xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 1c09002cbee8e797a0f8bbfc5678dfadd39ee1a7)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
549 		status_t status, generic_size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, (uint64)fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
581 			fFlags, fStatus, (uint64)fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	generic_io_vec*		fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	generic_size_t			fBytesRequested;
594 	status_t		fStatus;
595 	generic_size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
603 		status_t status, generic_size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
622 		status_t status, generic_size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1329 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1330 }
1331 
1332 
1333 static void
1334 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1335 {
1336 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1337 
1338 	free_unused_vnodes(level);
1339 }
1340 
1341 
1342 static inline void
1343 put_advisory_locking(struct advisory_locking* locking)
1344 {
1345 	release_sem(locking->lock);
1346 }
1347 
1348 
1349 /*!	Returns the advisory_locking object of the \a vnode in case it
1350 	has one, and locks it.
1351 	You have to call put_advisory_locking() when you're done with
1352 	it.
1353 	Note, you must not have the vnode mutex locked when calling
1354 	this function.
1355 */
1356 static struct advisory_locking*
1357 get_advisory_locking(struct vnode* vnode)
1358 {
1359 	rw_lock_read_lock(&sVnodeLock);
1360 	vnode->Lock();
1361 
1362 	struct advisory_locking* locking = vnode->advisory_locking;
1363 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1364 
1365 	vnode->Unlock();
1366 	rw_lock_read_unlock(&sVnodeLock);
1367 
1368 	if (lock >= 0)
1369 		lock = acquire_sem(lock);
1370 	if (lock < 0) {
1371 		// This means the locking has been deleted in the mean time
1372 		// or had never existed in the first place - otherwise, we
1373 		// would get the lock at some point.
1374 		return NULL;
1375 	}
1376 
1377 	return locking;
1378 }
1379 
1380 
1381 /*!	Creates a locked advisory_locking object, and attaches it to the
1382 	given \a vnode.
1383 	Returns B_OK in case of success - also if the vnode got such an
1384 	object from someone else in the mean time, you'll still get this
1385 	one locked then.
1386 */
1387 static status_t
1388 create_advisory_locking(struct vnode* vnode)
1389 {
1390 	if (vnode == NULL)
1391 		return B_FILE_ERROR;
1392 
1393 	ObjectDeleter<advisory_locking> lockingDeleter;
1394 	struct advisory_locking* locking = NULL;
1395 
1396 	while (get_advisory_locking(vnode) == NULL) {
1397 		// no locking object set on the vnode yet, create one
1398 		if (locking == NULL) {
1399 			locking = new(std::nothrow) advisory_locking;
1400 			if (locking == NULL)
1401 				return B_NO_MEMORY;
1402 			lockingDeleter.SetTo(locking);
1403 
1404 			locking->wait_sem = create_sem(0, "advisory lock");
1405 			if (locking->wait_sem < 0)
1406 				return locking->wait_sem;
1407 
1408 			locking->lock = create_sem(0, "advisory locking");
1409 			if (locking->lock < 0)
1410 				return locking->lock;
1411 		}
1412 
1413 		// set our newly created locking object
1414 		ReadLocker _(sVnodeLock);
1415 		AutoLocker<Vnode> nodeLocker(vnode);
1416 		if (vnode->advisory_locking == NULL) {
1417 			vnode->advisory_locking = locking;
1418 			lockingDeleter.Detach();
1419 			return B_OK;
1420 		}
1421 	}
1422 
1423 	// The vnode already had a locking object. That's just as well.
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 /*!	Retrieves the first lock that has been set by the current team.
1430 */
1431 static status_t
1432 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1433 {
1434 	struct advisory_locking* locking = get_advisory_locking(vnode);
1435 	if (locking == NULL)
1436 		return B_BAD_VALUE;
1437 
1438 	// TODO: this should probably get the flock by its file descriptor!
1439 	team_id team = team_get_current_team_id();
1440 	status_t status = B_BAD_VALUE;
1441 
1442 	LockList::Iterator iterator = locking->locks.GetIterator();
1443 	while (iterator.HasNext()) {
1444 		struct advisory_lock* lock = iterator.Next();
1445 
1446 		if (lock->team == team) {
1447 			flock->l_start = lock->start;
1448 			flock->l_len = lock->end - lock->start + 1;
1449 			status = B_OK;
1450 			break;
1451 		}
1452 	}
1453 
1454 	put_advisory_locking(locking);
1455 	return status;
1456 }
1457 
1458 
1459 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1460 	with the advisory_lock \a lock.
1461 */
1462 static bool
1463 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1464 {
1465 	if (flock == NULL)
1466 		return true;
1467 
1468 	return lock->start <= flock->l_start - 1 + flock->l_len
1469 		&& lock->end >= flock->l_start;
1470 }
1471 
1472 
1473 /*!	Removes the specified lock, or all locks of the calling team
1474 	if \a flock is NULL.
1475 */
1476 static status_t
1477 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1478 {
1479 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1480 
1481 	struct advisory_locking* locking = get_advisory_locking(vnode);
1482 	if (locking == NULL)
1483 		return B_OK;
1484 
1485 	// TODO: use the thread ID instead??
1486 	team_id team = team_get_current_team_id();
1487 	pid_t session = thread_get_current_thread()->team->session_id;
1488 
1489 	// find matching lock entries
1490 
1491 	LockList::Iterator iterator = locking->locks.GetIterator();
1492 	while (iterator.HasNext()) {
1493 		struct advisory_lock* lock = iterator.Next();
1494 		bool removeLock = false;
1495 
1496 		if (lock->session == session)
1497 			removeLock = true;
1498 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1499 			bool endsBeyond = false;
1500 			bool startsBefore = false;
1501 			if (flock != NULL) {
1502 				startsBefore = lock->start < flock->l_start;
1503 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1504 			}
1505 
1506 			if (!startsBefore && !endsBeyond) {
1507 				// lock is completely contained in flock
1508 				removeLock = true;
1509 			} else if (startsBefore && !endsBeyond) {
1510 				// cut the end of the lock
1511 				lock->end = flock->l_start - 1;
1512 			} else if (!startsBefore && endsBeyond) {
1513 				// cut the start of the lock
1514 				lock->start = flock->l_start + flock->l_len;
1515 			} else {
1516 				// divide the lock into two locks
1517 				struct advisory_lock* secondLock = new advisory_lock;
1518 				if (secondLock == NULL) {
1519 					// TODO: we should probably revert the locks we already
1520 					// changed... (ie. allocate upfront)
1521 					put_advisory_locking(locking);
1522 					return B_NO_MEMORY;
1523 				}
1524 
1525 				lock->end = flock->l_start - 1;
1526 
1527 				secondLock->team = lock->team;
1528 				secondLock->session = lock->session;
1529 				// values must already be normalized when getting here
1530 				secondLock->start = flock->l_start + flock->l_len;
1531 				secondLock->end = lock->end;
1532 				secondLock->shared = lock->shared;
1533 
1534 				locking->locks.Add(secondLock);
1535 			}
1536 		}
1537 
1538 		if (removeLock) {
1539 			// this lock is no longer used
1540 			iterator.Remove();
1541 			free(lock);
1542 		}
1543 	}
1544 
1545 	bool removeLocking = locking->locks.IsEmpty();
1546 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1547 
1548 	put_advisory_locking(locking);
1549 
1550 	if (removeLocking) {
1551 		// We can remove the whole advisory locking structure; it's no
1552 		// longer used
1553 		locking = get_advisory_locking(vnode);
1554 		if (locking != NULL) {
1555 			ReadLocker locker(sVnodeLock);
1556 			AutoLocker<Vnode> nodeLocker(vnode);
1557 
1558 			// the locking could have been changed in the mean time
1559 			if (locking->locks.IsEmpty()) {
1560 				vnode->advisory_locking = NULL;
1561 				nodeLocker.Unlock();
1562 				locker.Unlock();
1563 
1564 				// we've detached the locking from the vnode, so we can
1565 				// safely delete it
1566 				delete locking;
1567 			} else {
1568 				// the locking is in use again
1569 				nodeLocker.Unlock();
1570 				locker.Unlock();
1571 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1572 			}
1573 		}
1574 	}
1575 
1576 	return B_OK;
1577 }
1578 
1579 
1580 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1581 	will wait for the lock to become available, if there are any collisions
1582 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1583 
1584 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1585 	BSD flock() semantics are used, that is, all children can unlock the file
1586 	in question (we even allow parents to remove the lock, though, but that
1587 	seems to be in line to what the BSD's are doing).
1588 */
1589 static status_t
1590 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1591 	bool wait)
1592 {
1593 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1594 		vnode, flock, wait ? "yes" : "no"));
1595 
1596 	bool shared = flock->l_type == F_RDLCK;
1597 	status_t status = B_OK;
1598 
1599 	// TODO: do deadlock detection!
1600 
1601 	struct advisory_locking* locking;
1602 
1603 	while (true) {
1604 		// if this vnode has an advisory_locking structure attached,
1605 		// lock that one and search for any colliding file lock
1606 		status = create_advisory_locking(vnode);
1607 		if (status != B_OK)
1608 			return status;
1609 
1610 		locking = vnode->advisory_locking;
1611 		team_id team = team_get_current_team_id();
1612 		sem_id waitForLock = -1;
1613 
1614 		// test for collisions
1615 		LockList::Iterator iterator = locking->locks.GetIterator();
1616 		while (iterator.HasNext()) {
1617 			struct advisory_lock* lock = iterator.Next();
1618 
1619 			// TODO: locks from the same team might be joinable!
1620 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1621 				// locks do overlap
1622 				if (!shared || !lock->shared) {
1623 					// we need to wait
1624 					waitForLock = locking->wait_sem;
1625 					break;
1626 				}
1627 			}
1628 		}
1629 
1630 		if (waitForLock < 0)
1631 			break;
1632 
1633 		// We need to wait. Do that or fail now, if we've been asked not to.
1634 
1635 		if (!wait) {
1636 			put_advisory_locking(locking);
1637 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1638 		}
1639 
1640 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1641 			B_CAN_INTERRUPT, 0);
1642 		if (status != B_OK && status != B_BAD_SEM_ID)
1643 			return status;
1644 
1645 		// We have been notified, but we need to re-lock the locking object. So
1646 		// go another round...
1647 	}
1648 
1649 	// install new lock
1650 
1651 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1652 		sizeof(struct advisory_lock));
1653 	if (lock == NULL) {
1654 		put_advisory_locking(locking);
1655 		return B_NO_MEMORY;
1656 	}
1657 
1658 	lock->team = team_get_current_team_id();
1659 	lock->session = session;
1660 	// values must already be normalized when getting here
1661 	lock->start = flock->l_start;
1662 	lock->end = flock->l_start - 1 + flock->l_len;
1663 	lock->shared = shared;
1664 
1665 	locking->locks.Add(lock);
1666 	put_advisory_locking(locking);
1667 
1668 	return status;
1669 }
1670 
1671 
1672 /*!	Normalizes the \a flock structure to make it easier to compare the
1673 	structure with others. The l_start and l_len fields are set to absolute
1674 	values according to the l_whence field.
1675 */
1676 static status_t
1677 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1678 {
1679 	switch (flock->l_whence) {
1680 		case SEEK_SET:
1681 			break;
1682 		case SEEK_CUR:
1683 			flock->l_start += descriptor->pos;
1684 			break;
1685 		case SEEK_END:
1686 		{
1687 			struct vnode* vnode = descriptor->u.vnode;
1688 			struct stat stat;
1689 			status_t status;
1690 
1691 			if (!HAS_FS_CALL(vnode, read_stat))
1692 				return B_NOT_SUPPORTED;
1693 
1694 			status = FS_CALL(vnode, read_stat, &stat);
1695 			if (status != B_OK)
1696 				return status;
1697 
1698 			flock->l_start += stat.st_size;
1699 			break;
1700 		}
1701 		default:
1702 			return B_BAD_VALUE;
1703 	}
1704 
1705 	if (flock->l_start < 0)
1706 		flock->l_start = 0;
1707 	if (flock->l_len == 0)
1708 		flock->l_len = OFF_MAX;
1709 
1710 	// don't let the offset and length overflow
1711 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1712 		flock->l_len = OFF_MAX - flock->l_start;
1713 
1714 	if (flock->l_len < 0) {
1715 		// a negative length reverses the region
1716 		flock->l_start += flock->l_len;
1717 		flock->l_len = -flock->l_len;
1718 	}
1719 
1720 	return B_OK;
1721 }
1722 
1723 
1724 static void
1725 replace_vnode_if_disconnected(struct fs_mount* mount,
1726 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1727 	struct vnode* fallBack, bool lockRootLock)
1728 {
1729 	if (lockRootLock)
1730 		mutex_lock(&sIOContextRootLock);
1731 
1732 	struct vnode* obsoleteVnode = NULL;
1733 
1734 	if (vnode != NULL && vnode->mount == mount
1735 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1736 		obsoleteVnode = vnode;
1737 
1738 		if (vnode == mount->root_vnode) {
1739 			// redirect the vnode to the covered vnode
1740 			vnode = mount->covers_vnode;
1741 		} else
1742 			vnode = fallBack;
1743 
1744 		if (vnode != NULL)
1745 			inc_vnode_ref_count(vnode);
1746 	}
1747 
1748 	if (lockRootLock)
1749 		mutex_unlock(&sIOContextRootLock);
1750 
1751 	if (obsoleteVnode != NULL)
1752 		put_vnode(obsoleteVnode);
1753 }
1754 
1755 
1756 /*!	Disconnects all file descriptors that are associated with the
1757 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1758 	\a mount object.
1759 
1760 	Note, after you've called this function, there might still be ongoing
1761 	accesses - they won't be interrupted if they already happened before.
1762 	However, any subsequent access will fail.
1763 
1764 	This is not a cheap function and should be used with care and rarely.
1765 	TODO: there is currently no means to stop a blocking read/write!
1766 */
1767 void
1768 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1769 	struct vnode* vnodeToDisconnect)
1770 {
1771 	// iterate over all teams and peek into their file descriptors
1772 	int32 nextTeamID = 0;
1773 
1774 	while (true) {
1775 		struct io_context* context = NULL;
1776 		bool contextLocked = false;
1777 		Team* team = NULL;
1778 		team_id lastTeamID;
1779 
1780 		cpu_status state = disable_interrupts();
1781 		SpinLocker teamsLock(gTeamSpinlock);
1782 
1783 		lastTeamID = peek_next_thread_id();
1784 		if (nextTeamID < lastTeamID) {
1785 			// get next valid team
1786 			while (nextTeamID < lastTeamID
1787 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1788 				nextTeamID++;
1789 			}
1790 
1791 			if (team) {
1792 				context = (io_context*)team->io_context;
1793 
1794 				// Some acrobatics to lock the context in a safe way
1795 				// (cf. _kern_get_next_fd_info() for details).
1796 				GRAB_THREAD_LOCK();
1797 				teamsLock.Unlock();
1798 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1799 					== B_OK;
1800 				RELEASE_THREAD_LOCK();
1801 
1802 				nextTeamID++;
1803 			}
1804 		}
1805 
1806 		teamsLock.Unlock();
1807 		restore_interrupts(state);
1808 
1809 		if (context == NULL)
1810 			break;
1811 
1812 		// we now have a context - since we couldn't lock it while having
1813 		// safe access to the team structure, we now need to lock the mutex
1814 		// manually
1815 
1816 		if (!contextLocked) {
1817 			// team seems to be gone, go over to the next team
1818 			continue;
1819 		}
1820 
1821 		// the team cannot be deleted completely while we're owning its
1822 		// io_context mutex, so we can safely play with it now
1823 
1824 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1825 			sRoot, true);
1826 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1827 			sRoot, false);
1828 
1829 		for (uint32 i = 0; i < context->table_size; i++) {
1830 			if (struct file_descriptor* descriptor = context->fds[i]) {
1831 				inc_fd_ref_count(descriptor);
1832 
1833 				// if this descriptor points at this mount, we
1834 				// need to disconnect it to be able to unmount
1835 				struct vnode* vnode = fd_vnode(descriptor);
1836 				if (vnodeToDisconnect != NULL) {
1837 					if (vnode == vnodeToDisconnect)
1838 						disconnect_fd(descriptor);
1839 				} else if ((vnode != NULL && vnode->mount == mount)
1840 					|| (vnode == NULL && descriptor->u.mount == mount))
1841 					disconnect_fd(descriptor);
1842 
1843 				put_fd(descriptor);
1844 			}
1845 		}
1846 
1847 		mutex_unlock(&context->io_mutex);
1848 	}
1849 }
1850 
1851 
1852 /*!	\brief Gets the root node of the current IO context.
1853 	If \a kernel is \c true, the kernel IO context will be used.
1854 	The caller obtains a reference to the returned node.
1855 */
1856 struct vnode*
1857 get_root_vnode(bool kernel)
1858 {
1859 	if (!kernel) {
1860 		// Get current working directory from io context
1861 		struct io_context* context = get_current_io_context(kernel);
1862 
1863 		mutex_lock(&sIOContextRootLock);
1864 
1865 		struct vnode* root = context->root;
1866 		if (root != NULL)
1867 			inc_vnode_ref_count(root);
1868 
1869 		mutex_unlock(&sIOContextRootLock);
1870 
1871 		if (root != NULL)
1872 			return root;
1873 
1874 		// That should never happen.
1875 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1876 			"root\n", team_get_current_team_id());
1877 	}
1878 
1879 	inc_vnode_ref_count(sRoot);
1880 	return sRoot;
1881 }
1882 
1883 
1884 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1885 		   by.
1886 
1887 	Given an arbitrary vnode, the function checks, whether the node is covered
1888 	by the root of a volume. If it is the function obtains a reference to the
1889 	volume root node and returns it.
1890 
1891 	\param vnode The vnode in question.
1892 	\return The volume root vnode the vnode cover is covered by, if it is
1893 			indeed a mount point, or \c NULL otherwise.
1894 */
1895 static struct vnode*
1896 resolve_mount_point_to_volume_root(struct vnode* vnode)
1897 {
1898 	if (!vnode)
1899 		return NULL;
1900 
1901 	struct vnode* volumeRoot = NULL;
1902 
1903 	rw_lock_read_lock(&sVnodeLock);
1904 
1905 	if (vnode->covered_by) {
1906 		volumeRoot = vnode->covered_by;
1907 		inc_vnode_ref_count(volumeRoot);
1908 	}
1909 
1910 	rw_lock_read_unlock(&sVnodeLock);
1911 
1912 	return volumeRoot;
1913 }
1914 
1915 
1916 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1917 		   by.
1918 
1919 	Given an arbitrary vnode (identified by mount and node ID), the function
1920 	checks, whether the node is covered by the root of a volume. If it is the
1921 	function returns the mount and node ID of the volume root node. Otherwise
1922 	it simply returns the supplied mount and node ID.
1923 
1924 	In case of error (e.g. the supplied node could not be found) the variables
1925 	for storing the resolved mount and node ID remain untouched and an error
1926 	code is returned.
1927 
1928 	\param mountID The mount ID of the vnode in question.
1929 	\param nodeID The node ID of the vnode in question.
1930 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1931 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1932 	\return
1933 	- \c B_OK, if everything went fine,
1934 	- another error code, if something went wrong.
1935 */
1936 status_t
1937 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1938 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1939 {
1940 	// get the node
1941 	struct vnode* node;
1942 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1943 	if (error != B_OK)
1944 		return error;
1945 
1946 	// resolve the node
1947 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1948 	if (resolvedNode) {
1949 		put_vnode(node);
1950 		node = resolvedNode;
1951 	}
1952 
1953 	// set the return values
1954 	*resolvedMountID = node->device;
1955 	*resolvedNodeID = node->id;
1956 
1957 	put_vnode(node);
1958 
1959 	return B_OK;
1960 }
1961 
1962 
1963 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1964 
1965 	Given an arbitrary vnode, the function checks, whether the node is the
1966 	root of a volume. If it is (and if it is not "/"), the function obtains
1967 	a reference to the underlying mount point node and returns it.
1968 
1969 	\param vnode The vnode in question (caller must have a reference).
1970 	\return The mount point vnode the vnode covers, if it is indeed a volume
1971 			root and not "/", or \c NULL otherwise.
1972 */
1973 static struct vnode*
1974 resolve_volume_root_to_mount_point(struct vnode* vnode)
1975 {
1976 	if (!vnode)
1977 		return NULL;
1978 
1979 	struct vnode* mountPoint = NULL;
1980 
1981 	struct fs_mount* mount = vnode->mount;
1982 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1983 		mountPoint = mount->covers_vnode;
1984 		inc_vnode_ref_count(mountPoint);
1985 	}
1986 
1987 	return mountPoint;
1988 }
1989 
1990 
1991 /*!	\brief Gets the directory path and leaf name for a given path.
1992 
1993 	The supplied \a path is transformed to refer to the directory part of
1994 	the entry identified by the original path, and into the buffer \a filename
1995 	the leaf name of the original entry is written.
1996 	Neither the returned path nor the leaf name can be expected to be
1997 	canonical.
1998 
1999 	\param path The path to be analyzed. Must be able to store at least one
2000 		   additional character.
2001 	\param filename The buffer into which the leaf name will be written.
2002 		   Must be of size B_FILE_NAME_LENGTH at least.
2003 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2004 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2005 		   if the given path name is empty.
2006 */
2007 static status_t
2008 get_dir_path_and_leaf(char* path, char* filename)
2009 {
2010 	if (*path == '\0')
2011 		return B_ENTRY_NOT_FOUND;
2012 
2013 	char* last = strrchr(path, '/');
2014 		// '/' are not allowed in file names!
2015 
2016 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2017 
2018 	if (last == NULL) {
2019 		// this path is single segment with no '/' in it
2020 		// ex. "foo"
2021 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2022 			return B_NAME_TOO_LONG;
2023 
2024 		strcpy(path, ".");
2025 	} else {
2026 		last++;
2027 		if (last[0] == '\0') {
2028 			// special case: the path ends in one or more '/' - remove them
2029 			while (*--last == '/' && last != path);
2030 			last[1] = '\0';
2031 
2032 			if (last == path && last[0] == '/') {
2033 				// This path points to the root of the file system
2034 				strcpy(filename, ".");
2035 				return B_OK;
2036 			}
2037 			for (; last != path && *(last - 1) != '/'; last--);
2038 				// rewind to the start of the leaf before the '/'
2039 		}
2040 
2041 		// normal leaf: replace the leaf portion of the path with a '.'
2042 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2043 			return B_NAME_TOO_LONG;
2044 
2045 		last[0] = '.';
2046 		last[1] = '\0';
2047 	}
2048 	return B_OK;
2049 }
2050 
2051 
2052 static status_t
2053 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2054 	bool traverse, bool kernel, struct vnode** _vnode)
2055 {
2056 	char clonedName[B_FILE_NAME_LENGTH + 1];
2057 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2058 		return B_NAME_TOO_LONG;
2059 
2060 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2061 	struct vnode* directory;
2062 
2063 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2064 	if (status < 0)
2065 		return status;
2066 
2067 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2068 		_vnode, NULL);
2069 }
2070 
2071 
2072 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2073 	and returns the respective vnode.
2074 	On success a reference to the vnode is acquired for the caller.
2075 */
2076 static status_t
2077 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2078 {
2079 	ino_t id;
2080 
2081 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2082 		return get_vnode(dir->device, id, _vnode, true, false);
2083 
2084 	status_t status = FS_CALL(dir, lookup, name, &id);
2085 	if (status != B_OK)
2086 		return status;
2087 
2088 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2089 	// have a reference and just need to look the node up.
2090 	rw_lock_read_lock(&sVnodeLock);
2091 	*_vnode = lookup_vnode(dir->device, id);
2092 	rw_lock_read_unlock(&sVnodeLock);
2093 
2094 	if (*_vnode == NULL) {
2095 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2096 			"0x%Lx)\n", dir->device, id);
2097 		return B_ENTRY_NOT_FOUND;
2098 	}
2099 
2100 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2101 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2102 //		(*_vnode)->mount->id, (*_vnode)->id);
2103 
2104 	return B_OK;
2105 }
2106 
2107 
2108 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2109 	\a path must not be NULL.
2110 	If it returns successfully, \a path contains the name of the last path
2111 	component. This function clobbers the buffer pointed to by \a path only
2112 	if it does contain more than one component.
2113 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2114 	it is successful or not!
2115 */
2116 static status_t
2117 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2118 	int count, struct io_context* ioContext, struct vnode** _vnode,
2119 	ino_t* _parentID)
2120 {
2121 	status_t status = B_OK;
2122 	ino_t lastParentID = vnode->id;
2123 
2124 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2125 
2126 	if (path == NULL) {
2127 		put_vnode(vnode);
2128 		return B_BAD_VALUE;
2129 	}
2130 
2131 	if (*path == '\0') {
2132 		put_vnode(vnode);
2133 		return B_ENTRY_NOT_FOUND;
2134 	}
2135 
2136 	while (true) {
2137 		struct vnode* nextVnode;
2138 		char* nextPath;
2139 
2140 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2141 			path));
2142 
2143 		// done?
2144 		if (path[0] == '\0')
2145 			break;
2146 
2147 		// walk to find the next path component ("path" will point to a single
2148 		// path component), and filter out multiple slashes
2149 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2150 				nextPath++);
2151 
2152 		if (*nextPath == '/') {
2153 			*nextPath = '\0';
2154 			do
2155 				nextPath++;
2156 			while (*nextPath == '/');
2157 		}
2158 
2159 		// See if the '..' is at the root of a mount and move to the covered
2160 		// vnode so we pass the '..' path to the underlying filesystem.
2161 		// Also prevent breaking the root of the IO context.
2162 		if (strcmp("..", path) == 0) {
2163 			if (vnode == ioContext->root) {
2164 				// Attempted prison break! Keep it contained.
2165 				path = nextPath;
2166 				continue;
2167 			} else if (vnode->mount->root_vnode == vnode
2168 				&& vnode->mount->covers_vnode) {
2169 				nextVnode = vnode->mount->covers_vnode;
2170 				inc_vnode_ref_count(nextVnode);
2171 				put_vnode(vnode);
2172 				vnode = nextVnode;
2173 			}
2174 		}
2175 
2176 		// check if vnode is really a directory
2177 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2178 			status = B_NOT_A_DIRECTORY;
2179 
2180 		// Check if we have the right to search the current directory vnode.
2181 		// If a file system doesn't have the access() function, we assume that
2182 		// searching a directory is always allowed
2183 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2184 			status = FS_CALL(vnode, access, X_OK);
2185 
2186 		// Tell the filesystem to get the vnode of this path component (if we
2187 		// got the permission from the call above)
2188 		if (status == B_OK)
2189 			status = lookup_dir_entry(vnode, path, &nextVnode);
2190 
2191 		if (status != B_OK) {
2192 			put_vnode(vnode);
2193 			return status;
2194 		}
2195 
2196 		// If the new node is a symbolic link, resolve it (if we've been told
2197 		// to do it)
2198 		if (S_ISLNK(nextVnode->Type())
2199 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2200 			size_t bufferSize;
2201 			char* buffer;
2202 
2203 			TRACE(("traverse link\n"));
2204 
2205 			// it's not exactly nice style using goto in this way, but hey,
2206 			// it works :-/
2207 			if (count + 1 > B_MAX_SYMLINKS) {
2208 				status = B_LINK_LIMIT;
2209 				goto resolve_link_error;
2210 			}
2211 
2212 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2213 			if (buffer == NULL) {
2214 				status = B_NO_MEMORY;
2215 				goto resolve_link_error;
2216 			}
2217 
2218 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2219 				bufferSize--;
2220 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2221 				// null-terminate
2222 				if (status >= 0)
2223 					buffer[bufferSize] = '\0';
2224 			} else
2225 				status = B_BAD_VALUE;
2226 
2227 			if (status != B_OK) {
2228 				free(buffer);
2229 
2230 		resolve_link_error:
2231 				put_vnode(vnode);
2232 				put_vnode(nextVnode);
2233 
2234 				return status;
2235 			}
2236 			put_vnode(nextVnode);
2237 
2238 			// Check if we start from the root directory or the current
2239 			// directory ("vnode" still points to that one).
2240 			// Cut off all leading slashes if it's the root directory
2241 			path = buffer;
2242 			bool absoluteSymlink = false;
2243 			if (path[0] == '/') {
2244 				// we don't need the old directory anymore
2245 				put_vnode(vnode);
2246 
2247 				while (*++path == '/')
2248 					;
2249 
2250 				mutex_lock(&sIOContextRootLock);
2251 				vnode = ioContext->root;
2252 				inc_vnode_ref_count(vnode);
2253 				mutex_unlock(&sIOContextRootLock);
2254 
2255 				absoluteSymlink = true;
2256 			}
2257 
2258 			inc_vnode_ref_count(vnode);
2259 				// balance the next recursion - we will decrement the
2260 				// ref_count of the vnode, no matter if we succeeded or not
2261 
2262 			if (absoluteSymlink && *path == '\0') {
2263 				// symlink was just "/"
2264 				nextVnode = vnode;
2265 			} else {
2266 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2267 					ioContext, &nextVnode, &lastParentID);
2268 			}
2269 
2270 			free(buffer);
2271 
2272 			if (status != B_OK) {
2273 				put_vnode(vnode);
2274 				return status;
2275 			}
2276 		} else
2277 			lastParentID = vnode->id;
2278 
2279 		// decrease the ref count on the old dir we just looked up into
2280 		put_vnode(vnode);
2281 
2282 		path = nextPath;
2283 		vnode = nextVnode;
2284 
2285 		// see if we hit a mount point
2286 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2287 		if (mountPoint) {
2288 			put_vnode(vnode);
2289 			vnode = mountPoint;
2290 		}
2291 	}
2292 
2293 	*_vnode = vnode;
2294 	if (_parentID)
2295 		*_parentID = lastParentID;
2296 
2297 	return B_OK;
2298 }
2299 
2300 
2301 static status_t
2302 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2303 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2304 {
2305 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2306 		get_current_io_context(kernel), _vnode, _parentID);
2307 }
2308 
2309 
2310 static status_t
2311 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2312 	ino_t* _parentID, bool kernel)
2313 {
2314 	struct vnode* start = NULL;
2315 
2316 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2317 
2318 	if (!path)
2319 		return B_BAD_VALUE;
2320 
2321 	if (*path == '\0')
2322 		return B_ENTRY_NOT_FOUND;
2323 
2324 	// figure out if we need to start at root or at cwd
2325 	if (*path == '/') {
2326 		if (sRoot == NULL) {
2327 			// we're a bit early, aren't we?
2328 			return B_ERROR;
2329 		}
2330 
2331 		while (*++path == '/')
2332 			;
2333 		start = get_root_vnode(kernel);
2334 
2335 		if (*path == '\0') {
2336 			*_vnode = start;
2337 			return B_OK;
2338 		}
2339 
2340 	} else {
2341 		struct io_context* context = get_current_io_context(kernel);
2342 
2343 		mutex_lock(&context->io_mutex);
2344 		start = context->cwd;
2345 		if (start != NULL)
2346 			inc_vnode_ref_count(start);
2347 		mutex_unlock(&context->io_mutex);
2348 
2349 		if (start == NULL)
2350 			return B_ERROR;
2351 	}
2352 
2353 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2354 		_parentID);
2355 }
2356 
2357 
2358 /*! Returns the vnode in the next to last segment of the path, and returns
2359 	the last portion in filename.
2360 	The path buffer must be able to store at least one additional character.
2361 */
2362 static status_t
2363 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2364 	bool kernel)
2365 {
2366 	status_t status = get_dir_path_and_leaf(path, filename);
2367 	if (status != B_OK)
2368 		return status;
2369 
2370 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2371 }
2372 
2373 
2374 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2375 		   to by a FD + path pair.
2376 
2377 	\a path must be given in either case. \a fd might be omitted, in which
2378 	case \a path is either an absolute path or one relative to the current
2379 	directory. If both a supplied and \a path is relative it is reckoned off
2380 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2381 	ignored.
2382 
2383 	The caller has the responsibility to call put_vnode() on the returned
2384 	directory vnode.
2385 
2386 	\param fd The FD. May be < 0.
2387 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2388 	       is modified by this function. It must have at least room for a
2389 	       string one character longer than the path it contains.
2390 	\param _vnode A pointer to a variable the directory vnode shall be written
2391 		   into.
2392 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2393 		   the leaf name of the specified entry will be written.
2394 	\param kernel \c true, if invoked from inside the kernel, \c false if
2395 		   invoked from userland.
2396 	\return \c B_OK, if everything went fine, another error code otherwise.
2397 */
2398 static status_t
2399 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2400 	char* filename, bool kernel)
2401 {
2402 	if (!path)
2403 		return B_BAD_VALUE;
2404 	if (*path == '\0')
2405 		return B_ENTRY_NOT_FOUND;
2406 	if (fd < 0)
2407 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2408 
2409 	status_t status = get_dir_path_and_leaf(path, filename);
2410 	if (status != B_OK)
2411 		return status;
2412 
2413 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2414 }
2415 
2416 
2417 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2418 		   to by a vnode + path pair.
2419 
2420 	\a path must be given in either case. \a vnode might be omitted, in which
2421 	case \a path is either an absolute path or one relative to the current
2422 	directory. If both a supplied and \a path is relative it is reckoned off
2423 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2424 	ignored.
2425 
2426 	The caller has the responsibility to call put_vnode() on the returned
2427 	directory vnode.
2428 
2429 	\param vnode The vnode. May be \c NULL.
2430 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2431 	       is modified by this function. It must have at least room for a
2432 	       string one character longer than the path it contains.
2433 	\param _vnode A pointer to a variable the directory vnode shall be written
2434 		   into.
2435 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2436 		   the leaf name of the specified entry will be written.
2437 	\param kernel \c true, if invoked from inside the kernel, \c false if
2438 		   invoked from userland.
2439 	\return \c B_OK, if everything went fine, another error code otherwise.
2440 */
2441 static status_t
2442 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2443 	struct vnode** _vnode, char* filename, bool kernel)
2444 {
2445 	if (!path)
2446 		return B_BAD_VALUE;
2447 	if (*path == '\0')
2448 		return B_ENTRY_NOT_FOUND;
2449 	if (vnode == NULL || path[0] == '/')
2450 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2451 
2452 	status_t status = get_dir_path_and_leaf(path, filename);
2453 	if (status != B_OK)
2454 		return status;
2455 
2456 	inc_vnode_ref_count(vnode);
2457 		// vnode_path_to_vnode() always decrements the ref count
2458 
2459 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2460 }
2461 
2462 
2463 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2464 */
2465 static status_t
2466 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2467 	size_t bufferSize, struct io_context* ioContext)
2468 {
2469 	if (bufferSize < sizeof(struct dirent))
2470 		return B_BAD_VALUE;
2471 
2472 	// See if vnode is the root of a mount and move to the covered
2473 	// vnode so we get the underlying file system
2474 	VNodePutter vnodePutter;
2475 	if (vnode->mount->root_vnode == vnode
2476 		&& vnode->mount->covers_vnode != NULL) {
2477 		vnode = vnode->mount->covers_vnode;
2478 		inc_vnode_ref_count(vnode);
2479 		vnodePutter.SetTo(vnode);
2480 	}
2481 
2482 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2483 		// The FS supports getting the name of a vnode.
2484 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2485 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2486 			return B_OK;
2487 	}
2488 
2489 	// The FS doesn't support getting the name of a vnode. So we search the
2490 	// parent directory for the vnode, if the caller let us.
2491 
2492 	if (parent == NULL)
2493 		return B_NOT_SUPPORTED;
2494 
2495 	void* cookie;
2496 
2497 	status_t status = FS_CALL(parent, open_dir, &cookie);
2498 	if (status >= B_OK) {
2499 		while (true) {
2500 			uint32 num = 1;
2501 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2502 				&num);
2503 			if (status != B_OK)
2504 				break;
2505 			if (num == 0) {
2506 				status = B_ENTRY_NOT_FOUND;
2507 				break;
2508 			}
2509 
2510 			if (vnode->id == buffer->d_ino) {
2511 				// found correct entry!
2512 				break;
2513 			}
2514 		}
2515 
2516 		FS_CALL(vnode, close_dir, cookie);
2517 		FS_CALL(vnode, free_dir_cookie, cookie);
2518 	}
2519 	return status;
2520 }
2521 
2522 
2523 static status_t
2524 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2525 	size_t nameSize, bool kernel)
2526 {
2527 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2528 	struct dirent* dirent = (struct dirent*)buffer;
2529 
2530 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2531 		get_current_io_context(kernel));
2532 	if (status != B_OK)
2533 		return status;
2534 
2535 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2536 		return B_BUFFER_OVERFLOW;
2537 
2538 	return B_OK;
2539 }
2540 
2541 
2542 /*!	Gets the full path to a given directory vnode.
2543 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2544 	file system doesn't support this call, it will fall back to iterating
2545 	through the parent directory to get the name of the child.
2546 
2547 	To protect against circular loops, it supports a maximum tree depth
2548 	of 256 levels.
2549 
2550 	Note that the path may not be correct the time this function returns!
2551 	It doesn't use any locking to prevent returning the correct path, as
2552 	paths aren't safe anyway: the path to a file can change at any time.
2553 
2554 	It might be a good idea, though, to check if the returned path exists
2555 	in the calling function (it's not done here because of efficiency)
2556 */
2557 static status_t
2558 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2559 	bool kernel)
2560 {
2561 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2562 
2563 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2564 		return B_BAD_VALUE;
2565 
2566 	if (!S_ISDIR(vnode->Type()))
2567 		return B_NOT_A_DIRECTORY;
2568 
2569 	char* path = buffer;
2570 	int32 insert = bufferSize;
2571 	int32 maxLevel = 256;
2572 	int32 length;
2573 	status_t status;
2574 	struct io_context* ioContext = get_current_io_context(kernel);
2575 
2576 	// we don't use get_vnode() here because this call is more
2577 	// efficient and does all we need from get_vnode()
2578 	inc_vnode_ref_count(vnode);
2579 
2580 	if (vnode != ioContext->root) {
2581 		// we don't hit the IO context root
2582 		// resolve a volume root to its mount point
2583 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2584 		if (mountPoint) {
2585 			put_vnode(vnode);
2586 			vnode = mountPoint;
2587 		}
2588 	}
2589 
2590 	path[--insert] = '\0';
2591 		// the path is filled right to left
2592 
2593 	while (true) {
2594 		// the name buffer is also used for fs_read_dir()
2595 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2596 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2597 		struct vnode* parentVnode;
2598 		ino_t parentID;
2599 
2600 		// lookup the parent vnode
2601 		if (vnode == ioContext->root) {
2602 			// we hit the IO context root
2603 			parentVnode = vnode;
2604 			inc_vnode_ref_count(vnode);
2605 		} else {
2606 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2607 			if (status != B_OK)
2608 				goto out;
2609 		}
2610 
2611 		// get the node's name
2612 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2613 			sizeof(nameBuffer), ioContext);
2614 
2615 		if (vnode != ioContext->root) {
2616 			// we don't hit the IO context root
2617 			// resolve a volume root to its mount point
2618 			struct vnode* mountPoint
2619 				= resolve_volume_root_to_mount_point(parentVnode);
2620 			if (mountPoint) {
2621 				put_vnode(parentVnode);
2622 				parentVnode = mountPoint;
2623 				parentID = parentVnode->id;
2624 			}
2625 		}
2626 
2627 		bool hitRoot = (parentVnode == vnode);
2628 
2629 		// release the current vnode, we only need its parent from now on
2630 		put_vnode(vnode);
2631 		vnode = parentVnode;
2632 
2633 		if (status != B_OK)
2634 			goto out;
2635 
2636 		if (hitRoot) {
2637 			// we have reached "/", which means we have constructed the full
2638 			// path
2639 			break;
2640 		}
2641 
2642 		// TODO: add an explicit check for loops in about 10 levels to do
2643 		// real loop detection
2644 
2645 		// don't go deeper as 'maxLevel' to prevent circular loops
2646 		if (maxLevel-- < 0) {
2647 			status = B_LINK_LIMIT;
2648 			goto out;
2649 		}
2650 
2651 		// add the name in front of the current path
2652 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2653 		length = strlen(name);
2654 		insert -= length;
2655 		if (insert <= 0) {
2656 			status = B_RESULT_NOT_REPRESENTABLE;
2657 			goto out;
2658 		}
2659 		memcpy(path + insert, name, length);
2660 		path[--insert] = '/';
2661 	}
2662 
2663 	// the root dir will result in an empty path: fix it
2664 	if (path[insert] == '\0')
2665 		path[--insert] = '/';
2666 
2667 	TRACE(("  path is: %s\n", path + insert));
2668 
2669 	// move the path to the start of the buffer
2670 	length = bufferSize - insert;
2671 	memmove(buffer, path + insert, length);
2672 
2673 out:
2674 	put_vnode(vnode);
2675 	return status;
2676 }
2677 
2678 
2679 /*!	Checks the length of every path component, and adds a '.'
2680 	if the path ends in a slash.
2681 	The given path buffer must be able to store at least one
2682 	additional character.
2683 */
2684 static status_t
2685 check_path(char* to)
2686 {
2687 	int32 length = 0;
2688 
2689 	// check length of every path component
2690 
2691 	while (*to) {
2692 		char* begin;
2693 		if (*to == '/')
2694 			to++, length++;
2695 
2696 		begin = to;
2697 		while (*to != '/' && *to)
2698 			to++, length++;
2699 
2700 		if (to - begin > B_FILE_NAME_LENGTH)
2701 			return B_NAME_TOO_LONG;
2702 	}
2703 
2704 	if (length == 0)
2705 		return B_ENTRY_NOT_FOUND;
2706 
2707 	// complete path if there is a slash at the end
2708 
2709 	if (*(to - 1) == '/') {
2710 		if (length > B_PATH_NAME_LENGTH - 2)
2711 			return B_NAME_TOO_LONG;
2712 
2713 		to[0] = '.';
2714 		to[1] = '\0';
2715 	}
2716 
2717 	return B_OK;
2718 }
2719 
2720 
2721 static struct file_descriptor*
2722 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2723 {
2724 	struct file_descriptor* descriptor
2725 		= get_fd(get_current_io_context(kernel), fd);
2726 	if (descriptor == NULL)
2727 		return NULL;
2728 
2729 	struct vnode* vnode = fd_vnode(descriptor);
2730 	if (vnode == NULL) {
2731 		put_fd(descriptor);
2732 		return NULL;
2733 	}
2734 
2735 	// ToDo: when we can close a file descriptor at any point, investigate
2736 	//	if this is still valid to do (accessing the vnode without ref_count
2737 	//	or locking)
2738 	*_vnode = vnode;
2739 	return descriptor;
2740 }
2741 
2742 
2743 static struct vnode*
2744 get_vnode_from_fd(int fd, bool kernel)
2745 {
2746 	struct file_descriptor* descriptor;
2747 	struct vnode* vnode;
2748 
2749 	descriptor = get_fd(get_current_io_context(kernel), fd);
2750 	if (descriptor == NULL)
2751 		return NULL;
2752 
2753 	vnode = fd_vnode(descriptor);
2754 	if (vnode != NULL)
2755 		inc_vnode_ref_count(vnode);
2756 
2757 	put_fd(descriptor);
2758 	return vnode;
2759 }
2760 
2761 
2762 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2763 	only the path will be considered. In this case, the \a path must not be
2764 	NULL.
2765 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2766 	and should be NULL for files.
2767 */
2768 static status_t
2769 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2770 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2771 {
2772 	if (fd < 0 && !path)
2773 		return B_BAD_VALUE;
2774 
2775 	if (path != NULL && *path == '\0')
2776 		return B_ENTRY_NOT_FOUND;
2777 
2778 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2779 		// no FD or absolute path
2780 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2781 	}
2782 
2783 	// FD only, or FD + relative path
2784 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2785 	if (!vnode)
2786 		return B_FILE_ERROR;
2787 
2788 	if (path != NULL) {
2789 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2790 			_vnode, _parentID);
2791 	}
2792 
2793 	// there is no relative path to take into account
2794 
2795 	*_vnode = vnode;
2796 	if (_parentID)
2797 		*_parentID = -1;
2798 
2799 	return B_OK;
2800 }
2801 
2802 
2803 static int
2804 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2805 	void* cookie, int openMode, bool kernel)
2806 {
2807 	struct file_descriptor* descriptor;
2808 	int fd;
2809 
2810 	// If the vnode is locked, we don't allow creating a new file/directory
2811 	// file_descriptor for it
2812 	if (vnode && vnode->mandatory_locked_by != NULL
2813 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2814 		return B_BUSY;
2815 
2816 	descriptor = alloc_fd();
2817 	if (!descriptor)
2818 		return B_NO_MEMORY;
2819 
2820 	if (vnode)
2821 		descriptor->u.vnode = vnode;
2822 	else
2823 		descriptor->u.mount = mount;
2824 	descriptor->cookie = cookie;
2825 
2826 	switch (type) {
2827 		// vnode types
2828 		case FDTYPE_FILE:
2829 			descriptor->ops = &sFileOps;
2830 			break;
2831 		case FDTYPE_DIR:
2832 			descriptor->ops = &sDirectoryOps;
2833 			break;
2834 		case FDTYPE_ATTR:
2835 			descriptor->ops = &sAttributeOps;
2836 			break;
2837 		case FDTYPE_ATTR_DIR:
2838 			descriptor->ops = &sAttributeDirectoryOps;
2839 			break;
2840 
2841 		// mount types
2842 		case FDTYPE_INDEX_DIR:
2843 			descriptor->ops = &sIndexDirectoryOps;
2844 			break;
2845 		case FDTYPE_QUERY:
2846 			descriptor->ops = &sQueryOps;
2847 			break;
2848 
2849 		default:
2850 			panic("get_new_fd() called with unknown type %d\n", type);
2851 			break;
2852 	}
2853 	descriptor->type = type;
2854 	descriptor->open_mode = openMode;
2855 
2856 	io_context* context = get_current_io_context(kernel);
2857 	fd = new_fd(context, descriptor);
2858 	if (fd < 0) {
2859 		free(descriptor);
2860 		return B_NO_MORE_FDS;
2861 	}
2862 
2863 	mutex_lock(&context->io_mutex);
2864 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2865 	mutex_unlock(&context->io_mutex);
2866 
2867 	return fd;
2868 }
2869 
2870 
2871 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2872 	vfs_normalize_path(). See there for more documentation.
2873 */
2874 static status_t
2875 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2876 {
2877 	VNodePutter dirPutter;
2878 	struct vnode* dir = NULL;
2879 	status_t error;
2880 
2881 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2882 		// get dir vnode + leaf name
2883 		struct vnode* nextDir;
2884 		char leaf[B_FILE_NAME_LENGTH];
2885 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2886 		if (error != B_OK)
2887 			return error;
2888 
2889 		dir = nextDir;
2890 		strcpy(path, leaf);
2891 		dirPutter.SetTo(dir);
2892 
2893 		// get file vnode, if we shall resolve links
2894 		bool fileExists = false;
2895 		struct vnode* fileVnode;
2896 		VNodePutter fileVnodePutter;
2897 		if (traverseLink) {
2898 			inc_vnode_ref_count(dir);
2899 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2900 					NULL) == B_OK) {
2901 				fileVnodePutter.SetTo(fileVnode);
2902 				fileExists = true;
2903 			}
2904 		}
2905 
2906 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2907 			// we're done -- construct the path
2908 			bool hasLeaf = true;
2909 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2910 				// special cases "." and ".." -- get the dir, forget the leaf
2911 				inc_vnode_ref_count(dir);
2912 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2913 					&nextDir, NULL);
2914 				if (error != B_OK)
2915 					return error;
2916 				dir = nextDir;
2917 				dirPutter.SetTo(dir);
2918 				hasLeaf = false;
2919 			}
2920 
2921 			// get the directory path
2922 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2923 			if (error != B_OK)
2924 				return error;
2925 
2926 			// append the leaf name
2927 			if (hasLeaf) {
2928 				// insert a directory separator if this is not the file system
2929 				// root
2930 				if ((strcmp(path, "/") != 0
2931 					&& strlcat(path, "/", pathSize) >= pathSize)
2932 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2933 					return B_NAME_TOO_LONG;
2934 				}
2935 			}
2936 
2937 			return B_OK;
2938 		}
2939 
2940 		// read link
2941 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2942 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2943 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2944 			if (error != B_OK)
2945 				return error;
2946 			path[bufferSize] = '\0';
2947 		} else
2948 			return B_BAD_VALUE;
2949 	}
2950 
2951 	return B_LINK_LIMIT;
2952 }
2953 
2954 
2955 #ifdef ADD_DEBUGGER_COMMANDS
2956 
2957 
2958 static void
2959 _dump_advisory_locking(advisory_locking* locking)
2960 {
2961 	if (locking == NULL)
2962 		return;
2963 
2964 	kprintf("   lock:        %ld", locking->lock);
2965 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2966 
2967 	int32 index = 0;
2968 	LockList::Iterator iterator = locking->locks.GetIterator();
2969 	while (iterator.HasNext()) {
2970 		struct advisory_lock* lock = iterator.Next();
2971 
2972 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2973 		kprintf("        start:  %Ld\n", lock->start);
2974 		kprintf("        end:    %Ld\n", lock->end);
2975 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2976 	}
2977 }
2978 
2979 
2980 static void
2981 _dump_mount(struct fs_mount* mount)
2982 {
2983 	kprintf("MOUNT: %p\n", mount);
2984 	kprintf(" id:            %ld\n", mount->id);
2985 	kprintf(" device_name:   %s\n", mount->device_name);
2986 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2987 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2988 	kprintf(" partition:     %p\n", mount->partition);
2989 	kprintf(" lock:          %p\n", &mount->rlock);
2990 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2991 		mount->owns_file_device ? " owns_file_device" : "");
2992 
2993 	fs_volume* volume = mount->volume;
2994 	while (volume != NULL) {
2995 		kprintf(" volume %p:\n", volume);
2996 		kprintf("  layer:            %ld\n", volume->layer);
2997 		kprintf("  private_volume:   %p\n", volume->private_volume);
2998 		kprintf("  ops:              %p\n", volume->ops);
2999 		kprintf("  file_system:      %p\n", volume->file_system);
3000 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3001 		volume = volume->super_volume;
3002 	}
3003 
3004 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3005 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3006 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3007 	set_debug_variable("_partition", (addr_t)mount->partition);
3008 }
3009 
3010 
3011 static bool
3012 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3013 	const char* name)
3014 {
3015 	bool insertSlash = buffer[bufferSize] != '\0';
3016 	size_t nameLength = strlen(name);
3017 
3018 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3019 		return false;
3020 
3021 	if (insertSlash)
3022 		buffer[--bufferSize] = '/';
3023 
3024 	bufferSize -= nameLength;
3025 	memcpy(buffer + bufferSize, name, nameLength);
3026 
3027 	return true;
3028 }
3029 
3030 
3031 static bool
3032 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3033 	ino_t nodeID)
3034 {
3035 	if (bufferSize == 0)
3036 		return false;
3037 
3038 	bool insertSlash = buffer[bufferSize] != '\0';
3039 	if (insertSlash)
3040 		buffer[--bufferSize] = '/';
3041 
3042 	size_t size = snprintf(buffer, bufferSize,
3043 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3044 	if (size > bufferSize) {
3045 		if (insertSlash)
3046 			bufferSize++;
3047 		return false;
3048 	}
3049 
3050 	if (size < bufferSize)
3051 		memmove(buffer + bufferSize - size, buffer, size);
3052 
3053 	bufferSize -= size;
3054 	return true;
3055 }
3056 
3057 
3058 static char*
3059 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3060 	bool& _truncated)
3061 {
3062 	// null-terminate the path
3063 	buffer[--bufferSize] = '\0';
3064 
3065 	while (true) {
3066 		while (vnode->mount->root_vnode == vnode
3067 				&& vnode->mount->covers_vnode != NULL) {
3068 			vnode = vnode->mount->covers_vnode;
3069 		}
3070 
3071 		if (vnode == sRoot) {
3072 			_truncated = bufferSize == 0;
3073 			if (!_truncated)
3074 				buffer[--bufferSize] = '/';
3075 			return buffer + bufferSize;
3076 		}
3077 
3078 		// resolve the name
3079 		ino_t dirID;
3080 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3081 			vnode->id, dirID);
3082 		if (name == NULL) {
3083 			// Failed to resolve the name -- prepend "<dev,node>/".
3084 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3085 				vnode->mount->id, vnode->id);
3086 			return buffer + bufferSize;
3087 		}
3088 
3089 		// prepend the name
3090 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3091 			_truncated = true;
3092 			return buffer + bufferSize;
3093 		}
3094 
3095 		// resolve the directory node
3096 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3097 		if (nextVnode == NULL) {
3098 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3099 				vnode->mount->id, dirID);
3100 			return buffer + bufferSize;
3101 		}
3102 
3103 		vnode = nextVnode;
3104 	}
3105 }
3106 
3107 
3108 static void
3109 _dump_vnode(struct vnode* vnode, bool printPath)
3110 {
3111 	kprintf("VNODE: %p\n", vnode);
3112 	kprintf(" device:        %ld\n", vnode->device);
3113 	kprintf(" id:            %Ld\n", vnode->id);
3114 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3115 	kprintf(" private_node:  %p\n", vnode->private_node);
3116 	kprintf(" mount:         %p\n", vnode->mount);
3117 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3118 	kprintf(" cache:         %p\n", vnode->cache);
3119 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3120 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3121 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3122 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3123 
3124 	_dump_advisory_locking(vnode->advisory_locking);
3125 
3126 	if (printPath) {
3127 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3128 		if (buffer != NULL) {
3129 			bool truncated;
3130 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3131 				B_PATH_NAME_LENGTH, truncated);
3132 			if (path != NULL) {
3133 				kprintf(" path:          ");
3134 				if (truncated)
3135 					kputs("<truncated>/");
3136 				kputs(path);
3137 				kputs("\n");
3138 			} else
3139 				kprintf("Failed to resolve vnode path.\n");
3140 
3141 			debug_free(buffer);
3142 		} else
3143 			kprintf("Failed to allocate memory for constructing the path.\n");
3144 	}
3145 
3146 	set_debug_variable("_node", (addr_t)vnode->private_node);
3147 	set_debug_variable("_mount", (addr_t)vnode->mount);
3148 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3149 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3150 }
3151 
3152 
3153 static int
3154 dump_mount(int argc, char** argv)
3155 {
3156 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3157 		kprintf("usage: %s [id|address]\n", argv[0]);
3158 		return 0;
3159 	}
3160 
3161 	uint32 id = parse_expression(argv[1]);
3162 	struct fs_mount* mount = NULL;
3163 
3164 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3165 	if (mount == NULL) {
3166 		if (IS_USER_ADDRESS(id)) {
3167 			kprintf("fs_mount not found\n");
3168 			return 0;
3169 		}
3170 		mount = (fs_mount*)id;
3171 	}
3172 
3173 	_dump_mount(mount);
3174 	return 0;
3175 }
3176 
3177 
3178 static int
3179 dump_mounts(int argc, char** argv)
3180 {
3181 	if (argc != 1) {
3182 		kprintf("usage: %s\n", argv[0]);
3183 		return 0;
3184 	}
3185 
3186 	kprintf("address     id root       covers     cookie     fs_name\n");
3187 
3188 	struct hash_iterator iterator;
3189 	struct fs_mount* mount;
3190 
3191 	hash_open(sMountsTable, &iterator);
3192 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3193 			!= NULL) {
3194 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3195 			mount->covers_vnode, mount->volume->private_volume,
3196 			mount->volume->file_system_name);
3197 
3198 		fs_volume* volume = mount->volume;
3199 		while (volume->super_volume != NULL) {
3200 			volume = volume->super_volume;
3201 			kprintf("                                     %p %s\n",
3202 				volume->private_volume, volume->file_system_name);
3203 		}
3204 	}
3205 
3206 	hash_close(sMountsTable, &iterator, false);
3207 	return 0;
3208 }
3209 
3210 
3211 static int
3212 dump_vnode(int argc, char** argv)
3213 {
3214 	bool printPath = false;
3215 	int argi = 1;
3216 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3217 		printPath = true;
3218 		argi++;
3219 	}
3220 
3221 	if (argi >= argc || argi + 2 < argc) {
3222 		print_debugger_command_usage(argv[0]);
3223 		return 0;
3224 	}
3225 
3226 	struct vnode* vnode = NULL;
3227 
3228 	if (argi + 1 == argc) {
3229 		vnode = (struct vnode*)parse_expression(argv[argi]);
3230 		if (IS_USER_ADDRESS(vnode)) {
3231 			kprintf("invalid vnode address\n");
3232 			return 0;
3233 		}
3234 		_dump_vnode(vnode, printPath);
3235 		return 0;
3236 	}
3237 
3238 	struct hash_iterator iterator;
3239 	dev_t device = parse_expression(argv[argi]);
3240 	ino_t id = parse_expression(argv[argi + 1]);
3241 
3242 	hash_open(sVnodeTable, &iterator);
3243 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3244 		if (vnode->id != id || vnode->device != device)
3245 			continue;
3246 
3247 		_dump_vnode(vnode, printPath);
3248 	}
3249 
3250 	hash_close(sVnodeTable, &iterator, false);
3251 	return 0;
3252 }
3253 
3254 
3255 static int
3256 dump_vnodes(int argc, char** argv)
3257 {
3258 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3259 		kprintf("usage: %s [device]\n", argv[0]);
3260 		return 0;
3261 	}
3262 
3263 	// restrict dumped nodes to a certain device if requested
3264 	dev_t device = parse_expression(argv[1]);
3265 
3266 	struct hash_iterator iterator;
3267 	struct vnode* vnode;
3268 
3269 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3270 		"flags\n");
3271 
3272 	hash_open(sVnodeTable, &iterator);
3273 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3274 		if (vnode->device != device)
3275 			continue;
3276 
3277 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3278 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3279 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3280 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3281 	}
3282 
3283 	hash_close(sVnodeTable, &iterator, false);
3284 	return 0;
3285 }
3286 
3287 
3288 static int
3289 dump_vnode_caches(int argc, char** argv)
3290 {
3291 	struct hash_iterator iterator;
3292 	struct vnode* vnode;
3293 
3294 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3295 		kprintf("usage: %s [device]\n", argv[0]);
3296 		return 0;
3297 	}
3298 
3299 	// restrict dumped nodes to a certain device if requested
3300 	dev_t device = -1;
3301 	if (argc > 1)
3302 		device = parse_expression(argv[1]);
3303 
3304 	kprintf("address    dev     inode cache          size   pages\n");
3305 
3306 	hash_open(sVnodeTable, &iterator);
3307 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3308 		if (vnode->cache == NULL)
3309 			continue;
3310 		if (device != -1 && vnode->device != device)
3311 			continue;
3312 
3313 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3314 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3315 				/ B_PAGE_SIZE, vnode->cache->page_count);
3316 	}
3317 
3318 	hash_close(sVnodeTable, &iterator, false);
3319 	return 0;
3320 }
3321 
3322 
3323 int
3324 dump_io_context(int argc, char** argv)
3325 {
3326 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3327 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3328 		return 0;
3329 	}
3330 
3331 	struct io_context* context = NULL;
3332 
3333 	if (argc > 1) {
3334 		uint32 num = parse_expression(argv[1]);
3335 		if (IS_KERNEL_ADDRESS(num))
3336 			context = (struct io_context*)num;
3337 		else {
3338 			Team* team = team_get_team_struct_locked(num);
3339 			if (team == NULL) {
3340 				kprintf("could not find team with ID %ld\n", num);
3341 				return 0;
3342 			}
3343 			context = (struct io_context*)team->io_context;
3344 		}
3345 	} else
3346 		context = get_current_io_context(true);
3347 
3348 	kprintf("I/O CONTEXT: %p\n", context);
3349 	kprintf(" root vnode:\t%p\n", context->root);
3350 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3351 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3352 	kprintf(" max fds:\t%lu\n", context->table_size);
3353 
3354 	if (context->num_used_fds)
3355 		kprintf("   no.  type         ops  ref  open  mode         pos"
3356 			"      cookie\n");
3357 
3358 	for (uint32 i = 0; i < context->table_size; i++) {
3359 		struct file_descriptor* fd = context->fds[i];
3360 		if (fd == NULL)
3361 			continue;
3362 
3363 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3364 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3365 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3366 			fd->pos, fd->cookie,
3367 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3368 				? "mount" : "vnode",
3369 			fd->u.vnode);
3370 	}
3371 
3372 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3373 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3374 
3375 	set_debug_variable("_cwd", (addr_t)context->cwd);
3376 
3377 	return 0;
3378 }
3379 
3380 
3381 int
3382 dump_vnode_usage(int argc, char** argv)
3383 {
3384 	if (argc != 1) {
3385 		kprintf("usage: %s\n", argv[0]);
3386 		return 0;
3387 	}
3388 
3389 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3390 		kMaxUnusedVnodes);
3391 
3392 	struct hash_iterator iterator;
3393 	hash_open(sVnodeTable, &iterator);
3394 
3395 	uint32 count = 0;
3396 	struct vnode* vnode;
3397 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3398 		count++;
3399 	}
3400 
3401 	hash_close(sVnodeTable, &iterator, false);
3402 
3403 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3404 	return 0;
3405 }
3406 
3407 #endif	// ADD_DEBUGGER_COMMANDS
3408 
3409 /*!	Clears an iovec array of physical pages.
3410 	Returns in \a _bytes the number of bytes successfully cleared.
3411 */
3412 static status_t
3413 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3414 {
3415 	size_t bytes = *_bytes;
3416 	size_t index = 0;
3417 
3418 	while (bytes > 0) {
3419 		size_t length = min_c(vecs[index].iov_len, bytes);
3420 
3421 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3422 			length);
3423 		if (status != B_OK) {
3424 			*_bytes -= bytes;
3425 			return status;
3426 		}
3427 
3428 		bytes -= length;
3429 	}
3430 
3431 	return B_OK;
3432 }
3433 
3434 
3435 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3436 	and calls the file system hooks to read/write the request to disk.
3437 */
3438 static status_t
3439 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3440 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3441 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3442 	bool doWrite)
3443 {
3444 	if (fileVecCount == 0) {
3445 		// There are no file vecs at this offset, so we're obviously trying
3446 		// to access the file outside of its bounds
3447 		return B_BAD_VALUE;
3448 	}
3449 
3450 	size_t numBytes = *_numBytes;
3451 	uint32 fileVecIndex;
3452 	size_t vecOffset = *_vecOffset;
3453 	uint32 vecIndex = *_vecIndex;
3454 	status_t status;
3455 	size_t size;
3456 
3457 	if (!doWrite && vecOffset == 0) {
3458 		// now directly read the data from the device
3459 		// the first file_io_vec can be read directly
3460 
3461 		if (fileVecs[0].length < numBytes)
3462 			size = fileVecs[0].length;
3463 		else
3464 			size = numBytes;
3465 
3466 		if (fileVecs[0].offset >= 0) {
3467 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3468 				&vecs[vecIndex], vecCount - vecIndex, &size);
3469 		} else {
3470 			// sparse read
3471 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3472 		}
3473 		if (status != B_OK)
3474 			return status;
3475 
3476 		// TODO: this is a work-around for buggy device drivers!
3477 		//	When our own drivers honour the length, we can:
3478 		//	a) also use this direct I/O for writes (otherwise, it would
3479 		//	   overwrite precious data)
3480 		//	b) panic if the term below is true (at least for writes)
3481 		if (size > fileVecs[0].length) {
3482 			//dprintf("warning: device driver %p doesn't respect total length "
3483 			//	"in read_pages() call!\n", ref->device);
3484 			size = fileVecs[0].length;
3485 		}
3486 
3487 		ASSERT(size <= fileVecs[0].length);
3488 
3489 		// If the file portion was contiguous, we're already done now
3490 		if (size == numBytes)
3491 			return B_OK;
3492 
3493 		// if we reached the end of the file, we can return as well
3494 		if (size != fileVecs[0].length) {
3495 			*_numBytes = size;
3496 			return B_OK;
3497 		}
3498 
3499 		fileVecIndex = 1;
3500 
3501 		// first, find out where we have to continue in our iovecs
3502 		for (; vecIndex < vecCount; vecIndex++) {
3503 			if (size < vecs[vecIndex].iov_len)
3504 				break;
3505 
3506 			size -= vecs[vecIndex].iov_len;
3507 		}
3508 
3509 		vecOffset = size;
3510 	} else {
3511 		fileVecIndex = 0;
3512 		size = 0;
3513 	}
3514 
3515 	// Too bad, let's process the rest of the file_io_vecs
3516 
3517 	size_t totalSize = size;
3518 	size_t bytesLeft = numBytes - size;
3519 
3520 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3521 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3522 		off_t fileOffset = fileVec.offset;
3523 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3524 
3525 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3526 
3527 		// process the complete fileVec
3528 		while (fileLeft > 0) {
3529 			iovec tempVecs[MAX_TEMP_IO_VECS];
3530 			uint32 tempCount = 0;
3531 
3532 			// size tracks how much of what is left of the current fileVec
3533 			// (fileLeft) has been assigned to tempVecs
3534 			size = 0;
3535 
3536 			// assign what is left of the current fileVec to the tempVecs
3537 			for (size = 0; size < fileLeft && vecIndex < vecCount
3538 					&& tempCount < MAX_TEMP_IO_VECS;) {
3539 				// try to satisfy one iovec per iteration (or as much as
3540 				// possible)
3541 
3542 				// bytes left of the current iovec
3543 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3544 				if (vecLeft == 0) {
3545 					vecOffset = 0;
3546 					vecIndex++;
3547 					continue;
3548 				}
3549 
3550 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3551 					vecIndex, vecOffset, size));
3552 
3553 				// actually available bytes
3554 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3555 
3556 				tempVecs[tempCount].iov_base
3557 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3558 				tempVecs[tempCount].iov_len = tempVecSize;
3559 				tempCount++;
3560 
3561 				size += tempVecSize;
3562 				vecOffset += tempVecSize;
3563 			}
3564 
3565 			size_t bytes = size;
3566 
3567 			if (fileOffset == -1) {
3568 				if (doWrite) {
3569 					panic("sparse write attempt: vnode %p", vnode);
3570 					status = B_IO_ERROR;
3571 				} else {
3572 					// sparse read
3573 					status = zero_pages(tempVecs, tempCount, &bytes);
3574 				}
3575 			} else if (doWrite) {
3576 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3577 					tempVecs, tempCount, &bytes);
3578 			} else {
3579 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3580 					tempVecs, tempCount, &bytes);
3581 			}
3582 			if (status != B_OK)
3583 				return status;
3584 
3585 			totalSize += bytes;
3586 			bytesLeft -= size;
3587 			if (fileOffset >= 0)
3588 				fileOffset += size;
3589 			fileLeft -= size;
3590 			//dprintf("-> file left = %Lu\n", fileLeft);
3591 
3592 			if (size != bytes || vecIndex >= vecCount) {
3593 				// there are no more bytes or iovecs, let's bail out
3594 				*_numBytes = totalSize;
3595 				return B_OK;
3596 			}
3597 		}
3598 	}
3599 
3600 	*_vecIndex = vecIndex;
3601 	*_vecOffset = vecOffset;
3602 	*_numBytes = totalSize;
3603 	return B_OK;
3604 }
3605 
3606 
3607 //	#pragma mark - public API for file systems
3608 
3609 
3610 extern "C" status_t
3611 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3612 	fs_vnode_ops* ops)
3613 {
3614 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3615 		volume, volume->id, vnodeID, privateNode));
3616 
3617 	if (privateNode == NULL)
3618 		return B_BAD_VALUE;
3619 
3620 	// create the node
3621 	bool nodeCreated;
3622 	struct vnode* vnode;
3623 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3624 		nodeCreated);
3625 	if (status != B_OK)
3626 		return status;
3627 
3628 	WriteLocker nodeLocker(sVnodeLock, true);
3629 		// create_new_vnode_and_lock() has locked for us
3630 
3631 	// file system integrity check:
3632 	// test if the vnode already exists and bail out if this is the case!
3633 	if (!nodeCreated) {
3634 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3635 			volume->id, vnodeID, privateNode, vnode->private_node);
3636 		return B_ERROR;
3637 	}
3638 
3639 	vnode->private_node = privateNode;
3640 	vnode->ops = ops;
3641 	vnode->SetUnpublished(true);
3642 
3643 	TRACE(("returns: %s\n", strerror(status)));
3644 
3645 	return status;
3646 }
3647 
3648 
3649 extern "C" status_t
3650 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3651 	fs_vnode_ops* ops, int type, uint32 flags)
3652 {
3653 	FUNCTION(("publish_vnode()\n"));
3654 
3655 	WriteLocker locker(sVnodeLock);
3656 
3657 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3658 
3659 	bool nodeCreated = false;
3660 	if (vnode == NULL) {
3661 		if (privateNode == NULL)
3662 			return B_BAD_VALUE;
3663 
3664 		// create the node
3665 		locker.Unlock();
3666 			// create_new_vnode_and_lock() will re-lock for us on success
3667 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3668 			nodeCreated);
3669 		if (status != B_OK)
3670 			return status;
3671 
3672 		locker.SetTo(sVnodeLock, true);
3673 	}
3674 
3675 	if (nodeCreated) {
3676 		vnode->private_node = privateNode;
3677 		vnode->ops = ops;
3678 		vnode->SetUnpublished(true);
3679 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3680 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3681 		// already known, but not published
3682 	} else
3683 		return B_BAD_VALUE;
3684 
3685 	bool publishSpecialSubNode = false;
3686 
3687 	vnode->SetType(type);
3688 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3689 	publishSpecialSubNode = is_special_node_type(type)
3690 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3691 
3692 	status_t status = B_OK;
3693 
3694 	// create sub vnodes, if necessary
3695 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3696 		locker.Unlock();
3697 
3698 		fs_volume* subVolume = volume;
3699 		if (volume->sub_volume != NULL) {
3700 			while (status == B_OK && subVolume->sub_volume != NULL) {
3701 				subVolume = subVolume->sub_volume;
3702 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3703 					vnode);
3704 			}
3705 		}
3706 
3707 		if (status == B_OK && publishSpecialSubNode)
3708 			status = create_special_sub_node(vnode, flags);
3709 
3710 		if (status != B_OK) {
3711 			// error -- clean up the created sub vnodes
3712 			while (subVolume->super_volume != volume) {
3713 				subVolume = subVolume->super_volume;
3714 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3715 			}
3716 		}
3717 
3718 		if (status == B_OK) {
3719 			ReadLocker vnodesReadLocker(sVnodeLock);
3720 			AutoLocker<Vnode> nodeLocker(vnode);
3721 			vnode->SetBusy(false);
3722 			vnode->SetUnpublished(false);
3723 		} else {
3724 			locker.Lock();
3725 			hash_remove(sVnodeTable, vnode);
3726 			remove_vnode_from_mount_list(vnode, vnode->mount);
3727 			free(vnode);
3728 		}
3729 	} else {
3730 		// we still hold the write lock -- mark the node unbusy and published
3731 		vnode->SetBusy(false);
3732 		vnode->SetUnpublished(false);
3733 	}
3734 
3735 	TRACE(("returns: %s\n", strerror(status)));
3736 
3737 	return status;
3738 }
3739 
3740 
3741 extern "C" status_t
3742 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3743 {
3744 	struct vnode* vnode;
3745 
3746 	if (volume == NULL)
3747 		return B_BAD_VALUE;
3748 
3749 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3750 	if (status != B_OK)
3751 		return status;
3752 
3753 	// If this is a layered FS, we need to get the node cookie for the requested
3754 	// layer.
3755 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3756 		fs_vnode resolvedNode;
3757 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3758 			&resolvedNode);
3759 		if (status != B_OK) {
3760 			panic("get_vnode(): Failed to get super node for vnode %p, "
3761 				"volume: %p", vnode, volume);
3762 			put_vnode(vnode);
3763 			return status;
3764 		}
3765 
3766 		if (_privateNode != NULL)
3767 			*_privateNode = resolvedNode.private_node;
3768 	} else if (_privateNode != NULL)
3769 		*_privateNode = vnode->private_node;
3770 
3771 	return B_OK;
3772 }
3773 
3774 
3775 extern "C" status_t
3776 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3777 {
3778 	struct vnode* vnode;
3779 
3780 	rw_lock_read_lock(&sVnodeLock);
3781 	vnode = lookup_vnode(volume->id, vnodeID);
3782 	rw_lock_read_unlock(&sVnodeLock);
3783 
3784 	if (vnode == NULL)
3785 		return B_BAD_VALUE;
3786 
3787 	inc_vnode_ref_count(vnode);
3788 	return B_OK;
3789 }
3790 
3791 
3792 extern "C" status_t
3793 put_vnode(fs_volume* volume, ino_t vnodeID)
3794 {
3795 	struct vnode* vnode;
3796 
3797 	rw_lock_read_lock(&sVnodeLock);
3798 	vnode = lookup_vnode(volume->id, vnodeID);
3799 	rw_lock_read_unlock(&sVnodeLock);
3800 
3801 	if (vnode == NULL)
3802 		return B_BAD_VALUE;
3803 
3804 	dec_vnode_ref_count(vnode, false, true);
3805 	return B_OK;
3806 }
3807 
3808 
3809 extern "C" status_t
3810 remove_vnode(fs_volume* volume, ino_t vnodeID)
3811 {
3812 	ReadLocker locker(sVnodeLock);
3813 
3814 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3815 	if (vnode == NULL)
3816 		return B_ENTRY_NOT_FOUND;
3817 
3818 	if (vnode->covered_by != NULL) {
3819 		// this vnode is in use
3820 		return B_BUSY;
3821 	}
3822 
3823 	vnode->Lock();
3824 
3825 	vnode->SetRemoved(true);
3826 	bool removeUnpublished = false;
3827 
3828 	if (vnode->IsUnpublished()) {
3829 		// prepare the vnode for deletion
3830 		removeUnpublished = true;
3831 		vnode->SetBusy(true);
3832 	}
3833 
3834 	vnode->Unlock();
3835 	locker.Unlock();
3836 
3837 	if (removeUnpublished) {
3838 		// If the vnode hasn't been published yet, we delete it here
3839 		atomic_add(&vnode->ref_count, -1);
3840 		free_vnode(vnode, true);
3841 	}
3842 
3843 	return B_OK;
3844 }
3845 
3846 
3847 extern "C" status_t
3848 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3849 {
3850 	struct vnode* vnode;
3851 
3852 	rw_lock_read_lock(&sVnodeLock);
3853 
3854 	vnode = lookup_vnode(volume->id, vnodeID);
3855 	if (vnode) {
3856 		AutoLocker<Vnode> nodeLocker(vnode);
3857 		vnode->SetRemoved(false);
3858 	}
3859 
3860 	rw_lock_read_unlock(&sVnodeLock);
3861 	return B_OK;
3862 }
3863 
3864 
3865 extern "C" status_t
3866 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3867 {
3868 	ReadLocker _(sVnodeLock);
3869 
3870 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3871 		if (_removed != NULL)
3872 			*_removed = vnode->IsRemoved();
3873 		return B_OK;
3874 	}
3875 
3876 	return B_BAD_VALUE;
3877 }
3878 
3879 
3880 extern "C" fs_volume*
3881 volume_for_vnode(fs_vnode* _vnode)
3882 {
3883 	if (_vnode == NULL)
3884 		return NULL;
3885 
3886 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3887 	return vnode->mount->volume;
3888 }
3889 
3890 
3891 #if 0
3892 extern "C" status_t
3893 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3894 	size_t* _numBytes)
3895 {
3896 	struct file_descriptor* descriptor;
3897 	struct vnode* vnode;
3898 
3899 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3900 	if (descriptor == NULL)
3901 		return B_FILE_ERROR;
3902 
3903 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3904 		count, 0, _numBytes);
3905 
3906 	put_fd(descriptor);
3907 	return status;
3908 }
3909 
3910 
3911 extern "C" status_t
3912 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3913 	size_t* _numBytes)
3914 {
3915 	struct file_descriptor* descriptor;
3916 	struct vnode* vnode;
3917 
3918 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3919 	if (descriptor == NULL)
3920 		return B_FILE_ERROR;
3921 
3922 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3923 		count, 0, _numBytes);
3924 
3925 	put_fd(descriptor);
3926 	return status;
3927 }
3928 #endif
3929 
3930 
3931 extern "C" status_t
3932 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3933 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3934 	size_t* _bytes)
3935 {
3936 	struct file_descriptor* descriptor;
3937 	struct vnode* vnode;
3938 
3939 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3940 	if (descriptor == NULL)
3941 		return B_FILE_ERROR;
3942 
3943 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3944 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3945 		false);
3946 
3947 	put_fd(descriptor);
3948 	return status;
3949 }
3950 
3951 
3952 extern "C" status_t
3953 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3954 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3955 	size_t* _bytes)
3956 {
3957 	struct file_descriptor* descriptor;
3958 	struct vnode* vnode;
3959 
3960 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3961 	if (descriptor == NULL)
3962 		return B_FILE_ERROR;
3963 
3964 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3965 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3966 		true);
3967 
3968 	put_fd(descriptor);
3969 	return status;
3970 }
3971 
3972 
3973 extern "C" status_t
3974 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3975 {
3976 	// lookup mount -- the caller is required to make sure that the mount
3977 	// won't go away
3978 	MutexLocker locker(sMountMutex);
3979 	struct fs_mount* mount = find_mount(mountID);
3980 	if (mount == NULL)
3981 		return B_BAD_VALUE;
3982 	locker.Unlock();
3983 
3984 	return mount->entry_cache.Add(dirID, name, nodeID);
3985 }
3986 
3987 
3988 extern "C" status_t
3989 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3990 {
3991 	// lookup mount -- the caller is required to make sure that the mount
3992 	// won't go away
3993 	MutexLocker locker(sMountMutex);
3994 	struct fs_mount* mount = find_mount(mountID);
3995 	if (mount == NULL)
3996 		return B_BAD_VALUE;
3997 	locker.Unlock();
3998 
3999 	return mount->entry_cache.Remove(dirID, name);
4000 }
4001 
4002 
4003 //	#pragma mark - private VFS API
4004 //	Functions the VFS exports for other parts of the kernel
4005 
4006 
4007 /*! Acquires another reference to the vnode that has to be released
4008 	by calling vfs_put_vnode().
4009 */
4010 void
4011 vfs_acquire_vnode(struct vnode* vnode)
4012 {
4013 	inc_vnode_ref_count(vnode);
4014 }
4015 
4016 
4017 /*! This is currently called from file_cache_create() only.
4018 	It's probably a temporary solution as long as devfs requires that
4019 	fs_read_pages()/fs_write_pages() are called with the standard
4020 	open cookie and not with a device cookie.
4021 	If that's done differently, remove this call; it has no other
4022 	purpose.
4023 */
4024 extern "C" status_t
4025 vfs_get_cookie_from_fd(int fd, void** _cookie)
4026 {
4027 	struct file_descriptor* descriptor;
4028 
4029 	descriptor = get_fd(get_current_io_context(true), fd);
4030 	if (descriptor == NULL)
4031 		return B_FILE_ERROR;
4032 
4033 	*_cookie = descriptor->cookie;
4034 	return B_OK;
4035 }
4036 
4037 
4038 extern "C" status_t
4039 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4040 {
4041 	*vnode = get_vnode_from_fd(fd, kernel);
4042 
4043 	if (*vnode == NULL)
4044 		return B_FILE_ERROR;
4045 
4046 	return B_NO_ERROR;
4047 }
4048 
4049 
4050 extern "C" status_t
4051 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4052 {
4053 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4054 		path, kernel));
4055 
4056 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4057 	if (pathBuffer.InitCheck() != B_OK)
4058 		return B_NO_MEMORY;
4059 
4060 	char* buffer = pathBuffer.LockBuffer();
4061 	strlcpy(buffer, path, pathBuffer.BufferSize());
4062 
4063 	struct vnode* vnode;
4064 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4065 	if (status != B_OK)
4066 		return status;
4067 
4068 	*_vnode = vnode;
4069 	return B_OK;
4070 }
4071 
4072 
4073 extern "C" status_t
4074 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4075 {
4076 	struct vnode* vnode;
4077 
4078 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4079 	if (status != B_OK)
4080 		return status;
4081 
4082 	*_vnode = vnode;
4083 	return B_OK;
4084 }
4085 
4086 
4087 extern "C" status_t
4088 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4089 	const char* name, struct vnode** _vnode)
4090 {
4091 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4092 }
4093 
4094 
4095 extern "C" void
4096 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4097 {
4098 	*_mountID = vnode->device;
4099 	*_vnodeID = vnode->id;
4100 }
4101 
4102 
4103 /*!
4104 	Helper function abstracting the process of "converting" a given
4105 	vnode-pointer to a fs_vnode-pointer.
4106 	Currently only used in bindfs.
4107 */
4108 extern "C" fs_vnode*
4109 vfs_fsnode_for_vnode(struct vnode* vnode)
4110 {
4111 	return vnode;
4112 }
4113 
4114 
4115 /*!
4116 	Calls fs_open() on the given vnode and returns a new
4117 	file descriptor for it
4118 */
4119 int
4120 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4121 {
4122 	return open_vnode(vnode, openMode, kernel);
4123 }
4124 
4125 
4126 /*!	Looks up a vnode with the given mount and vnode ID.
4127 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4128 	to the node.
4129 	It's currently only be used by file_cache_create().
4130 */
4131 extern "C" status_t
4132 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4133 {
4134 	rw_lock_read_lock(&sVnodeLock);
4135 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4136 	rw_lock_read_unlock(&sVnodeLock);
4137 
4138 	if (vnode == NULL)
4139 		return B_ERROR;
4140 
4141 	*_vnode = vnode;
4142 	return B_OK;
4143 }
4144 
4145 
4146 extern "C" status_t
4147 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4148 	bool traverseLeafLink, bool kernel, void** _node)
4149 {
4150 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4151 		volume, path, kernel));
4152 
4153 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4154 	if (pathBuffer.InitCheck() != B_OK)
4155 		return B_NO_MEMORY;
4156 
4157 	fs_mount* mount;
4158 	status_t status = get_mount(volume->id, &mount);
4159 	if (status != B_OK)
4160 		return status;
4161 
4162 	char* buffer = pathBuffer.LockBuffer();
4163 	strlcpy(buffer, path, pathBuffer.BufferSize());
4164 
4165 	struct vnode* vnode = mount->root_vnode;
4166 
4167 	if (buffer[0] == '/')
4168 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4169 	else {
4170 		inc_vnode_ref_count(vnode);
4171 			// vnode_path_to_vnode() releases a reference to the starting vnode
4172 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4173 			kernel, &vnode, NULL);
4174 	}
4175 
4176 	put_mount(mount);
4177 
4178 	if (status != B_OK)
4179 		return status;
4180 
4181 	if (vnode->device != volume->id) {
4182 		// wrong mount ID - must not gain access on foreign file system nodes
4183 		put_vnode(vnode);
4184 		return B_BAD_VALUE;
4185 	}
4186 
4187 	// Use get_vnode() to resolve the cookie for the right layer.
4188 	status = get_vnode(volume, vnode->id, _node);
4189 	put_vnode(vnode);
4190 
4191 	return status;
4192 }
4193 
4194 
4195 status_t
4196 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4197 	struct stat* stat, bool kernel)
4198 {
4199 	status_t status;
4200 
4201 	if (path) {
4202 		// path given: get the stat of the node referred to by (fd, path)
4203 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4204 		if (pathBuffer.InitCheck() != B_OK)
4205 			return B_NO_MEMORY;
4206 
4207 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4208 			traverseLeafLink, stat, kernel);
4209 	} else {
4210 		// no path given: get the FD and use the FD operation
4211 		struct file_descriptor* descriptor
4212 			= get_fd(get_current_io_context(kernel), fd);
4213 		if (descriptor == NULL)
4214 			return B_FILE_ERROR;
4215 
4216 		if (descriptor->ops->fd_read_stat)
4217 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4218 		else
4219 			status = B_NOT_SUPPORTED;
4220 
4221 		put_fd(descriptor);
4222 	}
4223 
4224 	return status;
4225 }
4226 
4227 
4228 /*!	Finds the full path to the file that contains the module \a moduleName,
4229 	puts it into \a pathBuffer, and returns B_OK for success.
4230 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4231 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4232 	\a pathBuffer is clobbered in any case and must not be relied on if this
4233 	functions returns unsuccessfully.
4234 	\a basePath and \a pathBuffer must not point to the same space.
4235 */
4236 status_t
4237 vfs_get_module_path(const char* basePath, const char* moduleName,
4238 	char* pathBuffer, size_t bufferSize)
4239 {
4240 	struct vnode* dir;
4241 	struct vnode* file;
4242 	status_t status;
4243 	size_t length;
4244 	char* path;
4245 
4246 	if (bufferSize == 0
4247 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4248 		return B_BUFFER_OVERFLOW;
4249 
4250 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4251 	if (status != B_OK)
4252 		return status;
4253 
4254 	// the path buffer had been clobbered by the above call
4255 	length = strlcpy(pathBuffer, basePath, bufferSize);
4256 	if (pathBuffer[length - 1] != '/')
4257 		pathBuffer[length++] = '/';
4258 
4259 	path = pathBuffer + length;
4260 	bufferSize -= length;
4261 
4262 	while (moduleName) {
4263 		char* nextPath = strchr(moduleName, '/');
4264 		if (nextPath == NULL)
4265 			length = strlen(moduleName);
4266 		else {
4267 			length = nextPath - moduleName;
4268 			nextPath++;
4269 		}
4270 
4271 		if (length + 1 >= bufferSize) {
4272 			status = B_BUFFER_OVERFLOW;
4273 			goto err;
4274 		}
4275 
4276 		memcpy(path, moduleName, length);
4277 		path[length] = '\0';
4278 		moduleName = nextPath;
4279 
4280 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4281 		if (status != B_OK) {
4282 			// vnode_path_to_vnode() has already released the reference to dir
4283 			return status;
4284 		}
4285 
4286 		if (S_ISDIR(file->Type())) {
4287 			// goto the next directory
4288 			path[length] = '/';
4289 			path[length + 1] = '\0';
4290 			path += length + 1;
4291 			bufferSize -= length + 1;
4292 
4293 			dir = file;
4294 		} else if (S_ISREG(file->Type())) {
4295 			// it's a file so it should be what we've searched for
4296 			put_vnode(file);
4297 
4298 			return B_OK;
4299 		} else {
4300 			TRACE(("vfs_get_module_path(): something is strange here: "
4301 				"0x%08lx...\n", file->Type()));
4302 			status = B_ERROR;
4303 			dir = file;
4304 			goto err;
4305 		}
4306 	}
4307 
4308 	// if we got here, the moduleName just pointed to a directory, not to
4309 	// a real module - what should we do in this case?
4310 	status = B_ENTRY_NOT_FOUND;
4311 
4312 err:
4313 	put_vnode(dir);
4314 	return status;
4315 }
4316 
4317 
4318 /*!	\brief Normalizes a given path.
4319 
4320 	The path must refer to an existing or non-existing entry in an existing
4321 	directory, that is chopping off the leaf component the remaining path must
4322 	refer to an existing directory.
4323 
4324 	The returned will be canonical in that it will be absolute, will not
4325 	contain any "." or ".." components or duplicate occurrences of '/'s,
4326 	and none of the directory components will by symbolic links.
4327 
4328 	Any two paths referring to the same entry, will result in the same
4329 	normalized path (well, that is pretty much the definition of `normalized',
4330 	isn't it :-).
4331 
4332 	\param path The path to be normalized.
4333 	\param buffer The buffer into which the normalized path will be written.
4334 		   May be the same one as \a path.
4335 	\param bufferSize The size of \a buffer.
4336 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4337 	\param kernel \c true, if the IO context of the kernel shall be used,
4338 		   otherwise that of the team this thread belongs to. Only relevant,
4339 		   if the path is relative (to get the CWD).
4340 	\return \c B_OK if everything went fine, another error code otherwise.
4341 */
4342 status_t
4343 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4344 	bool traverseLink, bool kernel)
4345 {
4346 	if (!path || !buffer || bufferSize < 1)
4347 		return B_BAD_VALUE;
4348 
4349 	if (path != buffer) {
4350 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4351 			return B_BUFFER_OVERFLOW;
4352 	}
4353 
4354 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4355 }
4356 
4357 
4358 /*!	\brief Creates a special node in the file system.
4359 
4360 	The caller gets a reference to the newly created node (which is passed
4361 	back through \a _createdVnode) and is responsible for releasing it.
4362 
4363 	\param path The path where to create the entry for the node. Can be \c NULL,
4364 		in which case the node is created without an entry in the root FS -- it
4365 		will automatically be deleted when the last reference has been released.
4366 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4367 		the target file system will just create the node with its standard
4368 		operations. Depending on the type of the node a subnode might be created
4369 		automatically, though.
4370 	\param mode The type and permissions for the node to be created.
4371 	\param flags Flags to be passed to the creating FS.
4372 	\param kernel \c true, if called in the kernel context (relevant only if
4373 		\a path is not \c NULL and not absolute).
4374 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4375 		file system creating the node, with the private data pointer and
4376 		operations for the super node. Can be \c NULL.
4377 	\param _createVnode Pointer to pre-allocated storage where to store the
4378 		pointer to the newly created node.
4379 	\return \c B_OK, if everything went fine, another error code otherwise.
4380 */
4381 status_t
4382 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4383 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4384 	struct vnode** _createdVnode)
4385 {
4386 	struct vnode* dirNode;
4387 	char _leaf[B_FILE_NAME_LENGTH];
4388 	char* leaf = NULL;
4389 
4390 	if (path) {
4391 		// We've got a path. Get the dir vnode and the leaf name.
4392 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4393 		if (tmpPathBuffer.InitCheck() != B_OK)
4394 			return B_NO_MEMORY;
4395 
4396 		char* tmpPath = tmpPathBuffer.LockBuffer();
4397 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4398 			return B_NAME_TOO_LONG;
4399 
4400 		// get the dir vnode and the leaf name
4401 		leaf = _leaf;
4402 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4403 		if (error != B_OK)
4404 			return error;
4405 	} else {
4406 		// No path. Create the node in the root FS.
4407 		dirNode = sRoot;
4408 		inc_vnode_ref_count(dirNode);
4409 	}
4410 
4411 	VNodePutter _(dirNode);
4412 
4413 	// check support for creating special nodes
4414 	if (!HAS_FS_CALL(dirNode, create_special_node))
4415 		return B_UNSUPPORTED;
4416 
4417 	// create the node
4418 	fs_vnode superVnode;
4419 	ino_t nodeID;
4420 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4421 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4422 	if (status != B_OK)
4423 		return status;
4424 
4425 	// lookup the node
4426 	rw_lock_read_lock(&sVnodeLock);
4427 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4428 	rw_lock_read_unlock(&sVnodeLock);
4429 
4430 	if (*_createdVnode == NULL) {
4431 		panic("vfs_create_special_node(): lookup of node failed");
4432 		return B_ERROR;
4433 	}
4434 
4435 	return B_OK;
4436 }
4437 
4438 
4439 extern "C" void
4440 vfs_put_vnode(struct vnode* vnode)
4441 {
4442 	put_vnode(vnode);
4443 }
4444 
4445 
4446 extern "C" status_t
4447 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4448 {
4449 	// Get current working directory from io context
4450 	struct io_context* context = get_current_io_context(false);
4451 	status_t status = B_OK;
4452 
4453 	mutex_lock(&context->io_mutex);
4454 
4455 	if (context->cwd != NULL) {
4456 		*_mountID = context->cwd->device;
4457 		*_vnodeID = context->cwd->id;
4458 	} else
4459 		status = B_ERROR;
4460 
4461 	mutex_unlock(&context->io_mutex);
4462 	return status;
4463 }
4464 
4465 
4466 status_t
4467 vfs_unmount(dev_t mountID, uint32 flags)
4468 {
4469 	return fs_unmount(NULL, mountID, flags, true);
4470 }
4471 
4472 
4473 extern "C" status_t
4474 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4475 {
4476 	struct vnode* vnode;
4477 
4478 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4479 	if (status != B_OK)
4480 		return status;
4481 
4482 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4483 	put_vnode(vnode);
4484 	return B_OK;
4485 }
4486 
4487 
4488 extern "C" void
4489 vfs_free_unused_vnodes(int32 level)
4490 {
4491 	vnode_low_resource_handler(NULL,
4492 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4493 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4494 		level);
4495 }
4496 
4497 
4498 extern "C" bool
4499 vfs_can_page(struct vnode* vnode, void* cookie)
4500 {
4501 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4502 
4503 	if (HAS_FS_CALL(vnode, can_page))
4504 		return FS_CALL(vnode, can_page, cookie);
4505 	return false;
4506 }
4507 
4508 
4509 extern "C" status_t
4510 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4511 	const generic_io_vec* vecs, size_t count, uint32 flags,
4512 	generic_size_t* _numBytes)
4513 {
4514 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4515 		pos));
4516 
4517 #if VFS_PAGES_IO_TRACING
4518 	generic_size_t bytesRequested = *_numBytes;
4519 #endif
4520 
4521 	IORequest request;
4522 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4523 	if (status == B_OK) {
4524 		status = vfs_vnode_io(vnode, cookie, &request);
4525 		if (status == B_OK)
4526 			status = request.Wait();
4527 		*_numBytes = request.TransferredBytes();
4528 	}
4529 
4530 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4531 		status, *_numBytes));
4532 
4533 	return status;
4534 }
4535 
4536 
4537 extern "C" status_t
4538 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4539 	const generic_io_vec* vecs, size_t count, uint32 flags,
4540 	generic_size_t* _numBytes)
4541 {
4542 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4543 		pos));
4544 
4545 #if VFS_PAGES_IO_TRACING
4546 	generic_size_t bytesRequested = *_numBytes;
4547 #endif
4548 
4549 	IORequest request;
4550 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4551 	if (status == B_OK) {
4552 		status = vfs_vnode_io(vnode, cookie, &request);
4553 		if (status == B_OK)
4554 			status = request.Wait();
4555 		*_numBytes = request.TransferredBytes();
4556 	}
4557 
4558 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4559 		status, *_numBytes));
4560 
4561 	return status;
4562 }
4563 
4564 
4565 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4566 	created if \a allocate is \c true.
4567 	In case it's successful, it will also grab a reference to the cache
4568 	it returns.
4569 */
4570 extern "C" status_t
4571 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4572 {
4573 	if (vnode->cache != NULL) {
4574 		vnode->cache->AcquireRef();
4575 		*_cache = vnode->cache;
4576 		return B_OK;
4577 	}
4578 
4579 	rw_lock_read_lock(&sVnodeLock);
4580 	vnode->Lock();
4581 
4582 	status_t status = B_OK;
4583 
4584 	// The cache could have been created in the meantime
4585 	if (vnode->cache == NULL) {
4586 		if (allocate) {
4587 			// TODO: actually the vnode needs to be busy already here, or
4588 			//	else this won't work...
4589 			bool wasBusy = vnode->IsBusy();
4590 			vnode->SetBusy(true);
4591 
4592 			vnode->Unlock();
4593 			rw_lock_read_unlock(&sVnodeLock);
4594 
4595 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4596 
4597 			rw_lock_read_lock(&sVnodeLock);
4598 			vnode->Lock();
4599 			vnode->SetBusy(wasBusy);
4600 		} else
4601 			status = B_BAD_VALUE;
4602 	}
4603 
4604 	vnode->Unlock();
4605 	rw_lock_read_unlock(&sVnodeLock);
4606 
4607 	if (status == B_OK) {
4608 		vnode->cache->AcquireRef();
4609 		*_cache = vnode->cache;
4610 	}
4611 
4612 	return status;
4613 }
4614 
4615 
4616 status_t
4617 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4618 	file_io_vec* vecs, size_t* _count)
4619 {
4620 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4621 		vnode, vecs, offset, size));
4622 
4623 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4624 }
4625 
4626 
4627 status_t
4628 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4629 {
4630 	status_t status = FS_CALL(vnode, read_stat, stat);
4631 
4632 	// fill in the st_dev and st_ino fields
4633 	if (status == B_OK) {
4634 		stat->st_dev = vnode->device;
4635 		stat->st_ino = vnode->id;
4636 		stat->st_rdev = -1;
4637 	}
4638 
4639 	return status;
4640 }
4641 
4642 
4643 status_t
4644 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4645 {
4646 	struct vnode* vnode;
4647 	status_t status = get_vnode(device, inode, &vnode, true, false);
4648 	if (status != B_OK)
4649 		return status;
4650 
4651 	status = FS_CALL(vnode, read_stat, stat);
4652 
4653 	// fill in the st_dev and st_ino fields
4654 	if (status == B_OK) {
4655 		stat->st_dev = vnode->device;
4656 		stat->st_ino = vnode->id;
4657 		stat->st_rdev = -1;
4658 	}
4659 
4660 	put_vnode(vnode);
4661 	return status;
4662 }
4663 
4664 
4665 status_t
4666 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4667 {
4668 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4669 }
4670 
4671 
4672 status_t
4673 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4674 	char* path, size_t pathLength)
4675 {
4676 	struct vnode* vnode;
4677 	status_t status;
4678 
4679 	// filter invalid leaf names
4680 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4681 		return B_BAD_VALUE;
4682 
4683 	// get the vnode matching the dir's node_ref
4684 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4685 		// special cases "." and "..": we can directly get the vnode of the
4686 		// referenced directory
4687 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4688 		leaf = NULL;
4689 	} else
4690 		status = get_vnode(device, inode, &vnode, true, false);
4691 	if (status != B_OK)
4692 		return status;
4693 
4694 	// get the directory path
4695 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4696 	put_vnode(vnode);
4697 		// we don't need the vnode anymore
4698 	if (status != B_OK)
4699 		return status;
4700 
4701 	// append the leaf name
4702 	if (leaf) {
4703 		// insert a directory separator if this is not the file system root
4704 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4705 				>= pathLength)
4706 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4707 			return B_NAME_TOO_LONG;
4708 		}
4709 	}
4710 
4711 	return B_OK;
4712 }
4713 
4714 
4715 /*!	If the given descriptor locked its vnode, that lock will be released. */
4716 void
4717 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4718 {
4719 	struct vnode* vnode = fd_vnode(descriptor);
4720 
4721 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4722 		vnode->mandatory_locked_by = NULL;
4723 }
4724 
4725 
4726 /*!	Closes all file descriptors of the specified I/O context that
4727 	have the O_CLOEXEC flag set.
4728 */
4729 void
4730 vfs_exec_io_context(io_context* context)
4731 {
4732 	uint32 i;
4733 
4734 	for (i = 0; i < context->table_size; i++) {
4735 		mutex_lock(&context->io_mutex);
4736 
4737 		struct file_descriptor* descriptor = context->fds[i];
4738 		bool remove = false;
4739 
4740 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4741 			context->fds[i] = NULL;
4742 			context->num_used_fds--;
4743 
4744 			remove = true;
4745 		}
4746 
4747 		mutex_unlock(&context->io_mutex);
4748 
4749 		if (remove) {
4750 			close_fd(descriptor);
4751 			put_fd(descriptor);
4752 		}
4753 	}
4754 }
4755 
4756 
4757 /*! Sets up a new io_control structure, and inherits the properties
4758 	of the parent io_control if it is given.
4759 */
4760 io_context*
4761 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4762 {
4763 	io_context* context = (io_context*)malloc(sizeof(io_context));
4764 	if (context == NULL)
4765 		return NULL;
4766 
4767 	TIOC(NewIOContext(context, parentContext));
4768 
4769 	memset(context, 0, sizeof(io_context));
4770 	context->ref_count = 1;
4771 
4772 	MutexLocker parentLocker;
4773 
4774 	size_t tableSize;
4775 	if (parentContext) {
4776 		parentLocker.SetTo(parentContext->io_mutex, false);
4777 		tableSize = parentContext->table_size;
4778 	} else
4779 		tableSize = DEFAULT_FD_TABLE_SIZE;
4780 
4781 	// allocate space for FDs and their close-on-exec flag
4782 	context->fds = (file_descriptor**)malloc(
4783 		sizeof(struct file_descriptor*) * tableSize
4784 		+ sizeof(struct select_sync*) * tableSize
4785 		+ (tableSize + 7) / 8);
4786 	if (context->fds == NULL) {
4787 		free(context);
4788 		return NULL;
4789 	}
4790 
4791 	context->select_infos = (select_info**)(context->fds + tableSize);
4792 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4793 
4794 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4795 		+ sizeof(struct select_sync*) * tableSize
4796 		+ (tableSize + 7) / 8);
4797 
4798 	mutex_init(&context->io_mutex, "I/O context");
4799 
4800 	// Copy all parent file descriptors
4801 
4802 	if (parentContext) {
4803 		size_t i;
4804 
4805 		mutex_lock(&sIOContextRootLock);
4806 		context->root = parentContext->root;
4807 		if (context->root)
4808 			inc_vnode_ref_count(context->root);
4809 		mutex_unlock(&sIOContextRootLock);
4810 
4811 		context->cwd = parentContext->cwd;
4812 		if (context->cwd)
4813 			inc_vnode_ref_count(context->cwd);
4814 
4815 		for (i = 0; i < tableSize; i++) {
4816 			struct file_descriptor* descriptor = parentContext->fds[i];
4817 
4818 			if (descriptor != NULL) {
4819 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4820 				if (closeOnExec && purgeCloseOnExec)
4821 					continue;
4822 
4823 				TFD(InheritFD(context, i, descriptor, parentContext));
4824 
4825 				context->fds[i] = descriptor;
4826 				context->num_used_fds++;
4827 				atomic_add(&descriptor->ref_count, 1);
4828 				atomic_add(&descriptor->open_count, 1);
4829 
4830 				if (closeOnExec)
4831 					fd_set_close_on_exec(context, i, true);
4832 			}
4833 		}
4834 
4835 		parentLocker.Unlock();
4836 	} else {
4837 		context->root = sRoot;
4838 		context->cwd = sRoot;
4839 
4840 		if (context->root)
4841 			inc_vnode_ref_count(context->root);
4842 
4843 		if (context->cwd)
4844 			inc_vnode_ref_count(context->cwd);
4845 	}
4846 
4847 	context->table_size = tableSize;
4848 
4849 	list_init(&context->node_monitors);
4850 	context->max_monitors = DEFAULT_NODE_MONITORS;
4851 
4852 	return context;
4853 }
4854 
4855 
4856 static status_t
4857 vfs_free_io_context(io_context* context)
4858 {
4859 	uint32 i;
4860 
4861 	TIOC(FreeIOContext(context));
4862 
4863 	if (context->root)
4864 		put_vnode(context->root);
4865 
4866 	if (context->cwd)
4867 		put_vnode(context->cwd);
4868 
4869 	mutex_lock(&context->io_mutex);
4870 
4871 	for (i = 0; i < context->table_size; i++) {
4872 		if (struct file_descriptor* descriptor = context->fds[i]) {
4873 			close_fd(descriptor);
4874 			put_fd(descriptor);
4875 		}
4876 	}
4877 
4878 	mutex_destroy(&context->io_mutex);
4879 
4880 	remove_node_monitors(context);
4881 	free(context->fds);
4882 	free(context);
4883 
4884 	return B_OK;
4885 }
4886 
4887 
4888 void
4889 vfs_get_io_context(io_context* context)
4890 {
4891 	atomic_add(&context->ref_count, 1);
4892 }
4893 
4894 
4895 void
4896 vfs_put_io_context(io_context* context)
4897 {
4898 	if (atomic_add(&context->ref_count, -1) == 1)
4899 		vfs_free_io_context(context);
4900 }
4901 
4902 
4903 static status_t
4904 vfs_resize_fd_table(struct io_context* context, const int newSize)
4905 {
4906 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4907 		return B_BAD_VALUE;
4908 
4909 	TIOC(ResizeIOContext(context, newSize));
4910 
4911 	MutexLocker _(context->io_mutex);
4912 
4913 	int oldSize = context->table_size;
4914 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4915 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4916 
4917 	// If the tables shrink, make sure none of the fds being dropped are in use.
4918 	if (newSize < oldSize) {
4919 		for (int i = oldSize; i-- > newSize;) {
4920 			if (context->fds[i])
4921 				return B_BUSY;
4922 		}
4923 	}
4924 
4925 	// store pointers to the old tables
4926 	file_descriptor** oldFDs = context->fds;
4927 	select_info** oldSelectInfos = context->select_infos;
4928 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4929 
4930 	// allocate new tables
4931 	file_descriptor** newFDs = (file_descriptor**)malloc(
4932 		sizeof(struct file_descriptor*) * newSize
4933 		+ sizeof(struct select_sync*) * newSize
4934 		+ newCloseOnExitBitmapSize);
4935 	if (newFDs == NULL)
4936 		return B_NO_MEMORY;
4937 
4938 	context->fds = newFDs;
4939 	context->select_infos = (select_info**)(context->fds + newSize);
4940 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4941 	context->table_size = newSize;
4942 
4943 	// copy entries from old tables
4944 	int toCopy = min_c(oldSize, newSize);
4945 
4946 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4947 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4948 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4949 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4950 
4951 	// clear additional entries, if the tables grow
4952 	if (newSize > oldSize) {
4953 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4954 		memset(context->select_infos + oldSize, 0,
4955 			sizeof(void*) * (newSize - oldSize));
4956 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4957 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4958 	}
4959 
4960 	free(oldFDs);
4961 
4962 	return B_OK;
4963 }
4964 
4965 
4966 static status_t
4967 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4968 {
4969 	int	status = B_OK;
4970 
4971 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4972 		return B_BAD_VALUE;
4973 
4974 	mutex_lock(&context->io_mutex);
4975 
4976 	if ((size_t)newSize < context->num_monitors) {
4977 		status = B_BUSY;
4978 		goto out;
4979 	}
4980 	context->max_monitors = newSize;
4981 
4982 out:
4983 	mutex_unlock(&context->io_mutex);
4984 	return status;
4985 }
4986 
4987 
4988 int
4989 vfs_getrlimit(int resource, struct rlimit* rlp)
4990 {
4991 	if (!rlp)
4992 		return B_BAD_ADDRESS;
4993 
4994 	switch (resource) {
4995 		case RLIMIT_NOFILE:
4996 		{
4997 			struct io_context* context = get_current_io_context(false);
4998 			MutexLocker _(context->io_mutex);
4999 
5000 			rlp->rlim_cur = context->table_size;
5001 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5002 			return 0;
5003 		}
5004 
5005 		case RLIMIT_NOVMON:
5006 		{
5007 			struct io_context* context = get_current_io_context(false);
5008 			MutexLocker _(context->io_mutex);
5009 
5010 			rlp->rlim_cur = context->max_monitors;
5011 			rlp->rlim_max = MAX_NODE_MONITORS;
5012 			return 0;
5013 		}
5014 
5015 		default:
5016 			return B_BAD_VALUE;
5017 	}
5018 }
5019 
5020 
5021 int
5022 vfs_setrlimit(int resource, const struct rlimit* rlp)
5023 {
5024 	if (!rlp)
5025 		return B_BAD_ADDRESS;
5026 
5027 	switch (resource) {
5028 		case RLIMIT_NOFILE:
5029 			/* TODO: check getuid() */
5030 			if (rlp->rlim_max != RLIM_SAVED_MAX
5031 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5032 				return B_NOT_ALLOWED;
5033 
5034 			return vfs_resize_fd_table(get_current_io_context(false),
5035 				rlp->rlim_cur);
5036 
5037 		case RLIMIT_NOVMON:
5038 			/* TODO: check getuid() */
5039 			if (rlp->rlim_max != RLIM_SAVED_MAX
5040 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5041 				return B_NOT_ALLOWED;
5042 
5043 			return vfs_resize_monitor_table(get_current_io_context(false),
5044 				rlp->rlim_cur);
5045 
5046 		default:
5047 			return B_BAD_VALUE;
5048 	}
5049 }
5050 
5051 
5052 status_t
5053 vfs_init(kernel_args* args)
5054 {
5055 	vnode::StaticInit();
5056 
5057 	struct vnode dummyVnode;
5058 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5059 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5060 	if (sVnodeTable == NULL)
5061 		panic("vfs_init: error creating vnode hash table\n");
5062 
5063 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5064 
5065 	struct fs_mount dummyMount;
5066 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5067 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5068 	if (sMountsTable == NULL)
5069 		panic("vfs_init: error creating mounts hash table\n");
5070 
5071 	node_monitor_init();
5072 
5073 	sRoot = NULL;
5074 
5075 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5076 
5077 	if (block_cache_init() != B_OK)
5078 		return B_ERROR;
5079 
5080 #ifdef ADD_DEBUGGER_COMMANDS
5081 	// add some debugger commands
5082 	add_debugger_command_etc("vnode", &dump_vnode,
5083 		"Print info about the specified vnode",
5084 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5085 		"Prints information about the vnode specified by address <vnode> or\n"
5086 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5087 		"constructed and printed. It might not be possible to construct a\n"
5088 		"complete path, though.\n",
5089 		0);
5090 	add_debugger_command("vnodes", &dump_vnodes,
5091 		"list all vnodes (from the specified device)");
5092 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5093 		"list all vnode caches");
5094 	add_debugger_command("mount", &dump_mount,
5095 		"info about the specified fs_mount");
5096 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5097 	add_debugger_command("io_context", &dump_io_context,
5098 		"info about the I/O context");
5099 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5100 		"info about vnode usage");
5101 #endif
5102 
5103 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5104 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5105 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5106 		0);
5107 
5108 	file_map_init();
5109 
5110 	return file_cache_init();
5111 }
5112 
5113 
5114 //	#pragma mark - fd_ops implementations
5115 
5116 
5117 /*!
5118 	Calls fs_open() on the given vnode and returns a new
5119 	file descriptor for it
5120 */
5121 static int
5122 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5123 {
5124 	void* cookie;
5125 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5126 	if (status != B_OK)
5127 		return status;
5128 
5129 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5130 	if (fd < 0) {
5131 		FS_CALL(vnode, close, cookie);
5132 		FS_CALL(vnode, free_cookie, cookie);
5133 	}
5134 	return fd;
5135 }
5136 
5137 
5138 /*!
5139 	Calls fs_open() on the given vnode and returns a new
5140 	file descriptor for it
5141 */
5142 static int
5143 create_vnode(struct vnode* directory, const char* name, int openMode,
5144 	int perms, bool kernel)
5145 {
5146 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5147 	status_t status = B_ERROR;
5148 	struct vnode* vnode;
5149 	void* cookie;
5150 	ino_t newID;
5151 
5152 	// This is somewhat tricky: If the entry already exists, the FS responsible
5153 	// for the directory might not necessarily also be the one responsible for
5154 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5155 	// we can actually never call the create() hook without O_EXCL. Instead we
5156 	// try to look the entry up first. If it already exists, we just open the
5157 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5158 	// introduces a race condition, since someone else might have created the
5159 	// entry in the meantime. We hope the respective FS returns the correct
5160 	// error code and retry (up to 3 times) again.
5161 
5162 	for (int i = 0; i < 3 && status != B_OK; i++) {
5163 		// look the node up
5164 		status = lookup_dir_entry(directory, name, &vnode);
5165 		if (status == B_OK) {
5166 			VNodePutter putter(vnode);
5167 
5168 			if ((openMode & O_EXCL) != 0)
5169 				return B_FILE_EXISTS;
5170 
5171 			// If the node is a symlink, we have to follow it, unless
5172 			// O_NOTRAVERSE is set.
5173 			if (S_ISLNK(vnode->Type()) && traverse) {
5174 				putter.Put();
5175 				char clonedName[B_FILE_NAME_LENGTH + 1];
5176 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5177 						>= B_FILE_NAME_LENGTH) {
5178 					return B_NAME_TOO_LONG;
5179 				}
5180 
5181 				inc_vnode_ref_count(directory);
5182 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5183 					kernel, &vnode, NULL);
5184 				if (status != B_OK)
5185 					return status;
5186 
5187 				putter.SetTo(vnode);
5188 			}
5189 
5190 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5191 				put_vnode(vnode);
5192 				return B_LINK_LIMIT;
5193 			}
5194 
5195 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5196 			// on success keep the vnode reference for the FD
5197 			if (fd >= 0)
5198 				putter.Detach();
5199 
5200 			return fd;
5201 		}
5202 
5203 		// it doesn't exist yet -- try to create it
5204 
5205 		if (!HAS_FS_CALL(directory, create))
5206 			return B_READ_ONLY_DEVICE;
5207 
5208 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5209 			&cookie, &newID);
5210 		if (status != B_OK
5211 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5212 			return status;
5213 		}
5214 	}
5215 
5216 	if (status != B_OK)
5217 		return status;
5218 
5219 	// the node has been created successfully
5220 
5221 	rw_lock_read_lock(&sVnodeLock);
5222 	vnode = lookup_vnode(directory->device, newID);
5223 	rw_lock_read_unlock(&sVnodeLock);
5224 
5225 	if (vnode == NULL) {
5226 		panic("vfs: fs_create() returned success but there is no vnode, "
5227 			"mount ID %ld!\n", directory->device);
5228 		return B_BAD_VALUE;
5229 	}
5230 
5231 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5232 	if (fd >= 0)
5233 		return fd;
5234 
5235 	status = fd;
5236 
5237 	// something went wrong, clean up
5238 
5239 	FS_CALL(vnode, close, cookie);
5240 	FS_CALL(vnode, free_cookie, cookie);
5241 	put_vnode(vnode);
5242 
5243 	FS_CALL(directory, unlink, name);
5244 
5245 	return status;
5246 }
5247 
5248 
5249 /*! Calls fs open_dir() on the given vnode and returns a new
5250 	file descriptor for it
5251 */
5252 static int
5253 open_dir_vnode(struct vnode* vnode, bool kernel)
5254 {
5255 	void* cookie;
5256 	int status;
5257 
5258 	status = FS_CALL(vnode, open_dir, &cookie);
5259 	if (status != B_OK)
5260 		return status;
5261 
5262 	// directory is opened, create a fd
5263 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5264 	if (status >= 0)
5265 		return status;
5266 
5267 	FS_CALL(vnode, close_dir, cookie);
5268 	FS_CALL(vnode, free_dir_cookie, cookie);
5269 
5270 	return status;
5271 }
5272 
5273 
5274 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5275 	file descriptor for it.
5276 	Used by attr_dir_open(), and attr_dir_open_fd().
5277 */
5278 static int
5279 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5280 {
5281 	void* cookie;
5282 	int status;
5283 
5284 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5285 		return B_NOT_SUPPORTED;
5286 
5287 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5288 	if (status != B_OK)
5289 		return status;
5290 
5291 	// directory is opened, create a fd
5292 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5293 	if (status >= 0)
5294 		return status;
5295 
5296 	FS_CALL(vnode, close_attr_dir, cookie);
5297 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5298 
5299 	return status;
5300 }
5301 
5302 
5303 static int
5304 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5305 	int openMode, int perms, bool kernel)
5306 {
5307 	struct vnode* directory;
5308 	int status;
5309 
5310 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5311 		"kernel %d\n", name, openMode, perms, kernel));
5312 
5313 	// get directory to put the new file in
5314 	status = get_vnode(mountID, directoryID, &directory, true, false);
5315 	if (status != B_OK)
5316 		return status;
5317 
5318 	status = create_vnode(directory, name, openMode, perms, kernel);
5319 	put_vnode(directory);
5320 
5321 	return status;
5322 }
5323 
5324 
5325 static int
5326 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5327 {
5328 	char name[B_FILE_NAME_LENGTH];
5329 	struct vnode* directory;
5330 	int status;
5331 
5332 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5333 		openMode, perms, kernel));
5334 
5335 	// get directory to put the new file in
5336 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5337 	if (status < 0)
5338 		return status;
5339 
5340 	status = create_vnode(directory, name, openMode, perms, kernel);
5341 
5342 	put_vnode(directory);
5343 	return status;
5344 }
5345 
5346 
5347 static int
5348 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5349 	int openMode, bool kernel)
5350 {
5351 	if (name == NULL || *name == '\0')
5352 		return B_BAD_VALUE;
5353 
5354 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5355 		mountID, directoryID, name, openMode));
5356 
5357 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5358 
5359 	// get the vnode matching the entry_ref
5360 	struct vnode* vnode;
5361 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5362 		kernel, &vnode);
5363 	if (status != B_OK)
5364 		return status;
5365 
5366 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5367 		put_vnode(vnode);
5368 		return B_LINK_LIMIT;
5369 	}
5370 
5371 	int newFD = open_vnode(vnode, openMode, kernel);
5372 	if (newFD >= 0) {
5373 		// The vnode reference has been transferred to the FD
5374 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5375 			directoryID, vnode->id, name);
5376 	} else
5377 		put_vnode(vnode);
5378 
5379 	return newFD;
5380 }
5381 
5382 
5383 static int
5384 file_open(int fd, char* path, int openMode, bool kernel)
5385 {
5386 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5387 
5388 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5389 		fd, path, openMode, kernel));
5390 
5391 	// get the vnode matching the vnode + path combination
5392 	struct vnode* vnode;
5393 	ino_t parentID;
5394 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5395 		&parentID, kernel);
5396 	if (status != B_OK)
5397 		return status;
5398 
5399 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5400 		put_vnode(vnode);
5401 		return B_LINK_LIMIT;
5402 	}
5403 
5404 	// open the vnode
5405 	int newFD = open_vnode(vnode, openMode, kernel);
5406 	if (newFD >= 0) {
5407 		// The vnode reference has been transferred to the FD
5408 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5409 			vnode->device, parentID, vnode->id, NULL);
5410 	} else
5411 		put_vnode(vnode);
5412 
5413 	return newFD;
5414 }
5415 
5416 
5417 static status_t
5418 file_close(struct file_descriptor* descriptor)
5419 {
5420 	struct vnode* vnode = descriptor->u.vnode;
5421 	status_t status = B_OK;
5422 
5423 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5424 
5425 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5426 		vnode->id);
5427 	if (HAS_FS_CALL(vnode, close)) {
5428 		status = FS_CALL(vnode, close, descriptor->cookie);
5429 	}
5430 
5431 	if (status == B_OK) {
5432 		// remove all outstanding locks for this team
5433 		release_advisory_lock(vnode, NULL);
5434 	}
5435 	return status;
5436 }
5437 
5438 
5439 static void
5440 file_free_fd(struct file_descriptor* descriptor)
5441 {
5442 	struct vnode* vnode = descriptor->u.vnode;
5443 
5444 	if (vnode != NULL) {
5445 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5446 		put_vnode(vnode);
5447 	}
5448 }
5449 
5450 
5451 static status_t
5452 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5453 	size_t* length)
5454 {
5455 	struct vnode* vnode = descriptor->u.vnode;
5456 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5457 		*length));
5458 
5459 	if (S_ISDIR(vnode->Type()))
5460 		return B_IS_A_DIRECTORY;
5461 
5462 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5463 }
5464 
5465 
5466 static status_t
5467 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5468 	size_t* length)
5469 {
5470 	struct vnode* vnode = descriptor->u.vnode;
5471 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5472 
5473 	if (S_ISDIR(vnode->Type()))
5474 		return B_IS_A_DIRECTORY;
5475 	if (!HAS_FS_CALL(vnode, write))
5476 		return B_READ_ONLY_DEVICE;
5477 
5478 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5479 }
5480 
5481 
5482 static off_t
5483 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5484 {
5485 	struct vnode* vnode = descriptor->u.vnode;
5486 	off_t offset;
5487 
5488 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5489 
5490 	// some kinds of files are not seekable
5491 	switch (vnode->Type() & S_IFMT) {
5492 		case S_IFIFO:
5493 		case S_IFSOCK:
5494 			return ESPIPE;
5495 
5496 		// The Open Group Base Specs don't mention any file types besides pipes,
5497 		// fifos, and sockets specially, so we allow seeking them.
5498 		case S_IFREG:
5499 		case S_IFBLK:
5500 		case S_IFDIR:
5501 		case S_IFLNK:
5502 		case S_IFCHR:
5503 			break;
5504 	}
5505 
5506 	switch (seekType) {
5507 		case SEEK_SET:
5508 			offset = 0;
5509 			break;
5510 		case SEEK_CUR:
5511 			offset = descriptor->pos;
5512 			break;
5513 		case SEEK_END:
5514 		{
5515 			// stat() the node
5516 			if (!HAS_FS_CALL(vnode, read_stat))
5517 				return B_NOT_SUPPORTED;
5518 
5519 			struct stat stat;
5520 			status_t status = FS_CALL(vnode, read_stat, &stat);
5521 			if (status != B_OK)
5522 				return status;
5523 
5524 			offset = stat.st_size;
5525 			break;
5526 		}
5527 		default:
5528 			return B_BAD_VALUE;
5529 	}
5530 
5531 	// assumes off_t is 64 bits wide
5532 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5533 		return B_BUFFER_OVERFLOW;
5534 
5535 	pos += offset;
5536 	if (pos < 0)
5537 		return B_BAD_VALUE;
5538 
5539 	return descriptor->pos = pos;
5540 }
5541 
5542 
5543 static status_t
5544 file_select(struct file_descriptor* descriptor, uint8 event,
5545 	struct selectsync* sync)
5546 {
5547 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5548 
5549 	struct vnode* vnode = descriptor->u.vnode;
5550 
5551 	// If the FS has no select() hook, notify select() now.
5552 	if (!HAS_FS_CALL(vnode, select))
5553 		return notify_select_event(sync, event);
5554 
5555 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5556 }
5557 
5558 
5559 static status_t
5560 file_deselect(struct file_descriptor* descriptor, uint8 event,
5561 	struct selectsync* sync)
5562 {
5563 	struct vnode* vnode = descriptor->u.vnode;
5564 
5565 	if (!HAS_FS_CALL(vnode, deselect))
5566 		return B_OK;
5567 
5568 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5569 }
5570 
5571 
5572 static status_t
5573 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5574 	bool kernel)
5575 {
5576 	struct vnode* vnode;
5577 	status_t status;
5578 
5579 	if (name == NULL || *name == '\0')
5580 		return B_BAD_VALUE;
5581 
5582 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5583 		"perms = %d)\n", mountID, parentID, name, perms));
5584 
5585 	status = get_vnode(mountID, parentID, &vnode, true, false);
5586 	if (status != B_OK)
5587 		return status;
5588 
5589 	if (HAS_FS_CALL(vnode, create_dir))
5590 		status = FS_CALL(vnode, create_dir, name, perms);
5591 	else
5592 		status = B_READ_ONLY_DEVICE;
5593 
5594 	put_vnode(vnode);
5595 	return status;
5596 }
5597 
5598 
5599 static status_t
5600 dir_create(int fd, char* path, int perms, bool kernel)
5601 {
5602 	char filename[B_FILE_NAME_LENGTH];
5603 	struct vnode* vnode;
5604 	status_t status;
5605 
5606 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5607 		kernel));
5608 
5609 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5610 	if (status < 0)
5611 		return status;
5612 
5613 	if (HAS_FS_CALL(vnode, create_dir)) {
5614 		status = FS_CALL(vnode, create_dir, filename, perms);
5615 	} else
5616 		status = B_READ_ONLY_DEVICE;
5617 
5618 	put_vnode(vnode);
5619 	return status;
5620 }
5621 
5622 
5623 static int
5624 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5625 {
5626 	struct vnode* vnode;
5627 	int status;
5628 
5629 	FUNCTION(("dir_open_entry_ref()\n"));
5630 
5631 	if (name && *name == '\0')
5632 		return B_BAD_VALUE;
5633 
5634 	// get the vnode matching the entry_ref/node_ref
5635 	if (name) {
5636 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5637 			&vnode);
5638 	} else
5639 		status = get_vnode(mountID, parentID, &vnode, true, false);
5640 	if (status != B_OK)
5641 		return status;
5642 
5643 	int newFD = open_dir_vnode(vnode, kernel);
5644 	if (newFD >= 0) {
5645 		// The vnode reference has been transferred to the FD
5646 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5647 			vnode->id, name);
5648 	} else
5649 		put_vnode(vnode);
5650 
5651 	return newFD;
5652 }
5653 
5654 
5655 static int
5656 dir_open(int fd, char* path, bool kernel)
5657 {
5658 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5659 		kernel));
5660 
5661 	// get the vnode matching the vnode + path combination
5662 	struct vnode* vnode = NULL;
5663 	ino_t parentID;
5664 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5665 		kernel);
5666 	if (status != B_OK)
5667 		return status;
5668 
5669 	// open the dir
5670 	int newFD = open_dir_vnode(vnode, kernel);
5671 	if (newFD >= 0) {
5672 		// The vnode reference has been transferred to the FD
5673 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5674 			parentID, vnode->id, NULL);
5675 	} else
5676 		put_vnode(vnode);
5677 
5678 	return newFD;
5679 }
5680 
5681 
5682 static status_t
5683 dir_close(struct file_descriptor* descriptor)
5684 {
5685 	struct vnode* vnode = descriptor->u.vnode;
5686 
5687 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5688 
5689 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5690 		vnode->id);
5691 	if (HAS_FS_CALL(vnode, close_dir))
5692 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5693 
5694 	return B_OK;
5695 }
5696 
5697 
5698 static void
5699 dir_free_fd(struct file_descriptor* descriptor)
5700 {
5701 	struct vnode* vnode = descriptor->u.vnode;
5702 
5703 	if (vnode != NULL) {
5704 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5705 		put_vnode(vnode);
5706 	}
5707 }
5708 
5709 
5710 static status_t
5711 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5712 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5713 {
5714 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5715 		bufferSize, _count);
5716 }
5717 
5718 
5719 static status_t
5720 fix_dirent(struct vnode* parent, struct dirent* entry,
5721 	struct io_context* ioContext)
5722 {
5723 	// set d_pdev and d_pino
5724 	entry->d_pdev = parent->device;
5725 	entry->d_pino = parent->id;
5726 
5727 	// If this is the ".." entry and the directory is the root of a FS,
5728 	// we need to replace d_dev and d_ino with the actual values.
5729 	if (strcmp(entry->d_name, "..") == 0
5730 		&& parent->mount->root_vnode == parent
5731 		&& parent->mount->covers_vnode) {
5732 		inc_vnode_ref_count(parent);
5733 			// vnode_path_to_vnode() puts the node
5734 
5735 		// Make sure the IO context root is not bypassed.
5736 		if (parent == ioContext->root) {
5737 			entry->d_dev = parent->device;
5738 			entry->d_ino = parent->id;
5739 		} else {
5740 			// ".." is guaranteed not to be clobbered by this call
5741 			struct vnode* vnode;
5742 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5743 				ioContext, &vnode, NULL);
5744 
5745 			if (status == B_OK) {
5746 				entry->d_dev = vnode->device;
5747 				entry->d_ino = vnode->id;
5748 			}
5749 		}
5750 	} else {
5751 		// resolve mount points
5752 		ReadLocker _(&sVnodeLock);
5753 
5754 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5755 		if (vnode != NULL) {
5756 			if (vnode->covered_by != NULL) {
5757 				entry->d_dev = vnode->covered_by->device;
5758 				entry->d_ino = vnode->covered_by->id;
5759 			}
5760 		}
5761 	}
5762 
5763 	return B_OK;
5764 }
5765 
5766 
5767 static status_t
5768 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5769 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5770 {
5771 	if (!HAS_FS_CALL(vnode, read_dir))
5772 		return B_NOT_SUPPORTED;
5773 
5774 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5775 		_count);
5776 	if (error != B_OK)
5777 		return error;
5778 
5779 	// we need to adjust the read dirents
5780 	uint32 count = *_count;
5781 	for (uint32 i = 0; i < count; i++) {
5782 		error = fix_dirent(vnode, buffer, ioContext);
5783 		if (error != B_OK)
5784 			return error;
5785 
5786 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5787 	}
5788 
5789 	return error;
5790 }
5791 
5792 
5793 static status_t
5794 dir_rewind(struct file_descriptor* descriptor)
5795 {
5796 	struct vnode* vnode = descriptor->u.vnode;
5797 
5798 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5799 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5800 	}
5801 
5802 	return B_NOT_SUPPORTED;
5803 }
5804 
5805 
5806 static status_t
5807 dir_remove(int fd, char* path, bool kernel)
5808 {
5809 	char name[B_FILE_NAME_LENGTH];
5810 	struct vnode* directory;
5811 	status_t status;
5812 
5813 	if (path != NULL) {
5814 		// we need to make sure our path name doesn't stop with "/", ".",
5815 		// or ".."
5816 		char* lastSlash;
5817 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5818 			char* leaf = lastSlash + 1;
5819 			if (!strcmp(leaf, ".."))
5820 				return B_NOT_ALLOWED;
5821 
5822 			// omit multiple slashes
5823 			while (lastSlash > path && lastSlash[-1] == '/')
5824 				lastSlash--;
5825 
5826 			if (leaf[0]
5827 				&& strcmp(leaf, ".")) {
5828 				break;
5829 			}
5830 			// "name/" -> "name", or "name/." -> "name"
5831 			lastSlash[0] = '\0';
5832 		}
5833 
5834 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5835 			return B_NOT_ALLOWED;
5836 	}
5837 
5838 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5839 	if (status != B_OK)
5840 		return status;
5841 
5842 	if (HAS_FS_CALL(directory, remove_dir))
5843 		status = FS_CALL(directory, remove_dir, name);
5844 	else
5845 		status = B_READ_ONLY_DEVICE;
5846 
5847 	put_vnode(directory);
5848 	return status;
5849 }
5850 
5851 
5852 static status_t
5853 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5854 	size_t length)
5855 {
5856 	struct vnode* vnode = descriptor->u.vnode;
5857 
5858 	if (HAS_FS_CALL(vnode, ioctl))
5859 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5860 
5861 	return B_DEV_INVALID_IOCTL;
5862 }
5863 
5864 
5865 static status_t
5866 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5867 {
5868 	struct flock flock;
5869 
5870 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5871 		fd, op, argument, kernel ? "kernel" : "user"));
5872 
5873 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5874 		fd);
5875 	if (descriptor == NULL)
5876 		return B_FILE_ERROR;
5877 
5878 	struct vnode* vnode = fd_vnode(descriptor);
5879 
5880 	status_t status = B_OK;
5881 
5882 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5883 		if (descriptor->type != FDTYPE_FILE)
5884 			status = B_BAD_VALUE;
5885 		else if (user_memcpy(&flock, (struct flock*)argument,
5886 				sizeof(struct flock)) != B_OK)
5887 			status = B_BAD_ADDRESS;
5888 
5889 		if (status != B_OK) {
5890 			put_fd(descriptor);
5891 			return status;
5892 		}
5893 	}
5894 
5895 	switch (op) {
5896 		case F_SETFD:
5897 		{
5898 			struct io_context* context = get_current_io_context(kernel);
5899 			// Set file descriptor flags
5900 
5901 			// O_CLOEXEC is the only flag available at this time
5902 			mutex_lock(&context->io_mutex);
5903 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5904 			mutex_unlock(&context->io_mutex);
5905 
5906 			status = B_OK;
5907 			break;
5908 		}
5909 
5910 		case F_GETFD:
5911 		{
5912 			struct io_context* context = get_current_io_context(kernel);
5913 
5914 			// Get file descriptor flags
5915 			mutex_lock(&context->io_mutex);
5916 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5917 			mutex_unlock(&context->io_mutex);
5918 			break;
5919 		}
5920 
5921 		case F_SETFL:
5922 			// Set file descriptor open mode
5923 
5924 			// we only accept changes to O_APPEND and O_NONBLOCK
5925 			argument &= O_APPEND | O_NONBLOCK;
5926 			if (descriptor->ops->fd_set_flags != NULL) {
5927 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5928 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5929 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5930 					(int)argument);
5931 			} else
5932 				status = B_NOT_SUPPORTED;
5933 
5934 			if (status == B_OK) {
5935 				// update this descriptor's open_mode field
5936 				descriptor->open_mode = (descriptor->open_mode
5937 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5938 			}
5939 
5940 			break;
5941 
5942 		case F_GETFL:
5943 			// Get file descriptor open mode
5944 			status = descriptor->open_mode;
5945 			break;
5946 
5947 		case F_DUPFD:
5948 		{
5949 			struct io_context* context = get_current_io_context(kernel);
5950 
5951 			status = new_fd_etc(context, descriptor, (int)argument);
5952 			if (status >= 0) {
5953 				mutex_lock(&context->io_mutex);
5954 				fd_set_close_on_exec(context, fd, false);
5955 				mutex_unlock(&context->io_mutex);
5956 
5957 				atomic_add(&descriptor->ref_count, 1);
5958 			}
5959 			break;
5960 		}
5961 
5962 		case F_GETLK:
5963 			if (vnode != NULL) {
5964 				status = get_advisory_lock(vnode, &flock);
5965 				if (status == B_OK) {
5966 					// copy back flock structure
5967 					status = user_memcpy((struct flock*)argument, &flock,
5968 						sizeof(struct flock));
5969 				}
5970 			} else
5971 				status = B_BAD_VALUE;
5972 			break;
5973 
5974 		case F_SETLK:
5975 		case F_SETLKW:
5976 			status = normalize_flock(descriptor, &flock);
5977 			if (status != B_OK)
5978 				break;
5979 
5980 			if (vnode == NULL) {
5981 				status = B_BAD_VALUE;
5982 			} else if (flock.l_type == F_UNLCK) {
5983 				status = release_advisory_lock(vnode, &flock);
5984 			} else {
5985 				// the open mode must match the lock type
5986 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5987 						&& flock.l_type == F_WRLCK)
5988 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5989 						&& flock.l_type == F_RDLCK))
5990 					status = B_FILE_ERROR;
5991 				else {
5992 					status = acquire_advisory_lock(vnode, -1,
5993 						&flock, op == F_SETLKW);
5994 				}
5995 			}
5996 			break;
5997 
5998 		// ToDo: add support for more ops?
5999 
6000 		default:
6001 			status = B_BAD_VALUE;
6002 	}
6003 
6004 	put_fd(descriptor);
6005 	return status;
6006 }
6007 
6008 
6009 static status_t
6010 common_sync(int fd, bool kernel)
6011 {
6012 	struct file_descriptor* descriptor;
6013 	struct vnode* vnode;
6014 	status_t status;
6015 
6016 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6017 
6018 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6019 	if (descriptor == NULL)
6020 		return B_FILE_ERROR;
6021 
6022 	if (HAS_FS_CALL(vnode, fsync))
6023 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6024 	else
6025 		status = B_NOT_SUPPORTED;
6026 
6027 	put_fd(descriptor);
6028 	return status;
6029 }
6030 
6031 
6032 static status_t
6033 common_lock_node(int fd, bool kernel)
6034 {
6035 	struct file_descriptor* descriptor;
6036 	struct vnode* vnode;
6037 
6038 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6039 	if (descriptor == NULL)
6040 		return B_FILE_ERROR;
6041 
6042 	status_t status = B_OK;
6043 
6044 	// We need to set the locking atomically - someone
6045 	// else might set one at the same time
6046 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6047 			(file_descriptor*)NULL) != NULL)
6048 		status = B_BUSY;
6049 
6050 	put_fd(descriptor);
6051 	return status;
6052 }
6053 
6054 
6055 static status_t
6056 common_unlock_node(int fd, bool kernel)
6057 {
6058 	struct file_descriptor* descriptor;
6059 	struct vnode* vnode;
6060 
6061 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6062 	if (descriptor == NULL)
6063 		return B_FILE_ERROR;
6064 
6065 	status_t status = B_OK;
6066 
6067 	// We need to set the locking atomically - someone
6068 	// else might set one at the same time
6069 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6070 			(file_descriptor*)NULL, descriptor) != descriptor)
6071 		status = B_BAD_VALUE;
6072 
6073 	put_fd(descriptor);
6074 	return status;
6075 }
6076 
6077 
6078 static status_t
6079 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6080 	bool kernel)
6081 {
6082 	struct vnode* vnode;
6083 	status_t status;
6084 
6085 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6086 	if (status != B_OK)
6087 		return status;
6088 
6089 	if (HAS_FS_CALL(vnode, read_symlink)) {
6090 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6091 	} else
6092 		status = B_BAD_VALUE;
6093 
6094 	put_vnode(vnode);
6095 	return status;
6096 }
6097 
6098 
6099 static status_t
6100 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6101 	bool kernel)
6102 {
6103 	// path validity checks have to be in the calling function!
6104 	char name[B_FILE_NAME_LENGTH];
6105 	struct vnode* vnode;
6106 	status_t status;
6107 
6108 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6109 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6110 
6111 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6112 	if (status != B_OK)
6113 		return status;
6114 
6115 	if (HAS_FS_CALL(vnode, create_symlink))
6116 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6117 	else {
6118 		status = HAS_FS_CALL(vnode, write)
6119 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
6120 	}
6121 
6122 	put_vnode(vnode);
6123 
6124 	return status;
6125 }
6126 
6127 
6128 static status_t
6129 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6130 	bool traverseLeafLink, bool kernel)
6131 {
6132 	// path validity checks have to be in the calling function!
6133 
6134 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6135 		toPath, kernel));
6136 
6137 	char name[B_FILE_NAME_LENGTH];
6138 	struct vnode* directory;
6139 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6140 		kernel);
6141 	if (status != B_OK)
6142 		return status;
6143 
6144 	struct vnode* vnode;
6145 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6146 		kernel);
6147 	if (status != B_OK)
6148 		goto err;
6149 
6150 	if (directory->mount != vnode->mount) {
6151 		status = B_CROSS_DEVICE_LINK;
6152 		goto err1;
6153 	}
6154 
6155 	if (HAS_FS_CALL(directory, link))
6156 		status = FS_CALL(directory, link, name, vnode);
6157 	else
6158 		status = B_READ_ONLY_DEVICE;
6159 
6160 err1:
6161 	put_vnode(vnode);
6162 err:
6163 	put_vnode(directory);
6164 
6165 	return status;
6166 }
6167 
6168 
6169 static status_t
6170 common_unlink(int fd, char* path, bool kernel)
6171 {
6172 	char filename[B_FILE_NAME_LENGTH];
6173 	struct vnode* vnode;
6174 	status_t status;
6175 
6176 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6177 		kernel));
6178 
6179 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6180 	if (status < 0)
6181 		return status;
6182 
6183 	if (HAS_FS_CALL(vnode, unlink))
6184 		status = FS_CALL(vnode, unlink, filename);
6185 	else
6186 		status = B_READ_ONLY_DEVICE;
6187 
6188 	put_vnode(vnode);
6189 
6190 	return status;
6191 }
6192 
6193 
6194 static status_t
6195 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6196 {
6197 	struct vnode* vnode;
6198 	status_t status;
6199 
6200 	// TODO: honor effectiveUserGroup argument
6201 
6202 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6203 	if (status != B_OK)
6204 		return status;
6205 
6206 	if (HAS_FS_CALL(vnode, access))
6207 		status = FS_CALL(vnode, access, mode);
6208 	else
6209 		status = B_OK;
6210 
6211 	put_vnode(vnode);
6212 
6213 	return status;
6214 }
6215 
6216 
6217 static status_t
6218 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6219 {
6220 	struct vnode* fromVnode;
6221 	struct vnode* toVnode;
6222 	char fromName[B_FILE_NAME_LENGTH];
6223 	char toName[B_FILE_NAME_LENGTH];
6224 	status_t status;
6225 
6226 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6227 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6228 
6229 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6230 	if (status != B_OK)
6231 		return status;
6232 
6233 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6234 	if (status != B_OK)
6235 		goto err1;
6236 
6237 	if (fromVnode->device != toVnode->device) {
6238 		status = B_CROSS_DEVICE_LINK;
6239 		goto err2;
6240 	}
6241 
6242 	if (fromName[0] == '\0' || toName[0] == '\0'
6243 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6244 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6245 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6246 		status = B_BAD_VALUE;
6247 		goto err2;
6248 	}
6249 
6250 	if (HAS_FS_CALL(fromVnode, rename))
6251 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6252 	else
6253 		status = B_READ_ONLY_DEVICE;
6254 
6255 err2:
6256 	put_vnode(toVnode);
6257 err1:
6258 	put_vnode(fromVnode);
6259 
6260 	return status;
6261 }
6262 
6263 
6264 static status_t
6265 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6266 {
6267 	struct vnode* vnode = descriptor->u.vnode;
6268 
6269 	FUNCTION(("common_read_stat: stat %p\n", stat));
6270 
6271 	// TODO: remove this once all file systems properly set them!
6272 	stat->st_crtim.tv_nsec = 0;
6273 	stat->st_ctim.tv_nsec = 0;
6274 	stat->st_mtim.tv_nsec = 0;
6275 	stat->st_atim.tv_nsec = 0;
6276 
6277 	status_t status = FS_CALL(vnode, read_stat, stat);
6278 
6279 	// fill in the st_dev and st_ino fields
6280 	if (status == B_OK) {
6281 		stat->st_dev = vnode->device;
6282 		stat->st_ino = vnode->id;
6283 		stat->st_rdev = -1;
6284 	}
6285 
6286 	return status;
6287 }
6288 
6289 
6290 static status_t
6291 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6292 	int statMask)
6293 {
6294 	struct vnode* vnode = descriptor->u.vnode;
6295 
6296 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6297 		vnode, stat, statMask));
6298 
6299 	if (!HAS_FS_CALL(vnode, write_stat))
6300 		return B_READ_ONLY_DEVICE;
6301 
6302 	return FS_CALL(vnode, write_stat, stat, statMask);
6303 }
6304 
6305 
6306 static status_t
6307 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6308 	struct stat* stat, bool kernel)
6309 {
6310 	struct vnode* vnode;
6311 	status_t status;
6312 
6313 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6314 		stat));
6315 
6316 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6317 		kernel);
6318 	if (status < 0)
6319 		return status;
6320 
6321 	status = FS_CALL(vnode, read_stat, stat);
6322 
6323 	// fill in the st_dev and st_ino fields
6324 	if (status == B_OK) {
6325 		stat->st_dev = vnode->device;
6326 		stat->st_ino = vnode->id;
6327 		stat->st_rdev = -1;
6328 	}
6329 
6330 	put_vnode(vnode);
6331 	return status;
6332 }
6333 
6334 
6335 static status_t
6336 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6337 	const struct stat* stat, int statMask, bool kernel)
6338 {
6339 	struct vnode* vnode;
6340 	status_t status;
6341 
6342 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6343 		"kernel %d\n", fd, path, stat, statMask, kernel));
6344 
6345 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6346 		kernel);
6347 	if (status < 0)
6348 		return status;
6349 
6350 	if (HAS_FS_CALL(vnode, write_stat))
6351 		status = FS_CALL(vnode, write_stat, stat, statMask);
6352 	else
6353 		status = B_READ_ONLY_DEVICE;
6354 
6355 	put_vnode(vnode);
6356 
6357 	return status;
6358 }
6359 
6360 
6361 static int
6362 attr_dir_open(int fd, char* path, bool kernel)
6363 {
6364 	struct vnode* vnode;
6365 	int status;
6366 
6367 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6368 		kernel));
6369 
6370 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6371 	if (status != B_OK)
6372 		return status;
6373 
6374 	status = open_attr_dir_vnode(vnode, kernel);
6375 	if (status < 0)
6376 		put_vnode(vnode);
6377 
6378 	return status;
6379 }
6380 
6381 
6382 static status_t
6383 attr_dir_close(struct file_descriptor* descriptor)
6384 {
6385 	struct vnode* vnode = descriptor->u.vnode;
6386 
6387 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6388 
6389 	if (HAS_FS_CALL(vnode, close_attr_dir))
6390 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6391 
6392 	return B_OK;
6393 }
6394 
6395 
6396 static void
6397 attr_dir_free_fd(struct file_descriptor* descriptor)
6398 {
6399 	struct vnode* vnode = descriptor->u.vnode;
6400 
6401 	if (vnode != NULL) {
6402 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6403 		put_vnode(vnode);
6404 	}
6405 }
6406 
6407 
6408 static status_t
6409 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6410 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6411 {
6412 	struct vnode* vnode = descriptor->u.vnode;
6413 
6414 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6415 
6416 	if (HAS_FS_CALL(vnode, read_attr_dir))
6417 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6418 			bufferSize, _count);
6419 
6420 	return B_NOT_SUPPORTED;
6421 }
6422 
6423 
6424 static status_t
6425 attr_dir_rewind(struct file_descriptor* descriptor)
6426 {
6427 	struct vnode* vnode = descriptor->u.vnode;
6428 
6429 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6430 
6431 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6432 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6433 
6434 	return B_NOT_SUPPORTED;
6435 }
6436 
6437 
6438 static int
6439 attr_create(int fd, char* path, const char* name, uint32 type,
6440 	int openMode, bool kernel)
6441 {
6442 	if (name == NULL || *name == '\0')
6443 		return B_BAD_VALUE;
6444 
6445 	struct vnode* vnode;
6446 	status_t status = fd_and_path_to_vnode(fd, path,
6447 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6448 	if (status != B_OK)
6449 		return status;
6450 
6451 	if (!HAS_FS_CALL(vnode, create_attr)) {
6452 		status = B_READ_ONLY_DEVICE;
6453 		goto err;
6454 	}
6455 
6456 	void* cookie;
6457 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6458 	if (status != B_OK)
6459 		goto err;
6460 
6461 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6462 	if (fd >= 0)
6463 		return fd;
6464 
6465 	status = fd;
6466 
6467 	FS_CALL(vnode, close_attr, cookie);
6468 	FS_CALL(vnode, free_attr_cookie, cookie);
6469 
6470 	FS_CALL(vnode, remove_attr, name);
6471 
6472 err:
6473 	put_vnode(vnode);
6474 
6475 	return status;
6476 }
6477 
6478 
6479 static int
6480 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6481 {
6482 	if (name == NULL || *name == '\0')
6483 		return B_BAD_VALUE;
6484 
6485 	struct vnode* vnode;
6486 	status_t status = fd_and_path_to_vnode(fd, path,
6487 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6488 	if (status != B_OK)
6489 		return status;
6490 
6491 	if (!HAS_FS_CALL(vnode, open_attr)) {
6492 		status = B_NOT_SUPPORTED;
6493 		goto err;
6494 	}
6495 
6496 	void* cookie;
6497 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6498 	if (status != B_OK)
6499 		goto err;
6500 
6501 	// now we only need a file descriptor for this attribute and we're done
6502 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6503 	if (fd >= 0)
6504 		return fd;
6505 
6506 	status = fd;
6507 
6508 	FS_CALL(vnode, close_attr, cookie);
6509 	FS_CALL(vnode, free_attr_cookie, cookie);
6510 
6511 err:
6512 	put_vnode(vnode);
6513 
6514 	return status;
6515 }
6516 
6517 
6518 static status_t
6519 attr_close(struct file_descriptor* descriptor)
6520 {
6521 	struct vnode* vnode = descriptor->u.vnode;
6522 
6523 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6524 
6525 	if (HAS_FS_CALL(vnode, close_attr))
6526 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6527 
6528 	return B_OK;
6529 }
6530 
6531 
6532 static void
6533 attr_free_fd(struct file_descriptor* descriptor)
6534 {
6535 	struct vnode* vnode = descriptor->u.vnode;
6536 
6537 	if (vnode != NULL) {
6538 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6539 		put_vnode(vnode);
6540 	}
6541 }
6542 
6543 
6544 static status_t
6545 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6546 	size_t* length)
6547 {
6548 	struct vnode* vnode = descriptor->u.vnode;
6549 
6550 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6551 		*length));
6552 
6553 	if (!HAS_FS_CALL(vnode, read_attr))
6554 		return B_NOT_SUPPORTED;
6555 
6556 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6557 }
6558 
6559 
6560 static status_t
6561 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6562 	size_t* length)
6563 {
6564 	struct vnode* vnode = descriptor->u.vnode;
6565 
6566 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6567 	if (!HAS_FS_CALL(vnode, write_attr))
6568 		return B_NOT_SUPPORTED;
6569 
6570 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6571 }
6572 
6573 
6574 static off_t
6575 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6576 {
6577 	off_t offset;
6578 
6579 	switch (seekType) {
6580 		case SEEK_SET:
6581 			offset = 0;
6582 			break;
6583 		case SEEK_CUR:
6584 			offset = descriptor->pos;
6585 			break;
6586 		case SEEK_END:
6587 		{
6588 			struct vnode* vnode = descriptor->u.vnode;
6589 			if (!HAS_FS_CALL(vnode, read_stat))
6590 				return B_NOT_SUPPORTED;
6591 
6592 			struct stat stat;
6593 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6594 				&stat);
6595 			if (status != B_OK)
6596 				return status;
6597 
6598 			offset = stat.st_size;
6599 			break;
6600 		}
6601 		default:
6602 			return B_BAD_VALUE;
6603 	}
6604 
6605 	// assumes off_t is 64 bits wide
6606 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6607 		return B_BUFFER_OVERFLOW;
6608 
6609 	pos += offset;
6610 	if (pos < 0)
6611 		return B_BAD_VALUE;
6612 
6613 	return descriptor->pos = pos;
6614 }
6615 
6616 
6617 static status_t
6618 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6619 {
6620 	struct vnode* vnode = descriptor->u.vnode;
6621 
6622 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6623 
6624 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6625 		return B_NOT_SUPPORTED;
6626 
6627 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6628 }
6629 
6630 
6631 static status_t
6632 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6633 	int statMask)
6634 {
6635 	struct vnode* vnode = descriptor->u.vnode;
6636 
6637 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6638 
6639 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6640 		return B_READ_ONLY_DEVICE;
6641 
6642 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6643 }
6644 
6645 
6646 static status_t
6647 attr_remove(int fd, const char* name, bool kernel)
6648 {
6649 	struct file_descriptor* descriptor;
6650 	struct vnode* vnode;
6651 	status_t status;
6652 
6653 	if (name == NULL || *name == '\0')
6654 		return B_BAD_VALUE;
6655 
6656 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6657 		kernel));
6658 
6659 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6660 	if (descriptor == NULL)
6661 		return B_FILE_ERROR;
6662 
6663 	if (HAS_FS_CALL(vnode, remove_attr))
6664 		status = FS_CALL(vnode, remove_attr, name);
6665 	else
6666 		status = B_READ_ONLY_DEVICE;
6667 
6668 	put_fd(descriptor);
6669 
6670 	return status;
6671 }
6672 
6673 
6674 static status_t
6675 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6676 	bool kernel)
6677 {
6678 	struct file_descriptor* fromDescriptor;
6679 	struct file_descriptor* toDescriptor;
6680 	struct vnode* fromVnode;
6681 	struct vnode* toVnode;
6682 	status_t status;
6683 
6684 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6685 		|| *toName == '\0')
6686 		return B_BAD_VALUE;
6687 
6688 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6689 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6690 
6691 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6692 	if (fromDescriptor == NULL)
6693 		return B_FILE_ERROR;
6694 
6695 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6696 	if (toDescriptor == NULL) {
6697 		status = B_FILE_ERROR;
6698 		goto err;
6699 	}
6700 
6701 	// are the files on the same volume?
6702 	if (fromVnode->device != toVnode->device) {
6703 		status = B_CROSS_DEVICE_LINK;
6704 		goto err1;
6705 	}
6706 
6707 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6708 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6709 	} else
6710 		status = B_READ_ONLY_DEVICE;
6711 
6712 err1:
6713 	put_fd(toDescriptor);
6714 err:
6715 	put_fd(fromDescriptor);
6716 
6717 	return status;
6718 }
6719 
6720 
6721 static int
6722 index_dir_open(dev_t mountID, bool kernel)
6723 {
6724 	struct fs_mount* mount;
6725 	void* cookie;
6726 
6727 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6728 
6729 	status_t status = get_mount(mountID, &mount);
6730 	if (status != B_OK)
6731 		return status;
6732 
6733 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6734 		status = B_NOT_SUPPORTED;
6735 		goto error;
6736 	}
6737 
6738 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6739 	if (status != B_OK)
6740 		goto error;
6741 
6742 	// get fd for the index directory
6743 	int fd;
6744 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6745 	if (fd >= 0)
6746 		return fd;
6747 
6748 	// something went wrong
6749 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6750 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6751 
6752 	status = fd;
6753 
6754 error:
6755 	put_mount(mount);
6756 	return status;
6757 }
6758 
6759 
6760 static status_t
6761 index_dir_close(struct file_descriptor* descriptor)
6762 {
6763 	struct fs_mount* mount = descriptor->u.mount;
6764 
6765 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6766 
6767 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6768 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6769 
6770 	return B_OK;
6771 }
6772 
6773 
6774 static void
6775 index_dir_free_fd(struct file_descriptor* descriptor)
6776 {
6777 	struct fs_mount* mount = descriptor->u.mount;
6778 
6779 	if (mount != NULL) {
6780 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6781 		put_mount(mount);
6782 	}
6783 }
6784 
6785 
6786 static status_t
6787 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6788 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6789 {
6790 	struct fs_mount* mount = descriptor->u.mount;
6791 
6792 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6793 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6794 			bufferSize, _count);
6795 	}
6796 
6797 	return B_NOT_SUPPORTED;
6798 }
6799 
6800 
6801 static status_t
6802 index_dir_rewind(struct file_descriptor* descriptor)
6803 {
6804 	struct fs_mount* mount = descriptor->u.mount;
6805 
6806 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6807 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6808 
6809 	return B_NOT_SUPPORTED;
6810 }
6811 
6812 
6813 static status_t
6814 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6815 	bool kernel)
6816 {
6817 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6818 		name, kernel));
6819 
6820 	struct fs_mount* mount;
6821 	status_t status = get_mount(mountID, &mount);
6822 	if (status != B_OK)
6823 		return status;
6824 
6825 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6826 		status = B_READ_ONLY_DEVICE;
6827 		goto out;
6828 	}
6829 
6830 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6831 
6832 out:
6833 	put_mount(mount);
6834 	return status;
6835 }
6836 
6837 
6838 #if 0
6839 static status_t
6840 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6841 {
6842 	struct vnode* vnode = descriptor->u.vnode;
6843 
6844 	// ToDo: currently unused!
6845 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6846 	if (!HAS_FS_CALL(vnode, read_index_stat))
6847 		return B_NOT_SUPPORTED;
6848 
6849 	return B_NOT_SUPPORTED;
6850 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6851 }
6852 
6853 
6854 static void
6855 index_free_fd(struct file_descriptor* descriptor)
6856 {
6857 	struct vnode* vnode = descriptor->u.vnode;
6858 
6859 	if (vnode != NULL) {
6860 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6861 		put_vnode(vnode);
6862 	}
6863 }
6864 #endif
6865 
6866 
6867 static status_t
6868 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6869 	bool kernel)
6870 {
6871 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6872 		name, kernel));
6873 
6874 	struct fs_mount* mount;
6875 	status_t status = get_mount(mountID, &mount);
6876 	if (status != B_OK)
6877 		return status;
6878 
6879 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6880 		status = B_NOT_SUPPORTED;
6881 		goto out;
6882 	}
6883 
6884 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6885 
6886 out:
6887 	put_mount(mount);
6888 	return status;
6889 }
6890 
6891 
6892 static status_t
6893 index_remove(dev_t mountID, const char* name, bool kernel)
6894 {
6895 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6896 		name, kernel));
6897 
6898 	struct fs_mount* mount;
6899 	status_t status = get_mount(mountID, &mount);
6900 	if (status != B_OK)
6901 		return status;
6902 
6903 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6904 		status = B_READ_ONLY_DEVICE;
6905 		goto out;
6906 	}
6907 
6908 	status = FS_MOUNT_CALL(mount, remove_index, name);
6909 
6910 out:
6911 	put_mount(mount);
6912 	return status;
6913 }
6914 
6915 
6916 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6917 		It would be nice if the FS would find some more kernel support
6918 		for them.
6919 		For example, query parsing should be moved into the kernel.
6920 */
6921 static int
6922 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6923 	int32 token, bool kernel)
6924 {
6925 	struct fs_mount* mount;
6926 	void* cookie;
6927 
6928 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6929 		query, kernel));
6930 
6931 	status_t status = get_mount(device, &mount);
6932 	if (status != B_OK)
6933 		return status;
6934 
6935 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6936 		status = B_NOT_SUPPORTED;
6937 		goto error;
6938 	}
6939 
6940 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6941 		&cookie);
6942 	if (status != B_OK)
6943 		goto error;
6944 
6945 	// get fd for the index directory
6946 	int fd;
6947 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6948 	if (fd >= 0)
6949 		return fd;
6950 
6951 	status = fd;
6952 
6953 	// something went wrong
6954 	FS_MOUNT_CALL(mount, close_query, cookie);
6955 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6956 
6957 error:
6958 	put_mount(mount);
6959 	return status;
6960 }
6961 
6962 
6963 static status_t
6964 query_close(struct file_descriptor* descriptor)
6965 {
6966 	struct fs_mount* mount = descriptor->u.mount;
6967 
6968 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6969 
6970 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6971 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6972 
6973 	return B_OK;
6974 }
6975 
6976 
6977 static void
6978 query_free_fd(struct file_descriptor* descriptor)
6979 {
6980 	struct fs_mount* mount = descriptor->u.mount;
6981 
6982 	if (mount != NULL) {
6983 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6984 		put_mount(mount);
6985 	}
6986 }
6987 
6988 
6989 static status_t
6990 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6991 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6992 {
6993 	struct fs_mount* mount = descriptor->u.mount;
6994 
6995 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6996 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6997 			bufferSize, _count);
6998 	}
6999 
7000 	return B_NOT_SUPPORTED;
7001 }
7002 
7003 
7004 static status_t
7005 query_rewind(struct file_descriptor* descriptor)
7006 {
7007 	struct fs_mount* mount = descriptor->u.mount;
7008 
7009 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7010 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7011 
7012 	return B_NOT_SUPPORTED;
7013 }
7014 
7015 
7016 //	#pragma mark - General File System functions
7017 
7018 
7019 static dev_t
7020 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7021 	const char* args, bool kernel)
7022 {
7023 	struct ::fs_mount* mount;
7024 	status_t status = B_OK;
7025 	fs_volume* volume = NULL;
7026 	int32 layer = 0;
7027 
7028 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7029 
7030 	// The path is always safe, we just have to make sure that fsName is
7031 	// almost valid - we can't make any assumptions about args, though.
7032 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7033 	// We'll get it from the DDM later.
7034 	if (fsName == NULL) {
7035 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7036 			return B_BAD_VALUE;
7037 	} else if (fsName[0] == '\0')
7038 		return B_BAD_VALUE;
7039 
7040 	RecursiveLocker mountOpLocker(sMountOpLock);
7041 
7042 	// Helper to delete a newly created file device on failure.
7043 	// Not exactly beautiful, but helps to keep the code below cleaner.
7044 	struct FileDeviceDeleter {
7045 		FileDeviceDeleter() : id(-1) {}
7046 		~FileDeviceDeleter()
7047 		{
7048 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7049 		}
7050 
7051 		partition_id id;
7052 	} fileDeviceDeleter;
7053 
7054 	// If the file system is not a "virtual" one, the device argument should
7055 	// point to a real file/device (if given at all).
7056 	// get the partition
7057 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7058 	KPartition* partition = NULL;
7059 	KPath normalizedDevice;
7060 	bool newlyCreatedFileDevice = false;
7061 
7062 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7063 		// normalize the device path
7064 		status = normalizedDevice.SetTo(device, true);
7065 		if (status != B_OK)
7066 			return status;
7067 
7068 		// get a corresponding partition from the DDM
7069 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7070 		if (partition == NULL) {
7071 			// Partition not found: This either means, the user supplied
7072 			// an invalid path, or the path refers to an image file. We try
7073 			// to let the DDM create a file device for the path.
7074 			partition_id deviceID = ddm->CreateFileDevice(
7075 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7076 			if (deviceID >= 0) {
7077 				partition = ddm->RegisterPartition(deviceID);
7078 				if (newlyCreatedFileDevice)
7079 					fileDeviceDeleter.id = deviceID;
7080 			}
7081 		}
7082 
7083 		if (!partition) {
7084 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7085 				normalizedDevice.Path()));
7086 			return B_ENTRY_NOT_FOUND;
7087 		}
7088 
7089 		device = normalizedDevice.Path();
7090 			// correct path to file device
7091 	}
7092 	PartitionRegistrar partitionRegistrar(partition, true);
7093 
7094 	// Write lock the partition's device. For the time being, we keep the lock
7095 	// until we're done mounting -- not nice, but ensure, that no-one is
7096 	// interfering.
7097 	// TODO: Just mark the partition busy while mounting!
7098 	KDiskDevice* diskDevice = NULL;
7099 	if (partition) {
7100 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7101 		if (!diskDevice) {
7102 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7103 			return B_ERROR;
7104 		}
7105 	}
7106 
7107 	DeviceWriteLocker writeLocker(diskDevice, true);
7108 		// this takes over the write lock acquired before
7109 
7110 	if (partition != NULL) {
7111 		// make sure, that the partition is not busy
7112 		if (partition->IsBusy()) {
7113 			TRACE(("fs_mount(): Partition is busy.\n"));
7114 			return B_BUSY;
7115 		}
7116 
7117 		// if no FS name had been supplied, we get it from the partition
7118 		if (fsName == NULL) {
7119 			KDiskSystem* diskSystem = partition->DiskSystem();
7120 			if (!diskSystem) {
7121 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7122 					"recognize it.\n"));
7123 				return B_BAD_VALUE;
7124 			}
7125 
7126 			if (!diskSystem->IsFileSystem()) {
7127 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7128 					"partitioning system.\n"));
7129 				return B_BAD_VALUE;
7130 			}
7131 
7132 			// The disk system name will not change, and the KDiskSystem
7133 			// object will not go away while the disk device is locked (and
7134 			// the partition has a reference to it), so this is safe.
7135 			fsName = diskSystem->Name();
7136 		}
7137 	}
7138 
7139 	mount = new(std::nothrow) (struct ::fs_mount);
7140 	if (mount == NULL)
7141 		return B_NO_MEMORY;
7142 
7143 	mount->device_name = strdup(device);
7144 		// "device" can be NULL
7145 
7146 	status = mount->entry_cache.Init();
7147 	if (status != B_OK)
7148 		goto err1;
7149 
7150 	// initialize structure
7151 	mount->id = sNextMountID++;
7152 	mount->partition = NULL;
7153 	mount->root_vnode = NULL;
7154 	mount->covers_vnode = NULL;
7155 	mount->unmounting = false;
7156 	mount->owns_file_device = false;
7157 	mount->volume = NULL;
7158 
7159 	// build up the volume(s)
7160 	while (true) {
7161 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7162 		if (layerFSName == NULL) {
7163 			if (layer == 0) {
7164 				status = B_NO_MEMORY;
7165 				goto err1;
7166 			}
7167 
7168 			break;
7169 		}
7170 
7171 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7172 		if (volume == NULL) {
7173 			status = B_NO_MEMORY;
7174 			free(layerFSName);
7175 			goto err1;
7176 		}
7177 
7178 		volume->id = mount->id;
7179 		volume->partition = partition != NULL ? partition->ID() : -1;
7180 		volume->layer = layer++;
7181 		volume->private_volume = NULL;
7182 		volume->ops = NULL;
7183 		volume->sub_volume = NULL;
7184 		volume->super_volume = NULL;
7185 		volume->file_system = NULL;
7186 		volume->file_system_name = NULL;
7187 
7188 		volume->file_system_name = get_file_system_name(layerFSName);
7189 		if (volume->file_system_name == NULL) {
7190 			status = B_NO_MEMORY;
7191 			free(layerFSName);
7192 			free(volume);
7193 			goto err1;
7194 		}
7195 
7196 		volume->file_system = get_file_system(layerFSName);
7197 		if (volume->file_system == NULL) {
7198 			status = B_DEVICE_NOT_FOUND;
7199 			free(layerFSName);
7200 			free(volume->file_system_name);
7201 			free(volume);
7202 			goto err1;
7203 		}
7204 
7205 		if (mount->volume == NULL)
7206 			mount->volume = volume;
7207 		else {
7208 			volume->super_volume = mount->volume;
7209 			mount->volume->sub_volume = volume;
7210 			mount->volume = volume;
7211 		}
7212 	}
7213 
7214 	// insert mount struct into list before we call FS's mount() function
7215 	// so that vnodes can be created for this mount
7216 	mutex_lock(&sMountMutex);
7217 	hash_insert(sMountsTable, mount);
7218 	mutex_unlock(&sMountMutex);
7219 
7220 	ino_t rootID;
7221 
7222 	if (!sRoot) {
7223 		// we haven't mounted anything yet
7224 		if (strcmp(path, "/") != 0) {
7225 			status = B_ERROR;
7226 			goto err2;
7227 		}
7228 
7229 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7230 			args, &rootID);
7231 		if (status != 0)
7232 			goto err2;
7233 	} else {
7234 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7235 		if (status != B_OK)
7236 			goto err2;
7237 
7238 		// make sure covered_vnode is a directory
7239 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7240 			status = B_NOT_A_DIRECTORY;
7241 			goto err3;
7242 		}
7243 
7244 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7245 			// this is already a mount point
7246 			status = B_BUSY;
7247 			goto err3;
7248 		}
7249 
7250 		// mount it/them
7251 		fs_volume* volume = mount->volume;
7252 		while (volume) {
7253 			status = volume->file_system->mount(volume, device, flags, args,
7254 				&rootID);
7255 			if (status != B_OK) {
7256 				if (volume->sub_volume)
7257 					goto err4;
7258 				goto err3;
7259 			}
7260 
7261 			volume = volume->super_volume;
7262 		}
7263 
7264 		volume = mount->volume;
7265 		while (volume) {
7266 			if (volume->ops->all_layers_mounted != NULL)
7267 				volume->ops->all_layers_mounted(volume);
7268 			volume = volume->super_volume;
7269 		}
7270 	}
7271 
7272 	// the root node is supposed to be owned by the file system - it must
7273 	// exist at this point
7274 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7275 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7276 		panic("fs_mount: file system does not own its root node!\n");
7277 		status = B_ERROR;
7278 		goto err4;
7279 	}
7280 
7281 	// No race here, since fs_mount() is the only function changing
7282 	// covers_vnode (and holds sMountOpLock at that time).
7283 	rw_lock_write_lock(&sVnodeLock);
7284 	if (mount->covers_vnode)
7285 		mount->covers_vnode->covered_by = mount->root_vnode;
7286 	rw_lock_write_unlock(&sVnodeLock);
7287 
7288 	if (!sRoot) {
7289 		sRoot = mount->root_vnode;
7290 		mutex_lock(&sIOContextRootLock);
7291 		get_current_io_context(true)->root = sRoot;
7292 		mutex_unlock(&sIOContextRootLock);
7293 		inc_vnode_ref_count(sRoot);
7294 	}
7295 
7296 	// supply the partition (if any) with the mount cookie and mark it mounted
7297 	if (partition) {
7298 		partition->SetMountCookie(mount->volume->private_volume);
7299 		partition->SetVolumeID(mount->id);
7300 
7301 		// keep a partition reference as long as the partition is mounted
7302 		partitionRegistrar.Detach();
7303 		mount->partition = partition;
7304 		mount->owns_file_device = newlyCreatedFileDevice;
7305 		fileDeviceDeleter.id = -1;
7306 	}
7307 
7308 	notify_mount(mount->id,
7309 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7310 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7311 
7312 	return mount->id;
7313 
7314 err4:
7315 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7316 err3:
7317 	if (mount->covers_vnode != NULL)
7318 		put_vnode(mount->covers_vnode);
7319 err2:
7320 	mutex_lock(&sMountMutex);
7321 	hash_remove(sMountsTable, mount);
7322 	mutex_unlock(&sMountMutex);
7323 err1:
7324 	delete mount;
7325 
7326 	return status;
7327 }
7328 
7329 
7330 static status_t
7331 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7332 {
7333 	struct fs_mount* mount;
7334 	status_t err;
7335 
7336 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7337 		kernel));
7338 
7339 	struct vnode* pathVnode = NULL;
7340 	if (path != NULL) {
7341 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7342 		if (err != B_OK)
7343 			return B_ENTRY_NOT_FOUND;
7344 	}
7345 
7346 	RecursiveLocker mountOpLocker(sMountOpLock);
7347 
7348 	// this lock is not strictly necessary, but here in case of KDEBUG
7349 	// to keep the ASSERT in find_mount() working.
7350 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7351 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7352 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7353 	if (mount == NULL) {
7354 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7355 			pathVnode);
7356 	}
7357 
7358 	if (path != NULL) {
7359 		put_vnode(pathVnode);
7360 
7361 		if (mount->root_vnode != pathVnode) {
7362 			// not mountpoint
7363 			return B_BAD_VALUE;
7364 		}
7365 	}
7366 
7367 	// if the volume is associated with a partition, lock the device of the
7368 	// partition as long as we are unmounting
7369 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7370 	KPartition* partition = mount->partition;
7371 	KDiskDevice* diskDevice = NULL;
7372 	if (partition != NULL) {
7373 		if (partition->Device() == NULL) {
7374 			dprintf("fs_unmount(): There is no device!\n");
7375 			return B_ERROR;
7376 		}
7377 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7378 		if (!diskDevice) {
7379 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7380 			return B_ERROR;
7381 		}
7382 	}
7383 	DeviceWriteLocker writeLocker(diskDevice, true);
7384 
7385 	// make sure, that the partition is not busy
7386 	if (partition != NULL) {
7387 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7388 			TRACE(("fs_unmount(): Partition is busy.\n"));
7389 			return B_BUSY;
7390 		}
7391 	}
7392 
7393 	// grab the vnode master mutex to keep someone from creating
7394 	// a vnode while we're figuring out if we can continue
7395 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7396 
7397 	bool disconnectedDescriptors = false;
7398 
7399 	while (true) {
7400 		bool busy = false;
7401 
7402 		// cycle through the list of vnodes associated with this mount and
7403 		// make sure all of them are not busy or have refs on them
7404 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7405 		while (struct vnode* vnode = iterator.Next()) {
7406 			// The root vnode ref_count needs to be 1 here (the mount has a
7407 			// reference).
7408 			if (vnode->IsBusy()
7409 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7410 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7411 				// there are still vnodes in use on this mount, so we cannot
7412 				// unmount yet
7413 				busy = true;
7414 				break;
7415 			}
7416 		}
7417 
7418 		if (!busy)
7419 			break;
7420 
7421 		if ((flags & B_FORCE_UNMOUNT) == 0)
7422 			return B_BUSY;
7423 
7424 		if (disconnectedDescriptors) {
7425 			// wait a bit until the last access is finished, and then try again
7426 			vnodesWriteLocker.Unlock();
7427 			snooze(100000);
7428 			// TODO: if there is some kind of bug that prevents the ref counts
7429 			// from getting back to zero, this will fall into an endless loop...
7430 			vnodesWriteLocker.Lock();
7431 			continue;
7432 		}
7433 
7434 		// the file system is still busy - but we're forced to unmount it,
7435 		// so let's disconnect all open file descriptors
7436 
7437 		mount->unmounting = true;
7438 			// prevent new vnodes from being created
7439 
7440 		vnodesWriteLocker.Unlock();
7441 
7442 		disconnect_mount_or_vnode_fds(mount, NULL);
7443 		disconnectedDescriptors = true;
7444 
7445 		vnodesWriteLocker.Lock();
7446 	}
7447 
7448 	// we can safely continue, mark all of the vnodes busy and this mount
7449 	// structure in unmounting state
7450 	mount->unmounting = true;
7451 
7452 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7453 	while (struct vnode* vnode = iterator.Next()) {
7454 		vnode->SetBusy(true);
7455 		vnode_to_be_freed(vnode);
7456 	}
7457 
7458 	// The ref_count of the root node is 1 at this point, see above why this is
7459 	mount->root_vnode->ref_count--;
7460 	vnode_to_be_freed(mount->root_vnode);
7461 
7462 	mount->covers_vnode->covered_by = NULL;
7463 
7464 	vnodesWriteLocker.Unlock();
7465 
7466 	put_vnode(mount->covers_vnode);
7467 
7468 	// Free all vnodes associated with this mount.
7469 	// They will be removed from the mount list by free_vnode(), so
7470 	// we don't have to do this.
7471 	while (struct vnode* vnode = mount->vnodes.Head())
7472 		free_vnode(vnode, false);
7473 
7474 	// remove the mount structure from the hash table
7475 	mutex_lock(&sMountMutex);
7476 	hash_remove(sMountsTable, mount);
7477 	mutex_unlock(&sMountMutex);
7478 
7479 	mountOpLocker.Unlock();
7480 
7481 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7482 	notify_unmount(mount->id);
7483 
7484 	// dereference the partition and mark it unmounted
7485 	if (partition) {
7486 		partition->SetVolumeID(-1);
7487 		partition->SetMountCookie(NULL);
7488 
7489 		if (mount->owns_file_device)
7490 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7491 		partition->Unregister();
7492 	}
7493 
7494 	delete mount;
7495 	return B_OK;
7496 }
7497 
7498 
7499 static status_t
7500 fs_sync(dev_t device)
7501 {
7502 	struct fs_mount* mount;
7503 	status_t status = get_mount(device, &mount);
7504 	if (status != B_OK)
7505 		return status;
7506 
7507 	struct vnode marker;
7508 	memset(&marker, 0, sizeof(marker));
7509 	marker.SetBusy(true);
7510 	marker.SetRemoved(true);
7511 
7512 	// First, synchronize all file caches
7513 
7514 	while (true) {
7515 		WriteLocker locker(sVnodeLock);
7516 			// Note: That's the easy way. Which is probably OK for sync(),
7517 			// since it's a relatively rare call and doesn't need to allow for
7518 			// a lot of concurrency. Using a read lock would be possible, but
7519 			// also more involved, since we had to lock the individual nodes
7520 			// and take care of the locking order, which we might not want to
7521 			// do while holding fs_mount::rlock.
7522 
7523 		// synchronize access to vnode list
7524 		recursive_lock_lock(&mount->rlock);
7525 
7526 		struct vnode* vnode;
7527 		if (!marker.IsRemoved()) {
7528 			vnode = mount->vnodes.GetNext(&marker);
7529 			mount->vnodes.Remove(&marker);
7530 			marker.SetRemoved(true);
7531 		} else
7532 			vnode = mount->vnodes.First();
7533 
7534 		while (vnode != NULL && (vnode->cache == NULL
7535 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7536 			// TODO: we could track writes (and writable mapped vnodes)
7537 			//	and have a simple flag that we could test for here
7538 			vnode = mount->vnodes.GetNext(vnode);
7539 		}
7540 
7541 		if (vnode != NULL) {
7542 			// insert marker vnode again
7543 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7544 			marker.SetRemoved(false);
7545 		}
7546 
7547 		recursive_lock_unlock(&mount->rlock);
7548 
7549 		if (vnode == NULL)
7550 			break;
7551 
7552 		vnode = lookup_vnode(mount->id, vnode->id);
7553 		if (vnode == NULL || vnode->IsBusy())
7554 			continue;
7555 
7556 		if (vnode->ref_count == 0) {
7557 			// this vnode has been unused before
7558 			vnode_used(vnode);
7559 		}
7560 		inc_vnode_ref_count(vnode);
7561 
7562 		locker.Unlock();
7563 
7564 		if (vnode->cache != NULL && !vnode->IsRemoved())
7565 			vnode->cache->WriteModified();
7566 
7567 		put_vnode(vnode);
7568 	}
7569 
7570 	// And then, let the file systems do their synchronizing work
7571 
7572 	if (HAS_FS_MOUNT_CALL(mount, sync))
7573 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7574 
7575 	put_mount(mount);
7576 	return status;
7577 }
7578 
7579 
7580 static status_t
7581 fs_read_info(dev_t device, struct fs_info* info)
7582 {
7583 	struct fs_mount* mount;
7584 	status_t status = get_mount(device, &mount);
7585 	if (status != B_OK)
7586 		return status;
7587 
7588 	memset(info, 0, sizeof(struct fs_info));
7589 
7590 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7591 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7592 
7593 	// fill in info the file system doesn't (have to) know about
7594 	if (status == B_OK) {
7595 		info->dev = mount->id;
7596 		info->root = mount->root_vnode->id;
7597 
7598 		fs_volume* volume = mount->volume;
7599 		while (volume->super_volume != NULL)
7600 			volume = volume->super_volume;
7601 
7602 		strlcpy(info->fsh_name, volume->file_system_name,
7603 			sizeof(info->fsh_name));
7604 		if (mount->device_name != NULL) {
7605 			strlcpy(info->device_name, mount->device_name,
7606 				sizeof(info->device_name));
7607 		}
7608 	}
7609 
7610 	// if the call is not supported by the file system, there are still
7611 	// the parts that we filled out ourselves
7612 
7613 	put_mount(mount);
7614 	return status;
7615 }
7616 
7617 
7618 static status_t
7619 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7620 {
7621 	struct fs_mount* mount;
7622 	status_t status = get_mount(device, &mount);
7623 	if (status != B_OK)
7624 		return status;
7625 
7626 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7627 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7628 	else
7629 		status = B_READ_ONLY_DEVICE;
7630 
7631 	put_mount(mount);
7632 	return status;
7633 }
7634 
7635 
7636 static dev_t
7637 fs_next_device(int32* _cookie)
7638 {
7639 	struct fs_mount* mount = NULL;
7640 	dev_t device = *_cookie;
7641 
7642 	mutex_lock(&sMountMutex);
7643 
7644 	// Since device IDs are assigned sequentially, this algorithm
7645 	// does work good enough. It makes sure that the device list
7646 	// returned is sorted, and that no device is skipped when an
7647 	// already visited device got unmounted.
7648 
7649 	while (device < sNextMountID) {
7650 		mount = find_mount(device++);
7651 		if (mount != NULL && mount->volume->private_volume != NULL)
7652 			break;
7653 	}
7654 
7655 	*_cookie = device;
7656 
7657 	if (mount != NULL)
7658 		device = mount->id;
7659 	else
7660 		device = B_BAD_VALUE;
7661 
7662 	mutex_unlock(&sMountMutex);
7663 
7664 	return device;
7665 }
7666 
7667 
7668 ssize_t
7669 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7670 	void *buffer, size_t readBytes)
7671 {
7672 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7673 	if (attrFD < 0)
7674 		return attrFD;
7675 
7676 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7677 
7678 	_kern_close(attrFD);
7679 
7680 	return bytesRead;
7681 }
7682 
7683 
7684 static status_t
7685 get_cwd(char* buffer, size_t size, bool kernel)
7686 {
7687 	// Get current working directory from io context
7688 	struct io_context* context = get_current_io_context(kernel);
7689 	status_t status;
7690 
7691 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7692 
7693 	mutex_lock(&context->io_mutex);
7694 
7695 	struct vnode* vnode = context->cwd;
7696 	if (vnode)
7697 		inc_vnode_ref_count(vnode);
7698 
7699 	mutex_unlock(&context->io_mutex);
7700 
7701 	if (vnode) {
7702 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7703 		put_vnode(vnode);
7704 	} else
7705 		status = B_ERROR;
7706 
7707 	return status;
7708 }
7709 
7710 
7711 static status_t
7712 set_cwd(int fd, char* path, bool kernel)
7713 {
7714 	struct io_context* context;
7715 	struct vnode* vnode = NULL;
7716 	struct vnode* oldDirectory;
7717 	status_t status;
7718 
7719 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7720 
7721 	// Get vnode for passed path, and bail if it failed
7722 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7723 	if (status < 0)
7724 		return status;
7725 
7726 	if (!S_ISDIR(vnode->Type())) {
7727 		// nope, can't cwd to here
7728 		status = B_NOT_A_DIRECTORY;
7729 		goto err;
7730 	}
7731 
7732 	// Get current io context and lock
7733 	context = get_current_io_context(kernel);
7734 	mutex_lock(&context->io_mutex);
7735 
7736 	// save the old current working directory first
7737 	oldDirectory = context->cwd;
7738 	context->cwd = vnode;
7739 
7740 	mutex_unlock(&context->io_mutex);
7741 
7742 	if (oldDirectory)
7743 		put_vnode(oldDirectory);
7744 
7745 	return B_NO_ERROR;
7746 
7747 err:
7748 	put_vnode(vnode);
7749 	return status;
7750 }
7751 
7752 
7753 //	#pragma mark - kernel mirrored syscalls
7754 
7755 
7756 dev_t
7757 _kern_mount(const char* path, const char* device, const char* fsName,
7758 	uint32 flags, const char* args, size_t argsLength)
7759 {
7760 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7761 	if (pathBuffer.InitCheck() != B_OK)
7762 		return B_NO_MEMORY;
7763 
7764 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7765 }
7766 
7767 
7768 status_t
7769 _kern_unmount(const char* path, uint32 flags)
7770 {
7771 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7772 	if (pathBuffer.InitCheck() != B_OK)
7773 		return B_NO_MEMORY;
7774 
7775 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7776 }
7777 
7778 
7779 status_t
7780 _kern_read_fs_info(dev_t device, struct fs_info* info)
7781 {
7782 	if (info == NULL)
7783 		return B_BAD_VALUE;
7784 
7785 	return fs_read_info(device, info);
7786 }
7787 
7788 
7789 status_t
7790 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7791 {
7792 	if (info == NULL)
7793 		return B_BAD_VALUE;
7794 
7795 	return fs_write_info(device, info, mask);
7796 }
7797 
7798 
7799 status_t
7800 _kern_sync(void)
7801 {
7802 	// Note: _kern_sync() is also called from _user_sync()
7803 	int32 cookie = 0;
7804 	dev_t device;
7805 	while ((device = next_dev(&cookie)) >= 0) {
7806 		status_t status = fs_sync(device);
7807 		if (status != B_OK && status != B_BAD_VALUE) {
7808 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7809 				strerror(status));
7810 		}
7811 	}
7812 
7813 	return B_OK;
7814 }
7815 
7816 
7817 dev_t
7818 _kern_next_device(int32* _cookie)
7819 {
7820 	return fs_next_device(_cookie);
7821 }
7822 
7823 
7824 status_t
7825 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7826 	size_t infoSize)
7827 {
7828 	if (infoSize != sizeof(fd_info))
7829 		return B_BAD_VALUE;
7830 
7831 	struct io_context* context = NULL;
7832 	Team* team = NULL;
7833 
7834 	cpu_status state = disable_interrupts();
7835 	GRAB_TEAM_LOCK();
7836 
7837 	bool contextLocked = false;
7838 	team = team_get_team_struct_locked(teamID);
7839 	if (team) {
7840 		// We cannot lock the IO context while holding the team lock, nor can
7841 		// we just drop the team lock, since it might be deleted in the
7842 		// meantime. team_remove_team() acquires the thread lock when removing
7843 		// the team from the team hash table, though. Hence we switch to the
7844 		// thread lock and use mutex_lock_threads_locked().
7845 		context = (io_context*)team->io_context;
7846 
7847 		GRAB_THREAD_LOCK();
7848 		RELEASE_TEAM_LOCK();
7849 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7850 		RELEASE_THREAD_LOCK();
7851 	} else
7852 		RELEASE_TEAM_LOCK();
7853 
7854 	restore_interrupts(state);
7855 
7856 	if (!contextLocked) {
7857 		// team doesn't exit or seems to be gone
7858 		return B_BAD_TEAM_ID;
7859 	}
7860 
7861 	// the team cannot be deleted completely while we're owning its
7862 	// io_context mutex, so we can safely play with it now
7863 
7864 	uint32 slot = *_cookie;
7865 
7866 	struct file_descriptor* descriptor;
7867 	while (slot < context->table_size
7868 		&& (descriptor = context->fds[slot]) == NULL) {
7869 		slot++;
7870 	}
7871 
7872 	if (slot >= context->table_size) {
7873 		mutex_unlock(&context->io_mutex);
7874 		return B_ENTRY_NOT_FOUND;
7875 	}
7876 
7877 	info->number = slot;
7878 	info->open_mode = descriptor->open_mode;
7879 
7880 	struct vnode* vnode = fd_vnode(descriptor);
7881 	if (vnode != NULL) {
7882 		info->device = vnode->device;
7883 		info->node = vnode->id;
7884 	} else if (descriptor->u.mount != NULL) {
7885 		info->device = descriptor->u.mount->id;
7886 		info->node = -1;
7887 	}
7888 
7889 	mutex_unlock(&context->io_mutex);
7890 
7891 	*_cookie = slot + 1;
7892 	return B_OK;
7893 }
7894 
7895 
7896 int
7897 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7898 	int perms)
7899 {
7900 	if ((openMode & O_CREAT) != 0) {
7901 		return file_create_entry_ref(device, inode, name, openMode, perms,
7902 			true);
7903 	}
7904 
7905 	return file_open_entry_ref(device, inode, name, openMode, true);
7906 }
7907 
7908 
7909 /*!	\brief Opens a node specified by a FD + path pair.
7910 
7911 	At least one of \a fd and \a path must be specified.
7912 	If only \a fd is given, the function opens the node identified by this
7913 	FD. If only a path is given, this path is opened. If both are given and
7914 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7915 	of the directory (!) identified by \a fd.
7916 
7917 	\param fd The FD. May be < 0.
7918 	\param path The absolute or relative path. May be \c NULL.
7919 	\param openMode The open mode.
7920 	\return A FD referring to the newly opened node, or an error code,
7921 			if an error occurs.
7922 */
7923 int
7924 _kern_open(int fd, const char* path, int openMode, int perms)
7925 {
7926 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7927 	if (pathBuffer.InitCheck() != B_OK)
7928 		return B_NO_MEMORY;
7929 
7930 	if (openMode & O_CREAT)
7931 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7932 
7933 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7934 }
7935 
7936 
7937 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7938 
7939 	The supplied name may be \c NULL, in which case directory identified
7940 	by \a device and \a inode will be opened. Otherwise \a device and
7941 	\a inode identify the parent directory of the directory to be opened
7942 	and \a name its entry name.
7943 
7944 	\param device If \a name is specified the ID of the device the parent
7945 		   directory of the directory to be opened resides on, otherwise
7946 		   the device of the directory itself.
7947 	\param inode If \a name is specified the node ID of the parent
7948 		   directory of the directory to be opened, otherwise node ID of the
7949 		   directory itself.
7950 	\param name The entry name of the directory to be opened. If \c NULL,
7951 		   the \a device + \a inode pair identify the node to be opened.
7952 	\return The FD of the newly opened directory or an error code, if
7953 			something went wrong.
7954 */
7955 int
7956 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7957 {
7958 	return dir_open_entry_ref(device, inode, name, true);
7959 }
7960 
7961 
7962 /*!	\brief Opens a directory specified by a FD + path pair.
7963 
7964 	At least one of \a fd and \a path must be specified.
7965 	If only \a fd is given, the function opens the directory identified by this
7966 	FD. If only a path is given, this path is opened. If both are given and
7967 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7968 	of the directory (!) identified by \a fd.
7969 
7970 	\param fd The FD. May be < 0.
7971 	\param path The absolute or relative path. May be \c NULL.
7972 	\return A FD referring to the newly opened directory, or an error code,
7973 			if an error occurs.
7974 */
7975 int
7976 _kern_open_dir(int fd, const char* path)
7977 {
7978 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7979 	if (pathBuffer.InitCheck() != B_OK)
7980 		return B_NO_MEMORY;
7981 
7982 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7983 }
7984 
7985 
7986 status_t
7987 _kern_fcntl(int fd, int op, uint32 argument)
7988 {
7989 	return common_fcntl(fd, op, argument, true);
7990 }
7991 
7992 
7993 status_t
7994 _kern_fsync(int fd)
7995 {
7996 	return common_sync(fd, true);
7997 }
7998 
7999 
8000 status_t
8001 _kern_lock_node(int fd)
8002 {
8003 	return common_lock_node(fd, true);
8004 }
8005 
8006 
8007 status_t
8008 _kern_unlock_node(int fd)
8009 {
8010 	return common_unlock_node(fd, true);
8011 }
8012 
8013 
8014 status_t
8015 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8016 	int perms)
8017 {
8018 	return dir_create_entry_ref(device, inode, name, perms, true);
8019 }
8020 
8021 
8022 /*!	\brief Creates a directory specified by a FD + path pair.
8023 
8024 	\a path must always be specified (it contains the name of the new directory
8025 	at least). If only a path is given, this path identifies the location at
8026 	which the directory shall be created. If both \a fd and \a path are given
8027 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8028 	of the directory (!) identified by \a fd.
8029 
8030 	\param fd The FD. May be < 0.
8031 	\param path The absolute or relative path. Must not be \c NULL.
8032 	\param perms The access permissions the new directory shall have.
8033 	\return \c B_OK, if the directory has been created successfully, another
8034 			error code otherwise.
8035 */
8036 status_t
8037 _kern_create_dir(int fd, const char* path, int perms)
8038 {
8039 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8040 	if (pathBuffer.InitCheck() != B_OK)
8041 		return B_NO_MEMORY;
8042 
8043 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8044 }
8045 
8046 
8047 status_t
8048 _kern_remove_dir(int fd, const char* path)
8049 {
8050 	if (path) {
8051 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8052 		if (pathBuffer.InitCheck() != B_OK)
8053 			return B_NO_MEMORY;
8054 
8055 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8056 	}
8057 
8058 	return dir_remove(fd, NULL, true);
8059 }
8060 
8061 
8062 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8063 
8064 	At least one of \a fd and \a path must be specified.
8065 	If only \a fd is given, the function the symlink to be read is the node
8066 	identified by this FD. If only a path is given, this path identifies the
8067 	symlink to be read. If both are given and the path is absolute, \a fd is
8068 	ignored; a relative path is reckoned off of the directory (!) identified
8069 	by \a fd.
8070 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8071 	will still be updated to reflect the required buffer size.
8072 
8073 	\param fd The FD. May be < 0.
8074 	\param path The absolute or relative path. May be \c NULL.
8075 	\param buffer The buffer into which the contents of the symlink shall be
8076 		   written.
8077 	\param _bufferSize A pointer to the size of the supplied buffer.
8078 	\return The length of the link on success or an appropriate error code
8079 */
8080 status_t
8081 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8082 {
8083 	if (path) {
8084 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8085 		if (pathBuffer.InitCheck() != B_OK)
8086 			return B_NO_MEMORY;
8087 
8088 		return common_read_link(fd, pathBuffer.LockBuffer(),
8089 			buffer, _bufferSize, true);
8090 	}
8091 
8092 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8093 }
8094 
8095 
8096 /*!	\brief Creates a symlink specified by a FD + path pair.
8097 
8098 	\a path must always be specified (it contains the name of the new symlink
8099 	at least). If only a path is given, this path identifies the location at
8100 	which the symlink shall be created. If both \a fd and \a path are given and
8101 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8102 	of the directory (!) identified by \a fd.
8103 
8104 	\param fd The FD. May be < 0.
8105 	\param toPath The absolute or relative path. Must not be \c NULL.
8106 	\param mode The access permissions the new symlink shall have.
8107 	\return \c B_OK, if the symlink has been created successfully, another
8108 			error code otherwise.
8109 */
8110 status_t
8111 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8112 {
8113 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8114 	if (pathBuffer.InitCheck() != B_OK)
8115 		return B_NO_MEMORY;
8116 
8117 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8118 		toPath, mode, true);
8119 }
8120 
8121 
8122 status_t
8123 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8124 	bool traverseLeafLink)
8125 {
8126 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8127 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8128 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8129 		return B_NO_MEMORY;
8130 
8131 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8132 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8133 }
8134 
8135 
8136 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8137 
8138 	\a path must always be specified (it contains at least the name of the entry
8139 	to be deleted). If only a path is given, this path identifies the entry
8140 	directly. If both \a fd and \a path are given and the path is absolute,
8141 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8142 	identified by \a fd.
8143 
8144 	\param fd The FD. May be < 0.
8145 	\param path The absolute or relative path. Must not be \c NULL.
8146 	\return \c B_OK, if the entry has been removed successfully, another
8147 			error code otherwise.
8148 */
8149 status_t
8150 _kern_unlink(int fd, const char* path)
8151 {
8152 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8153 	if (pathBuffer.InitCheck() != B_OK)
8154 		return B_NO_MEMORY;
8155 
8156 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8157 }
8158 
8159 
8160 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8161 		   by another FD + path pair.
8162 
8163 	\a oldPath and \a newPath must always be specified (they contain at least
8164 	the name of the entry). If only a path is given, this path identifies the
8165 	entry directly. If both a FD and a path are given and the path is absolute,
8166 	the FD is ignored; a relative path is reckoned off of the directory (!)
8167 	identified by the respective FD.
8168 
8169 	\param oldFD The FD of the old location. May be < 0.
8170 	\param oldPath The absolute or relative path of the old location. Must not
8171 		   be \c NULL.
8172 	\param newFD The FD of the new location. May be < 0.
8173 	\param newPath The absolute or relative path of the new location. Must not
8174 		   be \c NULL.
8175 	\return \c B_OK, if the entry has been moved successfully, another
8176 			error code otherwise.
8177 */
8178 status_t
8179 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8180 {
8181 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8182 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8183 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8184 		return B_NO_MEMORY;
8185 
8186 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8187 		newFD, newPathBuffer.LockBuffer(), true);
8188 }
8189 
8190 
8191 status_t
8192 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8193 {
8194 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8195 	if (pathBuffer.InitCheck() != B_OK)
8196 		return B_NO_MEMORY;
8197 
8198 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8199 		true);
8200 }
8201 
8202 
8203 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8204 
8205 	If only \a fd is given, the stat operation associated with the type
8206 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8207 	given, this path identifies the entry for whose node to retrieve the
8208 	stat data. If both \a fd and \a path are given and the path is absolute,
8209 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8210 	identified by \a fd and specifies the entry whose stat data shall be
8211 	retrieved.
8212 
8213 	\param fd The FD. May be < 0.
8214 	\param path The absolute or relative path. Must not be \c NULL.
8215 	\param traverseLeafLink If \a path is given, \c true specifies that the
8216 		   function shall not stick to symlinks, but traverse them.
8217 	\param stat The buffer the stat data shall be written into.
8218 	\param statSize The size of the supplied stat buffer.
8219 	\return \c B_OK, if the the stat data have been read successfully, another
8220 			error code otherwise.
8221 */
8222 status_t
8223 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8224 	struct stat* stat, size_t statSize)
8225 {
8226 	struct stat completeStat;
8227 	struct stat* originalStat = NULL;
8228 	status_t status;
8229 
8230 	if (statSize > sizeof(struct stat))
8231 		return B_BAD_VALUE;
8232 
8233 	// this supports different stat extensions
8234 	if (statSize < sizeof(struct stat)) {
8235 		originalStat = stat;
8236 		stat = &completeStat;
8237 	}
8238 
8239 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8240 
8241 	if (status == B_OK && originalStat != NULL)
8242 		memcpy(originalStat, stat, statSize);
8243 
8244 	return status;
8245 }
8246 
8247 
8248 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8249 
8250 	If only \a fd is given, the stat operation associated with the type
8251 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8252 	given, this path identifies the entry for whose node to write the
8253 	stat data. If both \a fd and \a path are given and the path is absolute,
8254 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8255 	identified by \a fd and specifies the entry whose stat data shall be
8256 	written.
8257 
8258 	\param fd The FD. May be < 0.
8259 	\param path The absolute or relative path. Must not be \c NULL.
8260 	\param traverseLeafLink If \a path is given, \c true specifies that the
8261 		   function shall not stick to symlinks, but traverse them.
8262 	\param stat The buffer containing the stat data to be written.
8263 	\param statSize The size of the supplied stat buffer.
8264 	\param statMask A mask specifying which parts of the stat data shall be
8265 		   written.
8266 	\return \c B_OK, if the the stat data have been written successfully,
8267 			another error code otherwise.
8268 */
8269 status_t
8270 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8271 	const struct stat* stat, size_t statSize, int statMask)
8272 {
8273 	struct stat completeStat;
8274 
8275 	if (statSize > sizeof(struct stat))
8276 		return B_BAD_VALUE;
8277 
8278 	// this supports different stat extensions
8279 	if (statSize < sizeof(struct stat)) {
8280 		memset((uint8*)&completeStat + statSize, 0,
8281 			sizeof(struct stat) - statSize);
8282 		memcpy(&completeStat, stat, statSize);
8283 		stat = &completeStat;
8284 	}
8285 
8286 	status_t status;
8287 
8288 	if (path) {
8289 		// path given: write the stat of the node referred to by (fd, path)
8290 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8291 		if (pathBuffer.InitCheck() != B_OK)
8292 			return B_NO_MEMORY;
8293 
8294 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8295 			traverseLeafLink, stat, statMask, true);
8296 	} else {
8297 		// no path given: get the FD and use the FD operation
8298 		struct file_descriptor* descriptor
8299 			= get_fd(get_current_io_context(true), fd);
8300 		if (descriptor == NULL)
8301 			return B_FILE_ERROR;
8302 
8303 		if (descriptor->ops->fd_write_stat)
8304 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8305 		else
8306 			status = B_NOT_SUPPORTED;
8307 
8308 		put_fd(descriptor);
8309 	}
8310 
8311 	return status;
8312 }
8313 
8314 
8315 int
8316 _kern_open_attr_dir(int fd, const char* path)
8317 {
8318 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8319 	if (pathBuffer.InitCheck() != B_OK)
8320 		return B_NO_MEMORY;
8321 
8322 	if (path != NULL)
8323 		pathBuffer.SetTo(path);
8324 
8325 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8326 }
8327 
8328 
8329 int
8330 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8331 	int openMode)
8332 {
8333 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8334 	if (pathBuffer.InitCheck() != B_OK)
8335 		return B_NO_MEMORY;
8336 
8337 	if ((openMode & O_CREAT) != 0) {
8338 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8339 			true);
8340 	}
8341 
8342 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8343 }
8344 
8345 
8346 status_t
8347 _kern_remove_attr(int fd, const char* name)
8348 {
8349 	return attr_remove(fd, name, true);
8350 }
8351 
8352 
8353 status_t
8354 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8355 	const char* toName)
8356 {
8357 	return attr_rename(fromFile, fromName, toFile, toName, true);
8358 }
8359 
8360 
8361 int
8362 _kern_open_index_dir(dev_t device)
8363 {
8364 	return index_dir_open(device, true);
8365 }
8366 
8367 
8368 status_t
8369 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8370 {
8371 	return index_create(device, name, type, flags, true);
8372 }
8373 
8374 
8375 status_t
8376 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8377 {
8378 	return index_name_read_stat(device, name, stat, true);
8379 }
8380 
8381 
8382 status_t
8383 _kern_remove_index(dev_t device, const char* name)
8384 {
8385 	return index_remove(device, name, true);
8386 }
8387 
8388 
8389 status_t
8390 _kern_getcwd(char* buffer, size_t size)
8391 {
8392 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8393 
8394 	// Call vfs to get current working directory
8395 	return get_cwd(buffer, size, true);
8396 }
8397 
8398 
8399 status_t
8400 _kern_setcwd(int fd, const char* path)
8401 {
8402 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8403 	if (pathBuffer.InitCheck() != B_OK)
8404 		return B_NO_MEMORY;
8405 
8406 	if (path != NULL)
8407 		pathBuffer.SetTo(path);
8408 
8409 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8410 }
8411 
8412 
8413 //	#pragma mark - userland syscalls
8414 
8415 
8416 dev_t
8417 _user_mount(const char* userPath, const char* userDevice,
8418 	const char* userFileSystem, uint32 flags, const char* userArgs,
8419 	size_t argsLength)
8420 {
8421 	char fileSystem[B_FILE_NAME_LENGTH];
8422 	KPath path, device;
8423 	char* args = NULL;
8424 	status_t status;
8425 
8426 	if (!IS_USER_ADDRESS(userPath)
8427 		|| !IS_USER_ADDRESS(userFileSystem)
8428 		|| !IS_USER_ADDRESS(userDevice))
8429 		return B_BAD_ADDRESS;
8430 
8431 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8432 		return B_NO_MEMORY;
8433 
8434 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8435 		return B_BAD_ADDRESS;
8436 
8437 	if (userFileSystem != NULL
8438 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8439 		return B_BAD_ADDRESS;
8440 
8441 	if (userDevice != NULL
8442 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8443 			< B_OK)
8444 		return B_BAD_ADDRESS;
8445 
8446 	if (userArgs != NULL && argsLength > 0) {
8447 		// this is a safety restriction
8448 		if (argsLength >= 65536)
8449 			return B_NAME_TOO_LONG;
8450 
8451 		args = (char*)malloc(argsLength + 1);
8452 		if (args == NULL)
8453 			return B_NO_MEMORY;
8454 
8455 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8456 			free(args);
8457 			return B_BAD_ADDRESS;
8458 		}
8459 	}
8460 	path.UnlockBuffer();
8461 	device.UnlockBuffer();
8462 
8463 	status = fs_mount(path.LockBuffer(),
8464 		userDevice != NULL ? device.Path() : NULL,
8465 		userFileSystem ? fileSystem : NULL, flags, args, false);
8466 
8467 	free(args);
8468 	return status;
8469 }
8470 
8471 
8472 status_t
8473 _user_unmount(const char* userPath, uint32 flags)
8474 {
8475 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8476 	if (pathBuffer.InitCheck() != B_OK)
8477 		return B_NO_MEMORY;
8478 
8479 	char* path = pathBuffer.LockBuffer();
8480 
8481 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8482 		return B_BAD_ADDRESS;
8483 
8484 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8485 }
8486 
8487 
8488 status_t
8489 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8490 {
8491 	struct fs_info info;
8492 	status_t status;
8493 
8494 	if (userInfo == NULL)
8495 		return B_BAD_VALUE;
8496 
8497 	if (!IS_USER_ADDRESS(userInfo))
8498 		return B_BAD_ADDRESS;
8499 
8500 	status = fs_read_info(device, &info);
8501 	if (status != B_OK)
8502 		return status;
8503 
8504 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8505 		return B_BAD_ADDRESS;
8506 
8507 	return B_OK;
8508 }
8509 
8510 
8511 status_t
8512 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8513 {
8514 	struct fs_info info;
8515 
8516 	if (userInfo == NULL)
8517 		return B_BAD_VALUE;
8518 
8519 	if (!IS_USER_ADDRESS(userInfo)
8520 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8521 		return B_BAD_ADDRESS;
8522 
8523 	return fs_write_info(device, &info, mask);
8524 }
8525 
8526 
8527 dev_t
8528 _user_next_device(int32* _userCookie)
8529 {
8530 	int32 cookie;
8531 	dev_t device;
8532 
8533 	if (!IS_USER_ADDRESS(_userCookie)
8534 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8535 		return B_BAD_ADDRESS;
8536 
8537 	device = fs_next_device(&cookie);
8538 
8539 	if (device >= B_OK) {
8540 		// update user cookie
8541 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8542 			return B_BAD_ADDRESS;
8543 	}
8544 
8545 	return device;
8546 }
8547 
8548 
8549 status_t
8550 _user_sync(void)
8551 {
8552 	return _kern_sync();
8553 }
8554 
8555 
8556 status_t
8557 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8558 	size_t infoSize)
8559 {
8560 	struct fd_info info;
8561 	uint32 cookie;
8562 
8563 	// only root can do this (or should root's group be enough?)
8564 	if (geteuid() != 0)
8565 		return B_NOT_ALLOWED;
8566 
8567 	if (infoSize != sizeof(fd_info))
8568 		return B_BAD_VALUE;
8569 
8570 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8571 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8572 		return B_BAD_ADDRESS;
8573 
8574 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8575 	if (status != B_OK)
8576 		return status;
8577 
8578 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8579 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8580 		return B_BAD_ADDRESS;
8581 
8582 	return status;
8583 }
8584 
8585 
8586 status_t
8587 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8588 	char* userPath, size_t pathLength)
8589 {
8590 	if (!IS_USER_ADDRESS(userPath))
8591 		return B_BAD_ADDRESS;
8592 
8593 	KPath path(B_PATH_NAME_LENGTH + 1);
8594 	if (path.InitCheck() != B_OK)
8595 		return B_NO_MEMORY;
8596 
8597 	// copy the leaf name onto the stack
8598 	char stackLeaf[B_FILE_NAME_LENGTH];
8599 	if (leaf) {
8600 		if (!IS_USER_ADDRESS(leaf))
8601 			return B_BAD_ADDRESS;
8602 
8603 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8604 		if (length < 0)
8605 			return length;
8606 		if (length >= B_FILE_NAME_LENGTH)
8607 			return B_NAME_TOO_LONG;
8608 
8609 		leaf = stackLeaf;
8610 	}
8611 
8612 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8613 		path.LockBuffer(), path.BufferSize());
8614 	if (status != B_OK)
8615 		return status;
8616 
8617 	path.UnlockBuffer();
8618 
8619 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8620 	if (length < 0)
8621 		return length;
8622 	if (length >= (int)pathLength)
8623 		return B_BUFFER_OVERFLOW;
8624 
8625 	return B_OK;
8626 }
8627 
8628 
8629 status_t
8630 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8631 {
8632 	if (userPath == NULL || buffer == NULL)
8633 		return B_BAD_VALUE;
8634 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8635 		return B_BAD_ADDRESS;
8636 
8637 	// copy path from userland
8638 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8639 	if (pathBuffer.InitCheck() != B_OK)
8640 		return B_NO_MEMORY;
8641 	char* path = pathBuffer.LockBuffer();
8642 
8643 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8644 		return B_BAD_ADDRESS;
8645 
8646 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8647 		false);
8648 	if (error != B_OK)
8649 		return error;
8650 
8651 	// copy back to userland
8652 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8653 	if (len < 0)
8654 		return len;
8655 	if (len >= B_PATH_NAME_LENGTH)
8656 		return B_BUFFER_OVERFLOW;
8657 
8658 	return B_OK;
8659 }
8660 
8661 
8662 int
8663 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8664 	int openMode, int perms)
8665 {
8666 	char name[B_FILE_NAME_LENGTH];
8667 
8668 	if (userName == NULL || device < 0 || inode < 0)
8669 		return B_BAD_VALUE;
8670 	if (!IS_USER_ADDRESS(userName)
8671 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8672 		return B_BAD_ADDRESS;
8673 
8674 	if ((openMode & O_CREAT) != 0) {
8675 		return file_create_entry_ref(device, inode, name, openMode, perms,
8676 		 false);
8677 	}
8678 
8679 	return file_open_entry_ref(device, inode, name, openMode, false);
8680 }
8681 
8682 
8683 int
8684 _user_open(int fd, const char* userPath, int openMode, int perms)
8685 {
8686 	KPath path(B_PATH_NAME_LENGTH + 1);
8687 	if (path.InitCheck() != B_OK)
8688 		return B_NO_MEMORY;
8689 
8690 	char* buffer = path.LockBuffer();
8691 
8692 	if (!IS_USER_ADDRESS(userPath)
8693 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8694 		return B_BAD_ADDRESS;
8695 
8696 	if ((openMode & O_CREAT) != 0)
8697 		return file_create(fd, buffer, openMode, perms, false);
8698 
8699 	return file_open(fd, buffer, openMode, false);
8700 }
8701 
8702 
8703 int
8704 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8705 {
8706 	if (userName != NULL) {
8707 		char name[B_FILE_NAME_LENGTH];
8708 
8709 		if (!IS_USER_ADDRESS(userName)
8710 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8711 			return B_BAD_ADDRESS;
8712 
8713 		return dir_open_entry_ref(device, inode, name, false);
8714 	}
8715 	return dir_open_entry_ref(device, inode, NULL, false);
8716 }
8717 
8718 
8719 int
8720 _user_open_dir(int fd, const char* userPath)
8721 {
8722 	if (userPath == NULL)
8723 		return dir_open(fd, NULL, false);
8724 
8725 	KPath path(B_PATH_NAME_LENGTH + 1);
8726 	if (path.InitCheck() != B_OK)
8727 		return B_NO_MEMORY;
8728 
8729 	char* buffer = path.LockBuffer();
8730 
8731 	if (!IS_USER_ADDRESS(userPath)
8732 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8733 		return B_BAD_ADDRESS;
8734 
8735 	return dir_open(fd, buffer, false);
8736 }
8737 
8738 
8739 /*!	\brief Opens a directory's parent directory and returns the entry name
8740 		   of the former.
8741 
8742 	Aside from that is returns the directory's entry name, this method is
8743 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8744 	equivalent, if \a userName is \c NULL.
8745 
8746 	If a name buffer is supplied and the name does not fit the buffer, the
8747 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8748 
8749 	\param fd A FD referring to a directory.
8750 	\param userName Buffer the directory's entry name shall be written into.
8751 		   May be \c NULL.
8752 	\param nameLength Size of the name buffer.
8753 	\return The file descriptor of the opened parent directory, if everything
8754 			went fine, an error code otherwise.
8755 */
8756 int
8757 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8758 {
8759 	bool kernel = false;
8760 
8761 	if (userName && !IS_USER_ADDRESS(userName))
8762 		return B_BAD_ADDRESS;
8763 
8764 	// open the parent dir
8765 	int parentFD = dir_open(fd, (char*)"..", kernel);
8766 	if (parentFD < 0)
8767 		return parentFD;
8768 	FDCloser fdCloser(parentFD, kernel);
8769 
8770 	if (userName) {
8771 		// get the vnodes
8772 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8773 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8774 		VNodePutter parentVNodePutter(parentVNode);
8775 		VNodePutter dirVNodePutter(dirVNode);
8776 		if (!parentVNode || !dirVNode)
8777 			return B_FILE_ERROR;
8778 
8779 		// get the vnode name
8780 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8781 		struct dirent* buffer = (struct dirent*)_buffer;
8782 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8783 			sizeof(_buffer), get_current_io_context(false));
8784 		if (status != B_OK)
8785 			return status;
8786 
8787 		// copy the name to the userland buffer
8788 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8789 		if (len < 0)
8790 			return len;
8791 		if (len >= (int)nameLength)
8792 			return B_BUFFER_OVERFLOW;
8793 	}
8794 
8795 	return fdCloser.Detach();
8796 }
8797 
8798 
8799 status_t
8800 _user_fcntl(int fd, int op, uint32 argument)
8801 {
8802 	status_t status = common_fcntl(fd, op, argument, false);
8803 	if (op == F_SETLKW)
8804 		syscall_restart_handle_post(status);
8805 
8806 	return status;
8807 }
8808 
8809 
8810 status_t
8811 _user_fsync(int fd)
8812 {
8813 	return common_sync(fd, false);
8814 }
8815 
8816 
8817 status_t
8818 _user_flock(int fd, int operation)
8819 {
8820 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8821 
8822 	// Check if the operation is valid
8823 	switch (operation & ~LOCK_NB) {
8824 		case LOCK_UN:
8825 		case LOCK_SH:
8826 		case LOCK_EX:
8827 			break;
8828 
8829 		default:
8830 			return B_BAD_VALUE;
8831 	}
8832 
8833 	struct file_descriptor* descriptor;
8834 	struct vnode* vnode;
8835 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8836 	if (descriptor == NULL)
8837 		return B_FILE_ERROR;
8838 
8839 	if (descriptor->type != FDTYPE_FILE) {
8840 		put_fd(descriptor);
8841 		return B_BAD_VALUE;
8842 	}
8843 
8844 	struct flock flock;
8845 	flock.l_start = 0;
8846 	flock.l_len = OFF_MAX;
8847 	flock.l_whence = 0;
8848 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8849 
8850 	status_t status;
8851 	if ((operation & LOCK_UN) != 0)
8852 		status = release_advisory_lock(vnode, &flock);
8853 	else {
8854 		status = acquire_advisory_lock(vnode,
8855 			thread_get_current_thread()->team->session_id, &flock,
8856 			(operation & LOCK_NB) == 0);
8857 	}
8858 
8859 	syscall_restart_handle_post(status);
8860 
8861 	put_fd(descriptor);
8862 	return status;
8863 }
8864 
8865 
8866 status_t
8867 _user_lock_node(int fd)
8868 {
8869 	return common_lock_node(fd, false);
8870 }
8871 
8872 
8873 status_t
8874 _user_unlock_node(int fd)
8875 {
8876 	return common_unlock_node(fd, false);
8877 }
8878 
8879 
8880 status_t
8881 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8882 	int perms)
8883 {
8884 	char name[B_FILE_NAME_LENGTH];
8885 	status_t status;
8886 
8887 	if (!IS_USER_ADDRESS(userName))
8888 		return B_BAD_ADDRESS;
8889 
8890 	status = user_strlcpy(name, userName, sizeof(name));
8891 	if (status < 0)
8892 		return status;
8893 
8894 	return dir_create_entry_ref(device, inode, name, perms, false);
8895 }
8896 
8897 
8898 status_t
8899 _user_create_dir(int fd, const char* userPath, int perms)
8900 {
8901 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8902 	if (pathBuffer.InitCheck() != B_OK)
8903 		return B_NO_MEMORY;
8904 
8905 	char* path = pathBuffer.LockBuffer();
8906 
8907 	if (!IS_USER_ADDRESS(userPath)
8908 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8909 		return B_BAD_ADDRESS;
8910 
8911 	return dir_create(fd, path, perms, false);
8912 }
8913 
8914 
8915 status_t
8916 _user_remove_dir(int fd, const char* userPath)
8917 {
8918 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8919 	if (pathBuffer.InitCheck() != B_OK)
8920 		return B_NO_MEMORY;
8921 
8922 	char* path = pathBuffer.LockBuffer();
8923 
8924 	if (userPath != NULL) {
8925 		if (!IS_USER_ADDRESS(userPath)
8926 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8927 			return B_BAD_ADDRESS;
8928 	}
8929 
8930 	return dir_remove(fd, userPath ? path : NULL, false);
8931 }
8932 
8933 
8934 status_t
8935 _user_read_link(int fd, const char* userPath, char* userBuffer,
8936 	size_t* userBufferSize)
8937 {
8938 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8939 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8940 		return B_NO_MEMORY;
8941 
8942 	size_t bufferSize;
8943 
8944 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8945 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8946 		return B_BAD_ADDRESS;
8947 
8948 	char* path = pathBuffer.LockBuffer();
8949 	char* buffer = linkBuffer.LockBuffer();
8950 
8951 	if (userPath) {
8952 		if (!IS_USER_ADDRESS(userPath)
8953 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8954 			return B_BAD_ADDRESS;
8955 
8956 		if (bufferSize > B_PATH_NAME_LENGTH)
8957 			bufferSize = B_PATH_NAME_LENGTH;
8958 	}
8959 
8960 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8961 		&bufferSize, false);
8962 
8963 	// we also update the bufferSize in case of errors
8964 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8965 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8966 		return B_BAD_ADDRESS;
8967 
8968 	if (status != B_OK)
8969 		return status;
8970 
8971 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8972 		return B_BAD_ADDRESS;
8973 
8974 	return B_OK;
8975 }
8976 
8977 
8978 status_t
8979 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8980 	int mode)
8981 {
8982 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8983 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8984 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8985 		return B_NO_MEMORY;
8986 
8987 	char* path = pathBuffer.LockBuffer();
8988 	char* toPath = toPathBuffer.LockBuffer();
8989 
8990 	if (!IS_USER_ADDRESS(userPath)
8991 		|| !IS_USER_ADDRESS(userToPath)
8992 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8993 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8994 		return B_BAD_ADDRESS;
8995 
8996 	return common_create_symlink(fd, path, toPath, mode, false);
8997 }
8998 
8999 
9000 status_t
9001 _user_create_link(int pathFD, const char* userPath, int toFD,
9002 	const char* userToPath, bool traverseLeafLink)
9003 {
9004 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9005 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9006 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9007 		return B_NO_MEMORY;
9008 
9009 	char* path = pathBuffer.LockBuffer();
9010 	char* toPath = toPathBuffer.LockBuffer();
9011 
9012 	if (!IS_USER_ADDRESS(userPath)
9013 		|| !IS_USER_ADDRESS(userToPath)
9014 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9015 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9016 		return B_BAD_ADDRESS;
9017 
9018 	status_t status = check_path(toPath);
9019 	if (status != B_OK)
9020 		return status;
9021 
9022 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9023 		false);
9024 }
9025 
9026 
9027 status_t
9028 _user_unlink(int fd, const char* userPath)
9029 {
9030 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9031 	if (pathBuffer.InitCheck() != B_OK)
9032 		return B_NO_MEMORY;
9033 
9034 	char* path = pathBuffer.LockBuffer();
9035 
9036 	if (!IS_USER_ADDRESS(userPath)
9037 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9038 		return B_BAD_ADDRESS;
9039 
9040 	return common_unlink(fd, path, false);
9041 }
9042 
9043 
9044 status_t
9045 _user_rename(int oldFD, const char* userOldPath, int newFD,
9046 	const char* userNewPath)
9047 {
9048 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9049 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9050 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9051 		return B_NO_MEMORY;
9052 
9053 	char* oldPath = oldPathBuffer.LockBuffer();
9054 	char* newPath = newPathBuffer.LockBuffer();
9055 
9056 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9057 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9058 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9059 		return B_BAD_ADDRESS;
9060 
9061 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9062 }
9063 
9064 
9065 status_t
9066 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9067 {
9068 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9069 	if (pathBuffer.InitCheck() != B_OK)
9070 		return B_NO_MEMORY;
9071 
9072 	char* path = pathBuffer.LockBuffer();
9073 
9074 	if (!IS_USER_ADDRESS(userPath)
9075 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9076 		return B_BAD_ADDRESS;
9077 	}
9078 
9079 	// split into directory vnode and filename path
9080 	char filename[B_FILE_NAME_LENGTH];
9081 	struct vnode* dir;
9082 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9083 	if (status != B_OK)
9084 		return status;
9085 
9086 	VNodePutter _(dir);
9087 
9088 	// the underlying FS needs to support creating FIFOs
9089 	if (!HAS_FS_CALL(dir, create_special_node))
9090 		return B_UNSUPPORTED;
9091 
9092 	// create the entry	-- the FIFO sub node is set up automatically
9093 	fs_vnode superVnode;
9094 	ino_t nodeID;
9095 	status = FS_CALL(dir, create_special_node, filename, NULL,
9096 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9097 
9098 	// create_special_node() acquired a reference for us that we don't need.
9099 	if (status == B_OK)
9100 		put_vnode(dir->mount->volume, nodeID);
9101 
9102 	return status;
9103 }
9104 
9105 
9106 status_t
9107 _user_create_pipe(int* userFDs)
9108 {
9109 	// rootfs should support creating FIFOs, but let's be sure
9110 	if (!HAS_FS_CALL(sRoot, create_special_node))
9111 		return B_UNSUPPORTED;
9112 
9113 	// create the node	-- the FIFO sub node is set up automatically
9114 	fs_vnode superVnode;
9115 	ino_t nodeID;
9116 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9117 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9118 	if (status != B_OK)
9119 		return status;
9120 
9121 	// We've got one reference to the node and need another one.
9122 	struct vnode* vnode;
9123 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9124 	if (status != B_OK) {
9125 		// that should not happen
9126 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9127 			sRoot->mount->id, sRoot->id);
9128 		return status;
9129 	}
9130 
9131 	// Everything looks good so far. Open two FDs for reading respectively
9132 	// writing.
9133 	int fds[2];
9134 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9135 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9136 
9137 	FDCloser closer0(fds[0], false);
9138 	FDCloser closer1(fds[1], false);
9139 
9140 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9141 
9142 	// copy FDs to userland
9143 	if (status == B_OK) {
9144 		if (!IS_USER_ADDRESS(userFDs)
9145 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9146 			status = B_BAD_ADDRESS;
9147 		}
9148 	}
9149 
9150 	// keep FDs, if everything went fine
9151 	if (status == B_OK) {
9152 		closer0.Detach();
9153 		closer1.Detach();
9154 	}
9155 
9156 	return status;
9157 }
9158 
9159 
9160 status_t
9161 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9162 {
9163 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9164 	if (pathBuffer.InitCheck() != B_OK)
9165 		return B_NO_MEMORY;
9166 
9167 	char* path = pathBuffer.LockBuffer();
9168 
9169 	if (!IS_USER_ADDRESS(userPath)
9170 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9171 		return B_BAD_ADDRESS;
9172 
9173 	return common_access(fd, path, mode, effectiveUserGroup, false);
9174 }
9175 
9176 
9177 status_t
9178 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9179 	struct stat* userStat, size_t statSize)
9180 {
9181 	struct stat stat;
9182 	status_t status;
9183 
9184 	if (statSize > sizeof(struct stat))
9185 		return B_BAD_VALUE;
9186 
9187 	if (!IS_USER_ADDRESS(userStat))
9188 		return B_BAD_ADDRESS;
9189 
9190 	if (userPath) {
9191 		// path given: get the stat of the node referred to by (fd, path)
9192 		if (!IS_USER_ADDRESS(userPath))
9193 			return B_BAD_ADDRESS;
9194 
9195 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9196 		if (pathBuffer.InitCheck() != B_OK)
9197 			return B_NO_MEMORY;
9198 
9199 		char* path = pathBuffer.LockBuffer();
9200 
9201 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9202 		if (length < B_OK)
9203 			return length;
9204 		if (length >= B_PATH_NAME_LENGTH)
9205 			return B_NAME_TOO_LONG;
9206 
9207 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9208 	} else {
9209 		// no path given: get the FD and use the FD operation
9210 		struct file_descriptor* descriptor
9211 			= get_fd(get_current_io_context(false), fd);
9212 		if (descriptor == NULL)
9213 			return B_FILE_ERROR;
9214 
9215 		if (descriptor->ops->fd_read_stat)
9216 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9217 		else
9218 			status = B_NOT_SUPPORTED;
9219 
9220 		put_fd(descriptor);
9221 	}
9222 
9223 	if (status != B_OK)
9224 		return status;
9225 
9226 	return user_memcpy(userStat, &stat, statSize);
9227 }
9228 
9229 
9230 status_t
9231 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9232 	const struct stat* userStat, size_t statSize, int statMask)
9233 {
9234 	if (statSize > sizeof(struct stat))
9235 		return B_BAD_VALUE;
9236 
9237 	struct stat stat;
9238 
9239 	if (!IS_USER_ADDRESS(userStat)
9240 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9241 		return B_BAD_ADDRESS;
9242 
9243 	// clear additional stat fields
9244 	if (statSize < sizeof(struct stat))
9245 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9246 
9247 	status_t status;
9248 
9249 	if (userPath) {
9250 		// path given: write the stat of the node referred to by (fd, path)
9251 		if (!IS_USER_ADDRESS(userPath))
9252 			return B_BAD_ADDRESS;
9253 
9254 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9255 		if (pathBuffer.InitCheck() != B_OK)
9256 			return B_NO_MEMORY;
9257 
9258 		char* path = pathBuffer.LockBuffer();
9259 
9260 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9261 		if (length < B_OK)
9262 			return length;
9263 		if (length >= B_PATH_NAME_LENGTH)
9264 			return B_NAME_TOO_LONG;
9265 
9266 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9267 			statMask, false);
9268 	} else {
9269 		// no path given: get the FD and use the FD operation
9270 		struct file_descriptor* descriptor
9271 			= get_fd(get_current_io_context(false), fd);
9272 		if (descriptor == NULL)
9273 			return B_FILE_ERROR;
9274 
9275 		if (descriptor->ops->fd_write_stat) {
9276 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9277 				statMask);
9278 		} else
9279 			status = B_NOT_SUPPORTED;
9280 
9281 		put_fd(descriptor);
9282 	}
9283 
9284 	return status;
9285 }
9286 
9287 
9288 int
9289 _user_open_attr_dir(int fd, const char* userPath)
9290 {
9291 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9292 	if (pathBuffer.InitCheck() != B_OK)
9293 		return B_NO_MEMORY;
9294 
9295 	char* path = pathBuffer.LockBuffer();
9296 
9297 	if (userPath != NULL) {
9298 		if (!IS_USER_ADDRESS(userPath)
9299 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9300 			return B_BAD_ADDRESS;
9301 	}
9302 
9303 	return attr_dir_open(fd, userPath ? path : NULL, false);
9304 }
9305 
9306 
9307 ssize_t
9308 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9309 	size_t readBytes)
9310 {
9311 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9312 	if (attr < 0)
9313 		return attr;
9314 
9315 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9316 	_user_close(attr);
9317 
9318 	return bytes;
9319 }
9320 
9321 
9322 ssize_t
9323 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9324 	const void* buffer, size_t writeBytes)
9325 {
9326 	// Try to support the BeOS typical truncation as well as the position
9327 	// argument
9328 	int attr = attr_create(fd, NULL, attribute, type,
9329 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9330 	if (attr < 0)
9331 		return attr;
9332 
9333 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9334 	_user_close(attr);
9335 
9336 	return bytes;
9337 }
9338 
9339 
9340 status_t
9341 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9342 {
9343 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9344 	if (attr < 0)
9345 		return attr;
9346 
9347 	struct file_descriptor* descriptor
9348 		= get_fd(get_current_io_context(false), attr);
9349 	if (descriptor == NULL) {
9350 		_user_close(attr);
9351 		return B_FILE_ERROR;
9352 	}
9353 
9354 	struct stat stat;
9355 	status_t status;
9356 	if (descriptor->ops->fd_read_stat)
9357 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9358 	else
9359 		status = B_NOT_SUPPORTED;
9360 
9361 	put_fd(descriptor);
9362 	_user_close(attr);
9363 
9364 	if (status == B_OK) {
9365 		attr_info info;
9366 		info.type = stat.st_type;
9367 		info.size = stat.st_size;
9368 
9369 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9370 			return B_BAD_ADDRESS;
9371 	}
9372 
9373 	return status;
9374 }
9375 
9376 
9377 int
9378 _user_open_attr(int fd, const char* userPath, const char* userName,
9379 	uint32 type, int openMode)
9380 {
9381 	char name[B_FILE_NAME_LENGTH];
9382 
9383 	if (!IS_USER_ADDRESS(userName)
9384 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9385 		return B_BAD_ADDRESS;
9386 
9387 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9388 	if (pathBuffer.InitCheck() != B_OK)
9389 		return B_NO_MEMORY;
9390 
9391 	char* path = pathBuffer.LockBuffer();
9392 
9393 	if (userPath != NULL) {
9394 		if (!IS_USER_ADDRESS(userPath)
9395 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9396 			return B_BAD_ADDRESS;
9397 	}
9398 
9399 	if ((openMode & O_CREAT) != 0) {
9400 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9401 			false);
9402 	}
9403 
9404 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9405 }
9406 
9407 
9408 status_t
9409 _user_remove_attr(int fd, const char* userName)
9410 {
9411 	char name[B_FILE_NAME_LENGTH];
9412 
9413 	if (!IS_USER_ADDRESS(userName)
9414 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9415 		return B_BAD_ADDRESS;
9416 
9417 	return attr_remove(fd, name, false);
9418 }
9419 
9420 
9421 status_t
9422 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9423 	const char* userToName)
9424 {
9425 	if (!IS_USER_ADDRESS(userFromName)
9426 		|| !IS_USER_ADDRESS(userToName))
9427 		return B_BAD_ADDRESS;
9428 
9429 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9430 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9431 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9432 		return B_NO_MEMORY;
9433 
9434 	char* fromName = fromNameBuffer.LockBuffer();
9435 	char* toName = toNameBuffer.LockBuffer();
9436 
9437 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9438 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9439 		return B_BAD_ADDRESS;
9440 
9441 	return attr_rename(fromFile, fromName, toFile, toName, false);
9442 }
9443 
9444 
9445 int
9446 _user_open_index_dir(dev_t device)
9447 {
9448 	return index_dir_open(device, false);
9449 }
9450 
9451 
9452 status_t
9453 _user_create_index(dev_t device, const char* userName, uint32 type,
9454 	uint32 flags)
9455 {
9456 	char name[B_FILE_NAME_LENGTH];
9457 
9458 	if (!IS_USER_ADDRESS(userName)
9459 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9460 		return B_BAD_ADDRESS;
9461 
9462 	return index_create(device, name, type, flags, false);
9463 }
9464 
9465 
9466 status_t
9467 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9468 {
9469 	char name[B_FILE_NAME_LENGTH];
9470 	struct stat stat;
9471 	status_t status;
9472 
9473 	if (!IS_USER_ADDRESS(userName)
9474 		|| !IS_USER_ADDRESS(userStat)
9475 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9476 		return B_BAD_ADDRESS;
9477 
9478 	status = index_name_read_stat(device, name, &stat, false);
9479 	if (status == B_OK) {
9480 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9481 			return B_BAD_ADDRESS;
9482 	}
9483 
9484 	return status;
9485 }
9486 
9487 
9488 status_t
9489 _user_remove_index(dev_t device, const char* userName)
9490 {
9491 	char name[B_FILE_NAME_LENGTH];
9492 
9493 	if (!IS_USER_ADDRESS(userName)
9494 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9495 		return B_BAD_ADDRESS;
9496 
9497 	return index_remove(device, name, false);
9498 }
9499 
9500 
9501 status_t
9502 _user_getcwd(char* userBuffer, size_t size)
9503 {
9504 	if (size == 0)
9505 		return B_BAD_VALUE;
9506 	if (!IS_USER_ADDRESS(userBuffer))
9507 		return B_BAD_ADDRESS;
9508 
9509 	if (size > kMaxPathLength)
9510 		size = kMaxPathLength;
9511 
9512 	KPath pathBuffer(size);
9513 	if (pathBuffer.InitCheck() != B_OK)
9514 		return B_NO_MEMORY;
9515 
9516 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9517 
9518 	char* path = pathBuffer.LockBuffer();
9519 
9520 	status_t status = get_cwd(path, size, false);
9521 	if (status != B_OK)
9522 		return status;
9523 
9524 	// Copy back the result
9525 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9526 		return B_BAD_ADDRESS;
9527 
9528 	return status;
9529 }
9530 
9531 
9532 status_t
9533 _user_setcwd(int fd, const char* userPath)
9534 {
9535 	TRACE(("user_setcwd: path = %p\n", userPath));
9536 
9537 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9538 	if (pathBuffer.InitCheck() != B_OK)
9539 		return B_NO_MEMORY;
9540 
9541 	char* path = pathBuffer.LockBuffer();
9542 
9543 	if (userPath != NULL) {
9544 		if (!IS_USER_ADDRESS(userPath)
9545 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9546 			return B_BAD_ADDRESS;
9547 	}
9548 
9549 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9550 }
9551 
9552 
9553 status_t
9554 _user_change_root(const char* userPath)
9555 {
9556 	// only root is allowed to chroot()
9557 	if (geteuid() != 0)
9558 		return B_NOT_ALLOWED;
9559 
9560 	// alloc path buffer
9561 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9562 	if (pathBuffer.InitCheck() != B_OK)
9563 		return B_NO_MEMORY;
9564 
9565 	// copy userland path to kernel
9566 	char* path = pathBuffer.LockBuffer();
9567 	if (userPath != NULL) {
9568 		if (!IS_USER_ADDRESS(userPath)
9569 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9570 			return B_BAD_ADDRESS;
9571 	}
9572 
9573 	// get the vnode
9574 	struct vnode* vnode;
9575 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9576 	if (status != B_OK)
9577 		return status;
9578 
9579 	// set the new root
9580 	struct io_context* context = get_current_io_context(false);
9581 	mutex_lock(&sIOContextRootLock);
9582 	struct vnode* oldRoot = context->root;
9583 	context->root = vnode;
9584 	mutex_unlock(&sIOContextRootLock);
9585 
9586 	put_vnode(oldRoot);
9587 
9588 	return B_OK;
9589 }
9590 
9591 
9592 int
9593 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9594 	uint32 flags, port_id port, int32 token)
9595 {
9596 	char* query;
9597 
9598 	if (device < 0 || userQuery == NULL || queryLength == 0)
9599 		return B_BAD_VALUE;
9600 
9601 	// this is a safety restriction
9602 	if (queryLength >= 65536)
9603 		return B_NAME_TOO_LONG;
9604 
9605 	query = (char*)malloc(queryLength + 1);
9606 	if (query == NULL)
9607 		return B_NO_MEMORY;
9608 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9609 		free(query);
9610 		return B_BAD_ADDRESS;
9611 	}
9612 
9613 	int fd = query_open(device, query, flags, port, token, false);
9614 
9615 	free(query);
9616 	return fd;
9617 }
9618 
9619 
9620 #include "vfs_request_io.cpp"
9621