xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 14b32de1d5efe99b4c6d4ef8c25df47eb009cf0f)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags,
549 		generic_size_t bytesRequested, status_t status,
550 		generic_size_t bytesTransferred)
551 		:
552 		fVnode(vnode),
553 		fMountID(vnode->mount->id),
554 		fNodeID(vnode->id),
555 		fCookie(cookie),
556 		fPos(pos),
557 		fCount(count),
558 		fFlags(flags),
559 		fBytesRequested(bytesRequested),
560 		fStatus(status),
561 		fBytesTransferred(bytesTransferred)
562 	{
563 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
564 			sizeof(generic_io_vec) * count, false);
565 	}
566 
567 	void AddDump(TraceOutput& out, const char* mode)
568 	{
569 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
570 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
571 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
572 			(uint64)fBytesRequested);
573 
574 		if (fVecs != NULL) {
575 			for (uint32 i = 0; i < fCount; i++) {
576 				if (i > 0)
577 					out.Print(", ");
578 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
579 					(uint64)fVecs[i].length);
580 			}
581 		}
582 
583 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
584 			"transferred: %" B_PRIu64, fFlags, fStatus,
585 			(uint64)fBytesTransferred);
586 	}
587 
588 protected:
589 	struct vnode*	fVnode;
590 	dev_t			fMountID;
591 	ino_t			fNodeID;
592 	void*			fCookie;
593 	off_t			fPos;
594 	generic_io_vec*	fVecs;
595 	uint32			fCount;
596 	uint32			fFlags;
597 	generic_size_t	fBytesRequested;
598 	status_t		fStatus;
599 	generic_size_t	fBytesTransferred;
600 };
601 
602 
603 class ReadPages : public PagesIOTraceEntry {
604 public:
605 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
606 		const generic_io_vec* vecs, uint32 count, uint32 flags,
607 		generic_size_t bytesRequested, status_t status,
608 		generic_size_t bytesTransferred)
609 		:
610 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
611 			bytesRequested, status, bytesTransferred)
612 	{
613 		Initialized();
614 	}
615 
616 	virtual void AddDump(TraceOutput& out)
617 	{
618 		PagesIOTraceEntry::AddDump(out, "read");
619 	}
620 };
621 
622 
623 class WritePages : public PagesIOTraceEntry {
624 public:
625 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
631 			bytesRequested, status, bytesTransferred)
632 	{
633 		Initialized();
634 	}
635 
636 	virtual void AddDump(TraceOutput& out)
637 	{
638 		PagesIOTraceEntry::AddDump(out, "write");
639 	}
640 };
641 
642 }	// namespace VFSPagesIOTracing
643 
644 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
645 #else
646 #	define TPIO(x) ;
647 #endif	// VFS_PAGES_IO_TRACING
648 
649 
650 static int
651 mount_compare(void* _m, const void* _key)
652 {
653 	struct fs_mount* mount = (fs_mount*)_m;
654 	const dev_t* id = (dev_t*)_key;
655 
656 	if (mount->id == *id)
657 		return 0;
658 
659 	return -1;
660 }
661 
662 
663 static uint32
664 mount_hash(void* _m, const void* _key, uint32 range)
665 {
666 	struct fs_mount* mount = (fs_mount*)_m;
667 	const dev_t* id = (dev_t*)_key;
668 
669 	if (mount)
670 		return mount->id % range;
671 
672 	return (uint32)*id % range;
673 }
674 
675 
676 /*! Finds the mounted device (the fs_mount structure) with the given ID.
677 	Note, you must hold the gMountMutex lock when you call this function.
678 */
679 static struct fs_mount*
680 find_mount(dev_t id)
681 {
682 	ASSERT_LOCKED_MUTEX(&sMountMutex);
683 
684 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
685 }
686 
687 
688 static status_t
689 get_mount(dev_t id, struct fs_mount** _mount)
690 {
691 	struct fs_mount* mount;
692 
693 	ReadLocker nodeLocker(sVnodeLock);
694 	MutexLocker mountLocker(sMountMutex);
695 
696 	mount = find_mount(id);
697 	if (mount == NULL)
698 		return B_BAD_VALUE;
699 
700 	struct vnode* rootNode = mount->root_vnode;
701 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
702 		// might have been called during a mount/unmount operation
703 		return B_BUSY;
704 	}
705 
706 	inc_vnode_ref_count(mount->root_vnode);
707 	*_mount = mount;
708 	return B_OK;
709 }
710 
711 
712 static void
713 put_mount(struct fs_mount* mount)
714 {
715 	if (mount)
716 		put_vnode(mount->root_vnode);
717 }
718 
719 
720 /*!	Tries to open the specified file system module.
721 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
722 	Returns a pointer to file system module interface, or NULL if it
723 	could not open the module.
724 */
725 static file_system_module_info*
726 get_file_system(const char* fsName)
727 {
728 	char name[B_FILE_NAME_LENGTH];
729 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
730 		// construct module name if we didn't get one
731 		// (we currently support only one API)
732 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
733 		fsName = NULL;
734 	}
735 
736 	file_system_module_info* info;
737 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
738 		return NULL;
739 
740 	return info;
741 }
742 
743 
744 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
745 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
746 	The name is allocated for you, and you have to free() it when you're
747 	done with it.
748 	Returns NULL if the required memory is not available.
749 */
750 static char*
751 get_file_system_name(const char* fsName)
752 {
753 	const size_t length = strlen("file_systems/");
754 
755 	if (strncmp(fsName, "file_systems/", length)) {
756 		// the name already seems to be the module's file name
757 		return strdup(fsName);
758 	}
759 
760 	fsName += length;
761 	const char* end = strchr(fsName, '/');
762 	if (end == NULL) {
763 		// this doesn't seem to be a valid name, but well...
764 		return strdup(fsName);
765 	}
766 
767 	// cut off the trailing /v1
768 
769 	char* name = (char*)malloc(end + 1 - fsName);
770 	if (name == NULL)
771 		return NULL;
772 
773 	strlcpy(name, fsName, end + 1 - fsName);
774 	return name;
775 }
776 
777 
778 /*!	Accepts a list of file system names separated by a colon, one for each
779 	layer and returns the file system name for the specified layer.
780 	The name is allocated for you, and you have to free() it when you're
781 	done with it.
782 	Returns NULL if the required memory is not available or if there is no
783 	name for the specified layer.
784 */
785 static char*
786 get_file_system_name_for_layer(const char* fsNames, int32 layer)
787 {
788 	while (layer >= 0) {
789 		const char* end = strchr(fsNames, ':');
790 		if (end == NULL) {
791 			if (layer == 0)
792 				return strdup(fsNames);
793 			return NULL;
794 		}
795 
796 		if (layer == 0) {
797 			size_t length = end - fsNames + 1;
798 			char* result = (char*)malloc(length);
799 			strlcpy(result, fsNames, length);
800 			return result;
801 		}
802 
803 		fsNames = end + 1;
804 		layer--;
805 	}
806 
807 	return NULL;
808 }
809 
810 
811 static int
812 vnode_compare(void* _vnode, const void* _key)
813 {
814 	struct vnode* vnode = (struct vnode*)_vnode;
815 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
816 
817 	if (vnode->device == key->device && vnode->id == key->vnode)
818 		return 0;
819 
820 	return -1;
821 }
822 
823 
824 static uint32
825 vnode_hash(void* _vnode, const void* _key, uint32 range)
826 {
827 	struct vnode* vnode = (struct vnode*)_vnode;
828 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
829 
830 #define VHASH(mountid, vnodeid) \
831 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
832 
833 	if (vnode != NULL)
834 		return VHASH(vnode->device, vnode->id) % range;
835 
836 	return VHASH(key->device, key->vnode) % range;
837 
838 #undef VHASH
839 }
840 
841 
842 static void
843 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
844 {
845 	RecursiveLocker _(mount->rlock);
846 	mount->vnodes.Add(vnode);
847 }
848 
849 
850 static void
851 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
852 {
853 	RecursiveLocker _(mount->rlock);
854 	mount->vnodes.Remove(vnode);
855 }
856 
857 
858 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
859 
860 	The caller must hold the sVnodeLock (read lock at least).
861 
862 	\param mountID the mount ID.
863 	\param vnodeID the node ID.
864 
865 	\return The vnode structure, if it was found in the hash table, \c NULL
866 			otherwise.
867 */
868 static struct vnode*
869 lookup_vnode(dev_t mountID, ino_t vnodeID)
870 {
871 	struct vnode_hash_key key;
872 
873 	key.device = mountID;
874 	key.vnode = vnodeID;
875 
876 	return (vnode*)hash_lookup(sVnodeTable, &key);
877 }
878 
879 
880 /*!	Creates a new vnode with the given mount and node ID.
881 	If the node already exists, it is returned instead and no new node is
882 	created. In either case -- but not, if an error occurs -- the function write
883 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
884 	error the lock is not not held on return.
885 
886 	\param mountID The mount ID.
887 	\param vnodeID The vnode ID.
888 	\param _vnode Will be set to the new vnode on success.
889 	\param _nodeCreated Will be set to \c true when the returned vnode has
890 		been newly created, \c false when it already existed. Will not be
891 		changed on error.
892 	\return \c B_OK, when the vnode was successfully created and inserted or
893 		a node with the given ID was found, \c B_NO_MEMORY or
894 		\c B_ENTRY_NOT_FOUND on error.
895 */
896 static status_t
897 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
898 	bool& _nodeCreated)
899 {
900 	FUNCTION(("create_new_vnode_and_lock()\n"));
901 
902 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
903 	if (vnode == NULL)
904 		return B_NO_MEMORY;
905 
906 	// initialize basic values
907 	memset(vnode, 0, sizeof(struct vnode));
908 	vnode->device = mountID;
909 	vnode->id = vnodeID;
910 	vnode->ref_count = 1;
911 	vnode->SetBusy(true);
912 
913 	// look up the the node -- it might have been added by someone else in the
914 	// meantime
915 	rw_lock_write_lock(&sVnodeLock);
916 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
917 	if (existingVnode != NULL) {
918 		free(vnode);
919 		_vnode = existingVnode;
920 		_nodeCreated = false;
921 		return B_OK;
922 	}
923 
924 	// get the mount structure
925 	mutex_lock(&sMountMutex);
926 	vnode->mount = find_mount(mountID);
927 	if (!vnode->mount || vnode->mount->unmounting) {
928 		mutex_unlock(&sMountMutex);
929 		rw_lock_write_unlock(&sVnodeLock);
930 		free(vnode);
931 		return B_ENTRY_NOT_FOUND;
932 	}
933 
934 	// add the vnode to the mount's node list and the hash table
935 	hash_insert(sVnodeTable, vnode);
936 	add_vnode_to_mount_list(vnode, vnode->mount);
937 
938 	mutex_unlock(&sMountMutex);
939 
940 	_vnode = vnode;
941 	_nodeCreated = true;
942 
943 	// keep the vnode lock locked
944 	return B_OK;
945 }
946 
947 
948 /*!	Frees the vnode and all resources it has acquired, and removes
949 	it from the vnode hash as well as from its mount structure.
950 	Will also make sure that any cache modifications are written back.
951 */
952 static void
953 free_vnode(struct vnode* vnode, bool reenter)
954 {
955 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
956 		vnode);
957 
958 	// write back any changes in this vnode's cache -- but only
959 	// if the vnode won't be deleted, in which case the changes
960 	// will be discarded
961 
962 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
963 		FS_CALL_NO_PARAMS(vnode, fsync);
964 
965 	// Note: If this vnode has a cache attached, there will still be two
966 	// references to that cache at this point. The last one belongs to the vnode
967 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
968 	// cache. Each but the last reference to a cache also includes a reference
969 	// to the vnode. The file cache, however, released its reference (cf.
970 	// file_cache_create()), so that this vnode's ref count has the chance to
971 	// ever drop to 0. Deleting the file cache now, will cause the next to last
972 	// cache reference to be released, which will also release a (no longer
973 	// existing) vnode reference. To avoid problems, we set the vnode's ref
974 	// count, so that it will neither become negative nor 0.
975 	vnode->ref_count = 2;
976 
977 	if (!vnode->IsUnpublished()) {
978 		if (vnode->IsRemoved())
979 			FS_CALL(vnode, remove_vnode, reenter);
980 		else
981 			FS_CALL(vnode, put_vnode, reenter);
982 	}
983 
984 	// If the vnode has a VMCache attached, make sure that it won't try to get
985 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
986 	// long as the vnode is busy and in the hash, that won't happen, but as
987 	// soon as we've removed it from the hash, it could reload the vnode -- with
988 	// a new cache attached!
989 	if (vnode->cache != NULL)
990 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
991 
992 	// The file system has removed the resources of the vnode now, so we can
993 	// make it available again (by removing the busy vnode from the hash).
994 	rw_lock_write_lock(&sVnodeLock);
995 	hash_remove(sVnodeTable, vnode);
996 	rw_lock_write_unlock(&sVnodeLock);
997 
998 	// if we have a VMCache attached, remove it
999 	if (vnode->cache)
1000 		vnode->cache->ReleaseRef();
1001 
1002 	vnode->cache = NULL;
1003 
1004 	remove_vnode_from_mount_list(vnode, vnode->mount);
1005 
1006 	free(vnode);
1007 }
1008 
1009 
1010 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1011 	if the counter dropped to 0.
1012 
1013 	The caller must, of course, own a reference to the vnode to call this
1014 	function.
1015 	The caller must not hold the sVnodeLock or the sMountMutex.
1016 
1017 	\param vnode the vnode.
1018 	\param alwaysFree don't move this vnode into the unused list, but really
1019 		   delete it if possible.
1020 	\param reenter \c true, if this function is called (indirectly) from within
1021 		   a file system. This will be passed to file system hooks only.
1022 	\return \c B_OK, if everything went fine, an error code otherwise.
1023 */
1024 static status_t
1025 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1026 {
1027 	ReadLocker locker(sVnodeLock);
1028 	AutoLocker<Vnode> nodeLocker(vnode);
1029 
1030 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1031 
1032 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1033 
1034 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1035 		vnode->ref_count));
1036 
1037 	if (oldRefCount != 1)
1038 		return B_OK;
1039 
1040 	if (vnode->IsBusy())
1041 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1042 
1043 	bool freeNode = false;
1044 	bool freeUnusedNodes = false;
1045 
1046 	// Just insert the vnode into an unused list if we don't need
1047 	// to delete it
1048 	if (vnode->IsRemoved() || alwaysFree) {
1049 		vnode_to_be_freed(vnode);
1050 		vnode->SetBusy(true);
1051 		freeNode = true;
1052 	} else
1053 		freeUnusedNodes = vnode_unused(vnode);
1054 
1055 	nodeLocker.Unlock();
1056 	locker.Unlock();
1057 
1058 	if (freeNode)
1059 		free_vnode(vnode, reenter);
1060 	else if (freeUnusedNodes)
1061 		free_unused_vnodes();
1062 
1063 	return B_OK;
1064 }
1065 
1066 
1067 /*!	\brief Increments the reference counter of the given vnode.
1068 
1069 	The caller must make sure that the node isn't deleted while this function
1070 	is called. This can be done either:
1071 	- by ensuring that a reference to the node exists and remains in existence,
1072 	  or
1073 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1074 	  or by holding sVnodeLock write locked.
1075 
1076 	In the second case the caller is responsible for dealing with the ref count
1077 	0 -> 1 transition. That is 1. this function must not be invoked when the
1078 	node is busy in the first place and 2. vnode_used() must be called for the
1079 	node.
1080 
1081 	\param vnode the vnode.
1082 */
1083 static void
1084 inc_vnode_ref_count(struct vnode* vnode)
1085 {
1086 	atomic_add(&vnode->ref_count, 1);
1087 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1088 		vnode->ref_count));
1089 }
1090 
1091 
1092 static bool
1093 is_special_node_type(int type)
1094 {
1095 	// at the moment only FIFOs are supported
1096 	return S_ISFIFO(type);
1097 }
1098 
1099 
1100 static status_t
1101 create_special_sub_node(struct vnode* vnode, uint32 flags)
1102 {
1103 	if (S_ISFIFO(vnode->Type()))
1104 		return create_fifo_vnode(vnode->mount->volume, vnode);
1105 
1106 	return B_BAD_VALUE;
1107 }
1108 
1109 
1110 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1111 
1112 	If the node is not yet in memory, it will be loaded.
1113 
1114 	The caller must not hold the sVnodeLock or the sMountMutex.
1115 
1116 	\param mountID the mount ID.
1117 	\param vnodeID the node ID.
1118 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1119 		   retrieved vnode structure shall be written.
1120 	\param reenter \c true, if this function is called (indirectly) from within
1121 		   a file system.
1122 	\return \c B_OK, if everything when fine, an error code otherwise.
1123 */
1124 static status_t
1125 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1126 	int reenter)
1127 {
1128 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1129 		mountID, vnodeID, _vnode));
1130 
1131 	rw_lock_read_lock(&sVnodeLock);
1132 
1133 	int32 tries = 2000;
1134 		// try for 10 secs
1135 restart:
1136 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1137 	AutoLocker<Vnode> nodeLocker(vnode);
1138 
1139 	if (vnode && vnode->IsBusy()) {
1140 		nodeLocker.Unlock();
1141 		rw_lock_read_unlock(&sVnodeLock);
1142 		if (!canWait || --tries < 0) {
1143 			// vnode doesn't seem to become unbusy
1144 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is not becoming unbusy!\n",
1145 				mountID, vnodeID);
1146 			return B_BUSY;
1147 		}
1148 		snooze(5000); // 5 ms
1149 		rw_lock_read_lock(&sVnodeLock);
1150 		goto restart;
1151 	}
1152 
1153 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1154 
1155 	status_t status;
1156 
1157 	if (vnode) {
1158 		if (vnode->ref_count == 0) {
1159 			// this vnode has been unused before
1160 			vnode_used(vnode);
1161 		}
1162 		inc_vnode_ref_count(vnode);
1163 
1164 		nodeLocker.Unlock();
1165 		rw_lock_read_unlock(&sVnodeLock);
1166 	} else {
1167 		// we need to create a new vnode and read it in
1168 		rw_lock_read_unlock(&sVnodeLock);
1169 			// unlock -- create_new_vnode_and_lock() write-locks on success
1170 		bool nodeCreated;
1171 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1172 			nodeCreated);
1173 		if (status != B_OK)
1174 			return status;
1175 
1176 		if (!nodeCreated) {
1177 			rw_lock_read_lock(&sVnodeLock);
1178 			rw_lock_write_unlock(&sVnodeLock);
1179 			goto restart;
1180 		}
1181 
1182 		rw_lock_write_unlock(&sVnodeLock);
1183 
1184 		int type;
1185 		uint32 flags;
1186 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1187 			&flags, reenter);
1188 		if (status == B_OK && vnode->private_node == NULL)
1189 			status = B_BAD_VALUE;
1190 
1191 		bool gotNode = status == B_OK;
1192 		bool publishSpecialSubNode = false;
1193 		if (gotNode) {
1194 			vnode->SetType(type);
1195 			publishSpecialSubNode = is_special_node_type(type)
1196 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1197 		}
1198 
1199 		if (gotNode && publishSpecialSubNode)
1200 			status = create_special_sub_node(vnode, flags);
1201 
1202 		if (status != B_OK) {
1203 			if (gotNode)
1204 				FS_CALL(vnode, put_vnode, reenter);
1205 
1206 			rw_lock_write_lock(&sVnodeLock);
1207 			hash_remove(sVnodeTable, vnode);
1208 			remove_vnode_from_mount_list(vnode, vnode->mount);
1209 			rw_lock_write_unlock(&sVnodeLock);
1210 
1211 			free(vnode);
1212 			return status;
1213 		}
1214 
1215 		rw_lock_read_lock(&sVnodeLock);
1216 		vnode->Lock();
1217 
1218 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1219 		vnode->SetBusy(false);
1220 
1221 		vnode->Unlock();
1222 		rw_lock_read_unlock(&sVnodeLock);
1223 	}
1224 
1225 	TRACE(("get_vnode: returning %p\n", vnode));
1226 
1227 	*_vnode = vnode;
1228 	return B_OK;
1229 }
1230 
1231 
1232 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1233 	if the counter dropped to 0.
1234 
1235 	The caller must, of course, own a reference to the vnode to call this
1236 	function.
1237 	The caller must not hold the sVnodeLock or the sMountMutex.
1238 
1239 	\param vnode the vnode.
1240 */
1241 static inline void
1242 put_vnode(struct vnode* vnode)
1243 {
1244 	dec_vnode_ref_count(vnode, false, false);
1245 }
1246 
1247 
1248 static void
1249 free_unused_vnodes(int32 level)
1250 {
1251 	unused_vnodes_check_started();
1252 
1253 	if (level == B_NO_LOW_RESOURCE) {
1254 		unused_vnodes_check_done();
1255 		return;
1256 	}
1257 
1258 	flush_hot_vnodes();
1259 
1260 	// determine how many nodes to free
1261 	uint32 count = 1;
1262 	{
1263 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1264 
1265 		switch (level) {
1266 			case B_LOW_RESOURCE_NOTE:
1267 				count = sUnusedVnodes / 100;
1268 				break;
1269 			case B_LOW_RESOURCE_WARNING:
1270 				count = sUnusedVnodes / 10;
1271 				break;
1272 			case B_LOW_RESOURCE_CRITICAL:
1273 				count = sUnusedVnodes;
1274 				break;
1275 		}
1276 
1277 		if (count > sUnusedVnodes)
1278 			count = sUnusedVnodes;
1279 	}
1280 
1281 	// Write back the modified pages of some unused vnodes and free them.
1282 
1283 	for (uint32 i = 0; i < count; i++) {
1284 		ReadLocker vnodesReadLocker(sVnodeLock);
1285 
1286 		// get the first node
1287 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1288 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1289 			&sUnusedVnodeList);
1290 		unusedVnodesLocker.Unlock();
1291 
1292 		if (vnode == NULL)
1293 			break;
1294 
1295 		// lock the node
1296 		AutoLocker<Vnode> nodeLocker(vnode);
1297 
1298 		// Check whether the node is still unused -- since we only append to the
1299 		// the tail of the unused queue, the vnode should still be at its head.
1300 		// Alternatively we could check its ref count for 0 and its busy flag,
1301 		// but if the node is no longer at the head of the queue, it means it
1302 		// has been touched in the meantime, i.e. it is no longer the least
1303 		// recently used unused vnode and we rather don't free it.
1304 		unusedVnodesLocker.Lock();
1305 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1306 			continue;
1307 		unusedVnodesLocker.Unlock();
1308 
1309 		ASSERT(!vnode->IsBusy());
1310 
1311 		// grab a reference
1312 		inc_vnode_ref_count(vnode);
1313 		vnode_used(vnode);
1314 
1315 		// write back changes and free the node
1316 		nodeLocker.Unlock();
1317 		vnodesReadLocker.Unlock();
1318 
1319 		if (vnode->cache != NULL)
1320 			vnode->cache->WriteModified();
1321 
1322 		dec_vnode_ref_count(vnode, true, false);
1323 			// this should free the vnode when it's still unused
1324 	}
1325 
1326 	unused_vnodes_check_done();
1327 }
1328 
1329 
1330 /*!	Gets the vnode the given vnode is covering.
1331 
1332 	The caller must have \c sVnodeLock read-locked at least.
1333 
1334 	The function returns a reference to the retrieved vnode (if any), the caller
1335 	is responsible to free.
1336 
1337 	\param vnode The vnode whose covered node shall be returned.
1338 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1339 		vnode.
1340 */
1341 static inline Vnode*
1342 get_covered_vnode_locked(Vnode* vnode)
1343 {
1344 	if (Vnode* coveredNode = vnode->covers) {
1345 		while (coveredNode->covers != NULL)
1346 			coveredNode = coveredNode->covers;
1347 
1348 		inc_vnode_ref_count(coveredNode);
1349 		return coveredNode;
1350 	}
1351 
1352 	return NULL;
1353 }
1354 
1355 
1356 /*!	Gets the vnode the given vnode is covering.
1357 
1358 	The caller must not hold \c sVnodeLock. Note that this implies a race
1359 	condition, since the situation can change at any time.
1360 
1361 	The function returns a reference to the retrieved vnode (if any), the caller
1362 	is responsible to free.
1363 
1364 	\param vnode The vnode whose covered node shall be returned.
1365 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1366 		vnode.
1367 */
1368 static inline Vnode*
1369 get_covered_vnode(Vnode* vnode)
1370 {
1371 	if (!vnode->IsCovering())
1372 		return NULL;
1373 
1374 	ReadLocker vnodeReadLocker(sVnodeLock);
1375 	return get_covered_vnode_locked(vnode);
1376 }
1377 
1378 
1379 /*!	Gets the vnode the given vnode is covered by.
1380 
1381 	The caller must have \c sVnodeLock read-locked at least.
1382 
1383 	The function returns a reference to the retrieved vnode (if any), the caller
1384 	is responsible to free.
1385 
1386 	\param vnode The vnode whose covering node shall be returned.
1387 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1388 		any vnode.
1389 */
1390 static Vnode*
1391 get_covering_vnode_locked(Vnode* vnode)
1392 {
1393 	if (Vnode* coveringNode = vnode->covered_by) {
1394 		while (coveringNode->covered_by != NULL)
1395 			coveringNode = coveringNode->covered_by;
1396 
1397 		inc_vnode_ref_count(coveringNode);
1398 		return coveringNode;
1399 	}
1400 
1401 	return NULL;
1402 }
1403 
1404 
1405 /*!	Gets the vnode the given vnode is covered by.
1406 
1407 	The caller must not hold \c sVnodeLock. Note that this implies a race
1408 	condition, since the situation can change at any time.
1409 
1410 	The function returns a reference to the retrieved vnode (if any), the caller
1411 	is responsible to free.
1412 
1413 	\param vnode The vnode whose covering node shall be returned.
1414 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1415 		any vnode.
1416 */
1417 static inline Vnode*
1418 get_covering_vnode(Vnode* vnode)
1419 {
1420 	if (!vnode->IsCovered())
1421 		return NULL;
1422 
1423 	ReadLocker vnodeReadLocker(sVnodeLock);
1424 	return get_covering_vnode_locked(vnode);
1425 }
1426 
1427 
1428 static void
1429 free_unused_vnodes()
1430 {
1431 	free_unused_vnodes(
1432 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1433 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1434 }
1435 
1436 
1437 static void
1438 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1439 {
1440 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1441 
1442 	free_unused_vnodes(level);
1443 }
1444 
1445 
1446 static inline void
1447 put_advisory_locking(struct advisory_locking* locking)
1448 {
1449 	release_sem(locking->lock);
1450 }
1451 
1452 
1453 /*!	Returns the advisory_locking object of the \a vnode in case it
1454 	has one, and locks it.
1455 	You have to call put_advisory_locking() when you're done with
1456 	it.
1457 	Note, you must not have the vnode mutex locked when calling
1458 	this function.
1459 */
1460 static struct advisory_locking*
1461 get_advisory_locking(struct vnode* vnode)
1462 {
1463 	rw_lock_read_lock(&sVnodeLock);
1464 	vnode->Lock();
1465 
1466 	struct advisory_locking* locking = vnode->advisory_locking;
1467 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1468 
1469 	vnode->Unlock();
1470 	rw_lock_read_unlock(&sVnodeLock);
1471 
1472 	if (lock >= 0)
1473 		lock = acquire_sem(lock);
1474 	if (lock < 0) {
1475 		// This means the locking has been deleted in the mean time
1476 		// or had never existed in the first place - otherwise, we
1477 		// would get the lock at some point.
1478 		return NULL;
1479 	}
1480 
1481 	return locking;
1482 }
1483 
1484 
1485 /*!	Creates a locked advisory_locking object, and attaches it to the
1486 	given \a vnode.
1487 	Returns B_OK in case of success - also if the vnode got such an
1488 	object from someone else in the mean time, you'll still get this
1489 	one locked then.
1490 */
1491 static status_t
1492 create_advisory_locking(struct vnode* vnode)
1493 {
1494 	if (vnode == NULL)
1495 		return B_FILE_ERROR;
1496 
1497 	ObjectDeleter<advisory_locking> lockingDeleter;
1498 	struct advisory_locking* locking = NULL;
1499 
1500 	while (get_advisory_locking(vnode) == NULL) {
1501 		// no locking object set on the vnode yet, create one
1502 		if (locking == NULL) {
1503 			locking = new(std::nothrow) advisory_locking;
1504 			if (locking == NULL)
1505 				return B_NO_MEMORY;
1506 			lockingDeleter.SetTo(locking);
1507 
1508 			locking->wait_sem = create_sem(0, "advisory lock");
1509 			if (locking->wait_sem < 0)
1510 				return locking->wait_sem;
1511 
1512 			locking->lock = create_sem(0, "advisory locking");
1513 			if (locking->lock < 0)
1514 				return locking->lock;
1515 		}
1516 
1517 		// set our newly created locking object
1518 		ReadLocker _(sVnodeLock);
1519 		AutoLocker<Vnode> nodeLocker(vnode);
1520 		if (vnode->advisory_locking == NULL) {
1521 			vnode->advisory_locking = locking;
1522 			lockingDeleter.Detach();
1523 			return B_OK;
1524 		}
1525 	}
1526 
1527 	// The vnode already had a locking object. That's just as well.
1528 
1529 	return B_OK;
1530 }
1531 
1532 
1533 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1534 	with the advisory_lock \a lock.
1535 */
1536 static bool
1537 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1538 {
1539 	if (flock == NULL)
1540 		return true;
1541 
1542 	return lock->start <= flock->l_start - 1 + flock->l_len
1543 		&& lock->end >= flock->l_start;
1544 }
1545 
1546 
1547 /*!	Tests whether acquiring a lock would block.
1548 */
1549 static status_t
1550 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1551 {
1552 	flock->l_type = F_UNLCK;
1553 
1554 	struct advisory_locking* locking = get_advisory_locking(vnode);
1555 	if (locking == NULL)
1556 		return B_OK;
1557 
1558 	team_id team = team_get_current_team_id();
1559 
1560 	LockList::Iterator iterator = locking->locks.GetIterator();
1561 	while (iterator.HasNext()) {
1562 		struct advisory_lock* lock = iterator.Next();
1563 
1564 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1565 			// locks do overlap
1566 			if (flock->l_type != F_RDLCK || !lock->shared) {
1567 				// collision
1568 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1569 				flock->l_whence = SEEK_SET;
1570 				flock->l_start = lock->start;
1571 				flock->l_len = lock->end - lock->start + 1;
1572 				flock->l_pid = lock->team;
1573 				break;
1574 			}
1575 		}
1576 	}
1577 
1578 	put_advisory_locking(locking);
1579 	return B_OK;
1580 }
1581 
1582 
1583 /*!	Removes the specified lock, or all locks of the calling team
1584 	if \a flock is NULL.
1585 */
1586 static status_t
1587 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1588 {
1589 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1590 
1591 	struct advisory_locking* locking = get_advisory_locking(vnode);
1592 	if (locking == NULL)
1593 		return B_OK;
1594 
1595 	// TODO: use the thread ID instead??
1596 	team_id team = team_get_current_team_id();
1597 	pid_t session = thread_get_current_thread()->team->session_id;
1598 
1599 	// find matching lock entries
1600 
1601 	LockList::Iterator iterator = locking->locks.GetIterator();
1602 	while (iterator.HasNext()) {
1603 		struct advisory_lock* lock = iterator.Next();
1604 		bool removeLock = false;
1605 
1606 		if (lock->session == session)
1607 			removeLock = true;
1608 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1609 			bool endsBeyond = false;
1610 			bool startsBefore = false;
1611 			if (flock != NULL) {
1612 				startsBefore = lock->start < flock->l_start;
1613 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1614 			}
1615 
1616 			if (!startsBefore && !endsBeyond) {
1617 				// lock is completely contained in flock
1618 				removeLock = true;
1619 			} else if (startsBefore && !endsBeyond) {
1620 				// cut the end of the lock
1621 				lock->end = flock->l_start - 1;
1622 			} else if (!startsBefore && endsBeyond) {
1623 				// cut the start of the lock
1624 				lock->start = flock->l_start + flock->l_len;
1625 			} else {
1626 				// divide the lock into two locks
1627 				struct advisory_lock* secondLock = new advisory_lock;
1628 				if (secondLock == NULL) {
1629 					// TODO: we should probably revert the locks we already
1630 					// changed... (ie. allocate upfront)
1631 					put_advisory_locking(locking);
1632 					return B_NO_MEMORY;
1633 				}
1634 
1635 				lock->end = flock->l_start - 1;
1636 
1637 				secondLock->team = lock->team;
1638 				secondLock->session = lock->session;
1639 				// values must already be normalized when getting here
1640 				secondLock->start = flock->l_start + flock->l_len;
1641 				secondLock->end = lock->end;
1642 				secondLock->shared = lock->shared;
1643 
1644 				locking->locks.Add(secondLock);
1645 			}
1646 		}
1647 
1648 		if (removeLock) {
1649 			// this lock is no longer used
1650 			iterator.Remove();
1651 			free(lock);
1652 		}
1653 	}
1654 
1655 	bool removeLocking = locking->locks.IsEmpty();
1656 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1657 
1658 	put_advisory_locking(locking);
1659 
1660 	if (removeLocking) {
1661 		// We can remove the whole advisory locking structure; it's no
1662 		// longer used
1663 		locking = get_advisory_locking(vnode);
1664 		if (locking != NULL) {
1665 			ReadLocker locker(sVnodeLock);
1666 			AutoLocker<Vnode> nodeLocker(vnode);
1667 
1668 			// the locking could have been changed in the mean time
1669 			if (locking->locks.IsEmpty()) {
1670 				vnode->advisory_locking = NULL;
1671 				nodeLocker.Unlock();
1672 				locker.Unlock();
1673 
1674 				// we've detached the locking from the vnode, so we can
1675 				// safely delete it
1676 				delete locking;
1677 			} else {
1678 				// the locking is in use again
1679 				nodeLocker.Unlock();
1680 				locker.Unlock();
1681 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1682 			}
1683 		}
1684 	}
1685 
1686 	return B_OK;
1687 }
1688 
1689 
1690 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1691 	will wait for the lock to become available, if there are any collisions
1692 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1693 
1694 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1695 	BSD flock() semantics are used, that is, all children can unlock the file
1696 	in question (we even allow parents to remove the lock, though, but that
1697 	seems to be in line to what the BSD's are doing).
1698 */
1699 static status_t
1700 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1701 	bool wait)
1702 {
1703 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1704 		vnode, flock, wait ? "yes" : "no"));
1705 
1706 	bool shared = flock->l_type == F_RDLCK;
1707 	status_t status = B_OK;
1708 
1709 	// TODO: do deadlock detection!
1710 
1711 	struct advisory_locking* locking;
1712 
1713 	while (true) {
1714 		// if this vnode has an advisory_locking structure attached,
1715 		// lock that one and search for any colliding file lock
1716 		status = create_advisory_locking(vnode);
1717 		if (status != B_OK)
1718 			return status;
1719 
1720 		locking = vnode->advisory_locking;
1721 		team_id team = team_get_current_team_id();
1722 		sem_id waitForLock = -1;
1723 
1724 		// test for collisions
1725 		LockList::Iterator iterator = locking->locks.GetIterator();
1726 		while (iterator.HasNext()) {
1727 			struct advisory_lock* lock = iterator.Next();
1728 
1729 			// TODO: locks from the same team might be joinable!
1730 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1731 				// locks do overlap
1732 				if (!shared || !lock->shared) {
1733 					// we need to wait
1734 					waitForLock = locking->wait_sem;
1735 					break;
1736 				}
1737 			}
1738 		}
1739 
1740 		if (waitForLock < 0)
1741 			break;
1742 
1743 		// We need to wait. Do that or fail now, if we've been asked not to.
1744 
1745 		if (!wait) {
1746 			put_advisory_locking(locking);
1747 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1748 		}
1749 
1750 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1751 			B_CAN_INTERRUPT, 0);
1752 		if (status != B_OK && status != B_BAD_SEM_ID)
1753 			return status;
1754 
1755 		// We have been notified, but we need to re-lock the locking object. So
1756 		// go another round...
1757 	}
1758 
1759 	// install new lock
1760 
1761 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1762 		sizeof(struct advisory_lock));
1763 	if (lock == NULL) {
1764 		put_advisory_locking(locking);
1765 		return B_NO_MEMORY;
1766 	}
1767 
1768 	lock->team = team_get_current_team_id();
1769 	lock->session = session;
1770 	// values must already be normalized when getting here
1771 	lock->start = flock->l_start;
1772 	lock->end = flock->l_start - 1 + flock->l_len;
1773 	lock->shared = shared;
1774 
1775 	locking->locks.Add(lock);
1776 	put_advisory_locking(locking);
1777 
1778 	return status;
1779 }
1780 
1781 
1782 /*!	Normalizes the \a flock structure to make it easier to compare the
1783 	structure with others. The l_start and l_len fields are set to absolute
1784 	values according to the l_whence field.
1785 */
1786 static status_t
1787 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1788 {
1789 	switch (flock->l_whence) {
1790 		case SEEK_SET:
1791 			break;
1792 		case SEEK_CUR:
1793 			flock->l_start += descriptor->pos;
1794 			break;
1795 		case SEEK_END:
1796 		{
1797 			struct vnode* vnode = descriptor->u.vnode;
1798 			struct stat stat;
1799 			status_t status;
1800 
1801 			if (!HAS_FS_CALL(vnode, read_stat))
1802 				return B_UNSUPPORTED;
1803 
1804 			status = FS_CALL(vnode, read_stat, &stat);
1805 			if (status != B_OK)
1806 				return status;
1807 
1808 			flock->l_start += stat.st_size;
1809 			break;
1810 		}
1811 		default:
1812 			return B_BAD_VALUE;
1813 	}
1814 
1815 	if (flock->l_start < 0)
1816 		flock->l_start = 0;
1817 	if (flock->l_len == 0)
1818 		flock->l_len = OFF_MAX;
1819 
1820 	// don't let the offset and length overflow
1821 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1822 		flock->l_len = OFF_MAX - flock->l_start;
1823 
1824 	if (flock->l_len < 0) {
1825 		// a negative length reverses the region
1826 		flock->l_start += flock->l_len;
1827 		flock->l_len = -flock->l_len;
1828 	}
1829 
1830 	return B_OK;
1831 }
1832 
1833 
1834 static void
1835 replace_vnode_if_disconnected(struct fs_mount* mount,
1836 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1837 	struct vnode* fallBack, bool lockRootLock)
1838 {
1839 	struct vnode* givenVnode = vnode;
1840 	bool vnodeReplaced = false;
1841 
1842 	ReadLocker vnodeReadLocker(sVnodeLock);
1843 
1844 	if (lockRootLock)
1845 		mutex_lock(&sIOContextRootLock);
1846 
1847 	while (vnode != NULL && vnode->mount == mount
1848 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1849 		if (vnode->covers != NULL) {
1850 			// redirect the vnode to the covered vnode
1851 			vnode = vnode->covers;
1852 		} else
1853 			vnode = fallBack;
1854 
1855 		vnodeReplaced = true;
1856 	}
1857 
1858 	// If we've replaced the node, grab a reference for the new one.
1859 	if (vnodeReplaced && vnode != NULL)
1860 		inc_vnode_ref_count(vnode);
1861 
1862 	if (lockRootLock)
1863 		mutex_unlock(&sIOContextRootLock);
1864 
1865 	vnodeReadLocker.Unlock();
1866 
1867 	if (vnodeReplaced)
1868 		put_vnode(givenVnode);
1869 }
1870 
1871 
1872 /*!	Disconnects all file descriptors that are associated with the
1873 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1874 	\a mount object.
1875 
1876 	Note, after you've called this function, there might still be ongoing
1877 	accesses - they won't be interrupted if they already happened before.
1878 	However, any subsequent access will fail.
1879 
1880 	This is not a cheap function and should be used with care and rarely.
1881 	TODO: there is currently no means to stop a blocking read/write!
1882 */
1883 static void
1884 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1885 	struct vnode* vnodeToDisconnect)
1886 {
1887 	// iterate over all teams and peek into their file descriptors
1888 	TeamListIterator teamIterator;
1889 	while (Team* team = teamIterator.Next()) {
1890 		BReference<Team> teamReference(team, true);
1891 
1892 		// lock the I/O context
1893 		io_context* context = team->io_context;
1894 		MutexLocker contextLocker(context->io_mutex);
1895 
1896 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1897 			sRoot, true);
1898 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1899 			sRoot, false);
1900 
1901 		for (uint32 i = 0; i < context->table_size; i++) {
1902 			if (struct file_descriptor* descriptor = context->fds[i]) {
1903 				inc_fd_ref_count(descriptor);
1904 
1905 				// if this descriptor points at this mount, we
1906 				// need to disconnect it to be able to unmount
1907 				struct vnode* vnode = fd_vnode(descriptor);
1908 				if (vnodeToDisconnect != NULL) {
1909 					if (vnode == vnodeToDisconnect)
1910 						disconnect_fd(descriptor);
1911 				} else if ((vnode != NULL && vnode->mount == mount)
1912 					|| (vnode == NULL && descriptor->u.mount == mount))
1913 					disconnect_fd(descriptor);
1914 
1915 				put_fd(descriptor);
1916 			}
1917 		}
1918 	}
1919 }
1920 
1921 
1922 /*!	\brief Gets the root node of the current IO context.
1923 	If \a kernel is \c true, the kernel IO context will be used.
1924 	The caller obtains a reference to the returned node.
1925 */
1926 struct vnode*
1927 get_root_vnode(bool kernel)
1928 {
1929 	if (!kernel) {
1930 		// Get current working directory from io context
1931 		struct io_context* context = get_current_io_context(kernel);
1932 
1933 		mutex_lock(&sIOContextRootLock);
1934 
1935 		struct vnode* root = context->root;
1936 		if (root != NULL)
1937 			inc_vnode_ref_count(root);
1938 
1939 		mutex_unlock(&sIOContextRootLock);
1940 
1941 		if (root != NULL)
1942 			return root;
1943 
1944 		// That should never happen.
1945 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1946 			"have a root\n", team_get_current_team_id());
1947 	}
1948 
1949 	inc_vnode_ref_count(sRoot);
1950 	return sRoot;
1951 }
1952 
1953 
1954 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1955 
1956 	Given an arbitrary vnode (identified by mount and node ID), the function
1957 	checks, whether the vnode is covered by another vnode. If it is, the
1958 	function returns the mount and node ID of the covering vnode. Otherwise
1959 	it simply returns the supplied mount and node ID.
1960 
1961 	In case of error (e.g. the supplied node could not be found) the variables
1962 	for storing the resolved mount and node ID remain untouched and an error
1963 	code is returned.
1964 
1965 	\param mountID The mount ID of the vnode in question.
1966 	\param nodeID The node ID of the vnode in question.
1967 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1968 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1969 	\return
1970 	- \c B_OK, if everything went fine,
1971 	- another error code, if something went wrong.
1972 */
1973 status_t
1974 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1975 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1976 {
1977 	// get the node
1978 	struct vnode* node;
1979 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1980 	if (error != B_OK)
1981 		return error;
1982 
1983 	// resolve the node
1984 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1985 		put_vnode(node);
1986 		node = coveringNode;
1987 	}
1988 
1989 	// set the return values
1990 	*resolvedMountID = node->device;
1991 	*resolvedNodeID = node->id;
1992 
1993 	put_vnode(node);
1994 
1995 	return B_OK;
1996 }
1997 
1998 
1999 /*!	\brief Gets the directory path and leaf name for a given path.
2000 
2001 	The supplied \a path is transformed to refer to the directory part of
2002 	the entry identified by the original path, and into the buffer \a filename
2003 	the leaf name of the original entry is written.
2004 	Neither the returned path nor the leaf name can be expected to be
2005 	canonical.
2006 
2007 	\param path The path to be analyzed. Must be able to store at least one
2008 		   additional character.
2009 	\param filename The buffer into which the leaf name will be written.
2010 		   Must be of size B_FILE_NAME_LENGTH at least.
2011 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2012 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2013 		   if the given path name is empty.
2014 */
2015 static status_t
2016 get_dir_path_and_leaf(char* path, char* filename)
2017 {
2018 	if (*path == '\0')
2019 		return B_ENTRY_NOT_FOUND;
2020 
2021 	char* last = strrchr(path, '/');
2022 		// '/' are not allowed in file names!
2023 
2024 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2025 
2026 	if (last == NULL) {
2027 		// this path is single segment with no '/' in it
2028 		// ex. "foo"
2029 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2030 			return B_NAME_TOO_LONG;
2031 
2032 		strcpy(path, ".");
2033 	} else {
2034 		last++;
2035 		if (last[0] == '\0') {
2036 			// special case: the path ends in one or more '/' - remove them
2037 			while (*--last == '/' && last != path);
2038 			last[1] = '\0';
2039 
2040 			if (last == path && last[0] == '/') {
2041 				// This path points to the root of the file system
2042 				strcpy(filename, ".");
2043 				return B_OK;
2044 			}
2045 			for (; last != path && *(last - 1) != '/'; last--);
2046 				// rewind to the start of the leaf before the '/'
2047 		}
2048 
2049 		// normal leaf: replace the leaf portion of the path with a '.'
2050 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2051 			return B_NAME_TOO_LONG;
2052 
2053 		last[0] = '.';
2054 		last[1] = '\0';
2055 	}
2056 	return B_OK;
2057 }
2058 
2059 
2060 static status_t
2061 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2062 	bool traverse, bool kernel, struct vnode** _vnode)
2063 {
2064 	char clonedName[B_FILE_NAME_LENGTH + 1];
2065 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2066 		return B_NAME_TOO_LONG;
2067 
2068 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2069 	struct vnode* directory;
2070 
2071 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2072 	if (status < 0)
2073 		return status;
2074 
2075 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2076 		_vnode, NULL);
2077 }
2078 
2079 
2080 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2081 	and returns the respective vnode.
2082 	On success a reference to the vnode is acquired for the caller.
2083 */
2084 static status_t
2085 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2086 {
2087 	ino_t id;
2088 
2089 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2090 		return get_vnode(dir->device, id, _vnode, true, false);
2091 
2092 	status_t status = FS_CALL(dir, lookup, name, &id);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2097 	// have a reference and just need to look the node up.
2098 	rw_lock_read_lock(&sVnodeLock);
2099 	*_vnode = lookup_vnode(dir->device, id);
2100 	rw_lock_read_unlock(&sVnodeLock);
2101 
2102 	if (*_vnode == NULL) {
2103 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2104 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2105 		return B_ENTRY_NOT_FOUND;
2106 	}
2107 
2108 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2109 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2110 //		(*_vnode)->mount->id, (*_vnode)->id);
2111 
2112 	return B_OK;
2113 }
2114 
2115 
2116 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2117 	\a path must not be NULL.
2118 	If it returns successfully, \a path contains the name of the last path
2119 	component. This function clobbers the buffer pointed to by \a path only
2120 	if it does contain more than one component.
2121 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2122 	it is successful or not!
2123 */
2124 static status_t
2125 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2126 	int count, struct io_context* ioContext, struct vnode** _vnode,
2127 	ino_t* _parentID)
2128 {
2129 	status_t status = B_OK;
2130 	ino_t lastParentID = vnode->id;
2131 
2132 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2133 
2134 	if (path == NULL) {
2135 		put_vnode(vnode);
2136 		return B_BAD_VALUE;
2137 	}
2138 
2139 	if (*path == '\0') {
2140 		put_vnode(vnode);
2141 		return B_ENTRY_NOT_FOUND;
2142 	}
2143 
2144 	while (true) {
2145 		struct vnode* nextVnode;
2146 		char* nextPath;
2147 
2148 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 			path));
2150 
2151 		// done?
2152 		if (path[0] == '\0')
2153 			break;
2154 
2155 		// walk to find the next path component ("path" will point to a single
2156 		// path component), and filter out multiple slashes
2157 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 				nextPath++);
2159 
2160 		if (*nextPath == '/') {
2161 			*nextPath = '\0';
2162 			do
2163 				nextPath++;
2164 			while (*nextPath == '/');
2165 		}
2166 
2167 		// See if the '..' is at a covering vnode move to the covered
2168 		// vnode so we pass the '..' path to the underlying filesystem.
2169 		// Also prevent breaking the root of the IO context.
2170 		if (strcmp("..", path) == 0) {
2171 			if (vnode == ioContext->root) {
2172 				// Attempted prison break! Keep it contained.
2173 				path = nextPath;
2174 				continue;
2175 			}
2176 
2177 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2178 				nextVnode = coveredVnode;
2179 				put_vnode(vnode);
2180 				vnode = nextVnode;
2181 			}
2182 		}
2183 
2184 		// check if vnode is really a directory
2185 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2186 			status = B_NOT_A_DIRECTORY;
2187 
2188 		// Check if we have the right to search the current directory vnode.
2189 		// If a file system doesn't have the access() function, we assume that
2190 		// searching a directory is always allowed
2191 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2192 			status = FS_CALL(vnode, access, X_OK);
2193 
2194 		// Tell the filesystem to get the vnode of this path component (if we
2195 		// got the permission from the call above)
2196 		if (status == B_OK)
2197 			status = lookup_dir_entry(vnode, path, &nextVnode);
2198 
2199 		if (status != B_OK) {
2200 			put_vnode(vnode);
2201 			return status;
2202 		}
2203 
2204 		// If the new node is a symbolic link, resolve it (if we've been told
2205 		// to do it)
2206 		if (S_ISLNK(nextVnode->Type())
2207 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2208 			size_t bufferSize;
2209 			char* buffer;
2210 
2211 			TRACE(("traverse link\n"));
2212 
2213 			// it's not exactly nice style using goto in this way, but hey,
2214 			// it works :-/
2215 			if (count + 1 > B_MAX_SYMLINKS) {
2216 				status = B_LINK_LIMIT;
2217 				goto resolve_link_error;
2218 			}
2219 
2220 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2221 			if (buffer == NULL) {
2222 				status = B_NO_MEMORY;
2223 				goto resolve_link_error;
2224 			}
2225 
2226 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2227 				bufferSize--;
2228 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2229 				// null-terminate
2230 				if (status >= 0)
2231 					buffer[bufferSize] = '\0';
2232 			} else
2233 				status = B_BAD_VALUE;
2234 
2235 			if (status != B_OK) {
2236 				free(buffer);
2237 
2238 		resolve_link_error:
2239 				put_vnode(vnode);
2240 				put_vnode(nextVnode);
2241 
2242 				return status;
2243 			}
2244 			put_vnode(nextVnode);
2245 
2246 			// Check if we start from the root directory or the current
2247 			// directory ("vnode" still points to that one).
2248 			// Cut off all leading slashes if it's the root directory
2249 			path = buffer;
2250 			bool absoluteSymlink = false;
2251 			if (path[0] == '/') {
2252 				// we don't need the old directory anymore
2253 				put_vnode(vnode);
2254 
2255 				while (*++path == '/')
2256 					;
2257 
2258 				mutex_lock(&sIOContextRootLock);
2259 				vnode = ioContext->root;
2260 				inc_vnode_ref_count(vnode);
2261 				mutex_unlock(&sIOContextRootLock);
2262 
2263 				absoluteSymlink = true;
2264 			}
2265 
2266 			inc_vnode_ref_count(vnode);
2267 				// balance the next recursion - we will decrement the
2268 				// ref_count of the vnode, no matter if we succeeded or not
2269 
2270 			if (absoluteSymlink && *path == '\0') {
2271 				// symlink was just "/"
2272 				nextVnode = vnode;
2273 			} else {
2274 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2275 					ioContext, &nextVnode, &lastParentID);
2276 			}
2277 
2278 			free(buffer);
2279 
2280 			if (status != B_OK) {
2281 				put_vnode(vnode);
2282 				return status;
2283 			}
2284 		} else
2285 			lastParentID = vnode->id;
2286 
2287 		// decrease the ref count on the old dir we just looked up into
2288 		put_vnode(vnode);
2289 
2290 		path = nextPath;
2291 		vnode = nextVnode;
2292 
2293 		// see if we hit a covered node
2294 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2295 			put_vnode(vnode);
2296 			vnode = coveringNode;
2297 		}
2298 	}
2299 
2300 	*_vnode = vnode;
2301 	if (_parentID)
2302 		*_parentID = lastParentID;
2303 
2304 	return B_OK;
2305 }
2306 
2307 
2308 static status_t
2309 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2310 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2311 {
2312 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2313 		get_current_io_context(kernel), _vnode, _parentID);
2314 }
2315 
2316 
2317 static status_t
2318 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2319 	ino_t* _parentID, bool kernel)
2320 {
2321 	struct vnode* start = NULL;
2322 
2323 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2324 
2325 	if (!path)
2326 		return B_BAD_VALUE;
2327 
2328 	if (*path == '\0')
2329 		return B_ENTRY_NOT_FOUND;
2330 
2331 	// figure out if we need to start at root or at cwd
2332 	if (*path == '/') {
2333 		if (sRoot == NULL) {
2334 			// we're a bit early, aren't we?
2335 			return B_ERROR;
2336 		}
2337 
2338 		while (*++path == '/')
2339 			;
2340 		start = get_root_vnode(kernel);
2341 
2342 		if (*path == '\0') {
2343 			*_vnode = start;
2344 			return B_OK;
2345 		}
2346 
2347 	} else {
2348 		struct io_context* context = get_current_io_context(kernel);
2349 
2350 		mutex_lock(&context->io_mutex);
2351 		start = context->cwd;
2352 		if (start != NULL)
2353 			inc_vnode_ref_count(start);
2354 		mutex_unlock(&context->io_mutex);
2355 
2356 		if (start == NULL)
2357 			return B_ERROR;
2358 	}
2359 
2360 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2361 		_parentID);
2362 }
2363 
2364 
2365 /*! Returns the vnode in the next to last segment of the path, and returns
2366 	the last portion in filename.
2367 	The path buffer must be able to store at least one additional character.
2368 */
2369 static status_t
2370 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2371 	bool kernel)
2372 {
2373 	status_t status = get_dir_path_and_leaf(path, filename);
2374 	if (status != B_OK)
2375 		return status;
2376 
2377 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2378 }
2379 
2380 
2381 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2382 		   to by a FD + path pair.
2383 
2384 	\a path must be given in either case. \a fd might be omitted, in which
2385 	case \a path is either an absolute path or one relative to the current
2386 	directory. If both a supplied and \a path is relative it is reckoned off
2387 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2388 	ignored.
2389 
2390 	The caller has the responsibility to call put_vnode() on the returned
2391 	directory vnode.
2392 
2393 	\param fd The FD. May be < 0.
2394 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2395 	       is modified by this function. It must have at least room for a
2396 	       string one character longer than the path it contains.
2397 	\param _vnode A pointer to a variable the directory vnode shall be written
2398 		   into.
2399 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2400 		   the leaf name of the specified entry will be written.
2401 	\param kernel \c true, if invoked from inside the kernel, \c false if
2402 		   invoked from userland.
2403 	\return \c B_OK, if everything went fine, another error code otherwise.
2404 */
2405 static status_t
2406 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2407 	char* filename, bool kernel)
2408 {
2409 	if (!path)
2410 		return B_BAD_VALUE;
2411 	if (*path == '\0')
2412 		return B_ENTRY_NOT_FOUND;
2413 	if (fd < 0)
2414 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2415 
2416 	status_t status = get_dir_path_and_leaf(path, filename);
2417 	if (status != B_OK)
2418 		return status;
2419 
2420 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2421 }
2422 
2423 
2424 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2425 		   to by a vnode + path pair.
2426 
2427 	\a path must be given in either case. \a vnode might be omitted, in which
2428 	case \a path is either an absolute path or one relative to the current
2429 	directory. If both a supplied and \a path is relative it is reckoned off
2430 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2431 	ignored.
2432 
2433 	The caller has the responsibility to call put_vnode() on the returned
2434 	directory vnode.
2435 
2436 	\param vnode The vnode. May be \c NULL.
2437 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2438 	       is modified by this function. It must have at least room for a
2439 	       string one character longer than the path it contains.
2440 	\param _vnode A pointer to a variable the directory vnode shall be written
2441 		   into.
2442 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2443 		   the leaf name of the specified entry will be written.
2444 	\param kernel \c true, if invoked from inside the kernel, \c false if
2445 		   invoked from userland.
2446 	\return \c B_OK, if everything went fine, another error code otherwise.
2447 */
2448 static status_t
2449 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2450 	struct vnode** _vnode, char* filename, bool kernel)
2451 {
2452 	if (!path)
2453 		return B_BAD_VALUE;
2454 	if (*path == '\0')
2455 		return B_ENTRY_NOT_FOUND;
2456 	if (vnode == NULL || path[0] == '/')
2457 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2458 
2459 	status_t status = get_dir_path_and_leaf(path, filename);
2460 	if (status != B_OK)
2461 		return status;
2462 
2463 	inc_vnode_ref_count(vnode);
2464 		// vnode_path_to_vnode() always decrements the ref count
2465 
2466 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2467 }
2468 
2469 
2470 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2471 */
2472 static status_t
2473 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2474 	size_t bufferSize, struct io_context* ioContext)
2475 {
2476 	if (bufferSize < sizeof(struct dirent))
2477 		return B_BAD_VALUE;
2478 
2479 	// See if the vnode is convering another vnode and move to the covered
2480 	// vnode so we get the underlying file system
2481 	VNodePutter vnodePutter;
2482 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2483 		vnode = coveredVnode;
2484 		vnodePutter.SetTo(vnode);
2485 	}
2486 
2487 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 		// The FS supports getting the name of a vnode.
2489 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 			return B_OK;
2492 	}
2493 
2494 	// The FS doesn't support getting the name of a vnode. So we search the
2495 	// parent directory for the vnode, if the caller let us.
2496 
2497 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2498 		return B_UNSUPPORTED;
2499 
2500 	void* cookie;
2501 
2502 	status_t status = FS_CALL(parent, open_dir, &cookie);
2503 	if (status >= B_OK) {
2504 		while (true) {
2505 			uint32 num = 1;
2506 			// We use the FS hook directly instead of dir_read(), since we don't
2507 			// want the entries to be fixed. We have already resolved vnode to
2508 			// the covered node.
2509 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2510 				&num);
2511 			if (status != B_OK)
2512 				break;
2513 			if (num == 0) {
2514 				status = B_ENTRY_NOT_FOUND;
2515 				break;
2516 			}
2517 
2518 			if (vnode->id == buffer->d_ino) {
2519 				// found correct entry!
2520 				break;
2521 			}
2522 		}
2523 
2524 		FS_CALL(vnode, close_dir, cookie);
2525 		FS_CALL(vnode, free_dir_cookie, cookie);
2526 	}
2527 	return status;
2528 }
2529 
2530 
2531 static status_t
2532 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2533 	size_t nameSize, bool kernel)
2534 {
2535 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2536 	struct dirent* dirent = (struct dirent*)buffer;
2537 
2538 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2539 		get_current_io_context(kernel));
2540 	if (status != B_OK)
2541 		return status;
2542 
2543 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2544 		return B_BUFFER_OVERFLOW;
2545 
2546 	return B_OK;
2547 }
2548 
2549 
2550 /*!	Gets the full path to a given directory vnode.
2551 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2552 	file system doesn't support this call, it will fall back to iterating
2553 	through the parent directory to get the name of the child.
2554 
2555 	To protect against circular loops, it supports a maximum tree depth
2556 	of 256 levels.
2557 
2558 	Note that the path may not be correct the time this function returns!
2559 	It doesn't use any locking to prevent returning the correct path, as
2560 	paths aren't safe anyway: the path to a file can change at any time.
2561 
2562 	It might be a good idea, though, to check if the returned path exists
2563 	in the calling function (it's not done here because of efficiency)
2564 */
2565 static status_t
2566 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2567 	bool kernel)
2568 {
2569 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2570 
2571 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2572 		return B_BAD_VALUE;
2573 
2574 	if (!S_ISDIR(vnode->Type()))
2575 		return B_NOT_A_DIRECTORY;
2576 
2577 	char* path = buffer;
2578 	int32 insert = bufferSize;
2579 	int32 maxLevel = 256;
2580 	int32 length;
2581 	status_t status;
2582 	struct io_context* ioContext = get_current_io_context(kernel);
2583 
2584 	// we don't use get_vnode() here because this call is more
2585 	// efficient and does all we need from get_vnode()
2586 	inc_vnode_ref_count(vnode);
2587 
2588 	if (vnode != ioContext->root) {
2589 		// we don't hit the IO context root
2590 		// resolve a vnode to its covered vnode
2591 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2592 			put_vnode(vnode);
2593 			vnode = coveredVnode;
2594 		}
2595 	}
2596 
2597 	path[--insert] = '\0';
2598 		// the path is filled right to left
2599 
2600 	while (true) {
2601 		// the name buffer is also used for fs_read_dir()
2602 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2603 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2604 		struct vnode* parentVnode;
2605 
2606 		// lookup the parent vnode
2607 		if (vnode == ioContext->root) {
2608 			// we hit the IO context root
2609 			parentVnode = vnode;
2610 			inc_vnode_ref_count(vnode);
2611 		} else {
2612 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2613 			if (status != B_OK)
2614 				goto out;
2615 		}
2616 
2617 		// get the node's name
2618 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2619 			sizeof(nameBuffer), ioContext);
2620 
2621 		if (vnode != ioContext->root) {
2622 			// we don't hit the IO context root
2623 			// resolve a vnode to its covered vnode
2624 			if (Vnode* coveredVnode = get_covered_vnode(parentVnode)) {
2625 				put_vnode(parentVnode);
2626 				parentVnode = coveredVnode;
2627 			}
2628 		}
2629 
2630 		bool hitRoot = (parentVnode == vnode);
2631 
2632 		// release the current vnode, we only need its parent from now on
2633 		put_vnode(vnode);
2634 		vnode = parentVnode;
2635 
2636 		if (status != B_OK)
2637 			goto out;
2638 
2639 		if (hitRoot) {
2640 			// we have reached "/", which means we have constructed the full
2641 			// path
2642 			break;
2643 		}
2644 
2645 		// TODO: add an explicit check for loops in about 10 levels to do
2646 		// real loop detection
2647 
2648 		// don't go deeper as 'maxLevel' to prevent circular loops
2649 		if (maxLevel-- < 0) {
2650 			status = B_LINK_LIMIT;
2651 			goto out;
2652 		}
2653 
2654 		// add the name in front of the current path
2655 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2656 		length = strlen(name);
2657 		insert -= length;
2658 		if (insert <= 0) {
2659 			status = B_RESULT_NOT_REPRESENTABLE;
2660 			goto out;
2661 		}
2662 		memcpy(path + insert, name, length);
2663 		path[--insert] = '/';
2664 	}
2665 
2666 	// the root dir will result in an empty path: fix it
2667 	if (path[insert] == '\0')
2668 		path[--insert] = '/';
2669 
2670 	TRACE(("  path is: %s\n", path + insert));
2671 
2672 	// move the path to the start of the buffer
2673 	length = bufferSize - insert;
2674 	memmove(buffer, path + insert, length);
2675 
2676 out:
2677 	put_vnode(vnode);
2678 	return status;
2679 }
2680 
2681 
2682 /*!	Checks the length of every path component, and adds a '.'
2683 	if the path ends in a slash.
2684 	The given path buffer must be able to store at least one
2685 	additional character.
2686 */
2687 static status_t
2688 check_path(char* to)
2689 {
2690 	int32 length = 0;
2691 
2692 	// check length of every path component
2693 
2694 	while (*to) {
2695 		char* begin;
2696 		if (*to == '/')
2697 			to++, length++;
2698 
2699 		begin = to;
2700 		while (*to != '/' && *to)
2701 			to++, length++;
2702 
2703 		if (to - begin > B_FILE_NAME_LENGTH)
2704 			return B_NAME_TOO_LONG;
2705 	}
2706 
2707 	if (length == 0)
2708 		return B_ENTRY_NOT_FOUND;
2709 
2710 	// complete path if there is a slash at the end
2711 
2712 	if (*(to - 1) == '/') {
2713 		if (length > B_PATH_NAME_LENGTH - 2)
2714 			return B_NAME_TOO_LONG;
2715 
2716 		to[0] = '.';
2717 		to[1] = '\0';
2718 	}
2719 
2720 	return B_OK;
2721 }
2722 
2723 
2724 static struct file_descriptor*
2725 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2726 {
2727 	struct file_descriptor* descriptor
2728 		= get_fd(get_current_io_context(kernel), fd);
2729 	if (descriptor == NULL)
2730 		return NULL;
2731 
2732 	struct vnode* vnode = fd_vnode(descriptor);
2733 	if (vnode == NULL) {
2734 		put_fd(descriptor);
2735 		return NULL;
2736 	}
2737 
2738 	// ToDo: when we can close a file descriptor at any point, investigate
2739 	//	if this is still valid to do (accessing the vnode without ref_count
2740 	//	or locking)
2741 	*_vnode = vnode;
2742 	return descriptor;
2743 }
2744 
2745 
2746 static struct vnode*
2747 get_vnode_from_fd(int fd, bool kernel)
2748 {
2749 	struct file_descriptor* descriptor;
2750 	struct vnode* vnode;
2751 
2752 	descriptor = get_fd(get_current_io_context(kernel), fd);
2753 	if (descriptor == NULL)
2754 		return NULL;
2755 
2756 	vnode = fd_vnode(descriptor);
2757 	if (vnode != NULL)
2758 		inc_vnode_ref_count(vnode);
2759 
2760 	put_fd(descriptor);
2761 	return vnode;
2762 }
2763 
2764 
2765 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2766 	only the path will be considered. In this case, the \a path must not be
2767 	NULL.
2768 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2769 	and should be NULL for files.
2770 */
2771 static status_t
2772 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2773 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2774 {
2775 	if (fd < 0 && !path)
2776 		return B_BAD_VALUE;
2777 
2778 	if (path != NULL && *path == '\0')
2779 		return B_ENTRY_NOT_FOUND;
2780 
2781 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2782 		// no FD or absolute path
2783 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2784 	}
2785 
2786 	// FD only, or FD + relative path
2787 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2788 	if (!vnode)
2789 		return B_FILE_ERROR;
2790 
2791 	if (path != NULL) {
2792 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2793 			_vnode, _parentID);
2794 	}
2795 
2796 	// there is no relative path to take into account
2797 
2798 	*_vnode = vnode;
2799 	if (_parentID)
2800 		*_parentID = -1;
2801 
2802 	return B_OK;
2803 }
2804 
2805 
2806 static int
2807 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2808 	void* cookie, int openMode, bool kernel)
2809 {
2810 	struct file_descriptor* descriptor;
2811 	int fd;
2812 
2813 	// If the vnode is locked, we don't allow creating a new file/directory
2814 	// file_descriptor for it
2815 	if (vnode && vnode->mandatory_locked_by != NULL
2816 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2817 		return B_BUSY;
2818 
2819 	descriptor = alloc_fd();
2820 	if (!descriptor)
2821 		return B_NO_MEMORY;
2822 
2823 	if (vnode)
2824 		descriptor->u.vnode = vnode;
2825 	else
2826 		descriptor->u.mount = mount;
2827 	descriptor->cookie = cookie;
2828 
2829 	switch (type) {
2830 		// vnode types
2831 		case FDTYPE_FILE:
2832 			descriptor->ops = &sFileOps;
2833 			break;
2834 		case FDTYPE_DIR:
2835 			descriptor->ops = &sDirectoryOps;
2836 			break;
2837 		case FDTYPE_ATTR:
2838 			descriptor->ops = &sAttributeOps;
2839 			break;
2840 		case FDTYPE_ATTR_DIR:
2841 			descriptor->ops = &sAttributeDirectoryOps;
2842 			break;
2843 
2844 		// mount types
2845 		case FDTYPE_INDEX_DIR:
2846 			descriptor->ops = &sIndexDirectoryOps;
2847 			break;
2848 		case FDTYPE_QUERY:
2849 			descriptor->ops = &sQueryOps;
2850 			break;
2851 
2852 		default:
2853 			panic("get_new_fd() called with unknown type %d\n", type);
2854 			break;
2855 	}
2856 	descriptor->type = type;
2857 	descriptor->open_mode = openMode;
2858 
2859 	io_context* context = get_current_io_context(kernel);
2860 	fd = new_fd(context, descriptor);
2861 	if (fd < 0) {
2862 		free(descriptor);
2863 		return B_NO_MORE_FDS;
2864 	}
2865 
2866 	mutex_lock(&context->io_mutex);
2867 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2868 	mutex_unlock(&context->io_mutex);
2869 
2870 	return fd;
2871 }
2872 
2873 
2874 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2875 	vfs_normalize_path(). See there for more documentation.
2876 */
2877 static status_t
2878 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2879 {
2880 	VNodePutter dirPutter;
2881 	struct vnode* dir = NULL;
2882 	status_t error;
2883 
2884 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2885 		// get dir vnode + leaf name
2886 		struct vnode* nextDir;
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 
2892 		dir = nextDir;
2893 		strcpy(path, leaf);
2894 		dirPutter.SetTo(dir);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		struct vnode* fileVnode;
2899 		VNodePutter fileVnodePutter;
2900 		if (traverseLink) {
2901 			inc_vnode_ref_count(dir);
2902 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2903 					NULL) == B_OK) {
2904 				fileVnodePutter.SetTo(fileVnode);
2905 				fileExists = true;
2906 			}
2907 		}
2908 
2909 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2910 			// we're done -- construct the path
2911 			bool hasLeaf = true;
2912 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2913 				// special cases "." and ".." -- get the dir, forget the leaf
2914 				inc_vnode_ref_count(dir);
2915 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2916 					&nextDir, NULL);
2917 				if (error != B_OK)
2918 					return error;
2919 				dir = nextDir;
2920 				dirPutter.SetTo(dir);
2921 				hasLeaf = false;
2922 			}
2923 
2924 			// get the directory path
2925 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2926 			if (error != B_OK)
2927 				return error;
2928 
2929 			// append the leaf name
2930 			if (hasLeaf) {
2931 				// insert a directory separator if this is not the file system
2932 				// root
2933 				if ((strcmp(path, "/") != 0
2934 					&& strlcat(path, "/", pathSize) >= pathSize)
2935 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2936 					return B_NAME_TOO_LONG;
2937 				}
2938 			}
2939 
2940 			return B_OK;
2941 		}
2942 
2943 		// read link
2944 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2945 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2946 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2947 			if (error != B_OK)
2948 				return error;
2949 			path[bufferSize] = '\0';
2950 		} else
2951 			return B_BAD_VALUE;
2952 	}
2953 
2954 	return B_LINK_LIMIT;
2955 }
2956 
2957 
2958 #ifdef ADD_DEBUGGER_COMMANDS
2959 
2960 
2961 static void
2962 _dump_advisory_locking(advisory_locking* locking)
2963 {
2964 	if (locking == NULL)
2965 		return;
2966 
2967 	kprintf("   lock:        %" B_PRId32, locking->lock);
2968 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2969 
2970 	int32 index = 0;
2971 	LockList::Iterator iterator = locking->locks.GetIterator();
2972 	while (iterator.HasNext()) {
2973 		struct advisory_lock* lock = iterator.Next();
2974 
2975 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2976 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2977 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2978 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2979 	}
2980 }
2981 
2982 
2983 static void
2984 _dump_mount(struct fs_mount* mount)
2985 {
2986 	kprintf("MOUNT: %p\n", mount);
2987 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
2988 	kprintf(" device_name:   %s\n", mount->device_name);
2989 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2990 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2991 	kprintf(" partition:     %p\n", mount->partition);
2992 	kprintf(" lock:          %p\n", &mount->rlock);
2993 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2994 		mount->owns_file_device ? " owns_file_device" : "");
2995 
2996 	fs_volume* volume = mount->volume;
2997 	while (volume != NULL) {
2998 		kprintf(" volume %p:\n", volume);
2999 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3000 		kprintf("  private_volume:   %p\n", volume->private_volume);
3001 		kprintf("  ops:              %p\n", volume->ops);
3002 		kprintf("  file_system:      %p\n", volume->file_system);
3003 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3004 		volume = volume->super_volume;
3005 	}
3006 
3007 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3008 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3009 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3010 	set_debug_variable("_partition", (addr_t)mount->partition);
3011 }
3012 
3013 
3014 static bool
3015 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3016 	const char* name)
3017 {
3018 	bool insertSlash = buffer[bufferSize] != '\0';
3019 	size_t nameLength = strlen(name);
3020 
3021 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3022 		return false;
3023 
3024 	if (insertSlash)
3025 		buffer[--bufferSize] = '/';
3026 
3027 	bufferSize -= nameLength;
3028 	memcpy(buffer + bufferSize, name, nameLength);
3029 
3030 	return true;
3031 }
3032 
3033 
3034 static bool
3035 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3036 	ino_t nodeID)
3037 {
3038 	if (bufferSize == 0)
3039 		return false;
3040 
3041 	bool insertSlash = buffer[bufferSize] != '\0';
3042 	if (insertSlash)
3043 		buffer[--bufferSize] = '/';
3044 
3045 	size_t size = snprintf(buffer, bufferSize,
3046 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3047 	if (size > bufferSize) {
3048 		if (insertSlash)
3049 			bufferSize++;
3050 		return false;
3051 	}
3052 
3053 	if (size < bufferSize)
3054 		memmove(buffer + bufferSize - size, buffer, size);
3055 
3056 	bufferSize -= size;
3057 	return true;
3058 }
3059 
3060 
3061 static char*
3062 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3063 	bool& _truncated)
3064 {
3065 	// null-terminate the path
3066 	buffer[--bufferSize] = '\0';
3067 
3068 	while (true) {
3069 		while (vnode->covers != NULL)
3070 			vnode = vnode->covers;
3071 
3072 		if (vnode == sRoot) {
3073 			_truncated = bufferSize == 0;
3074 			if (!_truncated)
3075 				buffer[--bufferSize] = '/';
3076 			return buffer + bufferSize;
3077 		}
3078 
3079 		// resolve the name
3080 		ino_t dirID;
3081 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3082 			vnode->id, dirID);
3083 		if (name == NULL) {
3084 			// Failed to resolve the name -- prepend "<dev,node>/".
3085 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3086 				vnode->mount->id, vnode->id);
3087 			return buffer + bufferSize;
3088 		}
3089 
3090 		// prepend the name
3091 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3092 			_truncated = true;
3093 			return buffer + bufferSize;
3094 		}
3095 
3096 		// resolve the directory node
3097 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3098 		if (nextVnode == NULL) {
3099 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3100 				vnode->mount->id, dirID);
3101 			return buffer + bufferSize;
3102 		}
3103 
3104 		vnode = nextVnode;
3105 	}
3106 }
3107 
3108 
3109 static void
3110 _dump_vnode(struct vnode* vnode, bool printPath)
3111 {
3112 	kprintf("VNODE: %p\n", vnode);
3113 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3114 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3115 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3116 	kprintf(" private_node:  %p\n", vnode->private_node);
3117 	kprintf(" mount:         %p\n", vnode->mount);
3118 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3119 	kprintf(" covers:        %p\n", vnode->covers);
3120 	kprintf(" cache:         %p\n", vnode->cache);
3121 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3122 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3123 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3124 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3125 
3126 	_dump_advisory_locking(vnode->advisory_locking);
3127 
3128 	if (printPath) {
3129 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3130 		if (buffer != NULL) {
3131 			bool truncated;
3132 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3133 				B_PATH_NAME_LENGTH, truncated);
3134 			if (path != NULL) {
3135 				kprintf(" path:          ");
3136 				if (truncated)
3137 					kputs("<truncated>/");
3138 				kputs(path);
3139 				kputs("\n");
3140 			} else
3141 				kprintf("Failed to resolve vnode path.\n");
3142 
3143 			debug_free(buffer);
3144 		} else
3145 			kprintf("Failed to allocate memory for constructing the path.\n");
3146 	}
3147 
3148 	set_debug_variable("_node", (addr_t)vnode->private_node);
3149 	set_debug_variable("_mount", (addr_t)vnode->mount);
3150 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3151 	set_debug_variable("_covers", (addr_t)vnode->covers);
3152 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3153 }
3154 
3155 
3156 static int
3157 dump_mount(int argc, char** argv)
3158 {
3159 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3160 		kprintf("usage: %s [id|address]\n", argv[0]);
3161 		return 0;
3162 	}
3163 
3164 	ulong val = parse_expression(argv[1]);
3165 	uint32 id = val;
3166 
3167 	struct fs_mount* mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3168 	if (mount == NULL) {
3169 		if (IS_USER_ADDRESS(id)) {
3170 			kprintf("fs_mount not found\n");
3171 			return 0;
3172 		}
3173 		mount = (fs_mount*)val;
3174 	}
3175 
3176 	_dump_mount(mount);
3177 	return 0;
3178 }
3179 
3180 
3181 static int
3182 dump_mounts(int argc, char** argv)
3183 {
3184 	if (argc != 1) {
3185 		kprintf("usage: %s\n", argv[0]);
3186 		return 0;
3187 	}
3188 
3189 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3190 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3191 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3192 
3193 	struct hash_iterator iterator;
3194 	struct fs_mount* mount;
3195 
3196 	hash_open(sMountsTable, &iterator);
3197 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3198 			!= NULL) {
3199 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3200 			mount->root_vnode->covers, mount->volume->private_volume,
3201 			mount->volume->file_system_name);
3202 
3203 		fs_volume* volume = mount->volume;
3204 		while (volume->super_volume != NULL) {
3205 			volume = volume->super_volume;
3206 			kprintf("                                     %p %s\n",
3207 				volume->private_volume, volume->file_system_name);
3208 		}
3209 	}
3210 
3211 	hash_close(sMountsTable, &iterator, false);
3212 	return 0;
3213 }
3214 
3215 
3216 static int
3217 dump_vnode(int argc, char** argv)
3218 {
3219 	bool printPath = false;
3220 	int argi = 1;
3221 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3222 		printPath = true;
3223 		argi++;
3224 	}
3225 
3226 	if (argi >= argc || argi + 2 < argc) {
3227 		print_debugger_command_usage(argv[0]);
3228 		return 0;
3229 	}
3230 
3231 	struct vnode* vnode = NULL;
3232 
3233 	if (argi + 1 == argc) {
3234 		vnode = (struct vnode*)parse_expression(argv[argi]);
3235 		if (IS_USER_ADDRESS(vnode)) {
3236 			kprintf("invalid vnode address\n");
3237 			return 0;
3238 		}
3239 		_dump_vnode(vnode, printPath);
3240 		return 0;
3241 	}
3242 
3243 	struct hash_iterator iterator;
3244 	dev_t device = parse_expression(argv[argi]);
3245 	ino_t id = parse_expression(argv[argi + 1]);
3246 
3247 	hash_open(sVnodeTable, &iterator);
3248 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3249 		if (vnode->id != id || vnode->device != device)
3250 			continue;
3251 
3252 		_dump_vnode(vnode, printPath);
3253 	}
3254 
3255 	hash_close(sVnodeTable, &iterator, false);
3256 	return 0;
3257 }
3258 
3259 
3260 static int
3261 dump_vnodes(int argc, char** argv)
3262 {
3263 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3264 		kprintf("usage: %s [device]\n", argv[0]);
3265 		return 0;
3266 	}
3267 
3268 	// restrict dumped nodes to a certain device if requested
3269 	dev_t device = parse_expression(argv[1]);
3270 
3271 	struct hash_iterator iterator;
3272 	struct vnode* vnode;
3273 
3274 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3275 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3276 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3277 
3278 	hash_open(sVnodeTable, &iterator);
3279 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3280 		if (vnode->device != device)
3281 			continue;
3282 
3283 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3284 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3285 			vnode->private_node, vnode->advisory_locking,
3286 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3287 			vnode->IsUnpublished() ? "u" : "-");
3288 	}
3289 
3290 	hash_close(sVnodeTable, &iterator, false);
3291 	return 0;
3292 }
3293 
3294 
3295 static int
3296 dump_vnode_caches(int argc, char** argv)
3297 {
3298 	struct hash_iterator iterator;
3299 	struct vnode* vnode;
3300 
3301 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3302 		kprintf("usage: %s [device]\n", argv[0]);
3303 		return 0;
3304 	}
3305 
3306 	// restrict dumped nodes to a certain device if requested
3307 	dev_t device = -1;
3308 	if (argc > 1)
3309 		device = parse_expression(argv[1]);
3310 
3311 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3312 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3313 
3314 	hash_open(sVnodeTable, &iterator);
3315 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3316 		if (vnode->cache == NULL)
3317 			continue;
3318 		if (device != -1 && vnode->device != device)
3319 			continue;
3320 
3321 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3322 			vnode, vnode->device, vnode->id, vnode->cache,
3323 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3324 			vnode->cache->page_count);
3325 	}
3326 
3327 	hash_close(sVnodeTable, &iterator, false);
3328 	return 0;
3329 }
3330 
3331 
3332 int
3333 dump_io_context(int argc, char** argv)
3334 {
3335 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3336 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3337 		return 0;
3338 	}
3339 
3340 	struct io_context* context = NULL;
3341 
3342 	if (argc > 1) {
3343 		ulong num = parse_expression(argv[1]);
3344 		if (IS_KERNEL_ADDRESS(num))
3345 			context = (struct io_context*)num;
3346 		else {
3347 			Team* team = team_get_team_struct_locked(num);
3348 			if (team == NULL) {
3349 				kprintf("could not find team with ID %lu\n", num);
3350 				return 0;
3351 			}
3352 			context = (struct io_context*)team->io_context;
3353 		}
3354 	} else
3355 		context = get_current_io_context(true);
3356 
3357 	kprintf("I/O CONTEXT: %p\n", context);
3358 	kprintf(" root vnode:\t%p\n", context->root);
3359 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3360 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3361 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3362 
3363 	if (context->num_used_fds) {
3364 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3365 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3366 	}
3367 
3368 	for (uint32 i = 0; i < context->table_size; i++) {
3369 		struct file_descriptor* fd = context->fds[i];
3370 		if (fd == NULL)
3371 			continue;
3372 
3373 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3374 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3375 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3376 			fd->pos, fd->cookie,
3377 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3378 				? "mount" : "vnode",
3379 			fd->u.vnode);
3380 	}
3381 
3382 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3383 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3384 
3385 	set_debug_variable("_cwd", (addr_t)context->cwd);
3386 
3387 	return 0;
3388 }
3389 
3390 
3391 int
3392 dump_vnode_usage(int argc, char** argv)
3393 {
3394 	if (argc != 1) {
3395 		kprintf("usage: %s\n", argv[0]);
3396 		return 0;
3397 	}
3398 
3399 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3400 		sUnusedVnodes, kMaxUnusedVnodes);
3401 
3402 	struct hash_iterator iterator;
3403 	hash_open(sVnodeTable, &iterator);
3404 
3405 	uint32 count = 0;
3406 	struct vnode* vnode;
3407 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3408 		count++;
3409 	}
3410 
3411 	hash_close(sVnodeTable, &iterator, false);
3412 
3413 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3414 		count - sUnusedVnodes);
3415 	return 0;
3416 }
3417 
3418 #endif	// ADD_DEBUGGER_COMMANDS
3419 
3420 /*!	Clears an iovec array of physical pages.
3421 	Returns in \a _bytes the number of bytes successfully cleared.
3422 */
3423 static status_t
3424 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3425 {
3426 	size_t bytes = *_bytes;
3427 	size_t index = 0;
3428 
3429 	while (bytes > 0) {
3430 		size_t length = min_c(vecs[index].iov_len, bytes);
3431 
3432 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3433 			length);
3434 		if (status != B_OK) {
3435 			*_bytes -= bytes;
3436 			return status;
3437 		}
3438 
3439 		bytes -= length;
3440 	}
3441 
3442 	return B_OK;
3443 }
3444 
3445 
3446 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3447 	and calls the file system hooks to read/write the request to disk.
3448 */
3449 static status_t
3450 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3451 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3452 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3453 	bool doWrite)
3454 {
3455 	if (fileVecCount == 0) {
3456 		// There are no file vecs at this offset, so we're obviously trying
3457 		// to access the file outside of its bounds
3458 		return B_BAD_VALUE;
3459 	}
3460 
3461 	size_t numBytes = *_numBytes;
3462 	uint32 fileVecIndex;
3463 	size_t vecOffset = *_vecOffset;
3464 	uint32 vecIndex = *_vecIndex;
3465 	status_t status;
3466 	size_t size;
3467 
3468 	if (!doWrite && vecOffset == 0) {
3469 		// now directly read the data from the device
3470 		// the first file_io_vec can be read directly
3471 
3472 		if (fileVecs[0].length < (off_t)numBytes)
3473 			size = fileVecs[0].length;
3474 		else
3475 			size = numBytes;
3476 
3477 		if (fileVecs[0].offset >= 0) {
3478 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3479 				&vecs[vecIndex], vecCount - vecIndex, &size);
3480 		} else {
3481 			// sparse read
3482 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3483 		}
3484 		if (status != B_OK)
3485 			return status;
3486 
3487 		// TODO: this is a work-around for buggy device drivers!
3488 		//	When our own drivers honour the length, we can:
3489 		//	a) also use this direct I/O for writes (otherwise, it would
3490 		//	   overwrite precious data)
3491 		//	b) panic if the term below is true (at least for writes)
3492 		if ((off_t)size > fileVecs[0].length) {
3493 			//dprintf("warning: device driver %p doesn't respect total length "
3494 			//	"in read_pages() call!\n", ref->device);
3495 			size = fileVecs[0].length;
3496 		}
3497 
3498 		ASSERT((off_t)size <= fileVecs[0].length);
3499 
3500 		// If the file portion was contiguous, we're already done now
3501 		if (size == numBytes)
3502 			return B_OK;
3503 
3504 		// if we reached the end of the file, we can return as well
3505 		if ((off_t)size != fileVecs[0].length) {
3506 			*_numBytes = size;
3507 			return B_OK;
3508 		}
3509 
3510 		fileVecIndex = 1;
3511 
3512 		// first, find out where we have to continue in our iovecs
3513 		for (; vecIndex < vecCount; vecIndex++) {
3514 			if (size < vecs[vecIndex].iov_len)
3515 				break;
3516 
3517 			size -= vecs[vecIndex].iov_len;
3518 		}
3519 
3520 		vecOffset = size;
3521 	} else {
3522 		fileVecIndex = 0;
3523 		size = 0;
3524 	}
3525 
3526 	// Too bad, let's process the rest of the file_io_vecs
3527 
3528 	size_t totalSize = size;
3529 	size_t bytesLeft = numBytes - size;
3530 
3531 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3532 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3533 		off_t fileOffset = fileVec.offset;
3534 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3535 
3536 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3537 			fileLeft));
3538 
3539 		// process the complete fileVec
3540 		while (fileLeft > 0) {
3541 			iovec tempVecs[MAX_TEMP_IO_VECS];
3542 			uint32 tempCount = 0;
3543 
3544 			// size tracks how much of what is left of the current fileVec
3545 			// (fileLeft) has been assigned to tempVecs
3546 			size = 0;
3547 
3548 			// assign what is left of the current fileVec to the tempVecs
3549 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3550 					&& tempCount < MAX_TEMP_IO_VECS;) {
3551 				// try to satisfy one iovec per iteration (or as much as
3552 				// possible)
3553 
3554 				// bytes left of the current iovec
3555 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3556 				if (vecLeft == 0) {
3557 					vecOffset = 0;
3558 					vecIndex++;
3559 					continue;
3560 				}
3561 
3562 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3563 					vecIndex, vecOffset, size));
3564 
3565 				// actually available bytes
3566 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3567 
3568 				tempVecs[tempCount].iov_base
3569 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3570 				tempVecs[tempCount].iov_len = tempVecSize;
3571 				tempCount++;
3572 
3573 				size += tempVecSize;
3574 				vecOffset += tempVecSize;
3575 			}
3576 
3577 			size_t bytes = size;
3578 
3579 			if (fileOffset == -1) {
3580 				if (doWrite) {
3581 					panic("sparse write attempt: vnode %p", vnode);
3582 					status = B_IO_ERROR;
3583 				} else {
3584 					// sparse read
3585 					status = zero_pages(tempVecs, tempCount, &bytes);
3586 				}
3587 			} else if (doWrite) {
3588 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3589 					tempVecs, tempCount, &bytes);
3590 			} else {
3591 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3592 					tempVecs, tempCount, &bytes);
3593 			}
3594 			if (status != B_OK)
3595 				return status;
3596 
3597 			totalSize += bytes;
3598 			bytesLeft -= size;
3599 			if (fileOffset >= 0)
3600 				fileOffset += size;
3601 			fileLeft -= size;
3602 			//dprintf("-> file left = %Lu\n", fileLeft);
3603 
3604 			if (size != bytes || vecIndex >= vecCount) {
3605 				// there are no more bytes or iovecs, let's bail out
3606 				*_numBytes = totalSize;
3607 				return B_OK;
3608 			}
3609 		}
3610 	}
3611 
3612 	*_vecIndex = vecIndex;
3613 	*_vecOffset = vecOffset;
3614 	*_numBytes = totalSize;
3615 	return B_OK;
3616 }
3617 
3618 
3619 //	#pragma mark - public API for file systems
3620 
3621 
3622 extern "C" status_t
3623 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3624 	fs_vnode_ops* ops)
3625 {
3626 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3627 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3628 
3629 	if (privateNode == NULL)
3630 		return B_BAD_VALUE;
3631 
3632 	// create the node
3633 	bool nodeCreated;
3634 	struct vnode* vnode;
3635 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3636 		nodeCreated);
3637 	if (status != B_OK)
3638 		return status;
3639 
3640 	WriteLocker nodeLocker(sVnodeLock, true);
3641 		// create_new_vnode_and_lock() has locked for us
3642 
3643 	// file system integrity check:
3644 	// test if the vnode already exists and bail out if this is the case!
3645 	if (!nodeCreated) {
3646 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3647 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3648 			vnode->private_node);
3649 		return B_ERROR;
3650 	}
3651 
3652 	vnode->private_node = privateNode;
3653 	vnode->ops = ops;
3654 	vnode->SetUnpublished(true);
3655 
3656 	TRACE(("returns: %s\n", strerror(status)));
3657 
3658 	return status;
3659 }
3660 
3661 
3662 extern "C" status_t
3663 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3664 	fs_vnode_ops* ops, int type, uint32 flags)
3665 {
3666 	FUNCTION(("publish_vnode()\n"));
3667 
3668 	WriteLocker locker(sVnodeLock);
3669 
3670 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3671 
3672 	bool nodeCreated = false;
3673 	if (vnode == NULL) {
3674 		if (privateNode == NULL)
3675 			return B_BAD_VALUE;
3676 
3677 		// create the node
3678 		locker.Unlock();
3679 			// create_new_vnode_and_lock() will re-lock for us on success
3680 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3681 			nodeCreated);
3682 		if (status != B_OK)
3683 			return status;
3684 
3685 		locker.SetTo(sVnodeLock, true);
3686 	}
3687 
3688 	if (nodeCreated) {
3689 		vnode->private_node = privateNode;
3690 		vnode->ops = ops;
3691 		vnode->SetUnpublished(true);
3692 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3693 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3694 		// already known, but not published
3695 	} else
3696 		return B_BAD_VALUE;
3697 
3698 	bool publishSpecialSubNode = false;
3699 
3700 	vnode->SetType(type);
3701 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3702 	publishSpecialSubNode = is_special_node_type(type)
3703 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3704 
3705 	status_t status = B_OK;
3706 
3707 	// create sub vnodes, if necessary
3708 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3709 		locker.Unlock();
3710 
3711 		fs_volume* subVolume = volume;
3712 		if (volume->sub_volume != NULL) {
3713 			while (status == B_OK && subVolume->sub_volume != NULL) {
3714 				subVolume = subVolume->sub_volume;
3715 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3716 					vnode);
3717 			}
3718 		}
3719 
3720 		if (status == B_OK && publishSpecialSubNode)
3721 			status = create_special_sub_node(vnode, flags);
3722 
3723 		if (status != B_OK) {
3724 			// error -- clean up the created sub vnodes
3725 			while (subVolume->super_volume != volume) {
3726 				subVolume = subVolume->super_volume;
3727 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3728 			}
3729 		}
3730 
3731 		if (status == B_OK) {
3732 			ReadLocker vnodesReadLocker(sVnodeLock);
3733 			AutoLocker<Vnode> nodeLocker(vnode);
3734 			vnode->SetBusy(false);
3735 			vnode->SetUnpublished(false);
3736 		} else {
3737 			locker.Lock();
3738 			hash_remove(sVnodeTable, vnode);
3739 			remove_vnode_from_mount_list(vnode, vnode->mount);
3740 			free(vnode);
3741 		}
3742 	} else {
3743 		// we still hold the write lock -- mark the node unbusy and published
3744 		vnode->SetBusy(false);
3745 		vnode->SetUnpublished(false);
3746 	}
3747 
3748 	TRACE(("returns: %s\n", strerror(status)));
3749 
3750 	return status;
3751 }
3752 
3753 
3754 extern "C" status_t
3755 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3756 {
3757 	struct vnode* vnode;
3758 
3759 	if (volume == NULL)
3760 		return B_BAD_VALUE;
3761 
3762 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3763 	if (status != B_OK)
3764 		return status;
3765 
3766 	// If this is a layered FS, we need to get the node cookie for the requested
3767 	// layer.
3768 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3769 		fs_vnode resolvedNode;
3770 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3771 			&resolvedNode);
3772 		if (status != B_OK) {
3773 			panic("get_vnode(): Failed to get super node for vnode %p, "
3774 				"volume: %p", vnode, volume);
3775 			put_vnode(vnode);
3776 			return status;
3777 		}
3778 
3779 		if (_privateNode != NULL)
3780 			*_privateNode = resolvedNode.private_node;
3781 	} else if (_privateNode != NULL)
3782 		*_privateNode = vnode->private_node;
3783 
3784 	return B_OK;
3785 }
3786 
3787 
3788 extern "C" status_t
3789 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3790 {
3791 	struct vnode* vnode;
3792 
3793 	rw_lock_read_lock(&sVnodeLock);
3794 	vnode = lookup_vnode(volume->id, vnodeID);
3795 	rw_lock_read_unlock(&sVnodeLock);
3796 
3797 	if (vnode == NULL)
3798 		return B_BAD_VALUE;
3799 
3800 	inc_vnode_ref_count(vnode);
3801 	return B_OK;
3802 }
3803 
3804 
3805 extern "C" status_t
3806 put_vnode(fs_volume* volume, ino_t vnodeID)
3807 {
3808 	struct vnode* vnode;
3809 
3810 	rw_lock_read_lock(&sVnodeLock);
3811 	vnode = lookup_vnode(volume->id, vnodeID);
3812 	rw_lock_read_unlock(&sVnodeLock);
3813 
3814 	if (vnode == NULL)
3815 		return B_BAD_VALUE;
3816 
3817 	dec_vnode_ref_count(vnode, false, true);
3818 	return B_OK;
3819 }
3820 
3821 
3822 extern "C" status_t
3823 remove_vnode(fs_volume* volume, ino_t vnodeID)
3824 {
3825 	ReadLocker locker(sVnodeLock);
3826 
3827 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3828 	if (vnode == NULL)
3829 		return B_ENTRY_NOT_FOUND;
3830 
3831 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3832 		// this vnode is in use
3833 		return B_BUSY;
3834 	}
3835 
3836 	vnode->Lock();
3837 
3838 	vnode->SetRemoved(true);
3839 	bool removeUnpublished = false;
3840 
3841 	if (vnode->IsUnpublished()) {
3842 		// prepare the vnode for deletion
3843 		removeUnpublished = true;
3844 		vnode->SetBusy(true);
3845 	}
3846 
3847 	vnode->Unlock();
3848 	locker.Unlock();
3849 
3850 	if (removeUnpublished) {
3851 		// If the vnode hasn't been published yet, we delete it here
3852 		atomic_add(&vnode->ref_count, -1);
3853 		free_vnode(vnode, true);
3854 	}
3855 
3856 	return B_OK;
3857 }
3858 
3859 
3860 extern "C" status_t
3861 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3862 {
3863 	struct vnode* vnode;
3864 
3865 	rw_lock_read_lock(&sVnodeLock);
3866 
3867 	vnode = lookup_vnode(volume->id, vnodeID);
3868 	if (vnode) {
3869 		AutoLocker<Vnode> nodeLocker(vnode);
3870 		vnode->SetRemoved(false);
3871 	}
3872 
3873 	rw_lock_read_unlock(&sVnodeLock);
3874 	return B_OK;
3875 }
3876 
3877 
3878 extern "C" status_t
3879 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3880 {
3881 	ReadLocker _(sVnodeLock);
3882 
3883 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3884 		if (_removed != NULL)
3885 			*_removed = vnode->IsRemoved();
3886 		return B_OK;
3887 	}
3888 
3889 	return B_BAD_VALUE;
3890 }
3891 
3892 
3893 extern "C" fs_volume*
3894 volume_for_vnode(fs_vnode* _vnode)
3895 {
3896 	if (_vnode == NULL)
3897 		return NULL;
3898 
3899 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3900 	return vnode->mount->volume;
3901 }
3902 
3903 
3904 #if 0
3905 extern "C" status_t
3906 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3907 	size_t* _numBytes)
3908 {
3909 	struct file_descriptor* descriptor;
3910 	struct vnode* vnode;
3911 
3912 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3913 	if (descriptor == NULL)
3914 		return B_FILE_ERROR;
3915 
3916 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3917 		count, 0, _numBytes);
3918 
3919 	put_fd(descriptor);
3920 	return status;
3921 }
3922 
3923 
3924 extern "C" status_t
3925 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3926 	size_t* _numBytes)
3927 {
3928 	struct file_descriptor* descriptor;
3929 	struct vnode* vnode;
3930 
3931 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3932 	if (descriptor == NULL)
3933 		return B_FILE_ERROR;
3934 
3935 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3936 		count, 0, _numBytes);
3937 
3938 	put_fd(descriptor);
3939 	return status;
3940 }
3941 #endif
3942 
3943 
3944 extern "C" status_t
3945 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3946 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3947 	size_t* _bytes)
3948 {
3949 	struct file_descriptor* descriptor;
3950 	struct vnode* vnode;
3951 
3952 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3953 	if (descriptor == NULL)
3954 		return B_FILE_ERROR;
3955 
3956 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3957 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3958 		false);
3959 
3960 	put_fd(descriptor);
3961 	return status;
3962 }
3963 
3964 
3965 extern "C" status_t
3966 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3967 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3968 	size_t* _bytes)
3969 {
3970 	struct file_descriptor* descriptor;
3971 	struct vnode* vnode;
3972 
3973 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3974 	if (descriptor == NULL)
3975 		return B_FILE_ERROR;
3976 
3977 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3978 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3979 		true);
3980 
3981 	put_fd(descriptor);
3982 	return status;
3983 }
3984 
3985 
3986 extern "C" status_t
3987 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3988 {
3989 	// lookup mount -- the caller is required to make sure that the mount
3990 	// won't go away
3991 	MutexLocker locker(sMountMutex);
3992 	struct fs_mount* mount = find_mount(mountID);
3993 	if (mount == NULL)
3994 		return B_BAD_VALUE;
3995 	locker.Unlock();
3996 
3997 	return mount->entry_cache.Add(dirID, name, nodeID);
3998 }
3999 
4000 
4001 extern "C" status_t
4002 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4003 {
4004 	// lookup mount -- the caller is required to make sure that the mount
4005 	// won't go away
4006 	MutexLocker locker(sMountMutex);
4007 	struct fs_mount* mount = find_mount(mountID);
4008 	if (mount == NULL)
4009 		return B_BAD_VALUE;
4010 	locker.Unlock();
4011 
4012 	return mount->entry_cache.Remove(dirID, name);
4013 }
4014 
4015 
4016 //	#pragma mark - private VFS API
4017 //	Functions the VFS exports for other parts of the kernel
4018 
4019 
4020 /*! Acquires another reference to the vnode that has to be released
4021 	by calling vfs_put_vnode().
4022 */
4023 void
4024 vfs_acquire_vnode(struct vnode* vnode)
4025 {
4026 	inc_vnode_ref_count(vnode);
4027 }
4028 
4029 
4030 /*! This is currently called from file_cache_create() only.
4031 	It's probably a temporary solution as long as devfs requires that
4032 	fs_read_pages()/fs_write_pages() are called with the standard
4033 	open cookie and not with a device cookie.
4034 	If that's done differently, remove this call; it has no other
4035 	purpose.
4036 */
4037 extern "C" status_t
4038 vfs_get_cookie_from_fd(int fd, void** _cookie)
4039 {
4040 	struct file_descriptor* descriptor;
4041 
4042 	descriptor = get_fd(get_current_io_context(true), fd);
4043 	if (descriptor == NULL)
4044 		return B_FILE_ERROR;
4045 
4046 	*_cookie = descriptor->cookie;
4047 	return B_OK;
4048 }
4049 
4050 
4051 extern "C" status_t
4052 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4053 {
4054 	*vnode = get_vnode_from_fd(fd, kernel);
4055 
4056 	if (*vnode == NULL)
4057 		return B_FILE_ERROR;
4058 
4059 	return B_NO_ERROR;
4060 }
4061 
4062 
4063 extern "C" status_t
4064 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4065 {
4066 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4067 		path, kernel));
4068 
4069 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4070 	if (pathBuffer.InitCheck() != B_OK)
4071 		return B_NO_MEMORY;
4072 
4073 	char* buffer = pathBuffer.LockBuffer();
4074 	strlcpy(buffer, path, pathBuffer.BufferSize());
4075 
4076 	struct vnode* vnode;
4077 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4078 	if (status != B_OK)
4079 		return status;
4080 
4081 	*_vnode = vnode;
4082 	return B_OK;
4083 }
4084 
4085 
4086 extern "C" status_t
4087 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4088 {
4089 	struct vnode* vnode;
4090 
4091 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4092 	if (status != B_OK)
4093 		return status;
4094 
4095 	*_vnode = vnode;
4096 	return B_OK;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4102 	const char* name, struct vnode** _vnode)
4103 {
4104 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4105 }
4106 
4107 
4108 extern "C" void
4109 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4110 {
4111 	*_mountID = vnode->device;
4112 	*_vnodeID = vnode->id;
4113 }
4114 
4115 
4116 /*!
4117 	Helper function abstracting the process of "converting" a given
4118 	vnode-pointer to a fs_vnode-pointer.
4119 	Currently only used in bindfs.
4120 */
4121 extern "C" fs_vnode*
4122 vfs_fsnode_for_vnode(struct vnode* vnode)
4123 {
4124 	return vnode;
4125 }
4126 
4127 
4128 /*!
4129 	Calls fs_open() on the given vnode and returns a new
4130 	file descriptor for it
4131 */
4132 int
4133 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4134 {
4135 	return open_vnode(vnode, openMode, kernel);
4136 }
4137 
4138 
4139 /*!	Looks up a vnode with the given mount and vnode ID.
4140 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4141 	to the node.
4142 	It's currently only be used by file_cache_create().
4143 */
4144 extern "C" status_t
4145 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4146 {
4147 	rw_lock_read_lock(&sVnodeLock);
4148 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4149 	rw_lock_read_unlock(&sVnodeLock);
4150 
4151 	if (vnode == NULL)
4152 		return B_ERROR;
4153 
4154 	*_vnode = vnode;
4155 	return B_OK;
4156 }
4157 
4158 
4159 extern "C" status_t
4160 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4161 	bool traverseLeafLink, bool kernel, void** _node)
4162 {
4163 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4164 		volume, path, kernel));
4165 
4166 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4167 	if (pathBuffer.InitCheck() != B_OK)
4168 		return B_NO_MEMORY;
4169 
4170 	fs_mount* mount;
4171 	status_t status = get_mount(volume->id, &mount);
4172 	if (status != B_OK)
4173 		return status;
4174 
4175 	char* buffer = pathBuffer.LockBuffer();
4176 	strlcpy(buffer, path, pathBuffer.BufferSize());
4177 
4178 	struct vnode* vnode = mount->root_vnode;
4179 
4180 	if (buffer[0] == '/')
4181 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4182 	else {
4183 		inc_vnode_ref_count(vnode);
4184 			// vnode_path_to_vnode() releases a reference to the starting vnode
4185 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4186 			kernel, &vnode, NULL);
4187 	}
4188 
4189 	put_mount(mount);
4190 
4191 	if (status != B_OK)
4192 		return status;
4193 
4194 	if (vnode->device != volume->id) {
4195 		// wrong mount ID - must not gain access on foreign file system nodes
4196 		put_vnode(vnode);
4197 		return B_BAD_VALUE;
4198 	}
4199 
4200 	// Use get_vnode() to resolve the cookie for the right layer.
4201 	status = get_vnode(volume, vnode->id, _node);
4202 	put_vnode(vnode);
4203 
4204 	return status;
4205 }
4206 
4207 
4208 status_t
4209 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4210 	struct stat* stat, bool kernel)
4211 {
4212 	status_t status;
4213 
4214 	if (path) {
4215 		// path given: get the stat of the node referred to by (fd, path)
4216 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4217 		if (pathBuffer.InitCheck() != B_OK)
4218 			return B_NO_MEMORY;
4219 
4220 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4221 			traverseLeafLink, stat, kernel);
4222 	} else {
4223 		// no path given: get the FD and use the FD operation
4224 		struct file_descriptor* descriptor
4225 			= get_fd(get_current_io_context(kernel), fd);
4226 		if (descriptor == NULL)
4227 			return B_FILE_ERROR;
4228 
4229 		if (descriptor->ops->fd_read_stat)
4230 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4231 		else
4232 			status = B_UNSUPPORTED;
4233 
4234 		put_fd(descriptor);
4235 	}
4236 
4237 	return status;
4238 }
4239 
4240 
4241 /*!	Finds the full path to the file that contains the module \a moduleName,
4242 	puts it into \a pathBuffer, and returns B_OK for success.
4243 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4244 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4245 	\a pathBuffer is clobbered in any case and must not be relied on if this
4246 	functions returns unsuccessfully.
4247 	\a basePath and \a pathBuffer must not point to the same space.
4248 */
4249 status_t
4250 vfs_get_module_path(const char* basePath, const char* moduleName,
4251 	char* pathBuffer, size_t bufferSize)
4252 {
4253 	struct vnode* dir;
4254 	struct vnode* file;
4255 	status_t status;
4256 	size_t length;
4257 	char* path;
4258 
4259 	if (bufferSize == 0
4260 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4261 		return B_BUFFER_OVERFLOW;
4262 
4263 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4264 	if (status != B_OK)
4265 		return status;
4266 
4267 	// the path buffer had been clobbered by the above call
4268 	length = strlcpy(pathBuffer, basePath, bufferSize);
4269 	if (pathBuffer[length - 1] != '/')
4270 		pathBuffer[length++] = '/';
4271 
4272 	path = pathBuffer + length;
4273 	bufferSize -= length;
4274 
4275 	while (moduleName) {
4276 		char* nextPath = strchr(moduleName, '/');
4277 		if (nextPath == NULL)
4278 			length = strlen(moduleName);
4279 		else {
4280 			length = nextPath - moduleName;
4281 			nextPath++;
4282 		}
4283 
4284 		if (length + 1 >= bufferSize) {
4285 			status = B_BUFFER_OVERFLOW;
4286 			goto err;
4287 		}
4288 
4289 		memcpy(path, moduleName, length);
4290 		path[length] = '\0';
4291 		moduleName = nextPath;
4292 
4293 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4294 		if (status != B_OK) {
4295 			// vnode_path_to_vnode() has already released the reference to dir
4296 			return status;
4297 		}
4298 
4299 		if (S_ISDIR(file->Type())) {
4300 			// goto the next directory
4301 			path[length] = '/';
4302 			path[length + 1] = '\0';
4303 			path += length + 1;
4304 			bufferSize -= length + 1;
4305 
4306 			dir = file;
4307 		} else if (S_ISREG(file->Type())) {
4308 			// it's a file so it should be what we've searched for
4309 			put_vnode(file);
4310 
4311 			return B_OK;
4312 		} else {
4313 			TRACE(("vfs_get_module_path(): something is strange here: "
4314 				"0x%08" B_PRIx32 "...\n", file->Type()));
4315 			status = B_ERROR;
4316 			dir = file;
4317 			goto err;
4318 		}
4319 	}
4320 
4321 	// if we got here, the moduleName just pointed to a directory, not to
4322 	// a real module - what should we do in this case?
4323 	status = B_ENTRY_NOT_FOUND;
4324 
4325 err:
4326 	put_vnode(dir);
4327 	return status;
4328 }
4329 
4330 
4331 /*!	\brief Normalizes a given path.
4332 
4333 	The path must refer to an existing or non-existing entry in an existing
4334 	directory, that is chopping off the leaf component the remaining path must
4335 	refer to an existing directory.
4336 
4337 	The returned will be canonical in that it will be absolute, will not
4338 	contain any "." or ".." components or duplicate occurrences of '/'s,
4339 	and none of the directory components will by symbolic links.
4340 
4341 	Any two paths referring to the same entry, will result in the same
4342 	normalized path (well, that is pretty much the definition of `normalized',
4343 	isn't it :-).
4344 
4345 	\param path The path to be normalized.
4346 	\param buffer The buffer into which the normalized path will be written.
4347 		   May be the same one as \a path.
4348 	\param bufferSize The size of \a buffer.
4349 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4350 	\param kernel \c true, if the IO context of the kernel shall be used,
4351 		   otherwise that of the team this thread belongs to. Only relevant,
4352 		   if the path is relative (to get the CWD).
4353 	\return \c B_OK if everything went fine, another error code otherwise.
4354 */
4355 status_t
4356 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4357 	bool traverseLink, bool kernel)
4358 {
4359 	if (!path || !buffer || bufferSize < 1)
4360 		return B_BAD_VALUE;
4361 
4362 	if (path != buffer) {
4363 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4364 			return B_BUFFER_OVERFLOW;
4365 	}
4366 
4367 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4368 }
4369 
4370 
4371 /*!	\brief Creates a special node in the file system.
4372 
4373 	The caller gets a reference to the newly created node (which is passed
4374 	back through \a _createdVnode) and is responsible for releasing it.
4375 
4376 	\param path The path where to create the entry for the node. Can be \c NULL,
4377 		in which case the node is created without an entry in the root FS -- it
4378 		will automatically be deleted when the last reference has been released.
4379 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4380 		the target file system will just create the node with its standard
4381 		operations. Depending on the type of the node a subnode might be created
4382 		automatically, though.
4383 	\param mode The type and permissions for the node to be created.
4384 	\param flags Flags to be passed to the creating FS.
4385 	\param kernel \c true, if called in the kernel context (relevant only if
4386 		\a path is not \c NULL and not absolute).
4387 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4388 		file system creating the node, with the private data pointer and
4389 		operations for the super node. Can be \c NULL.
4390 	\param _createVnode Pointer to pre-allocated storage where to store the
4391 		pointer to the newly created node.
4392 	\return \c B_OK, if everything went fine, another error code otherwise.
4393 */
4394 status_t
4395 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4396 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4397 	struct vnode** _createdVnode)
4398 {
4399 	struct vnode* dirNode;
4400 	char _leaf[B_FILE_NAME_LENGTH];
4401 	char* leaf = NULL;
4402 
4403 	if (path) {
4404 		// We've got a path. Get the dir vnode and the leaf name.
4405 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4406 		if (tmpPathBuffer.InitCheck() != B_OK)
4407 			return B_NO_MEMORY;
4408 
4409 		char* tmpPath = tmpPathBuffer.LockBuffer();
4410 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4411 			return B_NAME_TOO_LONG;
4412 
4413 		// get the dir vnode and the leaf name
4414 		leaf = _leaf;
4415 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4416 		if (error != B_OK)
4417 			return error;
4418 	} else {
4419 		// No path. Create the node in the root FS.
4420 		dirNode = sRoot;
4421 		inc_vnode_ref_count(dirNode);
4422 	}
4423 
4424 	VNodePutter _(dirNode);
4425 
4426 	// check support for creating special nodes
4427 	if (!HAS_FS_CALL(dirNode, create_special_node))
4428 		return B_UNSUPPORTED;
4429 
4430 	// create the node
4431 	fs_vnode superVnode;
4432 	ino_t nodeID;
4433 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4434 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4435 	if (status != B_OK)
4436 		return status;
4437 
4438 	// lookup the node
4439 	rw_lock_read_lock(&sVnodeLock);
4440 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4441 	rw_lock_read_unlock(&sVnodeLock);
4442 
4443 	if (*_createdVnode == NULL) {
4444 		panic("vfs_create_special_node(): lookup of node failed");
4445 		return B_ERROR;
4446 	}
4447 
4448 	return B_OK;
4449 }
4450 
4451 
4452 extern "C" void
4453 vfs_put_vnode(struct vnode* vnode)
4454 {
4455 	put_vnode(vnode);
4456 }
4457 
4458 
4459 extern "C" status_t
4460 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4461 {
4462 	// Get current working directory from io context
4463 	struct io_context* context = get_current_io_context(false);
4464 	status_t status = B_OK;
4465 
4466 	mutex_lock(&context->io_mutex);
4467 
4468 	if (context->cwd != NULL) {
4469 		*_mountID = context->cwd->device;
4470 		*_vnodeID = context->cwd->id;
4471 	} else
4472 		status = B_ERROR;
4473 
4474 	mutex_unlock(&context->io_mutex);
4475 	return status;
4476 }
4477 
4478 
4479 status_t
4480 vfs_unmount(dev_t mountID, uint32 flags)
4481 {
4482 	return fs_unmount(NULL, mountID, flags, true);
4483 }
4484 
4485 
4486 extern "C" status_t
4487 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4488 {
4489 	struct vnode* vnode;
4490 
4491 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4492 	if (status != B_OK)
4493 		return status;
4494 
4495 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4496 	put_vnode(vnode);
4497 	return B_OK;
4498 }
4499 
4500 
4501 extern "C" void
4502 vfs_free_unused_vnodes(int32 level)
4503 {
4504 	vnode_low_resource_handler(NULL,
4505 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4506 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4507 		level);
4508 }
4509 
4510 
4511 extern "C" bool
4512 vfs_can_page(struct vnode* vnode, void* cookie)
4513 {
4514 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4515 
4516 	if (HAS_FS_CALL(vnode, can_page))
4517 		return FS_CALL(vnode, can_page, cookie);
4518 	return false;
4519 }
4520 
4521 
4522 extern "C" status_t
4523 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4524 	const generic_io_vec* vecs, size_t count, uint32 flags,
4525 	generic_size_t* _numBytes)
4526 {
4527 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4528 		vecs, pos));
4529 
4530 #if VFS_PAGES_IO_TRACING
4531 	generic_size_t bytesRequested = *_numBytes;
4532 #endif
4533 
4534 	IORequest request;
4535 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4536 	if (status == B_OK) {
4537 		status = vfs_vnode_io(vnode, cookie, &request);
4538 		if (status == B_OK)
4539 			status = request.Wait();
4540 		*_numBytes = request.TransferredBytes();
4541 	}
4542 
4543 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4544 		status, *_numBytes));
4545 
4546 	return status;
4547 }
4548 
4549 
4550 extern "C" status_t
4551 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4552 	const generic_io_vec* vecs, size_t count, uint32 flags,
4553 	generic_size_t* _numBytes)
4554 {
4555 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4556 		vecs, pos));
4557 
4558 #if VFS_PAGES_IO_TRACING
4559 	generic_size_t bytesRequested = *_numBytes;
4560 #endif
4561 
4562 	IORequest request;
4563 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4564 	if (status == B_OK) {
4565 		status = vfs_vnode_io(vnode, cookie, &request);
4566 		if (status == B_OK)
4567 			status = request.Wait();
4568 		*_numBytes = request.TransferredBytes();
4569 	}
4570 
4571 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4572 		status, *_numBytes));
4573 
4574 	return status;
4575 }
4576 
4577 
4578 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4579 	created if \a allocate is \c true.
4580 	In case it's successful, it will also grab a reference to the cache
4581 	it returns.
4582 */
4583 extern "C" status_t
4584 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4585 {
4586 	if (vnode->cache != NULL) {
4587 		vnode->cache->AcquireRef();
4588 		*_cache = vnode->cache;
4589 		return B_OK;
4590 	}
4591 
4592 	rw_lock_read_lock(&sVnodeLock);
4593 	vnode->Lock();
4594 
4595 	status_t status = B_OK;
4596 
4597 	// The cache could have been created in the meantime
4598 	if (vnode->cache == NULL) {
4599 		if (allocate) {
4600 			// TODO: actually the vnode needs to be busy already here, or
4601 			//	else this won't work...
4602 			bool wasBusy = vnode->IsBusy();
4603 			vnode->SetBusy(true);
4604 
4605 			vnode->Unlock();
4606 			rw_lock_read_unlock(&sVnodeLock);
4607 
4608 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4609 
4610 			rw_lock_read_lock(&sVnodeLock);
4611 			vnode->Lock();
4612 			vnode->SetBusy(wasBusy);
4613 		} else
4614 			status = B_BAD_VALUE;
4615 	}
4616 
4617 	vnode->Unlock();
4618 	rw_lock_read_unlock(&sVnodeLock);
4619 
4620 	if (status == B_OK) {
4621 		vnode->cache->AcquireRef();
4622 		*_cache = vnode->cache;
4623 	}
4624 
4625 	return status;
4626 }
4627 
4628 
4629 status_t
4630 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4631 	file_io_vec* vecs, size_t* _count)
4632 {
4633 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4634 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4635 
4636 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4637 }
4638 
4639 
4640 status_t
4641 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4642 {
4643 	status_t status = FS_CALL(vnode, read_stat, stat);
4644 
4645 	// fill in the st_dev and st_ino fields
4646 	if (status == B_OK) {
4647 		stat->st_dev = vnode->device;
4648 		stat->st_ino = vnode->id;
4649 		stat->st_rdev = -1;
4650 	}
4651 
4652 	return status;
4653 }
4654 
4655 
4656 status_t
4657 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4658 {
4659 	struct vnode* vnode;
4660 	status_t status = get_vnode(device, inode, &vnode, true, false);
4661 	if (status != B_OK)
4662 		return status;
4663 
4664 	status = FS_CALL(vnode, read_stat, stat);
4665 
4666 	// fill in the st_dev and st_ino fields
4667 	if (status == B_OK) {
4668 		stat->st_dev = vnode->device;
4669 		stat->st_ino = vnode->id;
4670 		stat->st_rdev = -1;
4671 	}
4672 
4673 	put_vnode(vnode);
4674 	return status;
4675 }
4676 
4677 
4678 status_t
4679 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4680 {
4681 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4682 }
4683 
4684 
4685 status_t
4686 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4687 	char* path, size_t pathLength)
4688 {
4689 	struct vnode* vnode;
4690 	status_t status;
4691 
4692 	// filter invalid leaf names
4693 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4694 		return B_BAD_VALUE;
4695 
4696 	// get the vnode matching the dir's node_ref
4697 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4698 		// special cases "." and "..": we can directly get the vnode of the
4699 		// referenced directory
4700 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4701 		leaf = NULL;
4702 	} else
4703 		status = get_vnode(device, inode, &vnode, true, false);
4704 	if (status != B_OK)
4705 		return status;
4706 
4707 	// get the directory path
4708 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4709 	put_vnode(vnode);
4710 		// we don't need the vnode anymore
4711 	if (status != B_OK)
4712 		return status;
4713 
4714 	// append the leaf name
4715 	if (leaf) {
4716 		// insert a directory separator if this is not the file system root
4717 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4718 				>= pathLength)
4719 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4720 			return B_NAME_TOO_LONG;
4721 		}
4722 	}
4723 
4724 	return B_OK;
4725 }
4726 
4727 
4728 /*!	If the given descriptor locked its vnode, that lock will be released. */
4729 void
4730 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4731 {
4732 	struct vnode* vnode = fd_vnode(descriptor);
4733 
4734 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4735 		vnode->mandatory_locked_by = NULL;
4736 }
4737 
4738 
4739 /*!	Closes all file descriptors of the specified I/O context that
4740 	have the O_CLOEXEC flag set.
4741 */
4742 void
4743 vfs_exec_io_context(io_context* context)
4744 {
4745 	uint32 i;
4746 
4747 	for (i = 0; i < context->table_size; i++) {
4748 		mutex_lock(&context->io_mutex);
4749 
4750 		struct file_descriptor* descriptor = context->fds[i];
4751 		bool remove = false;
4752 
4753 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4754 			context->fds[i] = NULL;
4755 			context->num_used_fds--;
4756 
4757 			remove = true;
4758 		}
4759 
4760 		mutex_unlock(&context->io_mutex);
4761 
4762 		if (remove) {
4763 			close_fd(descriptor);
4764 			put_fd(descriptor);
4765 		}
4766 	}
4767 }
4768 
4769 
4770 /*! Sets up a new io_control structure, and inherits the properties
4771 	of the parent io_control if it is given.
4772 */
4773 io_context*
4774 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4775 {
4776 	io_context* context = (io_context*)malloc(sizeof(io_context));
4777 	if (context == NULL)
4778 		return NULL;
4779 
4780 	TIOC(NewIOContext(context, parentContext));
4781 
4782 	memset(context, 0, sizeof(io_context));
4783 	context->ref_count = 1;
4784 
4785 	MutexLocker parentLocker;
4786 
4787 	size_t tableSize;
4788 	if (parentContext) {
4789 		parentLocker.SetTo(parentContext->io_mutex, false);
4790 		tableSize = parentContext->table_size;
4791 	} else
4792 		tableSize = DEFAULT_FD_TABLE_SIZE;
4793 
4794 	// allocate space for FDs and their close-on-exec flag
4795 	context->fds = (file_descriptor**)malloc(
4796 		sizeof(struct file_descriptor*) * tableSize
4797 		+ sizeof(struct select_sync*) * tableSize
4798 		+ (tableSize + 7) / 8);
4799 	if (context->fds == NULL) {
4800 		free(context);
4801 		return NULL;
4802 	}
4803 
4804 	context->select_infos = (select_info**)(context->fds + tableSize);
4805 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4806 
4807 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4808 		+ sizeof(struct select_sync*) * tableSize
4809 		+ (tableSize + 7) / 8);
4810 
4811 	mutex_init(&context->io_mutex, "I/O context");
4812 
4813 	// Copy all parent file descriptors
4814 
4815 	if (parentContext) {
4816 		size_t i;
4817 
4818 		mutex_lock(&sIOContextRootLock);
4819 		context->root = parentContext->root;
4820 		if (context->root)
4821 			inc_vnode_ref_count(context->root);
4822 		mutex_unlock(&sIOContextRootLock);
4823 
4824 		context->cwd = parentContext->cwd;
4825 		if (context->cwd)
4826 			inc_vnode_ref_count(context->cwd);
4827 
4828 		for (i = 0; i < tableSize; i++) {
4829 			struct file_descriptor* descriptor = parentContext->fds[i];
4830 
4831 			if (descriptor != NULL) {
4832 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4833 				if (closeOnExec && purgeCloseOnExec)
4834 					continue;
4835 
4836 				TFD(InheritFD(context, i, descriptor, parentContext));
4837 
4838 				context->fds[i] = descriptor;
4839 				context->num_used_fds++;
4840 				atomic_add(&descriptor->ref_count, 1);
4841 				atomic_add(&descriptor->open_count, 1);
4842 
4843 				if (closeOnExec)
4844 					fd_set_close_on_exec(context, i, true);
4845 			}
4846 		}
4847 
4848 		parentLocker.Unlock();
4849 	} else {
4850 		context->root = sRoot;
4851 		context->cwd = sRoot;
4852 
4853 		if (context->root)
4854 			inc_vnode_ref_count(context->root);
4855 
4856 		if (context->cwd)
4857 			inc_vnode_ref_count(context->cwd);
4858 	}
4859 
4860 	context->table_size = tableSize;
4861 
4862 	list_init(&context->node_monitors);
4863 	context->max_monitors = DEFAULT_NODE_MONITORS;
4864 
4865 	return context;
4866 }
4867 
4868 
4869 static status_t
4870 vfs_free_io_context(io_context* context)
4871 {
4872 	uint32 i;
4873 
4874 	TIOC(FreeIOContext(context));
4875 
4876 	if (context->root)
4877 		put_vnode(context->root);
4878 
4879 	if (context->cwd)
4880 		put_vnode(context->cwd);
4881 
4882 	mutex_lock(&context->io_mutex);
4883 
4884 	for (i = 0; i < context->table_size; i++) {
4885 		if (struct file_descriptor* descriptor = context->fds[i]) {
4886 			close_fd(descriptor);
4887 			put_fd(descriptor);
4888 		}
4889 	}
4890 
4891 	mutex_destroy(&context->io_mutex);
4892 
4893 	remove_node_monitors(context);
4894 	free(context->fds);
4895 	free(context);
4896 
4897 	return B_OK;
4898 }
4899 
4900 
4901 void
4902 vfs_get_io_context(io_context* context)
4903 {
4904 	atomic_add(&context->ref_count, 1);
4905 }
4906 
4907 
4908 void
4909 vfs_put_io_context(io_context* context)
4910 {
4911 	if (atomic_add(&context->ref_count, -1) == 1)
4912 		vfs_free_io_context(context);
4913 }
4914 
4915 
4916 static status_t
4917 vfs_resize_fd_table(struct io_context* context, const int newSize)
4918 {
4919 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4920 		return B_BAD_VALUE;
4921 
4922 	TIOC(ResizeIOContext(context, newSize));
4923 
4924 	MutexLocker _(context->io_mutex);
4925 
4926 	int oldSize = context->table_size;
4927 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4928 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4929 
4930 	// If the tables shrink, make sure none of the fds being dropped are in use.
4931 	if (newSize < oldSize) {
4932 		for (int i = oldSize; i-- > newSize;) {
4933 			if (context->fds[i])
4934 				return B_BUSY;
4935 		}
4936 	}
4937 
4938 	// store pointers to the old tables
4939 	file_descriptor** oldFDs = context->fds;
4940 	select_info** oldSelectInfos = context->select_infos;
4941 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4942 
4943 	// allocate new tables
4944 	file_descriptor** newFDs = (file_descriptor**)malloc(
4945 		sizeof(struct file_descriptor*) * newSize
4946 		+ sizeof(struct select_sync*) * newSize
4947 		+ newCloseOnExitBitmapSize);
4948 	if (newFDs == NULL)
4949 		return B_NO_MEMORY;
4950 
4951 	context->fds = newFDs;
4952 	context->select_infos = (select_info**)(context->fds + newSize);
4953 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4954 	context->table_size = newSize;
4955 
4956 	// copy entries from old tables
4957 	int toCopy = min_c(oldSize, newSize);
4958 
4959 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4960 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4961 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4962 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4963 
4964 	// clear additional entries, if the tables grow
4965 	if (newSize > oldSize) {
4966 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4967 		memset(context->select_infos + oldSize, 0,
4968 			sizeof(void*) * (newSize - oldSize));
4969 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4970 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4971 	}
4972 
4973 	free(oldFDs);
4974 
4975 	return B_OK;
4976 }
4977 
4978 
4979 static status_t
4980 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4981 {
4982 	int	status = B_OK;
4983 
4984 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4985 		return B_BAD_VALUE;
4986 
4987 	mutex_lock(&context->io_mutex);
4988 
4989 	if ((size_t)newSize < context->num_monitors) {
4990 		status = B_BUSY;
4991 		goto out;
4992 	}
4993 	context->max_monitors = newSize;
4994 
4995 out:
4996 	mutex_unlock(&context->io_mutex);
4997 	return status;
4998 }
4999 
5000 
5001 status_t
5002 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5003 	ino_t* _mountPointNodeID)
5004 {
5005 	ReadLocker nodeLocker(sVnodeLock);
5006 	MutexLocker mountLocker(sMountMutex);
5007 
5008 	struct fs_mount* mount = find_mount(mountID);
5009 	if (mount == NULL)
5010 		return B_BAD_VALUE;
5011 
5012 	Vnode* mountPoint = mount->covers_vnode;
5013 
5014 	*_mountPointMountID = mountPoint->device;
5015 	*_mountPointNodeID = mountPoint->id;
5016 
5017 	return B_OK;
5018 }
5019 
5020 
5021 status_t
5022 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5023 	ino_t coveredNodeID)
5024 {
5025 	// get the vnodes
5026 	Vnode* vnode;
5027 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5028 	if (error != B_OK)
5029 		return B_BAD_VALUE;
5030 	VNodePutter vnodePutter(vnode);
5031 
5032 	Vnode* coveredVnode;
5033 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5034 		false);
5035 	if (error != B_OK)
5036 		return B_BAD_VALUE;
5037 	VNodePutter coveredVnodePutter(coveredVnode);
5038 
5039 	// establish the covered/covering links
5040 	WriteLocker locker(sVnodeLock);
5041 
5042 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5043 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5044 		return B_BUSY;
5045 	}
5046 
5047 	vnode->covers = coveredVnode;
5048 	vnode->SetCovering(true);
5049 
5050 	coveredVnode->covered_by = vnode;
5051 	coveredVnode->SetCovered(true);
5052 
5053 	// the vnodes do now reference each other
5054 	inc_vnode_ref_count(vnode);
5055 	inc_vnode_ref_count(coveredVnode);
5056 
5057 	return B_OK;
5058 }
5059 
5060 
5061 int
5062 vfs_getrlimit(int resource, struct rlimit* rlp)
5063 {
5064 	if (!rlp)
5065 		return B_BAD_ADDRESS;
5066 
5067 	switch (resource) {
5068 		case RLIMIT_NOFILE:
5069 		{
5070 			struct io_context* context = get_current_io_context(false);
5071 			MutexLocker _(context->io_mutex);
5072 
5073 			rlp->rlim_cur = context->table_size;
5074 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5075 			return 0;
5076 		}
5077 
5078 		case RLIMIT_NOVMON:
5079 		{
5080 			struct io_context* context = get_current_io_context(false);
5081 			MutexLocker _(context->io_mutex);
5082 
5083 			rlp->rlim_cur = context->max_monitors;
5084 			rlp->rlim_max = MAX_NODE_MONITORS;
5085 			return 0;
5086 		}
5087 
5088 		default:
5089 			return B_BAD_VALUE;
5090 	}
5091 }
5092 
5093 
5094 int
5095 vfs_setrlimit(int resource, const struct rlimit* rlp)
5096 {
5097 	if (!rlp)
5098 		return B_BAD_ADDRESS;
5099 
5100 	switch (resource) {
5101 		case RLIMIT_NOFILE:
5102 			/* TODO: check getuid() */
5103 			if (rlp->rlim_max != RLIM_SAVED_MAX
5104 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5105 				return B_NOT_ALLOWED;
5106 
5107 			return vfs_resize_fd_table(get_current_io_context(false),
5108 				rlp->rlim_cur);
5109 
5110 		case RLIMIT_NOVMON:
5111 			/* TODO: check getuid() */
5112 			if (rlp->rlim_max != RLIM_SAVED_MAX
5113 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5114 				return B_NOT_ALLOWED;
5115 
5116 			return vfs_resize_monitor_table(get_current_io_context(false),
5117 				rlp->rlim_cur);
5118 
5119 		default:
5120 			return B_BAD_VALUE;
5121 	}
5122 }
5123 
5124 
5125 status_t
5126 vfs_init(kernel_args* args)
5127 {
5128 	vnode::StaticInit();
5129 
5130 	struct vnode dummyVnode;
5131 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5132 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5133 	if (sVnodeTable == NULL)
5134 		panic("vfs_init: error creating vnode hash table\n");
5135 
5136 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5137 
5138 	struct fs_mount dummyMount;
5139 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5140 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5141 	if (sMountsTable == NULL)
5142 		panic("vfs_init: error creating mounts hash table\n");
5143 
5144 	node_monitor_init();
5145 
5146 	sRoot = NULL;
5147 
5148 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5149 
5150 	if (block_cache_init() != B_OK)
5151 		return B_ERROR;
5152 
5153 #ifdef ADD_DEBUGGER_COMMANDS
5154 	// add some debugger commands
5155 	add_debugger_command_etc("vnode", &dump_vnode,
5156 		"Print info about the specified vnode",
5157 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5158 		"Prints information about the vnode specified by address <vnode> or\n"
5159 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5160 		"constructed and printed. It might not be possible to construct a\n"
5161 		"complete path, though.\n",
5162 		0);
5163 	add_debugger_command("vnodes", &dump_vnodes,
5164 		"list all vnodes (from the specified device)");
5165 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5166 		"list all vnode caches");
5167 	add_debugger_command("mount", &dump_mount,
5168 		"info about the specified fs_mount");
5169 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5170 	add_debugger_command("io_context", &dump_io_context,
5171 		"info about the I/O context");
5172 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5173 		"info about vnode usage");
5174 #endif
5175 
5176 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5177 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5178 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5179 		0);
5180 
5181 	file_map_init();
5182 
5183 	return file_cache_init();
5184 }
5185 
5186 
5187 //	#pragma mark - fd_ops implementations
5188 
5189 
5190 /*!
5191 	Calls fs_open() on the given vnode and returns a new
5192 	file descriptor for it
5193 */
5194 static int
5195 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5196 {
5197 	void* cookie;
5198 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5199 	if (status != B_OK)
5200 		return status;
5201 
5202 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5203 	if (fd < 0) {
5204 		FS_CALL(vnode, close, cookie);
5205 		FS_CALL(vnode, free_cookie, cookie);
5206 	}
5207 	return fd;
5208 }
5209 
5210 
5211 /*!
5212 	Calls fs_open() on the given vnode and returns a new
5213 	file descriptor for it
5214 */
5215 static int
5216 create_vnode(struct vnode* directory, const char* name, int openMode,
5217 	int perms, bool kernel)
5218 {
5219 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5220 	status_t status = B_ERROR;
5221 	struct vnode* vnode;
5222 	void* cookie;
5223 	ino_t newID;
5224 
5225 	// This is somewhat tricky: If the entry already exists, the FS responsible
5226 	// for the directory might not necessarily also be the one responsible for
5227 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5228 	// we can actually never call the create() hook without O_EXCL. Instead we
5229 	// try to look the entry up first. If it already exists, we just open the
5230 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5231 	// introduces a race condition, since someone else might have created the
5232 	// entry in the meantime. We hope the respective FS returns the correct
5233 	// error code and retry (up to 3 times) again.
5234 
5235 	for (int i = 0; i < 3 && status != B_OK; i++) {
5236 		// look the node up
5237 		status = lookup_dir_entry(directory, name, &vnode);
5238 		if (status == B_OK) {
5239 			VNodePutter putter(vnode);
5240 
5241 			if ((openMode & O_EXCL) != 0)
5242 				return B_FILE_EXISTS;
5243 
5244 			// If the node is a symlink, we have to follow it, unless
5245 			// O_NOTRAVERSE is set.
5246 			if (S_ISLNK(vnode->Type()) && traverse) {
5247 				putter.Put();
5248 				char clonedName[B_FILE_NAME_LENGTH + 1];
5249 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5250 						>= B_FILE_NAME_LENGTH) {
5251 					return B_NAME_TOO_LONG;
5252 				}
5253 
5254 				inc_vnode_ref_count(directory);
5255 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5256 					kernel, &vnode, NULL);
5257 				if (status != B_OK)
5258 					return status;
5259 
5260 				putter.SetTo(vnode);
5261 			}
5262 
5263 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5264 				put_vnode(vnode);
5265 				return B_LINK_LIMIT;
5266 			}
5267 
5268 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5269 			// on success keep the vnode reference for the FD
5270 			if (fd >= 0)
5271 				putter.Detach();
5272 
5273 			return fd;
5274 		}
5275 
5276 		// it doesn't exist yet -- try to create it
5277 
5278 		if (!HAS_FS_CALL(directory, create))
5279 			return B_READ_ONLY_DEVICE;
5280 
5281 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5282 			&cookie, &newID);
5283 		if (status != B_OK
5284 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5285 			return status;
5286 		}
5287 	}
5288 
5289 	if (status != B_OK)
5290 		return status;
5291 
5292 	// the node has been created successfully
5293 
5294 	rw_lock_read_lock(&sVnodeLock);
5295 	vnode = lookup_vnode(directory->device, newID);
5296 	rw_lock_read_unlock(&sVnodeLock);
5297 
5298 	if (vnode == NULL) {
5299 		panic("vfs: fs_create() returned success but there is no vnode, "
5300 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5301 		return B_BAD_VALUE;
5302 	}
5303 
5304 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5305 	if (fd >= 0)
5306 		return fd;
5307 
5308 	status = fd;
5309 
5310 	// something went wrong, clean up
5311 
5312 	FS_CALL(vnode, close, cookie);
5313 	FS_CALL(vnode, free_cookie, cookie);
5314 	put_vnode(vnode);
5315 
5316 	FS_CALL(directory, unlink, name);
5317 
5318 	return status;
5319 }
5320 
5321 
5322 /*! Calls fs open_dir() on the given vnode and returns a new
5323 	file descriptor for it
5324 */
5325 static int
5326 open_dir_vnode(struct vnode* vnode, bool kernel)
5327 {
5328 	void* cookie;
5329 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5330 	if (status != B_OK)
5331 		return status;
5332 
5333 	// directory is opened, create a fd
5334 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5335 	if (status >= 0)
5336 		return status;
5337 
5338 	FS_CALL(vnode, close_dir, cookie);
5339 	FS_CALL(vnode, free_dir_cookie, cookie);
5340 
5341 	return status;
5342 }
5343 
5344 
5345 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5346 	file descriptor for it.
5347 	Used by attr_dir_open(), and attr_dir_open_fd().
5348 */
5349 static int
5350 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5351 {
5352 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5353 		return B_UNSUPPORTED;
5354 
5355 	void* cookie;
5356 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5357 	if (status != B_OK)
5358 		return status;
5359 
5360 	// directory is opened, create a fd
5361 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5362 		kernel);
5363 	if (status >= 0)
5364 		return status;
5365 
5366 	FS_CALL(vnode, close_attr_dir, cookie);
5367 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5368 
5369 	return status;
5370 }
5371 
5372 
5373 static int
5374 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5375 	int openMode, int perms, bool kernel)
5376 {
5377 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5378 		"kernel %d\n", name, openMode, perms, kernel));
5379 
5380 	// get directory to put the new file in
5381 	struct vnode* directory;
5382 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5383 	if (status != B_OK)
5384 		return status;
5385 
5386 	status = create_vnode(directory, name, openMode, perms, kernel);
5387 	put_vnode(directory);
5388 
5389 	return status;
5390 }
5391 
5392 
5393 static int
5394 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5395 {
5396 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5397 		openMode, perms, kernel));
5398 
5399 	// get directory to put the new file in
5400 	char name[B_FILE_NAME_LENGTH];
5401 	struct vnode* directory;
5402 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5403 		kernel);
5404 	if (status < 0)
5405 		return status;
5406 
5407 	status = create_vnode(directory, name, openMode, perms, kernel);
5408 
5409 	put_vnode(directory);
5410 	return status;
5411 }
5412 
5413 
5414 static int
5415 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5416 	int openMode, bool kernel)
5417 {
5418 	if (name == NULL || *name == '\0')
5419 		return B_BAD_VALUE;
5420 
5421 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5422 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5423 
5424 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5425 
5426 	// get the vnode matching the entry_ref
5427 	struct vnode* vnode;
5428 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5429 		kernel, &vnode);
5430 	if (status != B_OK)
5431 		return status;
5432 
5433 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5434 		put_vnode(vnode);
5435 		return B_LINK_LIMIT;
5436 	}
5437 
5438 	int newFD = open_vnode(vnode, openMode, kernel);
5439 	if (newFD >= 0) {
5440 		// The vnode reference has been transferred to the FD
5441 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5442 			directoryID, vnode->id, name);
5443 	} else
5444 		put_vnode(vnode);
5445 
5446 	return newFD;
5447 }
5448 
5449 
5450 static int
5451 file_open(int fd, char* path, int openMode, bool kernel)
5452 {
5453 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5454 
5455 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5456 		fd, path, openMode, kernel));
5457 
5458 	// get the vnode matching the vnode + path combination
5459 	struct vnode* vnode;
5460 	ino_t parentID;
5461 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5462 		&parentID, kernel);
5463 	if (status != B_OK)
5464 		return status;
5465 
5466 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5467 		put_vnode(vnode);
5468 		return B_LINK_LIMIT;
5469 	}
5470 
5471 	// open the vnode
5472 	int newFD = open_vnode(vnode, openMode, kernel);
5473 	if (newFD >= 0) {
5474 		// The vnode reference has been transferred to the FD
5475 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5476 			vnode->device, parentID, vnode->id, NULL);
5477 	} else
5478 		put_vnode(vnode);
5479 
5480 	return newFD;
5481 }
5482 
5483 
5484 static status_t
5485 file_close(struct file_descriptor* descriptor)
5486 {
5487 	struct vnode* vnode = descriptor->u.vnode;
5488 	status_t status = B_OK;
5489 
5490 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5491 
5492 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5493 		vnode->id);
5494 	if (HAS_FS_CALL(vnode, close)) {
5495 		status = FS_CALL(vnode, close, descriptor->cookie);
5496 	}
5497 
5498 	if (status == B_OK) {
5499 		// remove all outstanding locks for this team
5500 		if (HAS_FS_CALL(vnode, release_lock))
5501 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5502 		else
5503 			status = release_advisory_lock(vnode, NULL);
5504 	}
5505 	return status;
5506 }
5507 
5508 
5509 static void
5510 file_free_fd(struct file_descriptor* descriptor)
5511 {
5512 	struct vnode* vnode = descriptor->u.vnode;
5513 
5514 	if (vnode != NULL) {
5515 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5516 		put_vnode(vnode);
5517 	}
5518 }
5519 
5520 
5521 static status_t
5522 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5523 	size_t* length)
5524 {
5525 	struct vnode* vnode = descriptor->u.vnode;
5526 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5527 		pos, length, *length));
5528 
5529 	if (S_ISDIR(vnode->Type()))
5530 		return B_IS_A_DIRECTORY;
5531 
5532 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5533 }
5534 
5535 
5536 static status_t
5537 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5538 	size_t* length)
5539 {
5540 	struct vnode* vnode = descriptor->u.vnode;
5541 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5542 		length));
5543 
5544 	if (S_ISDIR(vnode->Type()))
5545 		return B_IS_A_DIRECTORY;
5546 	if (!HAS_FS_CALL(vnode, write))
5547 		return B_READ_ONLY_DEVICE;
5548 
5549 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5550 }
5551 
5552 
5553 static off_t
5554 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5555 {
5556 	struct vnode* vnode = descriptor->u.vnode;
5557 	off_t offset;
5558 
5559 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5560 		seekType));
5561 
5562 	// some kinds of files are not seekable
5563 	switch (vnode->Type() & S_IFMT) {
5564 		case S_IFIFO:
5565 		case S_IFSOCK:
5566 			return ESPIPE;
5567 
5568 		// The Open Group Base Specs don't mention any file types besides pipes,
5569 		// fifos, and sockets specially, so we allow seeking them.
5570 		case S_IFREG:
5571 		case S_IFBLK:
5572 		case S_IFDIR:
5573 		case S_IFLNK:
5574 		case S_IFCHR:
5575 			break;
5576 	}
5577 
5578 	switch (seekType) {
5579 		case SEEK_SET:
5580 			offset = 0;
5581 			break;
5582 		case SEEK_CUR:
5583 			offset = descriptor->pos;
5584 			break;
5585 		case SEEK_END:
5586 		{
5587 			// stat() the node
5588 			if (!HAS_FS_CALL(vnode, read_stat))
5589 				return B_UNSUPPORTED;
5590 
5591 			struct stat stat;
5592 			status_t status = FS_CALL(vnode, read_stat, &stat);
5593 			if (status != B_OK)
5594 				return status;
5595 
5596 			offset = stat.st_size;
5597 			break;
5598 		}
5599 		default:
5600 			return B_BAD_VALUE;
5601 	}
5602 
5603 	// assumes off_t is 64 bits wide
5604 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5605 		return B_BUFFER_OVERFLOW;
5606 
5607 	pos += offset;
5608 	if (pos < 0)
5609 		return B_BAD_VALUE;
5610 
5611 	return descriptor->pos = pos;
5612 }
5613 
5614 
5615 static status_t
5616 file_select(struct file_descriptor* descriptor, uint8 event,
5617 	struct selectsync* sync)
5618 {
5619 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5620 
5621 	struct vnode* vnode = descriptor->u.vnode;
5622 
5623 	// If the FS has no select() hook, notify select() now.
5624 	if (!HAS_FS_CALL(vnode, select))
5625 		return notify_select_event(sync, event);
5626 
5627 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5628 }
5629 
5630 
5631 static status_t
5632 file_deselect(struct file_descriptor* descriptor, uint8 event,
5633 	struct selectsync* sync)
5634 {
5635 	struct vnode* vnode = descriptor->u.vnode;
5636 
5637 	if (!HAS_FS_CALL(vnode, deselect))
5638 		return B_OK;
5639 
5640 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5641 }
5642 
5643 
5644 static status_t
5645 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5646 	bool kernel)
5647 {
5648 	struct vnode* vnode;
5649 	status_t status;
5650 
5651 	if (name == NULL || *name == '\0')
5652 		return B_BAD_VALUE;
5653 
5654 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5655 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5656 
5657 	status = get_vnode(mountID, parentID, &vnode, true, false);
5658 	if (status != B_OK)
5659 		return status;
5660 
5661 	if (HAS_FS_CALL(vnode, create_dir))
5662 		status = FS_CALL(vnode, create_dir, name, perms);
5663 	else
5664 		status = B_READ_ONLY_DEVICE;
5665 
5666 	put_vnode(vnode);
5667 	return status;
5668 }
5669 
5670 
5671 static status_t
5672 dir_create(int fd, char* path, int perms, bool kernel)
5673 {
5674 	char filename[B_FILE_NAME_LENGTH];
5675 	struct vnode* vnode;
5676 	status_t status;
5677 
5678 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5679 		kernel));
5680 
5681 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5682 	if (status < 0)
5683 		return status;
5684 
5685 	if (HAS_FS_CALL(vnode, create_dir)) {
5686 		status = FS_CALL(vnode, create_dir, filename, perms);
5687 	} else
5688 		status = B_READ_ONLY_DEVICE;
5689 
5690 	put_vnode(vnode);
5691 	return status;
5692 }
5693 
5694 
5695 static int
5696 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5697 {
5698 	FUNCTION(("dir_open_entry_ref()\n"));
5699 
5700 	if (name && name[0] == '\0')
5701 		return B_BAD_VALUE;
5702 
5703 	// get the vnode matching the entry_ref/node_ref
5704 	struct vnode* vnode;
5705 	status_t status;
5706 	if (name) {
5707 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5708 			&vnode);
5709 	} else
5710 		status = get_vnode(mountID, parentID, &vnode, true, false);
5711 	if (status != B_OK)
5712 		return status;
5713 
5714 	int newFD = open_dir_vnode(vnode, kernel);
5715 	if (newFD >= 0) {
5716 		// The vnode reference has been transferred to the FD
5717 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5718 			vnode->id, name);
5719 	} else
5720 		put_vnode(vnode);
5721 
5722 	return newFD;
5723 }
5724 
5725 
5726 static int
5727 dir_open(int fd, char* path, bool kernel)
5728 {
5729 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5730 		kernel));
5731 
5732 	// get the vnode matching the vnode + path combination
5733 	struct vnode* vnode = NULL;
5734 	ino_t parentID;
5735 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5736 		kernel);
5737 	if (status != B_OK)
5738 		return status;
5739 
5740 	// open the dir
5741 	int newFD = open_dir_vnode(vnode, kernel);
5742 	if (newFD >= 0) {
5743 		// The vnode reference has been transferred to the FD
5744 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5745 			parentID, vnode->id, NULL);
5746 	} else
5747 		put_vnode(vnode);
5748 
5749 	return newFD;
5750 }
5751 
5752 
5753 static status_t
5754 dir_close(struct file_descriptor* descriptor)
5755 {
5756 	struct vnode* vnode = descriptor->u.vnode;
5757 
5758 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5759 
5760 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5761 		vnode->id);
5762 	if (HAS_FS_CALL(vnode, close_dir))
5763 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5764 
5765 	return B_OK;
5766 }
5767 
5768 
5769 static void
5770 dir_free_fd(struct file_descriptor* descriptor)
5771 {
5772 	struct vnode* vnode = descriptor->u.vnode;
5773 
5774 	if (vnode != NULL) {
5775 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5776 		put_vnode(vnode);
5777 	}
5778 }
5779 
5780 
5781 static status_t
5782 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5783 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5784 {
5785 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5786 		bufferSize, _count);
5787 }
5788 
5789 
5790 static status_t
5791 fix_dirent(struct vnode* parent, struct dirent* entry,
5792 	struct io_context* ioContext)
5793 {
5794 	// set d_pdev and d_pino
5795 	entry->d_pdev = parent->device;
5796 	entry->d_pino = parent->id;
5797 
5798 	// If this is the ".." entry and the directory covering another vnode,
5799 	// we need to replace d_dev and d_ino with the actual values.
5800 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5801 		// Make sure the IO context root is not bypassed.
5802 		if (parent == ioContext->root) {
5803 			entry->d_dev = parent->device;
5804 			entry->d_ino = parent->id;
5805 		} else {
5806 			inc_vnode_ref_count(parent);
5807 				// vnode_path_to_vnode() puts the node
5808 
5809 			// ".." is guaranteed not to be clobbered by this call
5810 			struct vnode* vnode;
5811 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5812 				ioContext, &vnode, NULL);
5813 
5814 			if (status == B_OK) {
5815 				entry->d_dev = vnode->device;
5816 				entry->d_ino = vnode->id;
5817 				put_vnode(vnode);
5818 			}
5819 		}
5820 	} else {
5821 		// resolve covered vnodes
5822 		ReadLocker _(&sVnodeLock);
5823 
5824 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5825 		if (vnode != NULL && vnode->covered_by != NULL) {
5826 			do {
5827 				vnode = vnode->covered_by;
5828 			} while (vnode->covered_by != NULL);
5829 
5830 			entry->d_dev = vnode->device;
5831 			entry->d_ino = vnode->id;
5832 		}
5833 	}
5834 
5835 	return B_OK;
5836 }
5837 
5838 
5839 static status_t
5840 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5841 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5842 {
5843 	if (!HAS_FS_CALL(vnode, read_dir))
5844 		return B_UNSUPPORTED;
5845 
5846 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5847 		_count);
5848 	if (error != B_OK)
5849 		return error;
5850 
5851 	// we need to adjust the read dirents
5852 	uint32 count = *_count;
5853 	for (uint32 i = 0; i < count; i++) {
5854 		error = fix_dirent(vnode, buffer, ioContext);
5855 		if (error != B_OK)
5856 			return error;
5857 
5858 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5859 	}
5860 
5861 	return error;
5862 }
5863 
5864 
5865 static status_t
5866 dir_rewind(struct file_descriptor* descriptor)
5867 {
5868 	struct vnode* vnode = descriptor->u.vnode;
5869 
5870 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5871 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5872 	}
5873 
5874 	return B_UNSUPPORTED;
5875 }
5876 
5877 
5878 static status_t
5879 dir_remove(int fd, char* path, bool kernel)
5880 {
5881 	char name[B_FILE_NAME_LENGTH];
5882 	struct vnode* directory;
5883 	status_t status;
5884 
5885 	if (path != NULL) {
5886 		// we need to make sure our path name doesn't stop with "/", ".",
5887 		// or ".."
5888 		char* lastSlash;
5889 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5890 			char* leaf = lastSlash + 1;
5891 			if (!strcmp(leaf, ".."))
5892 				return B_NOT_ALLOWED;
5893 
5894 			// omit multiple slashes
5895 			while (lastSlash > path && lastSlash[-1] == '/')
5896 				lastSlash--;
5897 
5898 			if (leaf[0]
5899 				&& strcmp(leaf, ".")) {
5900 				break;
5901 			}
5902 			// "name/" -> "name", or "name/." -> "name"
5903 			lastSlash[0] = '\0';
5904 		}
5905 
5906 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5907 			return B_NOT_ALLOWED;
5908 	}
5909 
5910 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5911 	if (status != B_OK)
5912 		return status;
5913 
5914 	if (HAS_FS_CALL(directory, remove_dir))
5915 		status = FS_CALL(directory, remove_dir, name);
5916 	else
5917 		status = B_READ_ONLY_DEVICE;
5918 
5919 	put_vnode(directory);
5920 	return status;
5921 }
5922 
5923 
5924 static status_t
5925 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5926 	size_t length)
5927 {
5928 	struct vnode* vnode = descriptor->u.vnode;
5929 
5930 	if (HAS_FS_CALL(vnode, ioctl))
5931 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5932 
5933 	return B_DEV_INVALID_IOCTL;
5934 }
5935 
5936 
5937 static status_t
5938 common_fcntl(int fd, int op, size_t argument, bool kernel)
5939 {
5940 	struct flock flock;
5941 
5942 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5943 		fd, op, argument, kernel ? "kernel" : "user"));
5944 
5945 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5946 		fd);
5947 	if (descriptor == NULL)
5948 		return B_FILE_ERROR;
5949 
5950 	struct vnode* vnode = fd_vnode(descriptor);
5951 
5952 	status_t status = B_OK;
5953 
5954 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5955 		if (descriptor->type != FDTYPE_FILE)
5956 			status = B_BAD_VALUE;
5957 		else if (user_memcpy(&flock, (struct flock*)argument,
5958 				sizeof(struct flock)) != B_OK)
5959 			status = B_BAD_ADDRESS;
5960 
5961 		if (status != B_OK) {
5962 			put_fd(descriptor);
5963 			return status;
5964 		}
5965 	}
5966 
5967 	switch (op) {
5968 		case F_SETFD:
5969 		{
5970 			struct io_context* context = get_current_io_context(kernel);
5971 			// Set file descriptor flags
5972 
5973 			// O_CLOEXEC is the only flag available at this time
5974 			mutex_lock(&context->io_mutex);
5975 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5976 			mutex_unlock(&context->io_mutex);
5977 
5978 			status = B_OK;
5979 			break;
5980 		}
5981 
5982 		case F_GETFD:
5983 		{
5984 			struct io_context* context = get_current_io_context(kernel);
5985 
5986 			// Get file descriptor flags
5987 			mutex_lock(&context->io_mutex);
5988 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5989 			mutex_unlock(&context->io_mutex);
5990 			break;
5991 		}
5992 
5993 		case F_SETFL:
5994 			// Set file descriptor open mode
5995 
5996 			// we only accept changes to O_APPEND and O_NONBLOCK
5997 			argument &= O_APPEND | O_NONBLOCK;
5998 			if (descriptor->ops->fd_set_flags != NULL) {
5999 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6000 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6001 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6002 					(int)argument);
6003 			} else
6004 				status = B_UNSUPPORTED;
6005 
6006 			if (status == B_OK) {
6007 				// update this descriptor's open_mode field
6008 				descriptor->open_mode = (descriptor->open_mode
6009 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6010 			}
6011 
6012 			break;
6013 
6014 		case F_GETFL:
6015 			// Get file descriptor open mode
6016 			status = descriptor->open_mode;
6017 			break;
6018 
6019 		case F_DUPFD:
6020 		{
6021 			struct io_context* context = get_current_io_context(kernel);
6022 
6023 			status = new_fd_etc(context, descriptor, (int)argument);
6024 			if (status >= 0) {
6025 				mutex_lock(&context->io_mutex);
6026 				fd_set_close_on_exec(context, fd, false);
6027 				mutex_unlock(&context->io_mutex);
6028 
6029 				atomic_add(&descriptor->ref_count, 1);
6030 			}
6031 			break;
6032 		}
6033 
6034 		case F_GETLK:
6035 			if (vnode != NULL) {
6036 				struct flock normalizedLock;
6037 
6038 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6039 				status = normalize_flock(descriptor, &normalizedLock);
6040 				if (status != B_OK)
6041 					break;
6042 
6043 				if (HAS_FS_CALL(vnode, test_lock)) {
6044 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6045 						&normalizedLock);
6046 				} else
6047 					status = test_advisory_lock(vnode, &normalizedLock);
6048 				if (status == B_OK) {
6049 					if (normalizedLock.l_type == F_UNLCK) {
6050 						// no conflicting lock found, copy back the same struct
6051 						// we were given except change type to F_UNLCK
6052 						flock.l_type = F_UNLCK;
6053 						status = user_memcpy((struct flock*)argument, &flock,
6054 							sizeof(struct flock));
6055 					} else {
6056 						// a conflicting lock was found, copy back its range and
6057 						// type
6058 						if (normalizedLock.l_len == OFF_MAX)
6059 							normalizedLock.l_len = 0;
6060 
6061 						status = user_memcpy((struct flock*)argument,
6062 							&normalizedLock, sizeof(struct flock));
6063 					}
6064 				}
6065 			} else
6066 				status = B_BAD_VALUE;
6067 			break;
6068 
6069 		case F_SETLK:
6070 		case F_SETLKW:
6071 			status = normalize_flock(descriptor, &flock);
6072 			if (status != B_OK)
6073 				break;
6074 
6075 			if (vnode == NULL) {
6076 				status = B_BAD_VALUE;
6077 			} else if (flock.l_type == F_UNLCK) {
6078 				if (HAS_FS_CALL(vnode, release_lock)) {
6079 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6080 						&flock);
6081 				} else
6082 					status = release_advisory_lock(vnode, &flock);
6083 			} else {
6084 				// the open mode must match the lock type
6085 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6086 						&& flock.l_type == F_WRLCK)
6087 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6088 						&& flock.l_type == F_RDLCK))
6089 					status = B_FILE_ERROR;
6090 				else {
6091 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6092 						status = FS_CALL(vnode, acquire_lock,
6093 							descriptor->cookie, &flock, op == F_SETLKW);
6094 					} else {
6095 						status = acquire_advisory_lock(vnode, -1,
6096 							&flock, op == F_SETLKW);
6097 					}
6098 				}
6099 			}
6100 			break;
6101 
6102 		// ToDo: add support for more ops?
6103 
6104 		default:
6105 			status = B_BAD_VALUE;
6106 	}
6107 
6108 	put_fd(descriptor);
6109 	return status;
6110 }
6111 
6112 
6113 static status_t
6114 common_sync(int fd, bool kernel)
6115 {
6116 	struct file_descriptor* descriptor;
6117 	struct vnode* vnode;
6118 	status_t status;
6119 
6120 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6121 
6122 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6123 	if (descriptor == NULL)
6124 		return B_FILE_ERROR;
6125 
6126 	if (HAS_FS_CALL(vnode, fsync))
6127 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6128 	else
6129 		status = B_UNSUPPORTED;
6130 
6131 	put_fd(descriptor);
6132 	return status;
6133 }
6134 
6135 
6136 static status_t
6137 common_lock_node(int fd, bool kernel)
6138 {
6139 	struct file_descriptor* descriptor;
6140 	struct vnode* vnode;
6141 
6142 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6143 	if (descriptor == NULL)
6144 		return B_FILE_ERROR;
6145 
6146 	status_t status = B_OK;
6147 
6148 	// We need to set the locking atomically - someone
6149 	// else might set one at the same time
6150 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6151 			(file_descriptor*)NULL) != NULL)
6152 		status = B_BUSY;
6153 
6154 	put_fd(descriptor);
6155 	return status;
6156 }
6157 
6158 
6159 static status_t
6160 common_unlock_node(int fd, bool kernel)
6161 {
6162 	struct file_descriptor* descriptor;
6163 	struct vnode* vnode;
6164 
6165 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6166 	if (descriptor == NULL)
6167 		return B_FILE_ERROR;
6168 
6169 	status_t status = B_OK;
6170 
6171 	// We need to set the locking atomically - someone
6172 	// else might set one at the same time
6173 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6174 			(file_descriptor*)NULL, descriptor) != descriptor)
6175 		status = B_BAD_VALUE;
6176 
6177 	put_fd(descriptor);
6178 	return status;
6179 }
6180 
6181 
6182 static status_t
6183 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6184 	bool kernel)
6185 {
6186 	struct vnode* vnode;
6187 	status_t status;
6188 
6189 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6190 	if (status != B_OK)
6191 		return status;
6192 
6193 	if (HAS_FS_CALL(vnode, read_symlink)) {
6194 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6195 	} else
6196 		status = B_BAD_VALUE;
6197 
6198 	put_vnode(vnode);
6199 	return status;
6200 }
6201 
6202 
6203 static status_t
6204 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6205 	bool kernel)
6206 {
6207 	// path validity checks have to be in the calling function!
6208 	char name[B_FILE_NAME_LENGTH];
6209 	struct vnode* vnode;
6210 	status_t status;
6211 
6212 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6213 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6214 
6215 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6216 	if (status != B_OK)
6217 		return status;
6218 
6219 	if (HAS_FS_CALL(vnode, create_symlink))
6220 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6221 	else {
6222 		status = HAS_FS_CALL(vnode, write)
6223 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6224 	}
6225 
6226 	put_vnode(vnode);
6227 
6228 	return status;
6229 }
6230 
6231 
6232 static status_t
6233 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6234 	bool traverseLeafLink, bool kernel)
6235 {
6236 	// path validity checks have to be in the calling function!
6237 
6238 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6239 		toPath, kernel));
6240 
6241 	char name[B_FILE_NAME_LENGTH];
6242 	struct vnode* directory;
6243 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6244 		kernel);
6245 	if (status != B_OK)
6246 		return status;
6247 
6248 	struct vnode* vnode;
6249 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6250 		kernel);
6251 	if (status != B_OK)
6252 		goto err;
6253 
6254 	if (directory->mount != vnode->mount) {
6255 		status = B_CROSS_DEVICE_LINK;
6256 		goto err1;
6257 	}
6258 
6259 	if (HAS_FS_CALL(directory, link))
6260 		status = FS_CALL(directory, link, name, vnode);
6261 	else
6262 		status = B_READ_ONLY_DEVICE;
6263 
6264 err1:
6265 	put_vnode(vnode);
6266 err:
6267 	put_vnode(directory);
6268 
6269 	return status;
6270 }
6271 
6272 
6273 static status_t
6274 common_unlink(int fd, char* path, bool kernel)
6275 {
6276 	char filename[B_FILE_NAME_LENGTH];
6277 	struct vnode* vnode;
6278 	status_t status;
6279 
6280 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6281 		kernel));
6282 
6283 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6284 	if (status < 0)
6285 		return status;
6286 
6287 	if (HAS_FS_CALL(vnode, unlink))
6288 		status = FS_CALL(vnode, unlink, filename);
6289 	else
6290 		status = B_READ_ONLY_DEVICE;
6291 
6292 	put_vnode(vnode);
6293 
6294 	return status;
6295 }
6296 
6297 
6298 static status_t
6299 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6300 {
6301 	struct vnode* vnode;
6302 	status_t status;
6303 
6304 	// TODO: honor effectiveUserGroup argument
6305 
6306 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6307 	if (status != B_OK)
6308 		return status;
6309 
6310 	if (HAS_FS_CALL(vnode, access))
6311 		status = FS_CALL(vnode, access, mode);
6312 	else
6313 		status = B_OK;
6314 
6315 	put_vnode(vnode);
6316 
6317 	return status;
6318 }
6319 
6320 
6321 static status_t
6322 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6323 {
6324 	struct vnode* fromVnode;
6325 	struct vnode* toVnode;
6326 	char fromName[B_FILE_NAME_LENGTH];
6327 	char toName[B_FILE_NAME_LENGTH];
6328 	status_t status;
6329 
6330 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6331 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6332 
6333 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6334 	if (status != B_OK)
6335 		return status;
6336 
6337 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6338 	if (status != B_OK)
6339 		goto err1;
6340 
6341 	if (fromVnode->device != toVnode->device) {
6342 		status = B_CROSS_DEVICE_LINK;
6343 		goto err2;
6344 	}
6345 
6346 	if (fromName[0] == '\0' || toName[0] == '\0'
6347 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6348 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6349 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6350 		status = B_BAD_VALUE;
6351 		goto err2;
6352 	}
6353 
6354 	if (HAS_FS_CALL(fromVnode, rename))
6355 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6356 	else
6357 		status = B_READ_ONLY_DEVICE;
6358 
6359 err2:
6360 	put_vnode(toVnode);
6361 err1:
6362 	put_vnode(fromVnode);
6363 
6364 	return status;
6365 }
6366 
6367 
6368 static status_t
6369 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6370 {
6371 	struct vnode* vnode = descriptor->u.vnode;
6372 
6373 	FUNCTION(("common_read_stat: stat %p\n", stat));
6374 
6375 	// TODO: remove this once all file systems properly set them!
6376 	stat->st_crtim.tv_nsec = 0;
6377 	stat->st_ctim.tv_nsec = 0;
6378 	stat->st_mtim.tv_nsec = 0;
6379 	stat->st_atim.tv_nsec = 0;
6380 
6381 	status_t status = FS_CALL(vnode, read_stat, stat);
6382 
6383 	// fill in the st_dev and st_ino fields
6384 	if (status == B_OK) {
6385 		stat->st_dev = vnode->device;
6386 		stat->st_ino = vnode->id;
6387 		stat->st_rdev = -1;
6388 	}
6389 
6390 	return status;
6391 }
6392 
6393 
6394 static status_t
6395 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6396 	int statMask)
6397 {
6398 	struct vnode* vnode = descriptor->u.vnode;
6399 
6400 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6401 		vnode, stat, statMask));
6402 
6403 	if (!HAS_FS_CALL(vnode, write_stat))
6404 		return B_READ_ONLY_DEVICE;
6405 
6406 	return FS_CALL(vnode, write_stat, stat, statMask);
6407 }
6408 
6409 
6410 static status_t
6411 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6412 	struct stat* stat, bool kernel)
6413 {
6414 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6415 		stat));
6416 
6417 	struct vnode* vnode;
6418 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6419 		NULL, kernel);
6420 	if (status != B_OK)
6421 		return status;
6422 
6423 	status = FS_CALL(vnode, read_stat, stat);
6424 
6425 	// fill in the st_dev and st_ino fields
6426 	if (status == B_OK) {
6427 		stat->st_dev = vnode->device;
6428 		stat->st_ino = vnode->id;
6429 		stat->st_rdev = -1;
6430 	}
6431 
6432 	put_vnode(vnode);
6433 	return status;
6434 }
6435 
6436 
6437 static status_t
6438 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6439 	const struct stat* stat, int statMask, bool kernel)
6440 {
6441 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6442 		"kernel %d\n", fd, path, stat, statMask, kernel));
6443 
6444 	struct vnode* vnode;
6445 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6446 		NULL, kernel);
6447 	if (status != B_OK)
6448 		return status;
6449 
6450 	if (HAS_FS_CALL(vnode, write_stat))
6451 		status = FS_CALL(vnode, write_stat, stat, statMask);
6452 	else
6453 		status = B_READ_ONLY_DEVICE;
6454 
6455 	put_vnode(vnode);
6456 
6457 	return status;
6458 }
6459 
6460 
6461 static int
6462 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6463 {
6464 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6465 		kernel));
6466 
6467 	struct vnode* vnode;
6468 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6469 		NULL, kernel);
6470 	if (status != B_OK)
6471 		return status;
6472 
6473 	status = open_attr_dir_vnode(vnode, kernel);
6474 	if (status < 0)
6475 		put_vnode(vnode);
6476 
6477 	return status;
6478 }
6479 
6480 
6481 static status_t
6482 attr_dir_close(struct file_descriptor* descriptor)
6483 {
6484 	struct vnode* vnode = descriptor->u.vnode;
6485 
6486 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6487 
6488 	if (HAS_FS_CALL(vnode, close_attr_dir))
6489 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6490 
6491 	return B_OK;
6492 }
6493 
6494 
6495 static void
6496 attr_dir_free_fd(struct file_descriptor* descriptor)
6497 {
6498 	struct vnode* vnode = descriptor->u.vnode;
6499 
6500 	if (vnode != NULL) {
6501 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6502 		put_vnode(vnode);
6503 	}
6504 }
6505 
6506 
6507 static status_t
6508 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6509 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6510 {
6511 	struct vnode* vnode = descriptor->u.vnode;
6512 
6513 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6514 
6515 	if (HAS_FS_CALL(vnode, read_attr_dir))
6516 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6517 			bufferSize, _count);
6518 
6519 	return B_UNSUPPORTED;
6520 }
6521 
6522 
6523 static status_t
6524 attr_dir_rewind(struct file_descriptor* descriptor)
6525 {
6526 	struct vnode* vnode = descriptor->u.vnode;
6527 
6528 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6529 
6530 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6531 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6532 
6533 	return B_UNSUPPORTED;
6534 }
6535 
6536 
6537 static int
6538 attr_create(int fd, char* path, const char* name, uint32 type,
6539 	int openMode, bool kernel)
6540 {
6541 	if (name == NULL || *name == '\0')
6542 		return B_BAD_VALUE;
6543 
6544 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6545 	struct vnode* vnode;
6546 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6547 		kernel);
6548 	if (status != B_OK)
6549 		return status;
6550 
6551 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6552 		status = B_LINK_LIMIT;
6553 		goto err;
6554 	}
6555 
6556 	if (!HAS_FS_CALL(vnode, create_attr)) {
6557 		status = B_READ_ONLY_DEVICE;
6558 		goto err;
6559 	}
6560 
6561 	void* cookie;
6562 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6563 	if (status != B_OK)
6564 		goto err;
6565 
6566 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6567 	if (fd >= 0)
6568 		return fd;
6569 
6570 	status = fd;
6571 
6572 	FS_CALL(vnode, close_attr, cookie);
6573 	FS_CALL(vnode, free_attr_cookie, cookie);
6574 
6575 	FS_CALL(vnode, remove_attr, name);
6576 
6577 err:
6578 	put_vnode(vnode);
6579 
6580 	return status;
6581 }
6582 
6583 
6584 static int
6585 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6586 {
6587 	if (name == NULL || *name == '\0')
6588 		return B_BAD_VALUE;
6589 
6590 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6591 	struct vnode* vnode;
6592 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6593 		kernel);
6594 	if (status != B_OK)
6595 		return status;
6596 
6597 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6598 		status = B_LINK_LIMIT;
6599 		goto err;
6600 	}
6601 
6602 	if (!HAS_FS_CALL(vnode, open_attr)) {
6603 		status = B_UNSUPPORTED;
6604 		goto err;
6605 	}
6606 
6607 	void* cookie;
6608 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6609 	if (status != B_OK)
6610 		goto err;
6611 
6612 	// now we only need a file descriptor for this attribute and we're done
6613 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6614 	if (fd >= 0)
6615 		return fd;
6616 
6617 	status = fd;
6618 
6619 	FS_CALL(vnode, close_attr, cookie);
6620 	FS_CALL(vnode, free_attr_cookie, cookie);
6621 
6622 err:
6623 	put_vnode(vnode);
6624 
6625 	return status;
6626 }
6627 
6628 
6629 static status_t
6630 attr_close(struct file_descriptor* descriptor)
6631 {
6632 	struct vnode* vnode = descriptor->u.vnode;
6633 
6634 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6635 
6636 	if (HAS_FS_CALL(vnode, close_attr))
6637 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6638 
6639 	return B_OK;
6640 }
6641 
6642 
6643 static void
6644 attr_free_fd(struct file_descriptor* descriptor)
6645 {
6646 	struct vnode* vnode = descriptor->u.vnode;
6647 
6648 	if (vnode != NULL) {
6649 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6650 		put_vnode(vnode);
6651 	}
6652 }
6653 
6654 
6655 static status_t
6656 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6657 	size_t* length)
6658 {
6659 	struct vnode* vnode = descriptor->u.vnode;
6660 
6661 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6662 		pos, length, *length));
6663 
6664 	if (!HAS_FS_CALL(vnode, read_attr))
6665 		return B_UNSUPPORTED;
6666 
6667 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6668 }
6669 
6670 
6671 static status_t
6672 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6673 	size_t* length)
6674 {
6675 	struct vnode* vnode = descriptor->u.vnode;
6676 
6677 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6678 		length));
6679 
6680 	if (!HAS_FS_CALL(vnode, write_attr))
6681 		return B_UNSUPPORTED;
6682 
6683 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6684 }
6685 
6686 
6687 static off_t
6688 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6689 {
6690 	off_t offset;
6691 
6692 	switch (seekType) {
6693 		case SEEK_SET:
6694 			offset = 0;
6695 			break;
6696 		case SEEK_CUR:
6697 			offset = descriptor->pos;
6698 			break;
6699 		case SEEK_END:
6700 		{
6701 			struct vnode* vnode = descriptor->u.vnode;
6702 			if (!HAS_FS_CALL(vnode, read_stat))
6703 				return B_UNSUPPORTED;
6704 
6705 			struct stat stat;
6706 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6707 				&stat);
6708 			if (status != B_OK)
6709 				return status;
6710 
6711 			offset = stat.st_size;
6712 			break;
6713 		}
6714 		default:
6715 			return B_BAD_VALUE;
6716 	}
6717 
6718 	// assumes off_t is 64 bits wide
6719 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6720 		return B_BUFFER_OVERFLOW;
6721 
6722 	pos += offset;
6723 	if (pos < 0)
6724 		return B_BAD_VALUE;
6725 
6726 	return descriptor->pos = pos;
6727 }
6728 
6729 
6730 static status_t
6731 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6732 {
6733 	struct vnode* vnode = descriptor->u.vnode;
6734 
6735 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6736 
6737 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6738 		return B_UNSUPPORTED;
6739 
6740 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6741 }
6742 
6743 
6744 static status_t
6745 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6746 	int statMask)
6747 {
6748 	struct vnode* vnode = descriptor->u.vnode;
6749 
6750 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6751 
6752 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6753 		return B_READ_ONLY_DEVICE;
6754 
6755 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6756 }
6757 
6758 
6759 static status_t
6760 attr_remove(int fd, const char* name, bool kernel)
6761 {
6762 	struct file_descriptor* descriptor;
6763 	struct vnode* vnode;
6764 	status_t status;
6765 
6766 	if (name == NULL || *name == '\0')
6767 		return B_BAD_VALUE;
6768 
6769 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6770 		kernel));
6771 
6772 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6773 	if (descriptor == NULL)
6774 		return B_FILE_ERROR;
6775 
6776 	if (HAS_FS_CALL(vnode, remove_attr))
6777 		status = FS_CALL(vnode, remove_attr, name);
6778 	else
6779 		status = B_READ_ONLY_DEVICE;
6780 
6781 	put_fd(descriptor);
6782 
6783 	return status;
6784 }
6785 
6786 
6787 static status_t
6788 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6789 	bool kernel)
6790 {
6791 	struct file_descriptor* fromDescriptor;
6792 	struct file_descriptor* toDescriptor;
6793 	struct vnode* fromVnode;
6794 	struct vnode* toVnode;
6795 	status_t status;
6796 
6797 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6798 		|| *toName == '\0')
6799 		return B_BAD_VALUE;
6800 
6801 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6802 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6803 
6804 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6805 	if (fromDescriptor == NULL)
6806 		return B_FILE_ERROR;
6807 
6808 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6809 	if (toDescriptor == NULL) {
6810 		status = B_FILE_ERROR;
6811 		goto err;
6812 	}
6813 
6814 	// are the files on the same volume?
6815 	if (fromVnode->device != toVnode->device) {
6816 		status = B_CROSS_DEVICE_LINK;
6817 		goto err1;
6818 	}
6819 
6820 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6821 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6822 	} else
6823 		status = B_READ_ONLY_DEVICE;
6824 
6825 err1:
6826 	put_fd(toDescriptor);
6827 err:
6828 	put_fd(fromDescriptor);
6829 
6830 	return status;
6831 }
6832 
6833 
6834 static int
6835 index_dir_open(dev_t mountID, bool kernel)
6836 {
6837 	struct fs_mount* mount;
6838 	void* cookie;
6839 
6840 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6841 		kernel));
6842 
6843 	status_t status = get_mount(mountID, &mount);
6844 	if (status != B_OK)
6845 		return status;
6846 
6847 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6848 		status = B_UNSUPPORTED;
6849 		goto error;
6850 	}
6851 
6852 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6853 	if (status != B_OK)
6854 		goto error;
6855 
6856 	// get fd for the index directory
6857 	int fd;
6858 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6859 	if (fd >= 0)
6860 		return fd;
6861 
6862 	// something went wrong
6863 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6864 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6865 
6866 	status = fd;
6867 
6868 error:
6869 	put_mount(mount);
6870 	return status;
6871 }
6872 
6873 
6874 static status_t
6875 index_dir_close(struct file_descriptor* descriptor)
6876 {
6877 	struct fs_mount* mount = descriptor->u.mount;
6878 
6879 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6880 
6881 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6882 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6883 
6884 	return B_OK;
6885 }
6886 
6887 
6888 static void
6889 index_dir_free_fd(struct file_descriptor* descriptor)
6890 {
6891 	struct fs_mount* mount = descriptor->u.mount;
6892 
6893 	if (mount != NULL) {
6894 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6895 		put_mount(mount);
6896 	}
6897 }
6898 
6899 
6900 static status_t
6901 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6902 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6903 {
6904 	struct fs_mount* mount = descriptor->u.mount;
6905 
6906 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6907 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6908 			bufferSize, _count);
6909 	}
6910 
6911 	return B_UNSUPPORTED;
6912 }
6913 
6914 
6915 static status_t
6916 index_dir_rewind(struct file_descriptor* descriptor)
6917 {
6918 	struct fs_mount* mount = descriptor->u.mount;
6919 
6920 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6921 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6922 
6923 	return B_UNSUPPORTED;
6924 }
6925 
6926 
6927 static status_t
6928 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6929 	bool kernel)
6930 {
6931 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6932 		mountID, name, kernel));
6933 
6934 	struct fs_mount* mount;
6935 	status_t status = get_mount(mountID, &mount);
6936 	if (status != B_OK)
6937 		return status;
6938 
6939 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6940 		status = B_READ_ONLY_DEVICE;
6941 		goto out;
6942 	}
6943 
6944 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6945 
6946 out:
6947 	put_mount(mount);
6948 	return status;
6949 }
6950 
6951 
6952 #if 0
6953 static status_t
6954 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6955 {
6956 	struct vnode* vnode = descriptor->u.vnode;
6957 
6958 	// ToDo: currently unused!
6959 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6960 	if (!HAS_FS_CALL(vnode, read_index_stat))
6961 		return B_UNSUPPORTED;
6962 
6963 	return B_UNSUPPORTED;
6964 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6965 }
6966 
6967 
6968 static void
6969 index_free_fd(struct file_descriptor* descriptor)
6970 {
6971 	struct vnode* vnode = descriptor->u.vnode;
6972 
6973 	if (vnode != NULL) {
6974 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6975 		put_vnode(vnode);
6976 	}
6977 }
6978 #endif
6979 
6980 
6981 static status_t
6982 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6983 	bool kernel)
6984 {
6985 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6986 		mountID, name, kernel));
6987 
6988 	struct fs_mount* mount;
6989 	status_t status = get_mount(mountID, &mount);
6990 	if (status != B_OK)
6991 		return status;
6992 
6993 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6994 		status = B_UNSUPPORTED;
6995 		goto out;
6996 	}
6997 
6998 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6999 
7000 out:
7001 	put_mount(mount);
7002 	return status;
7003 }
7004 
7005 
7006 static status_t
7007 index_remove(dev_t mountID, const char* name, bool kernel)
7008 {
7009 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7010 		mountID, name, kernel));
7011 
7012 	struct fs_mount* mount;
7013 	status_t status = get_mount(mountID, &mount);
7014 	if (status != B_OK)
7015 		return status;
7016 
7017 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7018 		status = B_READ_ONLY_DEVICE;
7019 		goto out;
7020 	}
7021 
7022 	status = FS_MOUNT_CALL(mount, remove_index, name);
7023 
7024 out:
7025 	put_mount(mount);
7026 	return status;
7027 }
7028 
7029 
7030 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7031 		It would be nice if the FS would find some more kernel support
7032 		for them.
7033 		For example, query parsing should be moved into the kernel.
7034 */
7035 static int
7036 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7037 	int32 token, bool kernel)
7038 {
7039 	struct fs_mount* mount;
7040 	void* cookie;
7041 
7042 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7043 		device, query, kernel));
7044 
7045 	status_t status = get_mount(device, &mount);
7046 	if (status != B_OK)
7047 		return status;
7048 
7049 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7050 		status = B_UNSUPPORTED;
7051 		goto error;
7052 	}
7053 
7054 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7055 		&cookie);
7056 	if (status != B_OK)
7057 		goto error;
7058 
7059 	// get fd for the index directory
7060 	int fd;
7061 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7062 	if (fd >= 0)
7063 		return fd;
7064 
7065 	status = fd;
7066 
7067 	// something went wrong
7068 	FS_MOUNT_CALL(mount, close_query, cookie);
7069 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7070 
7071 error:
7072 	put_mount(mount);
7073 	return status;
7074 }
7075 
7076 
7077 static status_t
7078 query_close(struct file_descriptor* descriptor)
7079 {
7080 	struct fs_mount* mount = descriptor->u.mount;
7081 
7082 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7083 
7084 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7085 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7086 
7087 	return B_OK;
7088 }
7089 
7090 
7091 static void
7092 query_free_fd(struct file_descriptor* descriptor)
7093 {
7094 	struct fs_mount* mount = descriptor->u.mount;
7095 
7096 	if (mount != NULL) {
7097 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7098 		put_mount(mount);
7099 	}
7100 }
7101 
7102 
7103 static status_t
7104 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7105 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7106 {
7107 	struct fs_mount* mount = descriptor->u.mount;
7108 
7109 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7110 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7111 			bufferSize, _count);
7112 	}
7113 
7114 	return B_UNSUPPORTED;
7115 }
7116 
7117 
7118 static status_t
7119 query_rewind(struct file_descriptor* descriptor)
7120 {
7121 	struct fs_mount* mount = descriptor->u.mount;
7122 
7123 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7124 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7125 
7126 	return B_UNSUPPORTED;
7127 }
7128 
7129 
7130 //	#pragma mark - General File System functions
7131 
7132 
7133 static dev_t
7134 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7135 	const char* args, bool kernel)
7136 {
7137 	struct ::fs_mount* mount;
7138 	status_t status = B_OK;
7139 	fs_volume* volume = NULL;
7140 	int32 layer = 0;
7141 	Vnode* coveredNode = NULL;
7142 
7143 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7144 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7145 
7146 	// The path is always safe, we just have to make sure that fsName is
7147 	// almost valid - we can't make any assumptions about args, though.
7148 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7149 	// We'll get it from the DDM later.
7150 	if (fsName == NULL) {
7151 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7152 			return B_BAD_VALUE;
7153 	} else if (fsName[0] == '\0')
7154 		return B_BAD_VALUE;
7155 
7156 	RecursiveLocker mountOpLocker(sMountOpLock);
7157 
7158 	// Helper to delete a newly created file device on failure.
7159 	// Not exactly beautiful, but helps to keep the code below cleaner.
7160 	struct FileDeviceDeleter {
7161 		FileDeviceDeleter() : id(-1) {}
7162 		~FileDeviceDeleter()
7163 		{
7164 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7165 		}
7166 
7167 		partition_id id;
7168 	} fileDeviceDeleter;
7169 
7170 	// If the file system is not a "virtual" one, the device argument should
7171 	// point to a real file/device (if given at all).
7172 	// get the partition
7173 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7174 	KPartition* partition = NULL;
7175 	KPath normalizedDevice;
7176 	bool newlyCreatedFileDevice = false;
7177 
7178 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7179 		// normalize the device path
7180 		status = normalizedDevice.SetTo(device, true);
7181 		if (status != B_OK)
7182 			return status;
7183 
7184 		// get a corresponding partition from the DDM
7185 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7186 		if (partition == NULL) {
7187 			// Partition not found: This either means, the user supplied
7188 			// an invalid path, or the path refers to an image file. We try
7189 			// to let the DDM create a file device for the path.
7190 			partition_id deviceID = ddm->CreateFileDevice(
7191 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7192 			if (deviceID >= 0) {
7193 				partition = ddm->RegisterPartition(deviceID);
7194 				if (newlyCreatedFileDevice)
7195 					fileDeviceDeleter.id = deviceID;
7196 			}
7197 		}
7198 
7199 		if (!partition) {
7200 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7201 				normalizedDevice.Path()));
7202 			return B_ENTRY_NOT_FOUND;
7203 		}
7204 
7205 		device = normalizedDevice.Path();
7206 			// correct path to file device
7207 	}
7208 	PartitionRegistrar partitionRegistrar(partition, true);
7209 
7210 	// Write lock the partition's device. For the time being, we keep the lock
7211 	// until we're done mounting -- not nice, but ensure, that no-one is
7212 	// interfering.
7213 	// TODO: Just mark the partition busy while mounting!
7214 	KDiskDevice* diskDevice = NULL;
7215 	if (partition) {
7216 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7217 		if (!diskDevice) {
7218 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7219 			return B_ERROR;
7220 		}
7221 	}
7222 
7223 	DeviceWriteLocker writeLocker(diskDevice, true);
7224 		// this takes over the write lock acquired before
7225 
7226 	if (partition != NULL) {
7227 		// make sure, that the partition is not busy
7228 		if (partition->IsBusy()) {
7229 			TRACE(("fs_mount(): Partition is busy.\n"));
7230 			return B_BUSY;
7231 		}
7232 
7233 		// if no FS name had been supplied, we get it from the partition
7234 		if (fsName == NULL) {
7235 			KDiskSystem* diskSystem = partition->DiskSystem();
7236 			if (!diskSystem) {
7237 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7238 					"recognize it.\n"));
7239 				return B_BAD_VALUE;
7240 			}
7241 
7242 			if (!diskSystem->IsFileSystem()) {
7243 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7244 					"partitioning system.\n"));
7245 				return B_BAD_VALUE;
7246 			}
7247 
7248 			// The disk system name will not change, and the KDiskSystem
7249 			// object will not go away while the disk device is locked (and
7250 			// the partition has a reference to it), so this is safe.
7251 			fsName = diskSystem->Name();
7252 		}
7253 	}
7254 
7255 	mount = new(std::nothrow) (struct ::fs_mount);
7256 	if (mount == NULL)
7257 		return B_NO_MEMORY;
7258 
7259 	mount->device_name = strdup(device);
7260 		// "device" can be NULL
7261 
7262 	status = mount->entry_cache.Init();
7263 	if (status != B_OK)
7264 		goto err1;
7265 
7266 	// initialize structure
7267 	mount->id = sNextMountID++;
7268 	mount->partition = NULL;
7269 	mount->root_vnode = NULL;
7270 	mount->covers_vnode = NULL;
7271 	mount->unmounting = false;
7272 	mount->owns_file_device = false;
7273 	mount->volume = NULL;
7274 
7275 	// build up the volume(s)
7276 	while (true) {
7277 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7278 		if (layerFSName == NULL) {
7279 			if (layer == 0) {
7280 				status = B_NO_MEMORY;
7281 				goto err1;
7282 			}
7283 
7284 			break;
7285 		}
7286 
7287 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7288 		if (volume == NULL) {
7289 			status = B_NO_MEMORY;
7290 			free(layerFSName);
7291 			goto err1;
7292 		}
7293 
7294 		volume->id = mount->id;
7295 		volume->partition = partition != NULL ? partition->ID() : -1;
7296 		volume->layer = layer++;
7297 		volume->private_volume = NULL;
7298 		volume->ops = NULL;
7299 		volume->sub_volume = NULL;
7300 		volume->super_volume = NULL;
7301 		volume->file_system = NULL;
7302 		volume->file_system_name = NULL;
7303 
7304 		volume->file_system_name = get_file_system_name(layerFSName);
7305 		if (volume->file_system_name == NULL) {
7306 			status = B_NO_MEMORY;
7307 			free(layerFSName);
7308 			free(volume);
7309 			goto err1;
7310 		}
7311 
7312 		volume->file_system = get_file_system(layerFSName);
7313 		if (volume->file_system == NULL) {
7314 			status = B_DEVICE_NOT_FOUND;
7315 			free(layerFSName);
7316 			free(volume->file_system_name);
7317 			free(volume);
7318 			goto err1;
7319 		}
7320 
7321 		if (mount->volume == NULL)
7322 			mount->volume = volume;
7323 		else {
7324 			volume->super_volume = mount->volume;
7325 			mount->volume->sub_volume = volume;
7326 			mount->volume = volume;
7327 		}
7328 	}
7329 
7330 	// insert mount struct into list before we call FS's mount() function
7331 	// so that vnodes can be created for this mount
7332 	mutex_lock(&sMountMutex);
7333 	hash_insert(sMountsTable, mount);
7334 	mutex_unlock(&sMountMutex);
7335 
7336 	ino_t rootID;
7337 
7338 	if (!sRoot) {
7339 		// we haven't mounted anything yet
7340 		if (strcmp(path, "/") != 0) {
7341 			status = B_ERROR;
7342 			goto err2;
7343 		}
7344 
7345 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7346 			args, &rootID);
7347 		if (status != 0)
7348 			goto err2;
7349 	} else {
7350 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7351 		if (status != B_OK)
7352 			goto err2;
7353 
7354 		mount->covers_vnode = coveredNode;
7355 
7356 		// make sure covered_vnode is a directory
7357 		if (!S_ISDIR(coveredNode->Type())) {
7358 			status = B_NOT_A_DIRECTORY;
7359 			goto err3;
7360 		}
7361 
7362 		if (coveredNode->IsCovered()) {
7363 			// this is already a covered vnode
7364 			status = B_BUSY;
7365 			goto err3;
7366 		}
7367 
7368 		// mount it/them
7369 		fs_volume* volume = mount->volume;
7370 		while (volume) {
7371 			status = volume->file_system->mount(volume, device, flags, args,
7372 				&rootID);
7373 			if (status != B_OK) {
7374 				if (volume->sub_volume)
7375 					goto err4;
7376 				goto err3;
7377 			}
7378 
7379 			volume = volume->super_volume;
7380 		}
7381 
7382 		volume = mount->volume;
7383 		while (volume) {
7384 			if (volume->ops->all_layers_mounted != NULL)
7385 				volume->ops->all_layers_mounted(volume);
7386 			volume = volume->super_volume;
7387 		}
7388 	}
7389 
7390 	// the root node is supposed to be owned by the file system - it must
7391 	// exist at this point
7392 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7393 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7394 		panic("fs_mount: file system does not own its root node!\n");
7395 		status = B_ERROR;
7396 		goto err4;
7397 	}
7398 
7399 	// set up the links between the root vnode and the vnode it covers
7400 	rw_lock_write_lock(&sVnodeLock);
7401 	if (coveredNode != NULL) {
7402 		if (coveredNode->IsCovered()) {
7403 			// the vnode is covered now
7404 			status = B_BUSY;
7405 			rw_lock_write_unlock(&sVnodeLock);
7406 			goto err4;
7407 		}
7408 
7409 		mount->root_vnode->covers = coveredNode;
7410 		mount->root_vnode->SetCovering(true);
7411 
7412 		coveredNode->covered_by = mount->root_vnode;
7413 		coveredNode->SetCovered(true);
7414 	}
7415 	rw_lock_write_unlock(&sVnodeLock);
7416 
7417 	if (!sRoot) {
7418 		sRoot = mount->root_vnode;
7419 		mutex_lock(&sIOContextRootLock);
7420 		get_current_io_context(true)->root = sRoot;
7421 		mutex_unlock(&sIOContextRootLock);
7422 		inc_vnode_ref_count(sRoot);
7423 	}
7424 
7425 	// supply the partition (if any) with the mount cookie and mark it mounted
7426 	if (partition) {
7427 		partition->SetMountCookie(mount->volume->private_volume);
7428 		partition->SetVolumeID(mount->id);
7429 
7430 		// keep a partition reference as long as the partition is mounted
7431 		partitionRegistrar.Detach();
7432 		mount->partition = partition;
7433 		mount->owns_file_device = newlyCreatedFileDevice;
7434 		fileDeviceDeleter.id = -1;
7435 	}
7436 
7437 	notify_mount(mount->id,
7438 		coveredNode != NULL ? coveredNode->device : -1,
7439 		coveredNode ? coveredNode->id : -1);
7440 
7441 	return mount->id;
7442 
7443 err4:
7444 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7445 err3:
7446 	if (coveredNode != NULL)
7447 		put_vnode(coveredNode);
7448 err2:
7449 	mutex_lock(&sMountMutex);
7450 	hash_remove(sMountsTable, mount);
7451 	mutex_unlock(&sMountMutex);
7452 err1:
7453 	delete mount;
7454 
7455 	return status;
7456 }
7457 
7458 
7459 static status_t
7460 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7461 {
7462 	struct fs_mount* mount;
7463 	status_t err;
7464 
7465 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7466 		mountID, kernel));
7467 
7468 	struct vnode* pathVnode = NULL;
7469 	if (path != NULL) {
7470 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7471 		if (err != B_OK)
7472 			return B_ENTRY_NOT_FOUND;
7473 	}
7474 
7475 	RecursiveLocker mountOpLocker(sMountOpLock);
7476 
7477 	// this lock is not strictly necessary, but here in case of KDEBUG
7478 	// to keep the ASSERT in find_mount() working.
7479 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7480 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7481 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7482 	if (mount == NULL) {
7483 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7484 			pathVnode);
7485 	}
7486 
7487 	if (path != NULL) {
7488 		put_vnode(pathVnode);
7489 
7490 		if (mount->root_vnode != pathVnode) {
7491 			// not mountpoint
7492 			return B_BAD_VALUE;
7493 		}
7494 	}
7495 
7496 	// if the volume is associated with a partition, lock the device of the
7497 	// partition as long as we are unmounting
7498 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7499 	KPartition* partition = mount->partition;
7500 	KDiskDevice* diskDevice = NULL;
7501 	if (partition != NULL) {
7502 		if (partition->Device() == NULL) {
7503 			dprintf("fs_unmount(): There is no device!\n");
7504 			return B_ERROR;
7505 		}
7506 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7507 		if (!diskDevice) {
7508 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7509 			return B_ERROR;
7510 		}
7511 	}
7512 	DeviceWriteLocker writeLocker(diskDevice, true);
7513 
7514 	// make sure, that the partition is not busy
7515 	if (partition != NULL) {
7516 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7517 			TRACE(("fs_unmount(): Partition is busy.\n"));
7518 			return B_BUSY;
7519 		}
7520 	}
7521 
7522 	// grab the vnode master mutex to keep someone from creating
7523 	// a vnode while we're figuring out if we can continue
7524 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7525 
7526 	bool disconnectedDescriptors = false;
7527 
7528 	while (true) {
7529 		bool busy = false;
7530 
7531 		// cycle through the list of vnodes associated with this mount and
7532 		// make sure all of them are not busy or have refs on them
7533 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7534 		while (struct vnode* vnode = iterator.Next()) {
7535 			if (vnode->IsBusy()) {
7536 				busy = true;
7537 				break;
7538 			}
7539 
7540 			// check the vnode's ref count -- subtract additional references for
7541 			// covering
7542 			int32 refCount = vnode->ref_count;
7543 			if (vnode->covers != NULL)
7544 				refCount--;
7545 			if (vnode->covered_by != NULL)
7546 				refCount--;
7547 
7548 			if (refCount != 0) {
7549 				// there are still vnodes in use on this mount, so we cannot
7550 				// unmount yet
7551 				busy = true;
7552 				break;
7553 			}
7554 		}
7555 
7556 		if (!busy)
7557 			break;
7558 
7559 		if ((flags & B_FORCE_UNMOUNT) == 0)
7560 			return B_BUSY;
7561 
7562 		if (disconnectedDescriptors) {
7563 			// wait a bit until the last access is finished, and then try again
7564 			vnodesWriteLocker.Unlock();
7565 			snooze(100000);
7566 			// TODO: if there is some kind of bug that prevents the ref counts
7567 			// from getting back to zero, this will fall into an endless loop...
7568 			vnodesWriteLocker.Lock();
7569 			continue;
7570 		}
7571 
7572 		// the file system is still busy - but we're forced to unmount it,
7573 		// so let's disconnect all open file descriptors
7574 
7575 		mount->unmounting = true;
7576 			// prevent new vnodes from being created
7577 
7578 		vnodesWriteLocker.Unlock();
7579 
7580 		disconnect_mount_or_vnode_fds(mount, NULL);
7581 		disconnectedDescriptors = true;
7582 
7583 		vnodesWriteLocker.Lock();
7584 	}
7585 
7586 	// We can safely continue. Mark all of the vnodes busy and this mount
7587 	// structure in unmounting state. Also undo the vnode covers/covered_by
7588 	// links.
7589 	mount->unmounting = true;
7590 
7591 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7592 	while (struct vnode* vnode = iterator.Next()) {
7593 		// Remove all covers/covered_by links from other mounts' nodes to this
7594 		// vnode and adjust the node ref count accordingly. We will release the
7595 		// references to the external vnodes below.
7596 		if (Vnode* coveredNode = vnode->covers) {
7597 			if (Vnode* coveringNode = vnode->covered_by) {
7598 				// We have both covered and covering vnodes, so just remove us
7599 				// from the chain.
7600 				coveredNode->covered_by = coveringNode;
7601 				coveringNode->covers = coveredNode;
7602 				vnode->ref_count -= 2;
7603 
7604 				vnode->covered_by = NULL;
7605 				vnode->covers = NULL;
7606 				vnode->SetCovering(false);
7607 				vnode->SetCovered(false);
7608 			} else {
7609 				// We only have a covered vnode. Remove its link to us.
7610 				coveredNode->covered_by = NULL;
7611 				coveredNode->SetCovered(false);
7612 				vnode->ref_count--;
7613 
7614 				// If the other node is an external vnode, we keep its link
7615 				// link around so we can put the reference later on. Otherwise
7616 				// we get rid of it right now.
7617 				if (coveredNode->mount == mount) {
7618 					vnode->covers = NULL;
7619 					coveredNode->ref_count--;
7620 				}
7621 			}
7622 		} else if (Vnode* coveringNode = vnode->covered_by) {
7623 			// We only have a covering vnode. Remove its link to us.
7624 			coveringNode->covers = NULL;
7625 			coveringNode->SetCovering(false);
7626 			vnode->ref_count--;
7627 
7628 			// If the other node is an external vnode, we keep its link
7629 			// link around so we can put the reference later on. Otherwise
7630 			// we get rid of it right now.
7631 			if (coveringNode->mount == mount) {
7632 				vnode->covered_by = NULL;
7633 				coveringNode->ref_count--;
7634 			}
7635 		}
7636 
7637 		vnode->SetBusy(true);
7638 		vnode_to_be_freed(vnode);
7639 	}
7640 
7641 	vnodesWriteLocker.Unlock();
7642 
7643 	// Free all vnodes associated with this mount.
7644 	// They will be removed from the mount list by free_vnode(), so
7645 	// we don't have to do this.
7646 	while (struct vnode* vnode = mount->vnodes.Head()) {
7647 		// Put the references to external covered/covering vnodes we kept above.
7648 		if (Vnode* coveredNode = vnode->covers)
7649 			put_vnode(coveredNode);
7650 		if (Vnode* coveringNode = vnode->covered_by)
7651 			put_vnode(coveringNode);
7652 
7653 		free_vnode(vnode, false);
7654 	}
7655 
7656 	// remove the mount structure from the hash table
7657 	mutex_lock(&sMountMutex);
7658 	hash_remove(sMountsTable, mount);
7659 	mutex_unlock(&sMountMutex);
7660 
7661 	mountOpLocker.Unlock();
7662 
7663 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7664 	notify_unmount(mount->id);
7665 
7666 	// dereference the partition and mark it unmounted
7667 	if (partition) {
7668 		partition->SetVolumeID(-1);
7669 		partition->SetMountCookie(NULL);
7670 
7671 		if (mount->owns_file_device)
7672 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7673 		partition->Unregister();
7674 	}
7675 
7676 	delete mount;
7677 	return B_OK;
7678 }
7679 
7680 
7681 static status_t
7682 fs_sync(dev_t device)
7683 {
7684 	struct fs_mount* mount;
7685 	status_t status = get_mount(device, &mount);
7686 	if (status != B_OK)
7687 		return status;
7688 
7689 	struct vnode marker;
7690 	memset(&marker, 0, sizeof(marker));
7691 	marker.SetBusy(true);
7692 	marker.SetRemoved(true);
7693 
7694 	// First, synchronize all file caches
7695 
7696 	while (true) {
7697 		WriteLocker locker(sVnodeLock);
7698 			// Note: That's the easy way. Which is probably OK for sync(),
7699 			// since it's a relatively rare call and doesn't need to allow for
7700 			// a lot of concurrency. Using a read lock would be possible, but
7701 			// also more involved, since we had to lock the individual nodes
7702 			// and take care of the locking order, which we might not want to
7703 			// do while holding fs_mount::rlock.
7704 
7705 		// synchronize access to vnode list
7706 		recursive_lock_lock(&mount->rlock);
7707 
7708 		struct vnode* vnode;
7709 		if (!marker.IsRemoved()) {
7710 			vnode = mount->vnodes.GetNext(&marker);
7711 			mount->vnodes.Remove(&marker);
7712 			marker.SetRemoved(true);
7713 		} else
7714 			vnode = mount->vnodes.First();
7715 
7716 		while (vnode != NULL && (vnode->cache == NULL
7717 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7718 			// TODO: we could track writes (and writable mapped vnodes)
7719 			//	and have a simple flag that we could test for here
7720 			vnode = mount->vnodes.GetNext(vnode);
7721 		}
7722 
7723 		if (vnode != NULL) {
7724 			// insert marker vnode again
7725 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7726 			marker.SetRemoved(false);
7727 		}
7728 
7729 		recursive_lock_unlock(&mount->rlock);
7730 
7731 		if (vnode == NULL)
7732 			break;
7733 
7734 		vnode = lookup_vnode(mount->id, vnode->id);
7735 		if (vnode == NULL || vnode->IsBusy())
7736 			continue;
7737 
7738 		if (vnode->ref_count == 0) {
7739 			// this vnode has been unused before
7740 			vnode_used(vnode);
7741 		}
7742 		inc_vnode_ref_count(vnode);
7743 
7744 		locker.Unlock();
7745 
7746 		if (vnode->cache != NULL && !vnode->IsRemoved())
7747 			vnode->cache->WriteModified();
7748 
7749 		put_vnode(vnode);
7750 	}
7751 
7752 	// And then, let the file systems do their synchronizing work
7753 
7754 	if (HAS_FS_MOUNT_CALL(mount, sync))
7755 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7756 
7757 	put_mount(mount);
7758 	return status;
7759 }
7760 
7761 
7762 static status_t
7763 fs_read_info(dev_t device, struct fs_info* info)
7764 {
7765 	struct fs_mount* mount;
7766 	status_t status = get_mount(device, &mount);
7767 	if (status != B_OK)
7768 		return status;
7769 
7770 	memset(info, 0, sizeof(struct fs_info));
7771 
7772 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7773 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7774 
7775 	// fill in info the file system doesn't (have to) know about
7776 	if (status == B_OK) {
7777 		info->dev = mount->id;
7778 		info->root = mount->root_vnode->id;
7779 
7780 		fs_volume* volume = mount->volume;
7781 		while (volume->super_volume != NULL)
7782 			volume = volume->super_volume;
7783 
7784 		strlcpy(info->fsh_name, volume->file_system_name,
7785 			sizeof(info->fsh_name));
7786 		if (mount->device_name != NULL) {
7787 			strlcpy(info->device_name, mount->device_name,
7788 				sizeof(info->device_name));
7789 		}
7790 	}
7791 
7792 	// if the call is not supported by the file system, there are still
7793 	// the parts that we filled out ourselves
7794 
7795 	put_mount(mount);
7796 	return status;
7797 }
7798 
7799 
7800 static status_t
7801 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7802 {
7803 	struct fs_mount* mount;
7804 	status_t status = get_mount(device, &mount);
7805 	if (status != B_OK)
7806 		return status;
7807 
7808 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7809 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7810 	else
7811 		status = B_READ_ONLY_DEVICE;
7812 
7813 	put_mount(mount);
7814 	return status;
7815 }
7816 
7817 
7818 static dev_t
7819 fs_next_device(int32* _cookie)
7820 {
7821 	struct fs_mount* mount = NULL;
7822 	dev_t device = *_cookie;
7823 
7824 	mutex_lock(&sMountMutex);
7825 
7826 	// Since device IDs are assigned sequentially, this algorithm
7827 	// does work good enough. It makes sure that the device list
7828 	// returned is sorted, and that no device is skipped when an
7829 	// already visited device got unmounted.
7830 
7831 	while (device < sNextMountID) {
7832 		mount = find_mount(device++);
7833 		if (mount != NULL && mount->volume->private_volume != NULL)
7834 			break;
7835 	}
7836 
7837 	*_cookie = device;
7838 
7839 	if (mount != NULL)
7840 		device = mount->id;
7841 	else
7842 		device = B_BAD_VALUE;
7843 
7844 	mutex_unlock(&sMountMutex);
7845 
7846 	return device;
7847 }
7848 
7849 
7850 ssize_t
7851 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7852 	void *buffer, size_t readBytes)
7853 {
7854 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7855 	if (attrFD < 0)
7856 		return attrFD;
7857 
7858 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7859 
7860 	_kern_close(attrFD);
7861 
7862 	return bytesRead;
7863 }
7864 
7865 
7866 static status_t
7867 get_cwd(char* buffer, size_t size, bool kernel)
7868 {
7869 	// Get current working directory from io context
7870 	struct io_context* context = get_current_io_context(kernel);
7871 	status_t status;
7872 
7873 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7874 
7875 	mutex_lock(&context->io_mutex);
7876 
7877 	struct vnode* vnode = context->cwd;
7878 	if (vnode)
7879 		inc_vnode_ref_count(vnode);
7880 
7881 	mutex_unlock(&context->io_mutex);
7882 
7883 	if (vnode) {
7884 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7885 		put_vnode(vnode);
7886 	} else
7887 		status = B_ERROR;
7888 
7889 	return status;
7890 }
7891 
7892 
7893 static status_t
7894 set_cwd(int fd, char* path, bool kernel)
7895 {
7896 	struct io_context* context;
7897 	struct vnode* vnode = NULL;
7898 	struct vnode* oldDirectory;
7899 	status_t status;
7900 
7901 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7902 
7903 	// Get vnode for passed path, and bail if it failed
7904 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7905 	if (status < 0)
7906 		return status;
7907 
7908 	if (!S_ISDIR(vnode->Type())) {
7909 		// nope, can't cwd to here
7910 		status = B_NOT_A_DIRECTORY;
7911 		goto err;
7912 	}
7913 
7914 	// Get current io context and lock
7915 	context = get_current_io_context(kernel);
7916 	mutex_lock(&context->io_mutex);
7917 
7918 	// save the old current working directory first
7919 	oldDirectory = context->cwd;
7920 	context->cwd = vnode;
7921 
7922 	mutex_unlock(&context->io_mutex);
7923 
7924 	if (oldDirectory)
7925 		put_vnode(oldDirectory);
7926 
7927 	return B_NO_ERROR;
7928 
7929 err:
7930 	put_vnode(vnode);
7931 	return status;
7932 }
7933 
7934 
7935 //	#pragma mark - kernel mirrored syscalls
7936 
7937 
7938 dev_t
7939 _kern_mount(const char* path, const char* device, const char* fsName,
7940 	uint32 flags, const char* args, size_t argsLength)
7941 {
7942 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7943 	if (pathBuffer.InitCheck() != B_OK)
7944 		return B_NO_MEMORY;
7945 
7946 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7947 }
7948 
7949 
7950 status_t
7951 _kern_unmount(const char* path, uint32 flags)
7952 {
7953 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7954 	if (pathBuffer.InitCheck() != B_OK)
7955 		return B_NO_MEMORY;
7956 
7957 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7958 }
7959 
7960 
7961 status_t
7962 _kern_read_fs_info(dev_t device, struct fs_info* info)
7963 {
7964 	if (info == NULL)
7965 		return B_BAD_VALUE;
7966 
7967 	return fs_read_info(device, info);
7968 }
7969 
7970 
7971 status_t
7972 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7973 {
7974 	if (info == NULL)
7975 		return B_BAD_VALUE;
7976 
7977 	return fs_write_info(device, info, mask);
7978 }
7979 
7980 
7981 status_t
7982 _kern_sync(void)
7983 {
7984 	// Note: _kern_sync() is also called from _user_sync()
7985 	int32 cookie = 0;
7986 	dev_t device;
7987 	while ((device = next_dev(&cookie)) >= 0) {
7988 		status_t status = fs_sync(device);
7989 		if (status != B_OK && status != B_BAD_VALUE) {
7990 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
7991 				strerror(status));
7992 		}
7993 	}
7994 
7995 	return B_OK;
7996 }
7997 
7998 
7999 dev_t
8000 _kern_next_device(int32* _cookie)
8001 {
8002 	return fs_next_device(_cookie);
8003 }
8004 
8005 
8006 status_t
8007 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8008 	size_t infoSize)
8009 {
8010 	if (infoSize != sizeof(fd_info))
8011 		return B_BAD_VALUE;
8012 
8013 	// get the team
8014 	Team* team = Team::Get(teamID);
8015 	if (team == NULL)
8016 		return B_BAD_TEAM_ID;
8017 	BReference<Team> teamReference(team, true);
8018 
8019 	// now that we have a team reference, its I/O context won't go away
8020 	io_context* context = team->io_context;
8021 	MutexLocker contextLocker(context->io_mutex);
8022 
8023 	uint32 slot = *_cookie;
8024 
8025 	struct file_descriptor* descriptor;
8026 	while (slot < context->table_size
8027 		&& (descriptor = context->fds[slot]) == NULL) {
8028 		slot++;
8029 	}
8030 
8031 	if (slot >= context->table_size)
8032 		return B_ENTRY_NOT_FOUND;
8033 
8034 	info->number = slot;
8035 	info->open_mode = descriptor->open_mode;
8036 
8037 	struct vnode* vnode = fd_vnode(descriptor);
8038 	if (vnode != NULL) {
8039 		info->device = vnode->device;
8040 		info->node = vnode->id;
8041 	} else if (descriptor->u.mount != NULL) {
8042 		info->device = descriptor->u.mount->id;
8043 		info->node = -1;
8044 	}
8045 
8046 	*_cookie = slot + 1;
8047 	return B_OK;
8048 }
8049 
8050 
8051 int
8052 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8053 	int perms)
8054 {
8055 	if ((openMode & O_CREAT) != 0) {
8056 		return file_create_entry_ref(device, inode, name, openMode, perms,
8057 			true);
8058 	}
8059 
8060 	return file_open_entry_ref(device, inode, name, openMode, true);
8061 }
8062 
8063 
8064 /*!	\brief Opens a node specified by a FD + path pair.
8065 
8066 	At least one of \a fd and \a path must be specified.
8067 	If only \a fd is given, the function opens the node identified by this
8068 	FD. If only a path is given, this path is opened. If both are given and
8069 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8070 	of the directory (!) identified by \a fd.
8071 
8072 	\param fd The FD. May be < 0.
8073 	\param path The absolute or relative path. May be \c NULL.
8074 	\param openMode The open mode.
8075 	\return A FD referring to the newly opened node, or an error code,
8076 			if an error occurs.
8077 */
8078 int
8079 _kern_open(int fd, const char* path, int openMode, int perms)
8080 {
8081 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8082 	if (pathBuffer.InitCheck() != B_OK)
8083 		return B_NO_MEMORY;
8084 
8085 	if (openMode & O_CREAT)
8086 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8087 
8088 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8089 }
8090 
8091 
8092 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8093 
8094 	The supplied name may be \c NULL, in which case directory identified
8095 	by \a device and \a inode will be opened. Otherwise \a device and
8096 	\a inode identify the parent directory of the directory to be opened
8097 	and \a name its entry name.
8098 
8099 	\param device If \a name is specified the ID of the device the parent
8100 		   directory of the directory to be opened resides on, otherwise
8101 		   the device of the directory itself.
8102 	\param inode If \a name is specified the node ID of the parent
8103 		   directory of the directory to be opened, otherwise node ID of the
8104 		   directory itself.
8105 	\param name The entry name of the directory to be opened. If \c NULL,
8106 		   the \a device + \a inode pair identify the node to be opened.
8107 	\return The FD of the newly opened directory or an error code, if
8108 			something went wrong.
8109 */
8110 int
8111 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8112 {
8113 	return dir_open_entry_ref(device, inode, name, true);
8114 }
8115 
8116 
8117 /*!	\brief Opens a directory specified by a FD + path pair.
8118 
8119 	At least one of \a fd and \a path must be specified.
8120 	If only \a fd is given, the function opens the directory identified by this
8121 	FD. If only a path is given, this path is opened. If both are given and
8122 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8123 	of the directory (!) identified by \a fd.
8124 
8125 	\param fd The FD. May be < 0.
8126 	\param path The absolute or relative path. May be \c NULL.
8127 	\return A FD referring to the newly opened directory, or an error code,
8128 			if an error occurs.
8129 */
8130 int
8131 _kern_open_dir(int fd, const char* path)
8132 {
8133 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8134 	if (pathBuffer.InitCheck() != B_OK)
8135 		return B_NO_MEMORY;
8136 
8137 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8138 }
8139 
8140 
8141 status_t
8142 _kern_fcntl(int fd, int op, size_t argument)
8143 {
8144 	return common_fcntl(fd, op, argument, true);
8145 }
8146 
8147 
8148 status_t
8149 _kern_fsync(int fd)
8150 {
8151 	return common_sync(fd, true);
8152 }
8153 
8154 
8155 status_t
8156 _kern_lock_node(int fd)
8157 {
8158 	return common_lock_node(fd, true);
8159 }
8160 
8161 
8162 status_t
8163 _kern_unlock_node(int fd)
8164 {
8165 	return common_unlock_node(fd, true);
8166 }
8167 
8168 
8169 status_t
8170 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8171 	int perms)
8172 {
8173 	return dir_create_entry_ref(device, inode, name, perms, true);
8174 }
8175 
8176 
8177 /*!	\brief Creates a directory specified by a FD + path pair.
8178 
8179 	\a path must always be specified (it contains the name of the new directory
8180 	at least). If only a path is given, this path identifies the location at
8181 	which the directory shall be created. If both \a fd and \a path are given
8182 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8183 	of the directory (!) identified by \a fd.
8184 
8185 	\param fd The FD. May be < 0.
8186 	\param path The absolute or relative path. Must not be \c NULL.
8187 	\param perms The access permissions the new directory shall have.
8188 	\return \c B_OK, if the directory has been created successfully, another
8189 			error code otherwise.
8190 */
8191 status_t
8192 _kern_create_dir(int fd, const char* path, int perms)
8193 {
8194 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8195 	if (pathBuffer.InitCheck() != B_OK)
8196 		return B_NO_MEMORY;
8197 
8198 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8199 }
8200 
8201 
8202 status_t
8203 _kern_remove_dir(int fd, const char* path)
8204 {
8205 	if (path) {
8206 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8207 		if (pathBuffer.InitCheck() != B_OK)
8208 			return B_NO_MEMORY;
8209 
8210 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8211 	}
8212 
8213 	return dir_remove(fd, NULL, true);
8214 }
8215 
8216 
8217 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8218 
8219 	At least one of \a fd and \a path must be specified.
8220 	If only \a fd is given, the function the symlink to be read is the node
8221 	identified by this FD. If only a path is given, this path identifies the
8222 	symlink to be read. If both are given and the path is absolute, \a fd is
8223 	ignored; a relative path is reckoned off of the directory (!) identified
8224 	by \a fd.
8225 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8226 	will still be updated to reflect the required buffer size.
8227 
8228 	\param fd The FD. May be < 0.
8229 	\param path The absolute or relative path. May be \c NULL.
8230 	\param buffer The buffer into which the contents of the symlink shall be
8231 		   written.
8232 	\param _bufferSize A pointer to the size of the supplied buffer.
8233 	\return The length of the link on success or an appropriate error code
8234 */
8235 status_t
8236 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8237 {
8238 	if (path) {
8239 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8240 		if (pathBuffer.InitCheck() != B_OK)
8241 			return B_NO_MEMORY;
8242 
8243 		return common_read_link(fd, pathBuffer.LockBuffer(),
8244 			buffer, _bufferSize, true);
8245 	}
8246 
8247 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8248 }
8249 
8250 
8251 /*!	\brief Creates a symlink specified by a FD + path pair.
8252 
8253 	\a path must always be specified (it contains the name of the new symlink
8254 	at least). If only a path is given, this path identifies the location at
8255 	which the symlink shall be created. If both \a fd and \a path are given and
8256 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8257 	of the directory (!) identified by \a fd.
8258 
8259 	\param fd The FD. May be < 0.
8260 	\param toPath The absolute or relative path. Must not be \c NULL.
8261 	\param mode The access permissions the new symlink shall have.
8262 	\return \c B_OK, if the symlink has been created successfully, another
8263 			error code otherwise.
8264 */
8265 status_t
8266 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8267 {
8268 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8269 	if (pathBuffer.InitCheck() != B_OK)
8270 		return B_NO_MEMORY;
8271 
8272 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8273 		toPath, mode, true);
8274 }
8275 
8276 
8277 status_t
8278 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8279 	bool traverseLeafLink)
8280 {
8281 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8282 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8283 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8284 		return B_NO_MEMORY;
8285 
8286 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8287 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8288 }
8289 
8290 
8291 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8292 
8293 	\a path must always be specified (it contains at least the name of the entry
8294 	to be deleted). If only a path is given, this path identifies the entry
8295 	directly. If both \a fd and \a path are given and the path is absolute,
8296 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8297 	identified by \a fd.
8298 
8299 	\param fd The FD. May be < 0.
8300 	\param path The absolute or relative path. Must not be \c NULL.
8301 	\return \c B_OK, if the entry has been removed successfully, another
8302 			error code otherwise.
8303 */
8304 status_t
8305 _kern_unlink(int fd, const char* path)
8306 {
8307 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8308 	if (pathBuffer.InitCheck() != B_OK)
8309 		return B_NO_MEMORY;
8310 
8311 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8312 }
8313 
8314 
8315 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8316 		   by another FD + path pair.
8317 
8318 	\a oldPath and \a newPath must always be specified (they contain at least
8319 	the name of the entry). If only a path is given, this path identifies the
8320 	entry directly. If both a FD and a path are given and the path is absolute,
8321 	the FD is ignored; a relative path is reckoned off of the directory (!)
8322 	identified by the respective FD.
8323 
8324 	\param oldFD The FD of the old location. May be < 0.
8325 	\param oldPath The absolute or relative path of the old location. Must not
8326 		   be \c NULL.
8327 	\param newFD The FD of the new location. May be < 0.
8328 	\param newPath The absolute or relative path of the new location. Must not
8329 		   be \c NULL.
8330 	\return \c B_OK, if the entry has been moved successfully, another
8331 			error code otherwise.
8332 */
8333 status_t
8334 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8335 {
8336 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8337 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8338 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8339 		return B_NO_MEMORY;
8340 
8341 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8342 		newFD, newPathBuffer.LockBuffer(), true);
8343 }
8344 
8345 
8346 status_t
8347 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8348 {
8349 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8350 	if (pathBuffer.InitCheck() != B_OK)
8351 		return B_NO_MEMORY;
8352 
8353 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8354 		true);
8355 }
8356 
8357 
8358 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8359 
8360 	If only \a fd is given, the stat operation associated with the type
8361 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8362 	given, this path identifies the entry for whose node to retrieve the
8363 	stat data. If both \a fd and \a path are given and the path is absolute,
8364 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8365 	identified by \a fd and specifies the entry whose stat data shall be
8366 	retrieved.
8367 
8368 	\param fd The FD. May be < 0.
8369 	\param path The absolute or relative path. Must not be \c NULL.
8370 	\param traverseLeafLink If \a path is given, \c true specifies that the
8371 		   function shall not stick to symlinks, but traverse them.
8372 	\param stat The buffer the stat data shall be written into.
8373 	\param statSize The size of the supplied stat buffer.
8374 	\return \c B_OK, if the the stat data have been read successfully, another
8375 			error code otherwise.
8376 */
8377 status_t
8378 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8379 	struct stat* stat, size_t statSize)
8380 {
8381 	struct stat completeStat;
8382 	struct stat* originalStat = NULL;
8383 	status_t status;
8384 
8385 	if (statSize > sizeof(struct stat))
8386 		return B_BAD_VALUE;
8387 
8388 	// this supports different stat extensions
8389 	if (statSize < sizeof(struct stat)) {
8390 		originalStat = stat;
8391 		stat = &completeStat;
8392 	}
8393 
8394 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8395 
8396 	if (status == B_OK && originalStat != NULL)
8397 		memcpy(originalStat, stat, statSize);
8398 
8399 	return status;
8400 }
8401 
8402 
8403 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8404 
8405 	If only \a fd is given, the stat operation associated with the type
8406 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8407 	given, this path identifies the entry for whose node to write the
8408 	stat data. If both \a fd and \a path are given and the path is absolute,
8409 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8410 	identified by \a fd and specifies the entry whose stat data shall be
8411 	written.
8412 
8413 	\param fd The FD. May be < 0.
8414 	\param path The absolute or relative path. Must not be \c NULL.
8415 	\param traverseLeafLink If \a path is given, \c true specifies that the
8416 		   function shall not stick to symlinks, but traverse them.
8417 	\param stat The buffer containing the stat data to be written.
8418 	\param statSize The size of the supplied stat buffer.
8419 	\param statMask A mask specifying which parts of the stat data shall be
8420 		   written.
8421 	\return \c B_OK, if the the stat data have been written successfully,
8422 			another error code otherwise.
8423 */
8424 status_t
8425 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8426 	const struct stat* stat, size_t statSize, int statMask)
8427 {
8428 	struct stat completeStat;
8429 
8430 	if (statSize > sizeof(struct stat))
8431 		return B_BAD_VALUE;
8432 
8433 	// this supports different stat extensions
8434 	if (statSize < sizeof(struct stat)) {
8435 		memset((uint8*)&completeStat + statSize, 0,
8436 			sizeof(struct stat) - statSize);
8437 		memcpy(&completeStat, stat, statSize);
8438 		stat = &completeStat;
8439 	}
8440 
8441 	status_t status;
8442 
8443 	if (path) {
8444 		// path given: write the stat of the node referred to by (fd, path)
8445 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8446 		if (pathBuffer.InitCheck() != B_OK)
8447 			return B_NO_MEMORY;
8448 
8449 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8450 			traverseLeafLink, stat, statMask, true);
8451 	} else {
8452 		// no path given: get the FD and use the FD operation
8453 		struct file_descriptor* descriptor
8454 			= get_fd(get_current_io_context(true), fd);
8455 		if (descriptor == NULL)
8456 			return B_FILE_ERROR;
8457 
8458 		if (descriptor->ops->fd_write_stat)
8459 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8460 		else
8461 			status = B_UNSUPPORTED;
8462 
8463 		put_fd(descriptor);
8464 	}
8465 
8466 	return status;
8467 }
8468 
8469 
8470 int
8471 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8472 {
8473 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8474 	if (pathBuffer.InitCheck() != B_OK)
8475 		return B_NO_MEMORY;
8476 
8477 	if (path != NULL)
8478 		pathBuffer.SetTo(path);
8479 
8480 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8481 		traverseLeafLink, true);
8482 }
8483 
8484 
8485 int
8486 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8487 	int openMode)
8488 {
8489 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8490 	if (pathBuffer.InitCheck() != B_OK)
8491 		return B_NO_MEMORY;
8492 
8493 	if ((openMode & O_CREAT) != 0) {
8494 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8495 			true);
8496 	}
8497 
8498 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8499 }
8500 
8501 
8502 status_t
8503 _kern_remove_attr(int fd, const char* name)
8504 {
8505 	return attr_remove(fd, name, true);
8506 }
8507 
8508 
8509 status_t
8510 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8511 	const char* toName)
8512 {
8513 	return attr_rename(fromFile, fromName, toFile, toName, true);
8514 }
8515 
8516 
8517 int
8518 _kern_open_index_dir(dev_t device)
8519 {
8520 	return index_dir_open(device, true);
8521 }
8522 
8523 
8524 status_t
8525 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8526 {
8527 	return index_create(device, name, type, flags, true);
8528 }
8529 
8530 
8531 status_t
8532 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8533 {
8534 	return index_name_read_stat(device, name, stat, true);
8535 }
8536 
8537 
8538 status_t
8539 _kern_remove_index(dev_t device, const char* name)
8540 {
8541 	return index_remove(device, name, true);
8542 }
8543 
8544 
8545 status_t
8546 _kern_getcwd(char* buffer, size_t size)
8547 {
8548 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8549 
8550 	// Call vfs to get current working directory
8551 	return get_cwd(buffer, size, true);
8552 }
8553 
8554 
8555 status_t
8556 _kern_setcwd(int fd, const char* path)
8557 {
8558 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8559 	if (pathBuffer.InitCheck() != B_OK)
8560 		return B_NO_MEMORY;
8561 
8562 	if (path != NULL)
8563 		pathBuffer.SetTo(path);
8564 
8565 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8566 }
8567 
8568 
8569 //	#pragma mark - userland syscalls
8570 
8571 
8572 dev_t
8573 _user_mount(const char* userPath, const char* userDevice,
8574 	const char* userFileSystem, uint32 flags, const char* userArgs,
8575 	size_t argsLength)
8576 {
8577 	char fileSystem[B_FILE_NAME_LENGTH];
8578 	KPath path, device;
8579 	char* args = NULL;
8580 	status_t status;
8581 
8582 	if (!IS_USER_ADDRESS(userPath)
8583 		|| !IS_USER_ADDRESS(userFileSystem)
8584 		|| !IS_USER_ADDRESS(userDevice))
8585 		return B_BAD_ADDRESS;
8586 
8587 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8588 		return B_NO_MEMORY;
8589 
8590 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8591 		return B_BAD_ADDRESS;
8592 
8593 	if (userFileSystem != NULL
8594 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8595 		return B_BAD_ADDRESS;
8596 
8597 	if (userDevice != NULL
8598 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8599 			< B_OK)
8600 		return B_BAD_ADDRESS;
8601 
8602 	if (userArgs != NULL && argsLength > 0) {
8603 		// this is a safety restriction
8604 		if (argsLength >= 65536)
8605 			return B_NAME_TOO_LONG;
8606 
8607 		args = (char*)malloc(argsLength + 1);
8608 		if (args == NULL)
8609 			return B_NO_MEMORY;
8610 
8611 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8612 			free(args);
8613 			return B_BAD_ADDRESS;
8614 		}
8615 	}
8616 	path.UnlockBuffer();
8617 	device.UnlockBuffer();
8618 
8619 	status = fs_mount(path.LockBuffer(),
8620 		userDevice != NULL ? device.Path() : NULL,
8621 		userFileSystem ? fileSystem : NULL, flags, args, false);
8622 
8623 	free(args);
8624 	return status;
8625 }
8626 
8627 
8628 status_t
8629 _user_unmount(const char* userPath, uint32 flags)
8630 {
8631 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8632 	if (pathBuffer.InitCheck() != B_OK)
8633 		return B_NO_MEMORY;
8634 
8635 	char* path = pathBuffer.LockBuffer();
8636 
8637 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8638 		return B_BAD_ADDRESS;
8639 
8640 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8641 }
8642 
8643 
8644 status_t
8645 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8646 {
8647 	struct fs_info info;
8648 	status_t status;
8649 
8650 	if (userInfo == NULL)
8651 		return B_BAD_VALUE;
8652 
8653 	if (!IS_USER_ADDRESS(userInfo))
8654 		return B_BAD_ADDRESS;
8655 
8656 	status = fs_read_info(device, &info);
8657 	if (status != B_OK)
8658 		return status;
8659 
8660 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8661 		return B_BAD_ADDRESS;
8662 
8663 	return B_OK;
8664 }
8665 
8666 
8667 status_t
8668 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8669 {
8670 	struct fs_info info;
8671 
8672 	if (userInfo == NULL)
8673 		return B_BAD_VALUE;
8674 
8675 	if (!IS_USER_ADDRESS(userInfo)
8676 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8677 		return B_BAD_ADDRESS;
8678 
8679 	return fs_write_info(device, &info, mask);
8680 }
8681 
8682 
8683 dev_t
8684 _user_next_device(int32* _userCookie)
8685 {
8686 	int32 cookie;
8687 	dev_t device;
8688 
8689 	if (!IS_USER_ADDRESS(_userCookie)
8690 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8691 		return B_BAD_ADDRESS;
8692 
8693 	device = fs_next_device(&cookie);
8694 
8695 	if (device >= B_OK) {
8696 		// update user cookie
8697 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8698 			return B_BAD_ADDRESS;
8699 	}
8700 
8701 	return device;
8702 }
8703 
8704 
8705 status_t
8706 _user_sync(void)
8707 {
8708 	return _kern_sync();
8709 }
8710 
8711 
8712 status_t
8713 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8714 	size_t infoSize)
8715 {
8716 	struct fd_info info;
8717 	uint32 cookie;
8718 
8719 	// only root can do this (or should root's group be enough?)
8720 	if (geteuid() != 0)
8721 		return B_NOT_ALLOWED;
8722 
8723 	if (infoSize != sizeof(fd_info))
8724 		return B_BAD_VALUE;
8725 
8726 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8727 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8728 		return B_BAD_ADDRESS;
8729 
8730 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8731 	if (status != B_OK)
8732 		return status;
8733 
8734 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8735 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8736 		return B_BAD_ADDRESS;
8737 
8738 	return status;
8739 }
8740 
8741 
8742 status_t
8743 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8744 	char* userPath, size_t pathLength)
8745 {
8746 	if (!IS_USER_ADDRESS(userPath))
8747 		return B_BAD_ADDRESS;
8748 
8749 	KPath path(B_PATH_NAME_LENGTH + 1);
8750 	if (path.InitCheck() != B_OK)
8751 		return B_NO_MEMORY;
8752 
8753 	// copy the leaf name onto the stack
8754 	char stackLeaf[B_FILE_NAME_LENGTH];
8755 	if (leaf) {
8756 		if (!IS_USER_ADDRESS(leaf))
8757 			return B_BAD_ADDRESS;
8758 
8759 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8760 		if (length < 0)
8761 			return length;
8762 		if (length >= B_FILE_NAME_LENGTH)
8763 			return B_NAME_TOO_LONG;
8764 
8765 		leaf = stackLeaf;
8766 	}
8767 
8768 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8769 		path.LockBuffer(), path.BufferSize());
8770 	if (status != B_OK)
8771 		return status;
8772 
8773 	path.UnlockBuffer();
8774 
8775 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8776 	if (length < 0)
8777 		return length;
8778 	if (length >= (int)pathLength)
8779 		return B_BUFFER_OVERFLOW;
8780 
8781 	return B_OK;
8782 }
8783 
8784 
8785 status_t
8786 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8787 {
8788 	if (userPath == NULL || buffer == NULL)
8789 		return B_BAD_VALUE;
8790 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8791 		return B_BAD_ADDRESS;
8792 
8793 	// copy path from userland
8794 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8795 	if (pathBuffer.InitCheck() != B_OK)
8796 		return B_NO_MEMORY;
8797 	char* path = pathBuffer.LockBuffer();
8798 
8799 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8800 		return B_BAD_ADDRESS;
8801 
8802 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8803 		false);
8804 	if (error != B_OK)
8805 		return error;
8806 
8807 	// copy back to userland
8808 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8809 	if (len < 0)
8810 		return len;
8811 	if (len >= B_PATH_NAME_LENGTH)
8812 		return B_BUFFER_OVERFLOW;
8813 
8814 	return B_OK;
8815 }
8816 
8817 
8818 int
8819 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8820 	int openMode, int perms)
8821 {
8822 	char name[B_FILE_NAME_LENGTH];
8823 
8824 	if (userName == NULL || device < 0 || inode < 0)
8825 		return B_BAD_VALUE;
8826 	if (!IS_USER_ADDRESS(userName)
8827 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8828 		return B_BAD_ADDRESS;
8829 
8830 	if ((openMode & O_CREAT) != 0) {
8831 		return file_create_entry_ref(device, inode, name, openMode, perms,
8832 		 false);
8833 	}
8834 
8835 	return file_open_entry_ref(device, inode, name, openMode, false);
8836 }
8837 
8838 
8839 int
8840 _user_open(int fd, const char* userPath, int openMode, int perms)
8841 {
8842 	KPath path(B_PATH_NAME_LENGTH + 1);
8843 	if (path.InitCheck() != B_OK)
8844 		return B_NO_MEMORY;
8845 
8846 	char* buffer = path.LockBuffer();
8847 
8848 	if (!IS_USER_ADDRESS(userPath)
8849 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8850 		return B_BAD_ADDRESS;
8851 
8852 	if ((openMode & O_CREAT) != 0)
8853 		return file_create(fd, buffer, openMode, perms, false);
8854 
8855 	return file_open(fd, buffer, openMode, false);
8856 }
8857 
8858 
8859 int
8860 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8861 {
8862 	if (userName != NULL) {
8863 		char name[B_FILE_NAME_LENGTH];
8864 
8865 		if (!IS_USER_ADDRESS(userName)
8866 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8867 			return B_BAD_ADDRESS;
8868 
8869 		return dir_open_entry_ref(device, inode, name, false);
8870 	}
8871 	return dir_open_entry_ref(device, inode, NULL, false);
8872 }
8873 
8874 
8875 int
8876 _user_open_dir(int fd, const char* userPath)
8877 {
8878 	if (userPath == NULL)
8879 		return dir_open(fd, NULL, false);
8880 
8881 	KPath path(B_PATH_NAME_LENGTH + 1);
8882 	if (path.InitCheck() != B_OK)
8883 		return B_NO_MEMORY;
8884 
8885 	char* buffer = path.LockBuffer();
8886 
8887 	if (!IS_USER_ADDRESS(userPath)
8888 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8889 		return B_BAD_ADDRESS;
8890 
8891 	return dir_open(fd, buffer, false);
8892 }
8893 
8894 
8895 /*!	\brief Opens a directory's parent directory and returns the entry name
8896 		   of the former.
8897 
8898 	Aside from that it returns the directory's entry name, this method is
8899 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8900 	equivalent, if \a userName is \c NULL.
8901 
8902 	If a name buffer is supplied and the name does not fit the buffer, the
8903 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8904 
8905 	\param fd A FD referring to a directory.
8906 	\param userName Buffer the directory's entry name shall be written into.
8907 		   May be \c NULL.
8908 	\param nameLength Size of the name buffer.
8909 	\return The file descriptor of the opened parent directory, if everything
8910 			went fine, an error code otherwise.
8911 */
8912 int
8913 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8914 {
8915 	bool kernel = false;
8916 
8917 	if (userName && !IS_USER_ADDRESS(userName))
8918 		return B_BAD_ADDRESS;
8919 
8920 	// open the parent dir
8921 	int parentFD = dir_open(fd, (char*)"..", kernel);
8922 	if (parentFD < 0)
8923 		return parentFD;
8924 	FDCloser fdCloser(parentFD, kernel);
8925 
8926 	if (userName) {
8927 		// get the vnodes
8928 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8929 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8930 		VNodePutter parentVNodePutter(parentVNode);
8931 		VNodePutter dirVNodePutter(dirVNode);
8932 		if (!parentVNode || !dirVNode)
8933 			return B_FILE_ERROR;
8934 
8935 		// get the vnode name
8936 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8937 		struct dirent* buffer = (struct dirent*)_buffer;
8938 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8939 			sizeof(_buffer), get_current_io_context(false));
8940 		if (status != B_OK)
8941 			return status;
8942 
8943 		// copy the name to the userland buffer
8944 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8945 		if (len < 0)
8946 			return len;
8947 		if (len >= (int)nameLength)
8948 			return B_BUFFER_OVERFLOW;
8949 	}
8950 
8951 	return fdCloser.Detach();
8952 }
8953 
8954 
8955 status_t
8956 _user_fcntl(int fd, int op, size_t argument)
8957 {
8958 	status_t status = common_fcntl(fd, op, argument, false);
8959 	if (op == F_SETLKW)
8960 		syscall_restart_handle_post(status);
8961 
8962 	return status;
8963 }
8964 
8965 
8966 status_t
8967 _user_fsync(int fd)
8968 {
8969 	return common_sync(fd, false);
8970 }
8971 
8972 
8973 status_t
8974 _user_flock(int fd, int operation)
8975 {
8976 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8977 
8978 	// Check if the operation is valid
8979 	switch (operation & ~LOCK_NB) {
8980 		case LOCK_UN:
8981 		case LOCK_SH:
8982 		case LOCK_EX:
8983 			break;
8984 
8985 		default:
8986 			return B_BAD_VALUE;
8987 	}
8988 
8989 	struct file_descriptor* descriptor;
8990 	struct vnode* vnode;
8991 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8992 	if (descriptor == NULL)
8993 		return B_FILE_ERROR;
8994 
8995 	if (descriptor->type != FDTYPE_FILE) {
8996 		put_fd(descriptor);
8997 		return B_BAD_VALUE;
8998 	}
8999 
9000 	struct flock flock;
9001 	flock.l_start = 0;
9002 	flock.l_len = OFF_MAX;
9003 	flock.l_whence = 0;
9004 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9005 
9006 	status_t status;
9007 	if ((operation & LOCK_UN) != 0)
9008 		status = release_advisory_lock(vnode, &flock);
9009 	else {
9010 		status = acquire_advisory_lock(vnode,
9011 			thread_get_current_thread()->team->session_id, &flock,
9012 			(operation & LOCK_NB) == 0);
9013 	}
9014 
9015 	syscall_restart_handle_post(status);
9016 
9017 	put_fd(descriptor);
9018 	return status;
9019 }
9020 
9021 
9022 status_t
9023 _user_lock_node(int fd)
9024 {
9025 	return common_lock_node(fd, false);
9026 }
9027 
9028 
9029 status_t
9030 _user_unlock_node(int fd)
9031 {
9032 	return common_unlock_node(fd, false);
9033 }
9034 
9035 
9036 status_t
9037 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9038 	int perms)
9039 {
9040 	char name[B_FILE_NAME_LENGTH];
9041 	status_t status;
9042 
9043 	if (!IS_USER_ADDRESS(userName))
9044 		return B_BAD_ADDRESS;
9045 
9046 	status = user_strlcpy(name, userName, sizeof(name));
9047 	if (status < 0)
9048 		return status;
9049 
9050 	return dir_create_entry_ref(device, inode, name, perms, false);
9051 }
9052 
9053 
9054 status_t
9055 _user_create_dir(int fd, const char* userPath, int perms)
9056 {
9057 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9058 	if (pathBuffer.InitCheck() != B_OK)
9059 		return B_NO_MEMORY;
9060 
9061 	char* path = pathBuffer.LockBuffer();
9062 
9063 	if (!IS_USER_ADDRESS(userPath)
9064 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9065 		return B_BAD_ADDRESS;
9066 
9067 	return dir_create(fd, path, perms, false);
9068 }
9069 
9070 
9071 status_t
9072 _user_remove_dir(int fd, const char* userPath)
9073 {
9074 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9075 	if (pathBuffer.InitCheck() != B_OK)
9076 		return B_NO_MEMORY;
9077 
9078 	char* path = pathBuffer.LockBuffer();
9079 
9080 	if (userPath != NULL) {
9081 		if (!IS_USER_ADDRESS(userPath)
9082 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9083 			return B_BAD_ADDRESS;
9084 	}
9085 
9086 	return dir_remove(fd, userPath ? path : NULL, false);
9087 }
9088 
9089 
9090 status_t
9091 _user_read_link(int fd, const char* userPath, char* userBuffer,
9092 	size_t* userBufferSize)
9093 {
9094 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9095 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9096 		return B_NO_MEMORY;
9097 
9098 	size_t bufferSize;
9099 
9100 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9101 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9102 		return B_BAD_ADDRESS;
9103 
9104 	char* path = pathBuffer.LockBuffer();
9105 	char* buffer = linkBuffer.LockBuffer();
9106 
9107 	if (userPath) {
9108 		if (!IS_USER_ADDRESS(userPath)
9109 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9110 			return B_BAD_ADDRESS;
9111 
9112 		if (bufferSize > B_PATH_NAME_LENGTH)
9113 			bufferSize = B_PATH_NAME_LENGTH;
9114 	}
9115 
9116 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9117 		&bufferSize, false);
9118 
9119 	// we also update the bufferSize in case of errors
9120 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9121 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9122 		return B_BAD_ADDRESS;
9123 
9124 	if (status != B_OK)
9125 		return status;
9126 
9127 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9128 		return B_BAD_ADDRESS;
9129 
9130 	return B_OK;
9131 }
9132 
9133 
9134 status_t
9135 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9136 	int mode)
9137 {
9138 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9139 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9140 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9141 		return B_NO_MEMORY;
9142 
9143 	char* path = pathBuffer.LockBuffer();
9144 	char* toPath = toPathBuffer.LockBuffer();
9145 
9146 	if (!IS_USER_ADDRESS(userPath)
9147 		|| !IS_USER_ADDRESS(userToPath)
9148 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9149 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9150 		return B_BAD_ADDRESS;
9151 
9152 	return common_create_symlink(fd, path, toPath, mode, false);
9153 }
9154 
9155 
9156 status_t
9157 _user_create_link(int pathFD, const char* userPath, int toFD,
9158 	const char* userToPath, bool traverseLeafLink)
9159 {
9160 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9161 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9162 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9163 		return B_NO_MEMORY;
9164 
9165 	char* path = pathBuffer.LockBuffer();
9166 	char* toPath = toPathBuffer.LockBuffer();
9167 
9168 	if (!IS_USER_ADDRESS(userPath)
9169 		|| !IS_USER_ADDRESS(userToPath)
9170 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9171 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9172 		return B_BAD_ADDRESS;
9173 
9174 	status_t status = check_path(toPath);
9175 	if (status != B_OK)
9176 		return status;
9177 
9178 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9179 		false);
9180 }
9181 
9182 
9183 status_t
9184 _user_unlink(int fd, const char* userPath)
9185 {
9186 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9187 	if (pathBuffer.InitCheck() != B_OK)
9188 		return B_NO_MEMORY;
9189 
9190 	char* path = pathBuffer.LockBuffer();
9191 
9192 	if (!IS_USER_ADDRESS(userPath)
9193 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9194 		return B_BAD_ADDRESS;
9195 
9196 	return common_unlink(fd, path, false);
9197 }
9198 
9199 
9200 status_t
9201 _user_rename(int oldFD, const char* userOldPath, int newFD,
9202 	const char* userNewPath)
9203 {
9204 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9205 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9206 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9207 		return B_NO_MEMORY;
9208 
9209 	char* oldPath = oldPathBuffer.LockBuffer();
9210 	char* newPath = newPathBuffer.LockBuffer();
9211 
9212 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9213 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9214 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9215 		return B_BAD_ADDRESS;
9216 
9217 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9218 }
9219 
9220 
9221 status_t
9222 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9223 {
9224 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9225 	if (pathBuffer.InitCheck() != B_OK)
9226 		return B_NO_MEMORY;
9227 
9228 	char* path = pathBuffer.LockBuffer();
9229 
9230 	if (!IS_USER_ADDRESS(userPath)
9231 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9232 		return B_BAD_ADDRESS;
9233 	}
9234 
9235 	// split into directory vnode and filename path
9236 	char filename[B_FILE_NAME_LENGTH];
9237 	struct vnode* dir;
9238 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9239 	if (status != B_OK)
9240 		return status;
9241 
9242 	VNodePutter _(dir);
9243 
9244 	// the underlying FS needs to support creating FIFOs
9245 	if (!HAS_FS_CALL(dir, create_special_node))
9246 		return B_UNSUPPORTED;
9247 
9248 	// create the entry	-- the FIFO sub node is set up automatically
9249 	fs_vnode superVnode;
9250 	ino_t nodeID;
9251 	status = FS_CALL(dir, create_special_node, filename, NULL,
9252 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9253 
9254 	// create_special_node() acquired a reference for us that we don't need.
9255 	if (status == B_OK)
9256 		put_vnode(dir->mount->volume, nodeID);
9257 
9258 	return status;
9259 }
9260 
9261 
9262 status_t
9263 _user_create_pipe(int* userFDs)
9264 {
9265 	// rootfs should support creating FIFOs, but let's be sure
9266 	if (!HAS_FS_CALL(sRoot, create_special_node))
9267 		return B_UNSUPPORTED;
9268 
9269 	// create the node	-- the FIFO sub node is set up automatically
9270 	fs_vnode superVnode;
9271 	ino_t nodeID;
9272 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9273 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9274 	if (status != B_OK)
9275 		return status;
9276 
9277 	// We've got one reference to the node and need another one.
9278 	struct vnode* vnode;
9279 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9280 	if (status != B_OK) {
9281 		// that should not happen
9282 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9283 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9284 		return status;
9285 	}
9286 
9287 	// Everything looks good so far. Open two FDs for reading respectively
9288 	// writing.
9289 	int fds[2];
9290 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9291 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9292 
9293 	FDCloser closer0(fds[0], false);
9294 	FDCloser closer1(fds[1], false);
9295 
9296 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9297 
9298 	// copy FDs to userland
9299 	if (status == B_OK) {
9300 		if (!IS_USER_ADDRESS(userFDs)
9301 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9302 			status = B_BAD_ADDRESS;
9303 		}
9304 	}
9305 
9306 	// keep FDs, if everything went fine
9307 	if (status == B_OK) {
9308 		closer0.Detach();
9309 		closer1.Detach();
9310 	}
9311 
9312 	return status;
9313 }
9314 
9315 
9316 status_t
9317 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9318 {
9319 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9320 	if (pathBuffer.InitCheck() != B_OK)
9321 		return B_NO_MEMORY;
9322 
9323 	char* path = pathBuffer.LockBuffer();
9324 
9325 	if (!IS_USER_ADDRESS(userPath)
9326 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9327 		return B_BAD_ADDRESS;
9328 
9329 	return common_access(fd, path, mode, effectiveUserGroup, false);
9330 }
9331 
9332 
9333 status_t
9334 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9335 	struct stat* userStat, size_t statSize)
9336 {
9337 	struct stat stat;
9338 	status_t status;
9339 
9340 	if (statSize > sizeof(struct stat))
9341 		return B_BAD_VALUE;
9342 
9343 	if (!IS_USER_ADDRESS(userStat))
9344 		return B_BAD_ADDRESS;
9345 
9346 	if (userPath) {
9347 		// path given: get the stat of the node referred to by (fd, path)
9348 		if (!IS_USER_ADDRESS(userPath))
9349 			return B_BAD_ADDRESS;
9350 
9351 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9352 		if (pathBuffer.InitCheck() != B_OK)
9353 			return B_NO_MEMORY;
9354 
9355 		char* path = pathBuffer.LockBuffer();
9356 
9357 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9358 		if (length < B_OK)
9359 			return length;
9360 		if (length >= B_PATH_NAME_LENGTH)
9361 			return B_NAME_TOO_LONG;
9362 
9363 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9364 	} else {
9365 		// no path given: get the FD and use the FD operation
9366 		struct file_descriptor* descriptor
9367 			= get_fd(get_current_io_context(false), fd);
9368 		if (descriptor == NULL)
9369 			return B_FILE_ERROR;
9370 
9371 		if (descriptor->ops->fd_read_stat)
9372 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9373 		else
9374 			status = B_UNSUPPORTED;
9375 
9376 		put_fd(descriptor);
9377 	}
9378 
9379 	if (status != B_OK)
9380 		return status;
9381 
9382 	return user_memcpy(userStat, &stat, statSize);
9383 }
9384 
9385 
9386 status_t
9387 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9388 	const struct stat* userStat, size_t statSize, int statMask)
9389 {
9390 	if (statSize > sizeof(struct stat))
9391 		return B_BAD_VALUE;
9392 
9393 	struct stat stat;
9394 
9395 	if (!IS_USER_ADDRESS(userStat)
9396 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9397 		return B_BAD_ADDRESS;
9398 
9399 	// clear additional stat fields
9400 	if (statSize < sizeof(struct stat))
9401 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9402 
9403 	status_t status;
9404 
9405 	if (userPath) {
9406 		// path given: write the stat of the node referred to by (fd, path)
9407 		if (!IS_USER_ADDRESS(userPath))
9408 			return B_BAD_ADDRESS;
9409 
9410 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9411 		if (pathBuffer.InitCheck() != B_OK)
9412 			return B_NO_MEMORY;
9413 
9414 		char* path = pathBuffer.LockBuffer();
9415 
9416 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9417 		if (length < B_OK)
9418 			return length;
9419 		if (length >= B_PATH_NAME_LENGTH)
9420 			return B_NAME_TOO_LONG;
9421 
9422 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9423 			statMask, false);
9424 	} else {
9425 		// no path given: get the FD and use the FD operation
9426 		struct file_descriptor* descriptor
9427 			= get_fd(get_current_io_context(false), fd);
9428 		if (descriptor == NULL)
9429 			return B_FILE_ERROR;
9430 
9431 		if (descriptor->ops->fd_write_stat) {
9432 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9433 				statMask);
9434 		} else
9435 			status = B_UNSUPPORTED;
9436 
9437 		put_fd(descriptor);
9438 	}
9439 
9440 	return status;
9441 }
9442 
9443 
9444 int
9445 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9446 {
9447 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9448 	if (pathBuffer.InitCheck() != B_OK)
9449 		return B_NO_MEMORY;
9450 
9451 	char* path = pathBuffer.LockBuffer();
9452 
9453 	if (userPath != NULL) {
9454 		if (!IS_USER_ADDRESS(userPath)
9455 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9456 			return B_BAD_ADDRESS;
9457 	}
9458 
9459 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9460 }
9461 
9462 
9463 ssize_t
9464 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9465 	size_t readBytes)
9466 {
9467 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9468 	if (attr < 0)
9469 		return attr;
9470 
9471 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9472 	_user_close(attr);
9473 
9474 	return bytes;
9475 }
9476 
9477 
9478 ssize_t
9479 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9480 	const void* buffer, size_t writeBytes)
9481 {
9482 	// Try to support the BeOS typical truncation as well as the position
9483 	// argument
9484 	int attr = attr_create(fd, NULL, attribute, type,
9485 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9486 	if (attr < 0)
9487 		return attr;
9488 
9489 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9490 	_user_close(attr);
9491 
9492 	return bytes;
9493 }
9494 
9495 
9496 status_t
9497 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9498 {
9499 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9500 	if (attr < 0)
9501 		return attr;
9502 
9503 	struct file_descriptor* descriptor
9504 		= get_fd(get_current_io_context(false), attr);
9505 	if (descriptor == NULL) {
9506 		_user_close(attr);
9507 		return B_FILE_ERROR;
9508 	}
9509 
9510 	struct stat stat;
9511 	status_t status;
9512 	if (descriptor->ops->fd_read_stat)
9513 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9514 	else
9515 		status = B_UNSUPPORTED;
9516 
9517 	put_fd(descriptor);
9518 	_user_close(attr);
9519 
9520 	if (status == B_OK) {
9521 		attr_info info;
9522 		info.type = stat.st_type;
9523 		info.size = stat.st_size;
9524 
9525 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9526 			return B_BAD_ADDRESS;
9527 	}
9528 
9529 	return status;
9530 }
9531 
9532 
9533 int
9534 _user_open_attr(int fd, const char* userPath, const char* userName,
9535 	uint32 type, int openMode)
9536 {
9537 	char name[B_FILE_NAME_LENGTH];
9538 
9539 	if (!IS_USER_ADDRESS(userName)
9540 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9541 		return B_BAD_ADDRESS;
9542 
9543 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9544 	if (pathBuffer.InitCheck() != B_OK)
9545 		return B_NO_MEMORY;
9546 
9547 	char* path = pathBuffer.LockBuffer();
9548 
9549 	if (userPath != NULL) {
9550 		if (!IS_USER_ADDRESS(userPath)
9551 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9552 			return B_BAD_ADDRESS;
9553 	}
9554 
9555 	if ((openMode & O_CREAT) != 0) {
9556 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9557 			false);
9558 	}
9559 
9560 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9561 }
9562 
9563 
9564 status_t
9565 _user_remove_attr(int fd, const char* userName)
9566 {
9567 	char name[B_FILE_NAME_LENGTH];
9568 
9569 	if (!IS_USER_ADDRESS(userName)
9570 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9571 		return B_BAD_ADDRESS;
9572 
9573 	return attr_remove(fd, name, false);
9574 }
9575 
9576 
9577 status_t
9578 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9579 	const char* userToName)
9580 {
9581 	if (!IS_USER_ADDRESS(userFromName)
9582 		|| !IS_USER_ADDRESS(userToName))
9583 		return B_BAD_ADDRESS;
9584 
9585 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9586 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9587 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9588 		return B_NO_MEMORY;
9589 
9590 	char* fromName = fromNameBuffer.LockBuffer();
9591 	char* toName = toNameBuffer.LockBuffer();
9592 
9593 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9594 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9595 		return B_BAD_ADDRESS;
9596 
9597 	return attr_rename(fromFile, fromName, toFile, toName, false);
9598 }
9599 
9600 
9601 int
9602 _user_open_index_dir(dev_t device)
9603 {
9604 	return index_dir_open(device, false);
9605 }
9606 
9607 
9608 status_t
9609 _user_create_index(dev_t device, const char* userName, uint32 type,
9610 	uint32 flags)
9611 {
9612 	char name[B_FILE_NAME_LENGTH];
9613 
9614 	if (!IS_USER_ADDRESS(userName)
9615 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9616 		return B_BAD_ADDRESS;
9617 
9618 	return index_create(device, name, type, flags, false);
9619 }
9620 
9621 
9622 status_t
9623 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9624 {
9625 	char name[B_FILE_NAME_LENGTH];
9626 	struct stat stat;
9627 	status_t status;
9628 
9629 	if (!IS_USER_ADDRESS(userName)
9630 		|| !IS_USER_ADDRESS(userStat)
9631 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9632 		return B_BAD_ADDRESS;
9633 
9634 	status = index_name_read_stat(device, name, &stat, false);
9635 	if (status == B_OK) {
9636 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9637 			return B_BAD_ADDRESS;
9638 	}
9639 
9640 	return status;
9641 }
9642 
9643 
9644 status_t
9645 _user_remove_index(dev_t device, const char* userName)
9646 {
9647 	char name[B_FILE_NAME_LENGTH];
9648 
9649 	if (!IS_USER_ADDRESS(userName)
9650 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9651 		return B_BAD_ADDRESS;
9652 
9653 	return index_remove(device, name, false);
9654 }
9655 
9656 
9657 status_t
9658 _user_getcwd(char* userBuffer, size_t size)
9659 {
9660 	if (size == 0)
9661 		return B_BAD_VALUE;
9662 	if (!IS_USER_ADDRESS(userBuffer))
9663 		return B_BAD_ADDRESS;
9664 
9665 	if (size > kMaxPathLength)
9666 		size = kMaxPathLength;
9667 
9668 	KPath pathBuffer(size);
9669 	if (pathBuffer.InitCheck() != B_OK)
9670 		return B_NO_MEMORY;
9671 
9672 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9673 
9674 	char* path = pathBuffer.LockBuffer();
9675 
9676 	status_t status = get_cwd(path, size, false);
9677 	if (status != B_OK)
9678 		return status;
9679 
9680 	// Copy back the result
9681 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9682 		return B_BAD_ADDRESS;
9683 
9684 	return status;
9685 }
9686 
9687 
9688 status_t
9689 _user_setcwd(int fd, const char* userPath)
9690 {
9691 	TRACE(("user_setcwd: path = %p\n", userPath));
9692 
9693 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9694 	if (pathBuffer.InitCheck() != B_OK)
9695 		return B_NO_MEMORY;
9696 
9697 	char* path = pathBuffer.LockBuffer();
9698 
9699 	if (userPath != NULL) {
9700 		if (!IS_USER_ADDRESS(userPath)
9701 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9702 			return B_BAD_ADDRESS;
9703 	}
9704 
9705 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9706 }
9707 
9708 
9709 status_t
9710 _user_change_root(const char* userPath)
9711 {
9712 	// only root is allowed to chroot()
9713 	if (geteuid() != 0)
9714 		return B_NOT_ALLOWED;
9715 
9716 	// alloc path buffer
9717 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9718 	if (pathBuffer.InitCheck() != B_OK)
9719 		return B_NO_MEMORY;
9720 
9721 	// copy userland path to kernel
9722 	char* path = pathBuffer.LockBuffer();
9723 	if (userPath != NULL) {
9724 		if (!IS_USER_ADDRESS(userPath)
9725 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9726 			return B_BAD_ADDRESS;
9727 	}
9728 
9729 	// get the vnode
9730 	struct vnode* vnode;
9731 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9732 	if (status != B_OK)
9733 		return status;
9734 
9735 	// set the new root
9736 	struct io_context* context = get_current_io_context(false);
9737 	mutex_lock(&sIOContextRootLock);
9738 	struct vnode* oldRoot = context->root;
9739 	context->root = vnode;
9740 	mutex_unlock(&sIOContextRootLock);
9741 
9742 	put_vnode(oldRoot);
9743 
9744 	return B_OK;
9745 }
9746 
9747 
9748 int
9749 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9750 	uint32 flags, port_id port, int32 token)
9751 {
9752 	char* query;
9753 
9754 	if (device < 0 || userQuery == NULL || queryLength == 0)
9755 		return B_BAD_VALUE;
9756 
9757 	// this is a safety restriction
9758 	if (queryLength >= 65536)
9759 		return B_NAME_TOO_LONG;
9760 
9761 	query = (char*)malloc(queryLength + 1);
9762 	if (query == NULL)
9763 		return B_NO_MEMORY;
9764 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9765 		free(query);
9766 		return B_BAD_ADDRESS;
9767 	}
9768 
9769 	int fd = query_open(device, query, flags, port, token, false);
9770 
9771 	free(query);
9772 	return fd;
9773 }
9774 
9775 
9776 #include "vfs_request_io.cpp"
9777