xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 83b1a68c52ba3e0e8796282759f694b7fdddf06d)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2014, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags,
549 		generic_size_t bytesRequested, status_t status,
550 		generic_size_t bytesTransferred)
551 		:
552 		fVnode(vnode),
553 		fMountID(vnode->mount->id),
554 		fNodeID(vnode->id),
555 		fCookie(cookie),
556 		fPos(pos),
557 		fCount(count),
558 		fFlags(flags),
559 		fBytesRequested(bytesRequested),
560 		fStatus(status),
561 		fBytesTransferred(bytesTransferred)
562 	{
563 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
564 			sizeof(generic_io_vec) * count, false);
565 	}
566 
567 	void AddDump(TraceOutput& out, const char* mode)
568 	{
569 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
570 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
571 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
572 			(uint64)fBytesRequested);
573 
574 		if (fVecs != NULL) {
575 			for (uint32 i = 0; i < fCount; i++) {
576 				if (i > 0)
577 					out.Print(", ");
578 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
579 					(uint64)fVecs[i].length);
580 			}
581 		}
582 
583 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
584 			"transferred: %" B_PRIu64, fFlags, fStatus,
585 			(uint64)fBytesTransferred);
586 	}
587 
588 protected:
589 	struct vnode*	fVnode;
590 	dev_t			fMountID;
591 	ino_t			fNodeID;
592 	void*			fCookie;
593 	off_t			fPos;
594 	generic_io_vec*	fVecs;
595 	uint32			fCount;
596 	uint32			fFlags;
597 	generic_size_t	fBytesRequested;
598 	status_t		fStatus;
599 	generic_size_t	fBytesTransferred;
600 };
601 
602 
603 class ReadPages : public PagesIOTraceEntry {
604 public:
605 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
606 		const generic_io_vec* vecs, uint32 count, uint32 flags,
607 		generic_size_t bytesRequested, status_t status,
608 		generic_size_t bytesTransferred)
609 		:
610 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
611 			bytesRequested, status, bytesTransferred)
612 	{
613 		Initialized();
614 	}
615 
616 	virtual void AddDump(TraceOutput& out)
617 	{
618 		PagesIOTraceEntry::AddDump(out, "read");
619 	}
620 };
621 
622 
623 class WritePages : public PagesIOTraceEntry {
624 public:
625 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
631 			bytesRequested, status, bytesTransferred)
632 	{
633 		Initialized();
634 	}
635 
636 	virtual void AddDump(TraceOutput& out)
637 	{
638 		PagesIOTraceEntry::AddDump(out, "write");
639 	}
640 };
641 
642 }	// namespace VFSPagesIOTracing
643 
644 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
645 #else
646 #	define TPIO(x) ;
647 #endif	// VFS_PAGES_IO_TRACING
648 
649 
650 static int
651 mount_compare(void* _m, const void* _key)
652 {
653 	struct fs_mount* mount = (fs_mount*)_m;
654 	const dev_t* id = (dev_t*)_key;
655 
656 	if (mount->id == *id)
657 		return 0;
658 
659 	return -1;
660 }
661 
662 
663 static uint32
664 mount_hash(void* _m, const void* _key, uint32 range)
665 {
666 	struct fs_mount* mount = (fs_mount*)_m;
667 	const dev_t* id = (dev_t*)_key;
668 
669 	if (mount)
670 		return mount->id % range;
671 
672 	return (uint32)*id % range;
673 }
674 
675 
676 /*! Finds the mounted device (the fs_mount structure) with the given ID.
677 	Note, you must hold the gMountMutex lock when you call this function.
678 */
679 static struct fs_mount*
680 find_mount(dev_t id)
681 {
682 	ASSERT_LOCKED_MUTEX(&sMountMutex);
683 
684 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
685 }
686 
687 
688 static status_t
689 get_mount(dev_t id, struct fs_mount** _mount)
690 {
691 	struct fs_mount* mount;
692 
693 	ReadLocker nodeLocker(sVnodeLock);
694 	MutexLocker mountLocker(sMountMutex);
695 
696 	mount = find_mount(id);
697 	if (mount == NULL)
698 		return B_BAD_VALUE;
699 
700 	struct vnode* rootNode = mount->root_vnode;
701 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
702 		|| rootNode->ref_count == 0) {
703 		// might have been called during a mount/unmount operation
704 		return B_BUSY;
705 	}
706 
707 	inc_vnode_ref_count(rootNode);
708 	*_mount = mount;
709 	return B_OK;
710 }
711 
712 
713 static void
714 put_mount(struct fs_mount* mount)
715 {
716 	if (mount)
717 		put_vnode(mount->root_vnode);
718 }
719 
720 
721 /*!	Tries to open the specified file system module.
722 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
723 	Returns a pointer to file system module interface, or NULL if it
724 	could not open the module.
725 */
726 static file_system_module_info*
727 get_file_system(const char* fsName)
728 {
729 	char name[B_FILE_NAME_LENGTH];
730 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
731 		// construct module name if we didn't get one
732 		// (we currently support only one API)
733 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
734 		fsName = NULL;
735 	}
736 
737 	file_system_module_info* info;
738 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
739 		return NULL;
740 
741 	return info;
742 }
743 
744 
745 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
746 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
747 	The name is allocated for you, and you have to free() it when you're
748 	done with it.
749 	Returns NULL if the required memory is not available.
750 */
751 static char*
752 get_file_system_name(const char* fsName)
753 {
754 	const size_t length = strlen("file_systems/");
755 
756 	if (strncmp(fsName, "file_systems/", length)) {
757 		// the name already seems to be the module's file name
758 		return strdup(fsName);
759 	}
760 
761 	fsName += length;
762 	const char* end = strchr(fsName, '/');
763 	if (end == NULL) {
764 		// this doesn't seem to be a valid name, but well...
765 		return strdup(fsName);
766 	}
767 
768 	// cut off the trailing /v1
769 
770 	char* name = (char*)malloc(end + 1 - fsName);
771 	if (name == NULL)
772 		return NULL;
773 
774 	strlcpy(name, fsName, end + 1 - fsName);
775 	return name;
776 }
777 
778 
779 /*!	Accepts a list of file system names separated by a colon, one for each
780 	layer and returns the file system name for the specified layer.
781 	The name is allocated for you, and you have to free() it when you're
782 	done with it.
783 	Returns NULL if the required memory is not available or if there is no
784 	name for the specified layer.
785 */
786 static char*
787 get_file_system_name_for_layer(const char* fsNames, int32 layer)
788 {
789 	while (layer >= 0) {
790 		const char* end = strchr(fsNames, ':');
791 		if (end == NULL) {
792 			if (layer == 0)
793 				return strdup(fsNames);
794 			return NULL;
795 		}
796 
797 		if (layer == 0) {
798 			size_t length = end - fsNames + 1;
799 			char* result = (char*)malloc(length);
800 			strlcpy(result, fsNames, length);
801 			return result;
802 		}
803 
804 		fsNames = end + 1;
805 		layer--;
806 	}
807 
808 	return NULL;
809 }
810 
811 
812 static int
813 vnode_compare(void* _vnode, const void* _key)
814 {
815 	struct vnode* vnode = (struct vnode*)_vnode;
816 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
817 
818 	if (vnode->device == key->device && vnode->id == key->vnode)
819 		return 0;
820 
821 	return -1;
822 }
823 
824 
825 static uint32
826 vnode_hash(void* _vnode, const void* _key, uint32 range)
827 {
828 	struct vnode* vnode = (struct vnode*)_vnode;
829 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
830 
831 #define VHASH(mountid, vnodeid) \
832 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
833 
834 	if (vnode != NULL)
835 		return VHASH(vnode->device, vnode->id) % range;
836 
837 	return VHASH(key->device, key->vnode) % range;
838 
839 #undef VHASH
840 }
841 
842 
843 static void
844 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
845 {
846 	RecursiveLocker _(mount->rlock);
847 	mount->vnodes.Add(vnode);
848 }
849 
850 
851 static void
852 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
853 {
854 	RecursiveLocker _(mount->rlock);
855 	mount->vnodes.Remove(vnode);
856 }
857 
858 
859 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
860 
861 	The caller must hold the sVnodeLock (read lock at least).
862 
863 	\param mountID the mount ID.
864 	\param vnodeID the node ID.
865 
866 	\return The vnode structure, if it was found in the hash table, \c NULL
867 			otherwise.
868 */
869 static struct vnode*
870 lookup_vnode(dev_t mountID, ino_t vnodeID)
871 {
872 	struct vnode_hash_key key;
873 
874 	key.device = mountID;
875 	key.vnode = vnodeID;
876 
877 	return (vnode*)hash_lookup(sVnodeTable, &key);
878 }
879 
880 
881 /*!	Creates a new vnode with the given mount and node ID.
882 	If the node already exists, it is returned instead and no new node is
883 	created. In either case -- but not, if an error occurs -- the function write
884 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
885 	error the lock is not held on return.
886 
887 	\param mountID The mount ID.
888 	\param vnodeID The vnode ID.
889 	\param _vnode Will be set to the new vnode on success.
890 	\param _nodeCreated Will be set to \c true when the returned vnode has
891 		been newly created, \c false when it already existed. Will not be
892 		changed on error.
893 	\return \c B_OK, when the vnode was successfully created and inserted or
894 		a node with the given ID was found, \c B_NO_MEMORY or
895 		\c B_ENTRY_NOT_FOUND on error.
896 */
897 static status_t
898 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
899 	bool& _nodeCreated)
900 {
901 	FUNCTION(("create_new_vnode_and_lock()\n"));
902 
903 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
904 	if (vnode == NULL)
905 		return B_NO_MEMORY;
906 
907 	// initialize basic values
908 	memset(vnode, 0, sizeof(struct vnode));
909 	vnode->device = mountID;
910 	vnode->id = vnodeID;
911 	vnode->ref_count = 1;
912 	vnode->SetBusy(true);
913 
914 	// look up the the node -- it might have been added by someone else in the
915 	// meantime
916 	rw_lock_write_lock(&sVnodeLock);
917 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
918 	if (existingVnode != NULL) {
919 		free(vnode);
920 		_vnode = existingVnode;
921 		_nodeCreated = false;
922 		return B_OK;
923 	}
924 
925 	// get the mount structure
926 	mutex_lock(&sMountMutex);
927 	vnode->mount = find_mount(mountID);
928 	if (!vnode->mount || vnode->mount->unmounting) {
929 		mutex_unlock(&sMountMutex);
930 		rw_lock_write_unlock(&sVnodeLock);
931 		free(vnode);
932 		return B_ENTRY_NOT_FOUND;
933 	}
934 
935 	// add the vnode to the mount's node list and the hash table
936 	hash_insert(sVnodeTable, vnode);
937 	add_vnode_to_mount_list(vnode, vnode->mount);
938 
939 	mutex_unlock(&sMountMutex);
940 
941 	_vnode = vnode;
942 	_nodeCreated = true;
943 
944 	// keep the vnode lock locked
945 	return B_OK;
946 }
947 
948 
949 /*!	Frees the vnode and all resources it has acquired, and removes
950 	it from the vnode hash as well as from its mount structure.
951 	Will also make sure that any cache modifications are written back.
952 */
953 static void
954 free_vnode(struct vnode* vnode, bool reenter)
955 {
956 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
957 		vnode);
958 
959 	// write back any changes in this vnode's cache -- but only
960 	// if the vnode won't be deleted, in which case the changes
961 	// will be discarded
962 
963 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
964 		FS_CALL_NO_PARAMS(vnode, fsync);
965 
966 	// Note: If this vnode has a cache attached, there will still be two
967 	// references to that cache at this point. The last one belongs to the vnode
968 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
969 	// cache. Each but the last reference to a cache also includes a reference
970 	// to the vnode. The file cache, however, released its reference (cf.
971 	// file_cache_create()), so that this vnode's ref count has the chance to
972 	// ever drop to 0. Deleting the file cache now, will cause the next to last
973 	// cache reference to be released, which will also release a (no longer
974 	// existing) vnode reference. To avoid problems, we set the vnode's ref
975 	// count, so that it will neither become negative nor 0.
976 	vnode->ref_count = 2;
977 
978 	if (!vnode->IsUnpublished()) {
979 		if (vnode->IsRemoved())
980 			FS_CALL(vnode, remove_vnode, reenter);
981 		else
982 			FS_CALL(vnode, put_vnode, reenter);
983 	}
984 
985 	// If the vnode has a VMCache attached, make sure that it won't try to get
986 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
987 	// long as the vnode is busy and in the hash, that won't happen, but as
988 	// soon as we've removed it from the hash, it could reload the vnode -- with
989 	// a new cache attached!
990 	if (vnode->cache != NULL)
991 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
992 
993 	// The file system has removed the resources of the vnode now, so we can
994 	// make it available again (by removing the busy vnode from the hash).
995 	rw_lock_write_lock(&sVnodeLock);
996 	hash_remove(sVnodeTable, vnode);
997 	rw_lock_write_unlock(&sVnodeLock);
998 
999 	// if we have a VMCache attached, remove it
1000 	if (vnode->cache)
1001 		vnode->cache->ReleaseRef();
1002 
1003 	vnode->cache = NULL;
1004 
1005 	remove_vnode_from_mount_list(vnode, vnode->mount);
1006 
1007 	free(vnode);
1008 }
1009 
1010 
1011 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1012 	if the counter dropped to 0.
1013 
1014 	The caller must, of course, own a reference to the vnode to call this
1015 	function.
1016 	The caller must not hold the sVnodeLock or the sMountMutex.
1017 
1018 	\param vnode the vnode.
1019 	\param alwaysFree don't move this vnode into the unused list, but really
1020 		   delete it if possible.
1021 	\param reenter \c true, if this function is called (indirectly) from within
1022 		   a file system. This will be passed to file system hooks only.
1023 	\return \c B_OK, if everything went fine, an error code otherwise.
1024 */
1025 static status_t
1026 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1027 {
1028 	ReadLocker locker(sVnodeLock);
1029 	AutoLocker<Vnode> nodeLocker(vnode);
1030 
1031 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1032 
1033 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1034 
1035 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1036 		vnode->ref_count));
1037 
1038 	if (oldRefCount != 1)
1039 		return B_OK;
1040 
1041 	if (vnode->IsBusy())
1042 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1043 
1044 	bool freeNode = false;
1045 	bool freeUnusedNodes = false;
1046 
1047 	// Just insert the vnode into an unused list if we don't need
1048 	// to delete it
1049 	if (vnode->IsRemoved() || alwaysFree) {
1050 		vnode_to_be_freed(vnode);
1051 		vnode->SetBusy(true);
1052 		freeNode = true;
1053 	} else
1054 		freeUnusedNodes = vnode_unused(vnode);
1055 
1056 	nodeLocker.Unlock();
1057 	locker.Unlock();
1058 
1059 	if (freeNode)
1060 		free_vnode(vnode, reenter);
1061 	else if (freeUnusedNodes)
1062 		free_unused_vnodes();
1063 
1064 	return B_OK;
1065 }
1066 
1067 
1068 /*!	\brief Increments the reference counter of the given vnode.
1069 
1070 	The caller must make sure that the node isn't deleted while this function
1071 	is called. This can be done either:
1072 	- by ensuring that a reference to the node exists and remains in existence,
1073 	  or
1074 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1075 	  or by holding sVnodeLock write locked.
1076 
1077 	In the second case the caller is responsible for dealing with the ref count
1078 	0 -> 1 transition. That is 1. this function must not be invoked when the
1079 	node is busy in the first place and 2. vnode_used() must be called for the
1080 	node.
1081 
1082 	\param vnode the vnode.
1083 */
1084 static void
1085 inc_vnode_ref_count(struct vnode* vnode)
1086 {
1087 	atomic_add(&vnode->ref_count, 1);
1088 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1089 		vnode->ref_count));
1090 }
1091 
1092 
1093 static bool
1094 is_special_node_type(int type)
1095 {
1096 	// at the moment only FIFOs are supported
1097 	return S_ISFIFO(type);
1098 }
1099 
1100 
1101 static status_t
1102 create_special_sub_node(struct vnode* vnode, uint32 flags)
1103 {
1104 	if (S_ISFIFO(vnode->Type()))
1105 		return create_fifo_vnode(vnode->mount->volume, vnode);
1106 
1107 	return B_BAD_VALUE;
1108 }
1109 
1110 
1111 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1112 
1113 	If the node is not yet in memory, it will be loaded.
1114 
1115 	The caller must not hold the sVnodeLock or the sMountMutex.
1116 
1117 	\param mountID the mount ID.
1118 	\param vnodeID the node ID.
1119 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1120 		   retrieved vnode structure shall be written.
1121 	\param reenter \c true, if this function is called (indirectly) from within
1122 		   a file system.
1123 	\return \c B_OK, if everything when fine, an error code otherwise.
1124 */
1125 static status_t
1126 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1127 	int reenter)
1128 {
1129 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1130 		mountID, vnodeID, _vnode));
1131 
1132 	rw_lock_read_lock(&sVnodeLock);
1133 
1134 	int32 tries = 2000;
1135 		// try for 10 secs
1136 restart:
1137 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1138 	AutoLocker<Vnode> nodeLocker(vnode);
1139 
1140 	if (vnode && vnode->IsBusy()) {
1141 		nodeLocker.Unlock();
1142 		rw_lock_read_unlock(&sVnodeLock);
1143 		if (!canWait || --tries < 0) {
1144 			// vnode doesn't seem to become unbusy
1145 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is not becoming unbusy!\n",
1146 				mountID, vnodeID);
1147 			return B_BUSY;
1148 		}
1149 		snooze(5000); // 5 ms
1150 		rw_lock_read_lock(&sVnodeLock);
1151 		goto restart;
1152 	}
1153 
1154 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1155 
1156 	status_t status;
1157 
1158 	if (vnode) {
1159 		if (vnode->ref_count == 0) {
1160 			// this vnode has been unused before
1161 			vnode_used(vnode);
1162 		}
1163 		inc_vnode_ref_count(vnode);
1164 
1165 		nodeLocker.Unlock();
1166 		rw_lock_read_unlock(&sVnodeLock);
1167 	} else {
1168 		// we need to create a new vnode and read it in
1169 		rw_lock_read_unlock(&sVnodeLock);
1170 			// unlock -- create_new_vnode_and_lock() write-locks on success
1171 		bool nodeCreated;
1172 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1173 			nodeCreated);
1174 		if (status != B_OK)
1175 			return status;
1176 
1177 		if (!nodeCreated) {
1178 			rw_lock_read_lock(&sVnodeLock);
1179 			rw_lock_write_unlock(&sVnodeLock);
1180 			goto restart;
1181 		}
1182 
1183 		rw_lock_write_unlock(&sVnodeLock);
1184 
1185 		int type;
1186 		uint32 flags;
1187 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1188 			&flags, reenter);
1189 		if (status == B_OK && vnode->private_node == NULL)
1190 			status = B_BAD_VALUE;
1191 
1192 		bool gotNode = status == B_OK;
1193 		bool publishSpecialSubNode = false;
1194 		if (gotNode) {
1195 			vnode->SetType(type);
1196 			publishSpecialSubNode = is_special_node_type(type)
1197 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1198 		}
1199 
1200 		if (gotNode && publishSpecialSubNode)
1201 			status = create_special_sub_node(vnode, flags);
1202 
1203 		if (status != B_OK) {
1204 			if (gotNode)
1205 				FS_CALL(vnode, put_vnode, reenter);
1206 
1207 			rw_lock_write_lock(&sVnodeLock);
1208 			hash_remove(sVnodeTable, vnode);
1209 			remove_vnode_from_mount_list(vnode, vnode->mount);
1210 			rw_lock_write_unlock(&sVnodeLock);
1211 
1212 			free(vnode);
1213 			return status;
1214 		}
1215 
1216 		rw_lock_read_lock(&sVnodeLock);
1217 		vnode->Lock();
1218 
1219 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1220 		vnode->SetBusy(false);
1221 
1222 		vnode->Unlock();
1223 		rw_lock_read_unlock(&sVnodeLock);
1224 	}
1225 
1226 	TRACE(("get_vnode: returning %p\n", vnode));
1227 
1228 	*_vnode = vnode;
1229 	return B_OK;
1230 }
1231 
1232 
1233 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1234 	if the counter dropped to 0.
1235 
1236 	The caller must, of course, own a reference to the vnode to call this
1237 	function.
1238 	The caller must not hold the sVnodeLock or the sMountMutex.
1239 
1240 	\param vnode the vnode.
1241 */
1242 static inline void
1243 put_vnode(struct vnode* vnode)
1244 {
1245 	dec_vnode_ref_count(vnode, false, false);
1246 }
1247 
1248 
1249 static void
1250 free_unused_vnodes(int32 level)
1251 {
1252 	unused_vnodes_check_started();
1253 
1254 	if (level == B_NO_LOW_RESOURCE) {
1255 		unused_vnodes_check_done();
1256 		return;
1257 	}
1258 
1259 	flush_hot_vnodes();
1260 
1261 	// determine how many nodes to free
1262 	uint32 count = 1;
1263 	{
1264 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1265 
1266 		switch (level) {
1267 			case B_LOW_RESOURCE_NOTE:
1268 				count = sUnusedVnodes / 100;
1269 				break;
1270 			case B_LOW_RESOURCE_WARNING:
1271 				count = sUnusedVnodes / 10;
1272 				break;
1273 			case B_LOW_RESOURCE_CRITICAL:
1274 				count = sUnusedVnodes;
1275 				break;
1276 		}
1277 
1278 		if (count > sUnusedVnodes)
1279 			count = sUnusedVnodes;
1280 	}
1281 
1282 	// Write back the modified pages of some unused vnodes and free them.
1283 
1284 	for (uint32 i = 0; i < count; i++) {
1285 		ReadLocker vnodesReadLocker(sVnodeLock);
1286 
1287 		// get the first node
1288 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1289 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1290 			&sUnusedVnodeList);
1291 		unusedVnodesLocker.Unlock();
1292 
1293 		if (vnode == NULL)
1294 			break;
1295 
1296 		// lock the node
1297 		AutoLocker<Vnode> nodeLocker(vnode);
1298 
1299 		// Check whether the node is still unused -- since we only append to the
1300 		// the tail of the unused queue, the vnode should still be at its head.
1301 		// Alternatively we could check its ref count for 0 and its busy flag,
1302 		// but if the node is no longer at the head of the queue, it means it
1303 		// has been touched in the meantime, i.e. it is no longer the least
1304 		// recently used unused vnode and we rather don't free it.
1305 		unusedVnodesLocker.Lock();
1306 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1307 			continue;
1308 		unusedVnodesLocker.Unlock();
1309 
1310 		ASSERT(!vnode->IsBusy());
1311 
1312 		// grab a reference
1313 		inc_vnode_ref_count(vnode);
1314 		vnode_used(vnode);
1315 
1316 		// write back changes and free the node
1317 		nodeLocker.Unlock();
1318 		vnodesReadLocker.Unlock();
1319 
1320 		if (vnode->cache != NULL)
1321 			vnode->cache->WriteModified();
1322 
1323 		dec_vnode_ref_count(vnode, true, false);
1324 			// this should free the vnode when it's still unused
1325 	}
1326 
1327 	unused_vnodes_check_done();
1328 }
1329 
1330 
1331 /*!	Gets the vnode the given vnode is covering.
1332 
1333 	The caller must have \c sVnodeLock read-locked at least.
1334 
1335 	The function returns a reference to the retrieved vnode (if any), the caller
1336 	is responsible to free.
1337 
1338 	\param vnode The vnode whose covered node shall be returned.
1339 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1340 		vnode.
1341 */
1342 static inline Vnode*
1343 get_covered_vnode_locked(Vnode* vnode)
1344 {
1345 	if (Vnode* coveredNode = vnode->covers) {
1346 		while (coveredNode->covers != NULL)
1347 			coveredNode = coveredNode->covers;
1348 
1349 		inc_vnode_ref_count(coveredNode);
1350 		return coveredNode;
1351 	}
1352 
1353 	return NULL;
1354 }
1355 
1356 
1357 /*!	Gets the vnode the given vnode is covering.
1358 
1359 	The caller must not hold \c sVnodeLock. Note that this implies a race
1360 	condition, since the situation can change at any time.
1361 
1362 	The function returns a reference to the retrieved vnode (if any), the caller
1363 	is responsible to free.
1364 
1365 	\param vnode The vnode whose covered node shall be returned.
1366 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1367 		vnode.
1368 */
1369 static inline Vnode*
1370 get_covered_vnode(Vnode* vnode)
1371 {
1372 	if (!vnode->IsCovering())
1373 		return NULL;
1374 
1375 	ReadLocker vnodeReadLocker(sVnodeLock);
1376 	return get_covered_vnode_locked(vnode);
1377 }
1378 
1379 
1380 /*!	Gets the vnode the given vnode is covered by.
1381 
1382 	The caller must have \c sVnodeLock read-locked at least.
1383 
1384 	The function returns a reference to the retrieved vnode (if any), the caller
1385 	is responsible to free.
1386 
1387 	\param vnode The vnode whose covering node shall be returned.
1388 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1389 		any vnode.
1390 */
1391 static Vnode*
1392 get_covering_vnode_locked(Vnode* vnode)
1393 {
1394 	if (Vnode* coveringNode = vnode->covered_by) {
1395 		while (coveringNode->covered_by != NULL)
1396 			coveringNode = coveringNode->covered_by;
1397 
1398 		inc_vnode_ref_count(coveringNode);
1399 		return coveringNode;
1400 	}
1401 
1402 	return NULL;
1403 }
1404 
1405 
1406 /*!	Gets the vnode the given vnode is covered by.
1407 
1408 	The caller must not hold \c sVnodeLock. Note that this implies a race
1409 	condition, since the situation can change at any time.
1410 
1411 	The function returns a reference to the retrieved vnode (if any), the caller
1412 	is responsible to free.
1413 
1414 	\param vnode The vnode whose covering node shall be returned.
1415 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1416 		any vnode.
1417 */
1418 static inline Vnode*
1419 get_covering_vnode(Vnode* vnode)
1420 {
1421 	if (!vnode->IsCovered())
1422 		return NULL;
1423 
1424 	ReadLocker vnodeReadLocker(sVnodeLock);
1425 	return get_covering_vnode_locked(vnode);
1426 }
1427 
1428 
1429 static void
1430 free_unused_vnodes()
1431 {
1432 	free_unused_vnodes(
1433 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1434 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1435 }
1436 
1437 
1438 static void
1439 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1440 {
1441 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1442 
1443 	free_unused_vnodes(level);
1444 }
1445 
1446 
1447 static inline void
1448 put_advisory_locking(struct advisory_locking* locking)
1449 {
1450 	release_sem(locking->lock);
1451 }
1452 
1453 
1454 /*!	Returns the advisory_locking object of the \a vnode in case it
1455 	has one, and locks it.
1456 	You have to call put_advisory_locking() when you're done with
1457 	it.
1458 	Note, you must not have the vnode mutex locked when calling
1459 	this function.
1460 */
1461 static struct advisory_locking*
1462 get_advisory_locking(struct vnode* vnode)
1463 {
1464 	rw_lock_read_lock(&sVnodeLock);
1465 	vnode->Lock();
1466 
1467 	struct advisory_locking* locking = vnode->advisory_locking;
1468 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1469 
1470 	vnode->Unlock();
1471 	rw_lock_read_unlock(&sVnodeLock);
1472 
1473 	if (lock >= 0)
1474 		lock = acquire_sem(lock);
1475 	if (lock < 0) {
1476 		// This means the locking has been deleted in the mean time
1477 		// or had never existed in the first place - otherwise, we
1478 		// would get the lock at some point.
1479 		return NULL;
1480 	}
1481 
1482 	return locking;
1483 }
1484 
1485 
1486 /*!	Creates a locked advisory_locking object, and attaches it to the
1487 	given \a vnode.
1488 	Returns B_OK in case of success - also if the vnode got such an
1489 	object from someone else in the mean time, you'll still get this
1490 	one locked then.
1491 */
1492 static status_t
1493 create_advisory_locking(struct vnode* vnode)
1494 {
1495 	if (vnode == NULL)
1496 		return B_FILE_ERROR;
1497 
1498 	ObjectDeleter<advisory_locking> lockingDeleter;
1499 	struct advisory_locking* locking = NULL;
1500 
1501 	while (get_advisory_locking(vnode) == NULL) {
1502 		// no locking object set on the vnode yet, create one
1503 		if (locking == NULL) {
1504 			locking = new(std::nothrow) advisory_locking;
1505 			if (locking == NULL)
1506 				return B_NO_MEMORY;
1507 			lockingDeleter.SetTo(locking);
1508 
1509 			locking->wait_sem = create_sem(0, "advisory lock");
1510 			if (locking->wait_sem < 0)
1511 				return locking->wait_sem;
1512 
1513 			locking->lock = create_sem(0, "advisory locking");
1514 			if (locking->lock < 0)
1515 				return locking->lock;
1516 		}
1517 
1518 		// set our newly created locking object
1519 		ReadLocker _(sVnodeLock);
1520 		AutoLocker<Vnode> nodeLocker(vnode);
1521 		if (vnode->advisory_locking == NULL) {
1522 			vnode->advisory_locking = locking;
1523 			lockingDeleter.Detach();
1524 			return B_OK;
1525 		}
1526 	}
1527 
1528 	// The vnode already had a locking object. That's just as well.
1529 
1530 	return B_OK;
1531 }
1532 
1533 
1534 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1535 	with the advisory_lock \a lock.
1536 */
1537 static bool
1538 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1539 {
1540 	if (flock == NULL)
1541 		return true;
1542 
1543 	return lock->start <= flock->l_start - 1 + flock->l_len
1544 		&& lock->end >= flock->l_start;
1545 }
1546 
1547 
1548 /*!	Tests whether acquiring a lock would block.
1549 */
1550 static status_t
1551 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1552 {
1553 	flock->l_type = F_UNLCK;
1554 
1555 	struct advisory_locking* locking = get_advisory_locking(vnode);
1556 	if (locking == NULL)
1557 		return B_OK;
1558 
1559 	team_id team = team_get_current_team_id();
1560 
1561 	LockList::Iterator iterator = locking->locks.GetIterator();
1562 	while (iterator.HasNext()) {
1563 		struct advisory_lock* lock = iterator.Next();
1564 
1565 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1566 			// locks do overlap
1567 			if (flock->l_type != F_RDLCK || !lock->shared) {
1568 				// collision
1569 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1570 				flock->l_whence = SEEK_SET;
1571 				flock->l_start = lock->start;
1572 				flock->l_len = lock->end - lock->start + 1;
1573 				flock->l_pid = lock->team;
1574 				break;
1575 			}
1576 		}
1577 	}
1578 
1579 	put_advisory_locking(locking);
1580 	return B_OK;
1581 }
1582 
1583 
1584 /*!	Removes the specified lock, or all locks of the calling team
1585 	if \a flock is NULL.
1586 */
1587 static status_t
1588 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1589 {
1590 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1591 
1592 	struct advisory_locking* locking = get_advisory_locking(vnode);
1593 	if (locking == NULL)
1594 		return B_OK;
1595 
1596 	// TODO: use the thread ID instead??
1597 	team_id team = team_get_current_team_id();
1598 	pid_t session = thread_get_current_thread()->team->session_id;
1599 
1600 	// find matching lock entries
1601 
1602 	LockList::Iterator iterator = locking->locks.GetIterator();
1603 	while (iterator.HasNext()) {
1604 		struct advisory_lock* lock = iterator.Next();
1605 		bool removeLock = false;
1606 
1607 		if (lock->session == session)
1608 			removeLock = true;
1609 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1610 			bool endsBeyond = false;
1611 			bool startsBefore = false;
1612 			if (flock != NULL) {
1613 				startsBefore = lock->start < flock->l_start;
1614 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1615 			}
1616 
1617 			if (!startsBefore && !endsBeyond) {
1618 				// lock is completely contained in flock
1619 				removeLock = true;
1620 			} else if (startsBefore && !endsBeyond) {
1621 				// cut the end of the lock
1622 				lock->end = flock->l_start - 1;
1623 			} else if (!startsBefore && endsBeyond) {
1624 				// cut the start of the lock
1625 				lock->start = flock->l_start + flock->l_len;
1626 			} else {
1627 				// divide the lock into two locks
1628 				struct advisory_lock* secondLock = new advisory_lock;
1629 				if (secondLock == NULL) {
1630 					// TODO: we should probably revert the locks we already
1631 					// changed... (ie. allocate upfront)
1632 					put_advisory_locking(locking);
1633 					return B_NO_MEMORY;
1634 				}
1635 
1636 				lock->end = flock->l_start - 1;
1637 
1638 				secondLock->team = lock->team;
1639 				secondLock->session = lock->session;
1640 				// values must already be normalized when getting here
1641 				secondLock->start = flock->l_start + flock->l_len;
1642 				secondLock->end = lock->end;
1643 				secondLock->shared = lock->shared;
1644 
1645 				locking->locks.Add(secondLock);
1646 			}
1647 		}
1648 
1649 		if (removeLock) {
1650 			// this lock is no longer used
1651 			iterator.Remove();
1652 			free(lock);
1653 		}
1654 	}
1655 
1656 	bool removeLocking = locking->locks.IsEmpty();
1657 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1658 
1659 	put_advisory_locking(locking);
1660 
1661 	if (removeLocking) {
1662 		// We can remove the whole advisory locking structure; it's no
1663 		// longer used
1664 		locking = get_advisory_locking(vnode);
1665 		if (locking != NULL) {
1666 			ReadLocker locker(sVnodeLock);
1667 			AutoLocker<Vnode> nodeLocker(vnode);
1668 
1669 			// the locking could have been changed in the mean time
1670 			if (locking->locks.IsEmpty()) {
1671 				vnode->advisory_locking = NULL;
1672 				nodeLocker.Unlock();
1673 				locker.Unlock();
1674 
1675 				// we've detached the locking from the vnode, so we can
1676 				// safely delete it
1677 				delete locking;
1678 			} else {
1679 				// the locking is in use again
1680 				nodeLocker.Unlock();
1681 				locker.Unlock();
1682 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1683 			}
1684 		}
1685 	}
1686 
1687 	return B_OK;
1688 }
1689 
1690 
1691 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1692 	will wait for the lock to become available, if there are any collisions
1693 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1694 
1695 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1696 	BSD flock() semantics are used, that is, all children can unlock the file
1697 	in question (we even allow parents to remove the lock, though, but that
1698 	seems to be in line to what the BSD's are doing).
1699 */
1700 static status_t
1701 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1702 	bool wait)
1703 {
1704 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1705 		vnode, flock, wait ? "yes" : "no"));
1706 
1707 	bool shared = flock->l_type == F_RDLCK;
1708 	status_t status = B_OK;
1709 
1710 	// TODO: do deadlock detection!
1711 
1712 	struct advisory_locking* locking;
1713 
1714 	while (true) {
1715 		// if this vnode has an advisory_locking structure attached,
1716 		// lock that one and search for any colliding file lock
1717 		status = create_advisory_locking(vnode);
1718 		if (status != B_OK)
1719 			return status;
1720 
1721 		locking = vnode->advisory_locking;
1722 		team_id team = team_get_current_team_id();
1723 		sem_id waitForLock = -1;
1724 
1725 		// test for collisions
1726 		LockList::Iterator iterator = locking->locks.GetIterator();
1727 		while (iterator.HasNext()) {
1728 			struct advisory_lock* lock = iterator.Next();
1729 
1730 			// TODO: locks from the same team might be joinable!
1731 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1732 				// locks do overlap
1733 				if (!shared || !lock->shared) {
1734 					// we need to wait
1735 					waitForLock = locking->wait_sem;
1736 					break;
1737 				}
1738 			}
1739 		}
1740 
1741 		if (waitForLock < 0)
1742 			break;
1743 
1744 		// We need to wait. Do that or fail now, if we've been asked not to.
1745 
1746 		if (!wait) {
1747 			put_advisory_locking(locking);
1748 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1749 		}
1750 
1751 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1752 			B_CAN_INTERRUPT, 0);
1753 		if (status != B_OK && status != B_BAD_SEM_ID)
1754 			return status;
1755 
1756 		// We have been notified, but we need to re-lock the locking object. So
1757 		// go another round...
1758 	}
1759 
1760 	// install new lock
1761 
1762 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1763 		sizeof(struct advisory_lock));
1764 	if (lock == NULL) {
1765 		put_advisory_locking(locking);
1766 		return B_NO_MEMORY;
1767 	}
1768 
1769 	lock->team = team_get_current_team_id();
1770 	lock->session = session;
1771 	// values must already be normalized when getting here
1772 	lock->start = flock->l_start;
1773 	lock->end = flock->l_start - 1 + flock->l_len;
1774 	lock->shared = shared;
1775 
1776 	locking->locks.Add(lock);
1777 	put_advisory_locking(locking);
1778 
1779 	return status;
1780 }
1781 
1782 
1783 /*!	Normalizes the \a flock structure to make it easier to compare the
1784 	structure with others. The l_start and l_len fields are set to absolute
1785 	values according to the l_whence field.
1786 */
1787 static status_t
1788 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1789 {
1790 	switch (flock->l_whence) {
1791 		case SEEK_SET:
1792 			break;
1793 		case SEEK_CUR:
1794 			flock->l_start += descriptor->pos;
1795 			break;
1796 		case SEEK_END:
1797 		{
1798 			struct vnode* vnode = descriptor->u.vnode;
1799 			struct stat stat;
1800 			status_t status;
1801 
1802 			if (!HAS_FS_CALL(vnode, read_stat))
1803 				return B_UNSUPPORTED;
1804 
1805 			status = FS_CALL(vnode, read_stat, &stat);
1806 			if (status != B_OK)
1807 				return status;
1808 
1809 			flock->l_start += stat.st_size;
1810 			break;
1811 		}
1812 		default:
1813 			return B_BAD_VALUE;
1814 	}
1815 
1816 	if (flock->l_start < 0)
1817 		flock->l_start = 0;
1818 	if (flock->l_len == 0)
1819 		flock->l_len = OFF_MAX;
1820 
1821 	// don't let the offset and length overflow
1822 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1823 		flock->l_len = OFF_MAX - flock->l_start;
1824 
1825 	if (flock->l_len < 0) {
1826 		// a negative length reverses the region
1827 		flock->l_start += flock->l_len;
1828 		flock->l_len = -flock->l_len;
1829 	}
1830 
1831 	return B_OK;
1832 }
1833 
1834 
1835 static void
1836 replace_vnode_if_disconnected(struct fs_mount* mount,
1837 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1838 	struct vnode* fallBack, bool lockRootLock)
1839 {
1840 	struct vnode* givenVnode = vnode;
1841 	bool vnodeReplaced = false;
1842 
1843 	ReadLocker vnodeReadLocker(sVnodeLock);
1844 
1845 	if (lockRootLock)
1846 		mutex_lock(&sIOContextRootLock);
1847 
1848 	while (vnode != NULL && vnode->mount == mount
1849 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1850 		if (vnode->covers != NULL) {
1851 			// redirect the vnode to the covered vnode
1852 			vnode = vnode->covers;
1853 		} else
1854 			vnode = fallBack;
1855 
1856 		vnodeReplaced = true;
1857 	}
1858 
1859 	// If we've replaced the node, grab a reference for the new one.
1860 	if (vnodeReplaced && vnode != NULL)
1861 		inc_vnode_ref_count(vnode);
1862 
1863 	if (lockRootLock)
1864 		mutex_unlock(&sIOContextRootLock);
1865 
1866 	vnodeReadLocker.Unlock();
1867 
1868 	if (vnodeReplaced)
1869 		put_vnode(givenVnode);
1870 }
1871 
1872 
1873 /*!	Disconnects all file descriptors that are associated with the
1874 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1875 	\a mount object.
1876 
1877 	Note, after you've called this function, there might still be ongoing
1878 	accesses - they won't be interrupted if they already happened before.
1879 	However, any subsequent access will fail.
1880 
1881 	This is not a cheap function and should be used with care and rarely.
1882 	TODO: there is currently no means to stop a blocking read/write!
1883 */
1884 static void
1885 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1886 	struct vnode* vnodeToDisconnect)
1887 {
1888 	// iterate over all teams and peek into their file descriptors
1889 	TeamListIterator teamIterator;
1890 	while (Team* team = teamIterator.Next()) {
1891 		BReference<Team> teamReference(team, true);
1892 
1893 		// lock the I/O context
1894 		io_context* context = team->io_context;
1895 		MutexLocker contextLocker(context->io_mutex);
1896 
1897 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1898 			sRoot, true);
1899 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1900 			sRoot, false);
1901 
1902 		for (uint32 i = 0; i < context->table_size; i++) {
1903 			if (struct file_descriptor* descriptor = context->fds[i]) {
1904 				inc_fd_ref_count(descriptor);
1905 
1906 				// if this descriptor points at this mount, we
1907 				// need to disconnect it to be able to unmount
1908 				struct vnode* vnode = fd_vnode(descriptor);
1909 				if (vnodeToDisconnect != NULL) {
1910 					if (vnode == vnodeToDisconnect)
1911 						disconnect_fd(descriptor);
1912 				} else if ((vnode != NULL && vnode->mount == mount)
1913 					|| (vnode == NULL && descriptor->u.mount == mount))
1914 					disconnect_fd(descriptor);
1915 
1916 				put_fd(descriptor);
1917 			}
1918 		}
1919 	}
1920 }
1921 
1922 
1923 /*!	\brief Gets the root node of the current IO context.
1924 	If \a kernel is \c true, the kernel IO context will be used.
1925 	The caller obtains a reference to the returned node.
1926 */
1927 struct vnode*
1928 get_root_vnode(bool kernel)
1929 {
1930 	if (!kernel) {
1931 		// Get current working directory from io context
1932 		struct io_context* context = get_current_io_context(kernel);
1933 
1934 		mutex_lock(&sIOContextRootLock);
1935 
1936 		struct vnode* root = context->root;
1937 		if (root != NULL)
1938 			inc_vnode_ref_count(root);
1939 
1940 		mutex_unlock(&sIOContextRootLock);
1941 
1942 		if (root != NULL)
1943 			return root;
1944 
1945 		// That should never happen.
1946 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1947 			"have a root\n", team_get_current_team_id());
1948 	}
1949 
1950 	inc_vnode_ref_count(sRoot);
1951 	return sRoot;
1952 }
1953 
1954 
1955 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1956 
1957 	Given an arbitrary vnode (identified by mount and node ID), the function
1958 	checks, whether the vnode is covered by another vnode. If it is, the
1959 	function returns the mount and node ID of the covering vnode. Otherwise
1960 	it simply returns the supplied mount and node ID.
1961 
1962 	In case of error (e.g. the supplied node could not be found) the variables
1963 	for storing the resolved mount and node ID remain untouched and an error
1964 	code is returned.
1965 
1966 	\param mountID The mount ID of the vnode in question.
1967 	\param nodeID The node ID of the vnode in question.
1968 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1969 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1970 	\return
1971 	- \c B_OK, if everything went fine,
1972 	- another error code, if something went wrong.
1973 */
1974 status_t
1975 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1976 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1977 {
1978 	// get the node
1979 	struct vnode* node;
1980 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1981 	if (error != B_OK)
1982 		return error;
1983 
1984 	// resolve the node
1985 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1986 		put_vnode(node);
1987 		node = coveringNode;
1988 	}
1989 
1990 	// set the return values
1991 	*resolvedMountID = node->device;
1992 	*resolvedNodeID = node->id;
1993 
1994 	put_vnode(node);
1995 
1996 	return B_OK;
1997 }
1998 
1999 
2000 /*!	\brief Gets the directory path and leaf name for a given path.
2001 
2002 	The supplied \a path is transformed to refer to the directory part of
2003 	the entry identified by the original path, and into the buffer \a filename
2004 	the leaf name of the original entry is written.
2005 	Neither the returned path nor the leaf name can be expected to be
2006 	canonical.
2007 
2008 	\param path The path to be analyzed. Must be able to store at least one
2009 		   additional character.
2010 	\param filename The buffer into which the leaf name will be written.
2011 		   Must be of size B_FILE_NAME_LENGTH at least.
2012 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2013 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2014 		   if the given path name is empty.
2015 */
2016 static status_t
2017 get_dir_path_and_leaf(char* path, char* filename)
2018 {
2019 	if (*path == '\0')
2020 		return B_ENTRY_NOT_FOUND;
2021 
2022 	char* last = strrchr(path, '/');
2023 		// '/' are not allowed in file names!
2024 
2025 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2026 
2027 	if (last == NULL) {
2028 		// this path is single segment with no '/' in it
2029 		// ex. "foo"
2030 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2031 			return B_NAME_TOO_LONG;
2032 
2033 		strcpy(path, ".");
2034 	} else {
2035 		last++;
2036 		if (last[0] == '\0') {
2037 			// special case: the path ends in one or more '/' - remove them
2038 			while (*--last == '/' && last != path);
2039 			last[1] = '\0';
2040 
2041 			if (last == path && last[0] == '/') {
2042 				// This path points to the root of the file system
2043 				strcpy(filename, ".");
2044 				return B_OK;
2045 			}
2046 			for (; last != path && *(last - 1) != '/'; last--);
2047 				// rewind to the start of the leaf before the '/'
2048 		}
2049 
2050 		// normal leaf: replace the leaf portion of the path with a '.'
2051 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2052 			return B_NAME_TOO_LONG;
2053 
2054 		last[0] = '.';
2055 		last[1] = '\0';
2056 	}
2057 	return B_OK;
2058 }
2059 
2060 
2061 static status_t
2062 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2063 	bool traverse, bool kernel, struct vnode** _vnode)
2064 {
2065 	char clonedName[B_FILE_NAME_LENGTH + 1];
2066 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2067 		return B_NAME_TOO_LONG;
2068 
2069 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2070 	struct vnode* directory;
2071 
2072 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2073 	if (status < 0)
2074 		return status;
2075 
2076 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2077 		_vnode, NULL);
2078 }
2079 
2080 
2081 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2082 	and returns the respective vnode.
2083 	On success a reference to the vnode is acquired for the caller.
2084 */
2085 static status_t
2086 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2087 {
2088 	ino_t id;
2089 
2090 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2091 		return get_vnode(dir->device, id, _vnode, true, false);
2092 
2093 	status_t status = FS_CALL(dir, lookup, name, &id);
2094 	if (status != B_OK)
2095 		return status;
2096 
2097 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2098 	// have a reference and just need to look the node up.
2099 	rw_lock_read_lock(&sVnodeLock);
2100 	*_vnode = lookup_vnode(dir->device, id);
2101 	rw_lock_read_unlock(&sVnodeLock);
2102 
2103 	if (*_vnode == NULL) {
2104 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2105 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2106 		return B_ENTRY_NOT_FOUND;
2107 	}
2108 
2109 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2110 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2111 //		(*_vnode)->mount->id, (*_vnode)->id);
2112 
2113 	return B_OK;
2114 }
2115 
2116 
2117 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2118 	\a path must not be NULL.
2119 	If it returns successfully, \a path contains the name of the last path
2120 	component. This function clobbers the buffer pointed to by \a path only
2121 	if it does contain more than one component.
2122 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2123 	it is successful or not!
2124 */
2125 static status_t
2126 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2127 	int count, struct io_context* ioContext, struct vnode** _vnode,
2128 	ino_t* _parentID)
2129 {
2130 	status_t status = B_OK;
2131 	ino_t lastParentID = vnode->id;
2132 
2133 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 
2135 	if (path == NULL) {
2136 		put_vnode(vnode);
2137 		return B_BAD_VALUE;
2138 	}
2139 
2140 	if (*path == '\0') {
2141 		put_vnode(vnode);
2142 		return B_ENTRY_NOT_FOUND;
2143 	}
2144 
2145 	while (true) {
2146 		struct vnode* nextVnode;
2147 		char* nextPath;
2148 
2149 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2150 			path));
2151 
2152 		// done?
2153 		if (path[0] == '\0')
2154 			break;
2155 
2156 		// walk to find the next path component ("path" will point to a single
2157 		// path component), and filter out multiple slashes
2158 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2159 				nextPath++);
2160 
2161 		if (*nextPath == '/') {
2162 			*nextPath = '\0';
2163 			do
2164 				nextPath++;
2165 			while (*nextPath == '/');
2166 		}
2167 
2168 		// See if the '..' is at a covering vnode move to the covered
2169 		// vnode so we pass the '..' path to the underlying filesystem.
2170 		// Also prevent breaking the root of the IO context.
2171 		if (strcmp("..", path) == 0) {
2172 			if (vnode == ioContext->root) {
2173 				// Attempted prison break! Keep it contained.
2174 				path = nextPath;
2175 				continue;
2176 			}
2177 
2178 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2179 				nextVnode = coveredVnode;
2180 				put_vnode(vnode);
2181 				vnode = nextVnode;
2182 			}
2183 		}
2184 
2185 		// check if vnode is really a directory
2186 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2187 			status = B_NOT_A_DIRECTORY;
2188 
2189 		// Check if we have the right to search the current directory vnode.
2190 		// If a file system doesn't have the access() function, we assume that
2191 		// searching a directory is always allowed
2192 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2193 			status = FS_CALL(vnode, access, X_OK);
2194 
2195 		// Tell the filesystem to get the vnode of this path component (if we
2196 		// got the permission from the call above)
2197 		if (status == B_OK)
2198 			status = lookup_dir_entry(vnode, path, &nextVnode);
2199 
2200 		if (status != B_OK) {
2201 			put_vnode(vnode);
2202 			return status;
2203 		}
2204 
2205 		// If the new node is a symbolic link, resolve it (if we've been told
2206 		// to do it)
2207 		if (S_ISLNK(nextVnode->Type())
2208 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2209 			size_t bufferSize;
2210 			char* buffer;
2211 
2212 			TRACE(("traverse link\n"));
2213 
2214 			// it's not exactly nice style using goto in this way, but hey,
2215 			// it works :-/
2216 			if (count + 1 > B_MAX_SYMLINKS) {
2217 				status = B_LINK_LIMIT;
2218 				goto resolve_link_error;
2219 			}
2220 
2221 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2222 			if (buffer == NULL) {
2223 				status = B_NO_MEMORY;
2224 				goto resolve_link_error;
2225 			}
2226 
2227 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2228 				bufferSize--;
2229 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2230 				// null-terminate
2231 				if (status >= 0)
2232 					buffer[bufferSize] = '\0';
2233 			} else
2234 				status = B_BAD_VALUE;
2235 
2236 			if (status != B_OK) {
2237 				free(buffer);
2238 
2239 		resolve_link_error:
2240 				put_vnode(vnode);
2241 				put_vnode(nextVnode);
2242 
2243 				return status;
2244 			}
2245 			put_vnode(nextVnode);
2246 
2247 			// Check if we start from the root directory or the current
2248 			// directory ("vnode" still points to that one).
2249 			// Cut off all leading slashes if it's the root directory
2250 			path = buffer;
2251 			bool absoluteSymlink = false;
2252 			if (path[0] == '/') {
2253 				// we don't need the old directory anymore
2254 				put_vnode(vnode);
2255 
2256 				while (*++path == '/')
2257 					;
2258 
2259 				mutex_lock(&sIOContextRootLock);
2260 				vnode = ioContext->root;
2261 				inc_vnode_ref_count(vnode);
2262 				mutex_unlock(&sIOContextRootLock);
2263 
2264 				absoluteSymlink = true;
2265 			}
2266 
2267 			inc_vnode_ref_count(vnode);
2268 				// balance the next recursion - we will decrement the
2269 				// ref_count of the vnode, no matter if we succeeded or not
2270 
2271 			if (absoluteSymlink && *path == '\0') {
2272 				// symlink was just "/"
2273 				nextVnode = vnode;
2274 			} else {
2275 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2276 					ioContext, &nextVnode, &lastParentID);
2277 			}
2278 
2279 			free(buffer);
2280 
2281 			if (status != B_OK) {
2282 				put_vnode(vnode);
2283 				return status;
2284 			}
2285 		} else
2286 			lastParentID = vnode->id;
2287 
2288 		// decrease the ref count on the old dir we just looked up into
2289 		put_vnode(vnode);
2290 
2291 		path = nextPath;
2292 		vnode = nextVnode;
2293 
2294 		// see if we hit a covered node
2295 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2296 			put_vnode(vnode);
2297 			vnode = coveringNode;
2298 		}
2299 	}
2300 
2301 	*_vnode = vnode;
2302 	if (_parentID)
2303 		*_parentID = lastParentID;
2304 
2305 	return B_OK;
2306 }
2307 
2308 
2309 static status_t
2310 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2311 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2312 {
2313 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2314 		get_current_io_context(kernel), _vnode, _parentID);
2315 }
2316 
2317 
2318 static status_t
2319 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2320 	ino_t* _parentID, bool kernel)
2321 {
2322 	struct vnode* start = NULL;
2323 
2324 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2325 
2326 	if (!path)
2327 		return B_BAD_VALUE;
2328 
2329 	if (*path == '\0')
2330 		return B_ENTRY_NOT_FOUND;
2331 
2332 	// figure out if we need to start at root or at cwd
2333 	if (*path == '/') {
2334 		if (sRoot == NULL) {
2335 			// we're a bit early, aren't we?
2336 			return B_ERROR;
2337 		}
2338 
2339 		while (*++path == '/')
2340 			;
2341 		start = get_root_vnode(kernel);
2342 
2343 		if (*path == '\0') {
2344 			*_vnode = start;
2345 			return B_OK;
2346 		}
2347 
2348 	} else {
2349 		struct io_context* context = get_current_io_context(kernel);
2350 
2351 		mutex_lock(&context->io_mutex);
2352 		start = context->cwd;
2353 		if (start != NULL)
2354 			inc_vnode_ref_count(start);
2355 		mutex_unlock(&context->io_mutex);
2356 
2357 		if (start == NULL)
2358 			return B_ERROR;
2359 	}
2360 
2361 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2362 		_parentID);
2363 }
2364 
2365 
2366 /*! Returns the vnode in the next to last segment of the path, and returns
2367 	the last portion in filename.
2368 	The path buffer must be able to store at least one additional character.
2369 */
2370 static status_t
2371 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2372 	bool kernel)
2373 {
2374 	status_t status = get_dir_path_and_leaf(path, filename);
2375 	if (status != B_OK)
2376 		return status;
2377 
2378 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2379 }
2380 
2381 
2382 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2383 		   to by a FD + path pair.
2384 
2385 	\a path must be given in either case. \a fd might be omitted, in which
2386 	case \a path is either an absolute path or one relative to the current
2387 	directory. If both a supplied and \a path is relative it is reckoned off
2388 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2389 	ignored.
2390 
2391 	The caller has the responsibility to call put_vnode() on the returned
2392 	directory vnode.
2393 
2394 	\param fd The FD. May be < 0.
2395 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2396 	       is modified by this function. It must have at least room for a
2397 	       string one character longer than the path it contains.
2398 	\param _vnode A pointer to a variable the directory vnode shall be written
2399 		   into.
2400 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2401 		   the leaf name of the specified entry will be written.
2402 	\param kernel \c true, if invoked from inside the kernel, \c false if
2403 		   invoked from userland.
2404 	\return \c B_OK, if everything went fine, another error code otherwise.
2405 */
2406 static status_t
2407 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2408 	char* filename, bool kernel)
2409 {
2410 	if (!path)
2411 		return B_BAD_VALUE;
2412 	if (*path == '\0')
2413 		return B_ENTRY_NOT_FOUND;
2414 	if (fd < 0)
2415 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2416 
2417 	status_t status = get_dir_path_and_leaf(path, filename);
2418 	if (status != B_OK)
2419 		return status;
2420 
2421 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2422 }
2423 
2424 
2425 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2426 		   to by a vnode + path pair.
2427 
2428 	\a path must be given in either case. \a vnode might be omitted, in which
2429 	case \a path is either an absolute path or one relative to the current
2430 	directory. If both a supplied and \a path is relative it is reckoned off
2431 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2432 	ignored.
2433 
2434 	The caller has the responsibility to call put_vnode() on the returned
2435 	directory vnode.
2436 
2437 	\param vnode The vnode. May be \c NULL.
2438 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2439 	       is modified by this function. It must have at least room for a
2440 	       string one character longer than the path it contains.
2441 	\param _vnode A pointer to a variable the directory vnode shall be written
2442 		   into.
2443 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2444 		   the leaf name of the specified entry will be written.
2445 	\param kernel \c true, if invoked from inside the kernel, \c false if
2446 		   invoked from userland.
2447 	\return \c B_OK, if everything went fine, another error code otherwise.
2448 */
2449 static status_t
2450 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2451 	struct vnode** _vnode, char* filename, bool kernel)
2452 {
2453 	if (!path)
2454 		return B_BAD_VALUE;
2455 	if (*path == '\0')
2456 		return B_ENTRY_NOT_FOUND;
2457 	if (vnode == NULL || path[0] == '/')
2458 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2459 
2460 	status_t status = get_dir_path_and_leaf(path, filename);
2461 	if (status != B_OK)
2462 		return status;
2463 
2464 	inc_vnode_ref_count(vnode);
2465 		// vnode_path_to_vnode() always decrements the ref count
2466 
2467 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2468 }
2469 
2470 
2471 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2472 */
2473 static status_t
2474 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2475 	size_t bufferSize, struct io_context* ioContext)
2476 {
2477 	if (bufferSize < sizeof(struct dirent))
2478 		return B_BAD_VALUE;
2479 
2480 	// See if the vnode is convering another vnode and move to the covered
2481 	// vnode so we get the underlying file system
2482 	VNodePutter vnodePutter;
2483 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2484 		vnode = coveredVnode;
2485 		vnodePutter.SetTo(vnode);
2486 	}
2487 
2488 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2489 		// The FS supports getting the name of a vnode.
2490 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2491 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2492 			return B_OK;
2493 	}
2494 
2495 	// The FS doesn't support getting the name of a vnode. So we search the
2496 	// parent directory for the vnode, if the caller let us.
2497 
2498 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2499 		return B_UNSUPPORTED;
2500 
2501 	void* cookie;
2502 
2503 	status_t status = FS_CALL(parent, open_dir, &cookie);
2504 	if (status >= B_OK) {
2505 		while (true) {
2506 			uint32 num = 1;
2507 			// We use the FS hook directly instead of dir_read(), since we don't
2508 			// want the entries to be fixed. We have already resolved vnode to
2509 			// the covered node.
2510 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2511 				&num);
2512 			if (status != B_OK)
2513 				break;
2514 			if (num == 0) {
2515 				status = B_ENTRY_NOT_FOUND;
2516 				break;
2517 			}
2518 
2519 			if (vnode->id == buffer->d_ino) {
2520 				// found correct entry!
2521 				break;
2522 			}
2523 		}
2524 
2525 		FS_CALL(vnode, close_dir, cookie);
2526 		FS_CALL(vnode, free_dir_cookie, cookie);
2527 	}
2528 	return status;
2529 }
2530 
2531 
2532 static status_t
2533 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2534 	size_t nameSize, bool kernel)
2535 {
2536 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2537 	struct dirent* dirent = (struct dirent*)buffer;
2538 
2539 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2540 		get_current_io_context(kernel));
2541 	if (status != B_OK)
2542 		return status;
2543 
2544 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2545 		return B_BUFFER_OVERFLOW;
2546 
2547 	return B_OK;
2548 }
2549 
2550 
2551 /*!	Gets the full path to a given directory vnode.
2552 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2553 	file system doesn't support this call, it will fall back to iterating
2554 	through the parent directory to get the name of the child.
2555 
2556 	To protect against circular loops, it supports a maximum tree depth
2557 	of 256 levels.
2558 
2559 	Note that the path may not be correct the time this function returns!
2560 	It doesn't use any locking to prevent returning the correct path, as
2561 	paths aren't safe anyway: the path to a file can change at any time.
2562 
2563 	It might be a good idea, though, to check if the returned path exists
2564 	in the calling function (it's not done here because of efficiency)
2565 */
2566 static status_t
2567 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2568 	bool kernel)
2569 {
2570 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2571 
2572 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2573 		return B_BAD_VALUE;
2574 
2575 	if (!S_ISDIR(vnode->Type()))
2576 		return B_NOT_A_DIRECTORY;
2577 
2578 	char* path = buffer;
2579 	int32 insert = bufferSize;
2580 	int32 maxLevel = 256;
2581 	int32 length;
2582 	status_t status = B_OK;
2583 	struct io_context* ioContext = get_current_io_context(kernel);
2584 
2585 	// we don't use get_vnode() here because this call is more
2586 	// efficient and does all we need from get_vnode()
2587 	inc_vnode_ref_count(vnode);
2588 
2589 	path[--insert] = '\0';
2590 		// the path is filled right to left
2591 
2592 	while (true) {
2593 		// If the node is the context's root, bail out. Otherwise resolve mount
2594 		// points.
2595 		if (vnode == ioContext->root)
2596 			break;
2597 
2598 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2599 			put_vnode(vnode);
2600 			vnode = coveredVnode;
2601 		}
2602 
2603 		// lookup the parent vnode
2604 		struct vnode* parentVnode;
2605 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2606 		if (status != B_OK)
2607 			goto out;
2608 
2609 		if (parentVnode == vnode) {
2610 			// The caller apparently got their hands on a node outside of their
2611 			// context's root. Now we've hit the global root.
2612 			put_vnode(parentVnode);
2613 			break;
2614 		}
2615 
2616 		// get the node's name
2617 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2618 			// also used for fs_read_dir()
2619 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2620 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2621 			sizeof(nameBuffer), ioContext);
2622 
2623 		// release the current vnode, we only need its parent from now on
2624 		put_vnode(vnode);
2625 		vnode = parentVnode;
2626 
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		// TODO: add an explicit check for loops in about 10 levels to do
2631 		// real loop detection
2632 
2633 		// don't go deeper as 'maxLevel' to prevent circular loops
2634 		if (maxLevel-- < 0) {
2635 			status = B_LINK_LIMIT;
2636 			goto out;
2637 		}
2638 
2639 		// add the name in front of the current path
2640 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2641 		length = strlen(name);
2642 		insert -= length;
2643 		if (insert <= 0) {
2644 			status = B_RESULT_NOT_REPRESENTABLE;
2645 			goto out;
2646 		}
2647 		memcpy(path + insert, name, length);
2648 		path[--insert] = '/';
2649 	}
2650 
2651 	// the root dir will result in an empty path: fix it
2652 	if (path[insert] == '\0')
2653 		path[--insert] = '/';
2654 
2655 	TRACE(("  path is: %s\n", path + insert));
2656 
2657 	// move the path to the start of the buffer
2658 	length = bufferSize - insert;
2659 	memmove(buffer, path + insert, length);
2660 
2661 out:
2662 	put_vnode(vnode);
2663 	return status;
2664 }
2665 
2666 
2667 /*!	Checks the length of every path component, and adds a '.'
2668 	if the path ends in a slash.
2669 	The given path buffer must be able to store at least one
2670 	additional character.
2671 */
2672 static status_t
2673 check_path(char* to)
2674 {
2675 	int32 length = 0;
2676 
2677 	// check length of every path component
2678 
2679 	while (*to) {
2680 		char* begin;
2681 		if (*to == '/')
2682 			to++, length++;
2683 
2684 		begin = to;
2685 		while (*to != '/' && *to)
2686 			to++, length++;
2687 
2688 		if (to - begin > B_FILE_NAME_LENGTH)
2689 			return B_NAME_TOO_LONG;
2690 	}
2691 
2692 	if (length == 0)
2693 		return B_ENTRY_NOT_FOUND;
2694 
2695 	// complete path if there is a slash at the end
2696 
2697 	if (*(to - 1) == '/') {
2698 		if (length > B_PATH_NAME_LENGTH - 2)
2699 			return B_NAME_TOO_LONG;
2700 
2701 		to[0] = '.';
2702 		to[1] = '\0';
2703 	}
2704 
2705 	return B_OK;
2706 }
2707 
2708 
2709 static struct file_descriptor*
2710 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2711 {
2712 	struct file_descriptor* descriptor
2713 		= get_fd(get_current_io_context(kernel), fd);
2714 	if (descriptor == NULL)
2715 		return NULL;
2716 
2717 	struct vnode* vnode = fd_vnode(descriptor);
2718 	if (vnode == NULL) {
2719 		put_fd(descriptor);
2720 		return NULL;
2721 	}
2722 
2723 	// ToDo: when we can close a file descriptor at any point, investigate
2724 	//	if this is still valid to do (accessing the vnode without ref_count
2725 	//	or locking)
2726 	*_vnode = vnode;
2727 	return descriptor;
2728 }
2729 
2730 
2731 static struct vnode*
2732 get_vnode_from_fd(int fd, bool kernel)
2733 {
2734 	struct file_descriptor* descriptor;
2735 	struct vnode* vnode;
2736 
2737 	descriptor = get_fd(get_current_io_context(kernel), fd);
2738 	if (descriptor == NULL)
2739 		return NULL;
2740 
2741 	vnode = fd_vnode(descriptor);
2742 	if (vnode != NULL)
2743 		inc_vnode_ref_count(vnode);
2744 
2745 	put_fd(descriptor);
2746 	return vnode;
2747 }
2748 
2749 
2750 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2751 	only the path will be considered. In this case, the \a path must not be
2752 	NULL.
2753 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2754 	and should be NULL for files.
2755 */
2756 static status_t
2757 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2758 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2759 {
2760 	if (fd < 0 && !path)
2761 		return B_BAD_VALUE;
2762 
2763 	if (path != NULL && *path == '\0')
2764 		return B_ENTRY_NOT_FOUND;
2765 
2766 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2767 		// no FD or absolute path
2768 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2769 	}
2770 
2771 	// FD only, or FD + relative path
2772 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2773 	if (!vnode)
2774 		return B_FILE_ERROR;
2775 
2776 	if (path != NULL) {
2777 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2778 			_vnode, _parentID);
2779 	}
2780 
2781 	// there is no relative path to take into account
2782 
2783 	*_vnode = vnode;
2784 	if (_parentID)
2785 		*_parentID = -1;
2786 
2787 	return B_OK;
2788 }
2789 
2790 
2791 static int
2792 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2793 	void* cookie, int openMode, bool kernel)
2794 {
2795 	struct file_descriptor* descriptor;
2796 	int fd;
2797 
2798 	// If the vnode is locked, we don't allow creating a new file/directory
2799 	// file_descriptor for it
2800 	if (vnode && vnode->mandatory_locked_by != NULL
2801 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2802 		return B_BUSY;
2803 
2804 	descriptor = alloc_fd();
2805 	if (!descriptor)
2806 		return B_NO_MEMORY;
2807 
2808 	if (vnode)
2809 		descriptor->u.vnode = vnode;
2810 	else
2811 		descriptor->u.mount = mount;
2812 	descriptor->cookie = cookie;
2813 
2814 	switch (type) {
2815 		// vnode types
2816 		case FDTYPE_FILE:
2817 			descriptor->ops = &sFileOps;
2818 			break;
2819 		case FDTYPE_DIR:
2820 			descriptor->ops = &sDirectoryOps;
2821 			break;
2822 		case FDTYPE_ATTR:
2823 			descriptor->ops = &sAttributeOps;
2824 			break;
2825 		case FDTYPE_ATTR_DIR:
2826 			descriptor->ops = &sAttributeDirectoryOps;
2827 			break;
2828 
2829 		// mount types
2830 		case FDTYPE_INDEX_DIR:
2831 			descriptor->ops = &sIndexDirectoryOps;
2832 			break;
2833 		case FDTYPE_QUERY:
2834 			descriptor->ops = &sQueryOps;
2835 			break;
2836 
2837 		default:
2838 			panic("get_new_fd() called with unknown type %d\n", type);
2839 			break;
2840 	}
2841 	descriptor->type = type;
2842 	descriptor->open_mode = openMode;
2843 
2844 	io_context* context = get_current_io_context(kernel);
2845 	fd = new_fd(context, descriptor);
2846 	if (fd < 0) {
2847 		free(descriptor);
2848 		return B_NO_MORE_FDS;
2849 	}
2850 
2851 	mutex_lock(&context->io_mutex);
2852 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2853 	mutex_unlock(&context->io_mutex);
2854 
2855 	return fd;
2856 }
2857 
2858 
2859 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2860 	vfs_normalize_path(). See there for more documentation.
2861 */
2862 static status_t
2863 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2864 {
2865 	VNodePutter dirPutter;
2866 	struct vnode* dir = NULL;
2867 	status_t error;
2868 
2869 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2870 		// get dir vnode + leaf name
2871 		struct vnode* nextDir;
2872 		char leaf[B_FILE_NAME_LENGTH];
2873 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2874 		if (error != B_OK)
2875 			return error;
2876 
2877 		dir = nextDir;
2878 		strcpy(path, leaf);
2879 		dirPutter.SetTo(dir);
2880 
2881 		// get file vnode, if we shall resolve links
2882 		bool fileExists = false;
2883 		struct vnode* fileVnode;
2884 		VNodePutter fileVnodePutter;
2885 		if (traverseLink) {
2886 			inc_vnode_ref_count(dir);
2887 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2888 					NULL) == B_OK) {
2889 				fileVnodePutter.SetTo(fileVnode);
2890 				fileExists = true;
2891 			}
2892 		}
2893 
2894 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2895 			// we're done -- construct the path
2896 			bool hasLeaf = true;
2897 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2898 				// special cases "." and ".." -- get the dir, forget the leaf
2899 				inc_vnode_ref_count(dir);
2900 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2901 					&nextDir, NULL);
2902 				if (error != B_OK)
2903 					return error;
2904 				dir = nextDir;
2905 				dirPutter.SetTo(dir);
2906 				hasLeaf = false;
2907 			}
2908 
2909 			// get the directory path
2910 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2911 			if (error != B_OK)
2912 				return error;
2913 
2914 			// append the leaf name
2915 			if (hasLeaf) {
2916 				// insert a directory separator if this is not the file system
2917 				// root
2918 				if ((strcmp(path, "/") != 0
2919 					&& strlcat(path, "/", pathSize) >= pathSize)
2920 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2921 					return B_NAME_TOO_LONG;
2922 				}
2923 			}
2924 
2925 			return B_OK;
2926 		}
2927 
2928 		// read link
2929 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2930 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2931 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2932 			if (error != B_OK)
2933 				return error;
2934 			path[bufferSize] = '\0';
2935 		} else
2936 			return B_BAD_VALUE;
2937 	}
2938 
2939 	return B_LINK_LIMIT;
2940 }
2941 
2942 
2943 #ifdef ADD_DEBUGGER_COMMANDS
2944 
2945 
2946 static void
2947 _dump_advisory_locking(advisory_locking* locking)
2948 {
2949 	if (locking == NULL)
2950 		return;
2951 
2952 	kprintf("   lock:        %" B_PRId32, locking->lock);
2953 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2954 
2955 	int32 index = 0;
2956 	LockList::Iterator iterator = locking->locks.GetIterator();
2957 	while (iterator.HasNext()) {
2958 		struct advisory_lock* lock = iterator.Next();
2959 
2960 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2961 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2962 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2963 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2964 	}
2965 }
2966 
2967 
2968 static void
2969 _dump_mount(struct fs_mount* mount)
2970 {
2971 	kprintf("MOUNT: %p\n", mount);
2972 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
2973 	kprintf(" device_name:   %s\n", mount->device_name);
2974 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2975 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2976 	kprintf(" partition:     %p\n", mount->partition);
2977 	kprintf(" lock:          %p\n", &mount->rlock);
2978 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2979 		mount->owns_file_device ? " owns_file_device" : "");
2980 
2981 	fs_volume* volume = mount->volume;
2982 	while (volume != NULL) {
2983 		kprintf(" volume %p:\n", volume);
2984 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
2985 		kprintf("  private_volume:   %p\n", volume->private_volume);
2986 		kprintf("  ops:              %p\n", volume->ops);
2987 		kprintf("  file_system:      %p\n", volume->file_system);
2988 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2989 		volume = volume->super_volume;
2990 	}
2991 
2992 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2993 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2994 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
2995 	set_debug_variable("_partition", (addr_t)mount->partition);
2996 }
2997 
2998 
2999 static bool
3000 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3001 	const char* name)
3002 {
3003 	bool insertSlash = buffer[bufferSize] != '\0';
3004 	size_t nameLength = strlen(name);
3005 
3006 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3007 		return false;
3008 
3009 	if (insertSlash)
3010 		buffer[--bufferSize] = '/';
3011 
3012 	bufferSize -= nameLength;
3013 	memcpy(buffer + bufferSize, name, nameLength);
3014 
3015 	return true;
3016 }
3017 
3018 
3019 static bool
3020 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3021 	ino_t nodeID)
3022 {
3023 	if (bufferSize == 0)
3024 		return false;
3025 
3026 	bool insertSlash = buffer[bufferSize] != '\0';
3027 	if (insertSlash)
3028 		buffer[--bufferSize] = '/';
3029 
3030 	size_t size = snprintf(buffer, bufferSize,
3031 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3032 	if (size > bufferSize) {
3033 		if (insertSlash)
3034 			bufferSize++;
3035 		return false;
3036 	}
3037 
3038 	if (size < bufferSize)
3039 		memmove(buffer + bufferSize - size, buffer, size);
3040 
3041 	bufferSize -= size;
3042 	return true;
3043 }
3044 
3045 
3046 static char*
3047 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3048 	bool& _truncated)
3049 {
3050 	// null-terminate the path
3051 	buffer[--bufferSize] = '\0';
3052 
3053 	while (true) {
3054 		while (vnode->covers != NULL)
3055 			vnode = vnode->covers;
3056 
3057 		if (vnode == sRoot) {
3058 			_truncated = bufferSize == 0;
3059 			if (!_truncated)
3060 				buffer[--bufferSize] = '/';
3061 			return buffer + bufferSize;
3062 		}
3063 
3064 		// resolve the name
3065 		ino_t dirID;
3066 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3067 			vnode->id, dirID);
3068 		if (name == NULL) {
3069 			// Failed to resolve the name -- prepend "<dev,node>/".
3070 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3071 				vnode->mount->id, vnode->id);
3072 			return buffer + bufferSize;
3073 		}
3074 
3075 		// prepend the name
3076 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3077 			_truncated = true;
3078 			return buffer + bufferSize;
3079 		}
3080 
3081 		// resolve the directory node
3082 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3083 		if (nextVnode == NULL) {
3084 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3085 				vnode->mount->id, dirID);
3086 			return buffer + bufferSize;
3087 		}
3088 
3089 		vnode = nextVnode;
3090 	}
3091 }
3092 
3093 
3094 static void
3095 _dump_vnode(struct vnode* vnode, bool printPath)
3096 {
3097 	kprintf("VNODE: %p\n", vnode);
3098 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3099 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3100 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3101 	kprintf(" private_node:  %p\n", vnode->private_node);
3102 	kprintf(" mount:         %p\n", vnode->mount);
3103 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3104 	kprintf(" covers:        %p\n", vnode->covers);
3105 	kprintf(" cache:         %p\n", vnode->cache);
3106 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3107 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3108 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3109 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3110 
3111 	_dump_advisory_locking(vnode->advisory_locking);
3112 
3113 	if (printPath) {
3114 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3115 		if (buffer != NULL) {
3116 			bool truncated;
3117 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3118 				B_PATH_NAME_LENGTH, truncated);
3119 			if (path != NULL) {
3120 				kprintf(" path:          ");
3121 				if (truncated)
3122 					kputs("<truncated>/");
3123 				kputs(path);
3124 				kputs("\n");
3125 			} else
3126 				kprintf("Failed to resolve vnode path.\n");
3127 
3128 			debug_free(buffer);
3129 		} else
3130 			kprintf("Failed to allocate memory for constructing the path.\n");
3131 	}
3132 
3133 	set_debug_variable("_node", (addr_t)vnode->private_node);
3134 	set_debug_variable("_mount", (addr_t)vnode->mount);
3135 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3136 	set_debug_variable("_covers", (addr_t)vnode->covers);
3137 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3138 }
3139 
3140 
3141 static int
3142 dump_mount(int argc, char** argv)
3143 {
3144 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3145 		kprintf("usage: %s [id|address]\n", argv[0]);
3146 		return 0;
3147 	}
3148 
3149 	ulong val = parse_expression(argv[1]);
3150 	uint32 id = val;
3151 
3152 	struct fs_mount* mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3153 	if (mount == NULL) {
3154 		if (IS_USER_ADDRESS(id)) {
3155 			kprintf("fs_mount not found\n");
3156 			return 0;
3157 		}
3158 		mount = (fs_mount*)val;
3159 	}
3160 
3161 	_dump_mount(mount);
3162 	return 0;
3163 }
3164 
3165 
3166 static int
3167 dump_mounts(int argc, char** argv)
3168 {
3169 	if (argc != 1) {
3170 		kprintf("usage: %s\n", argv[0]);
3171 		return 0;
3172 	}
3173 
3174 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3175 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3176 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3177 
3178 	struct hash_iterator iterator;
3179 	struct fs_mount* mount;
3180 
3181 	hash_open(sMountsTable, &iterator);
3182 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3183 			!= NULL) {
3184 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3185 			mount->root_vnode->covers, mount->volume->private_volume,
3186 			mount->volume->file_system_name);
3187 
3188 		fs_volume* volume = mount->volume;
3189 		while (volume->super_volume != NULL) {
3190 			volume = volume->super_volume;
3191 			kprintf("                                     %p %s\n",
3192 				volume->private_volume, volume->file_system_name);
3193 		}
3194 	}
3195 
3196 	hash_close(sMountsTable, &iterator, false);
3197 	return 0;
3198 }
3199 
3200 
3201 static int
3202 dump_vnode(int argc, char** argv)
3203 {
3204 	bool printPath = false;
3205 	int argi = 1;
3206 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3207 		printPath = true;
3208 		argi++;
3209 	}
3210 
3211 	if (argi >= argc || argi + 2 < argc) {
3212 		print_debugger_command_usage(argv[0]);
3213 		return 0;
3214 	}
3215 
3216 	struct vnode* vnode = NULL;
3217 
3218 	if (argi + 1 == argc) {
3219 		vnode = (struct vnode*)parse_expression(argv[argi]);
3220 		if (IS_USER_ADDRESS(vnode)) {
3221 			kprintf("invalid vnode address\n");
3222 			return 0;
3223 		}
3224 		_dump_vnode(vnode, printPath);
3225 		return 0;
3226 	}
3227 
3228 	struct hash_iterator iterator;
3229 	dev_t device = parse_expression(argv[argi]);
3230 	ino_t id = parse_expression(argv[argi + 1]);
3231 
3232 	hash_open(sVnodeTable, &iterator);
3233 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3234 		if (vnode->id != id || vnode->device != device)
3235 			continue;
3236 
3237 		_dump_vnode(vnode, printPath);
3238 	}
3239 
3240 	hash_close(sVnodeTable, &iterator, false);
3241 	return 0;
3242 }
3243 
3244 
3245 static int
3246 dump_vnodes(int argc, char** argv)
3247 {
3248 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s [device]\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	// restrict dumped nodes to a certain device if requested
3254 	dev_t device = parse_expression(argv[1]);
3255 
3256 	struct hash_iterator iterator;
3257 	struct vnode* vnode;
3258 
3259 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3260 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3261 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3262 
3263 	hash_open(sVnodeTable, &iterator);
3264 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3265 		if (vnode->device != device)
3266 			continue;
3267 
3268 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3269 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3270 			vnode->private_node, vnode->advisory_locking,
3271 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3272 			vnode->IsUnpublished() ? "u" : "-");
3273 	}
3274 
3275 	hash_close(sVnodeTable, &iterator, false);
3276 	return 0;
3277 }
3278 
3279 
3280 static int
3281 dump_vnode_caches(int argc, char** argv)
3282 {
3283 	struct hash_iterator iterator;
3284 	struct vnode* vnode;
3285 
3286 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3287 		kprintf("usage: %s [device]\n", argv[0]);
3288 		return 0;
3289 	}
3290 
3291 	// restrict dumped nodes to a certain device if requested
3292 	dev_t device = -1;
3293 	if (argc > 1)
3294 		device = parse_expression(argv[1]);
3295 
3296 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3297 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3298 
3299 	hash_open(sVnodeTable, &iterator);
3300 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3301 		if (vnode->cache == NULL)
3302 			continue;
3303 		if (device != -1 && vnode->device != device)
3304 			continue;
3305 
3306 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3307 			vnode, vnode->device, vnode->id, vnode->cache,
3308 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3309 			vnode->cache->page_count);
3310 	}
3311 
3312 	hash_close(sVnodeTable, &iterator, false);
3313 	return 0;
3314 }
3315 
3316 
3317 int
3318 dump_io_context(int argc, char** argv)
3319 {
3320 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3321 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3322 		return 0;
3323 	}
3324 
3325 	struct io_context* context = NULL;
3326 
3327 	if (argc > 1) {
3328 		ulong num = parse_expression(argv[1]);
3329 		if (IS_KERNEL_ADDRESS(num))
3330 			context = (struct io_context*)num;
3331 		else {
3332 			Team* team = team_get_team_struct_locked(num);
3333 			if (team == NULL) {
3334 				kprintf("could not find team with ID %lu\n", num);
3335 				return 0;
3336 			}
3337 			context = (struct io_context*)team->io_context;
3338 		}
3339 	} else
3340 		context = get_current_io_context(true);
3341 
3342 	kprintf("I/O CONTEXT: %p\n", context);
3343 	kprintf(" root vnode:\t%p\n", context->root);
3344 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3345 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3346 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3347 
3348 	if (context->num_used_fds) {
3349 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3350 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3351 	}
3352 
3353 	for (uint32 i = 0; i < context->table_size; i++) {
3354 		struct file_descriptor* fd = context->fds[i];
3355 		if (fd == NULL)
3356 			continue;
3357 
3358 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3359 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3360 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3361 			fd->pos, fd->cookie,
3362 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3363 				? "mount" : "vnode",
3364 			fd->u.vnode);
3365 	}
3366 
3367 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3368 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3369 
3370 	set_debug_variable("_cwd", (addr_t)context->cwd);
3371 
3372 	return 0;
3373 }
3374 
3375 
3376 int
3377 dump_vnode_usage(int argc, char** argv)
3378 {
3379 	if (argc != 1) {
3380 		kprintf("usage: %s\n", argv[0]);
3381 		return 0;
3382 	}
3383 
3384 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3385 		sUnusedVnodes, kMaxUnusedVnodes);
3386 
3387 	struct hash_iterator iterator;
3388 	hash_open(sVnodeTable, &iterator);
3389 
3390 	uint32 count = 0;
3391 	struct vnode* vnode;
3392 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3393 		count++;
3394 	}
3395 
3396 	hash_close(sVnodeTable, &iterator, false);
3397 
3398 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3399 		count - sUnusedVnodes);
3400 	return 0;
3401 }
3402 
3403 #endif	// ADD_DEBUGGER_COMMANDS
3404 
3405 
3406 /*!	Clears memory specified by an iovec array.
3407 */
3408 static void
3409 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3410 {
3411 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3412 		size_t length = std::min(vecs[i].iov_len, bytes);
3413 		memset(vecs[i].iov_base, 0, length);
3414 		bytes -= length;
3415 	}
3416 }
3417 
3418 
3419 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3420 	and calls the file system hooks to read/write the request to disk.
3421 */
3422 static status_t
3423 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3424 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3425 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3426 	bool doWrite)
3427 {
3428 	if (fileVecCount == 0) {
3429 		// There are no file vecs at this offset, so we're obviously trying
3430 		// to access the file outside of its bounds
3431 		return B_BAD_VALUE;
3432 	}
3433 
3434 	size_t numBytes = *_numBytes;
3435 	uint32 fileVecIndex;
3436 	size_t vecOffset = *_vecOffset;
3437 	uint32 vecIndex = *_vecIndex;
3438 	status_t status;
3439 	size_t size;
3440 
3441 	if (!doWrite && vecOffset == 0) {
3442 		// now directly read the data from the device
3443 		// the first file_io_vec can be read directly
3444 
3445 		if (fileVecs[0].length < (off_t)numBytes)
3446 			size = fileVecs[0].length;
3447 		else
3448 			size = numBytes;
3449 
3450 		if (fileVecs[0].offset >= 0) {
3451 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3452 				&vecs[vecIndex], vecCount - vecIndex, &size);
3453 		} else {
3454 			// sparse read
3455 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3456 			status = B_OK;
3457 		}
3458 		if (status != B_OK)
3459 			return status;
3460 
3461 		// TODO: this is a work-around for buggy device drivers!
3462 		//	When our own drivers honour the length, we can:
3463 		//	a) also use this direct I/O for writes (otherwise, it would
3464 		//	   overwrite precious data)
3465 		//	b) panic if the term below is true (at least for writes)
3466 		if ((off_t)size > fileVecs[0].length) {
3467 			//dprintf("warning: device driver %p doesn't respect total length "
3468 			//	"in read_pages() call!\n", ref->device);
3469 			size = fileVecs[0].length;
3470 		}
3471 
3472 		ASSERT((off_t)size <= fileVecs[0].length);
3473 
3474 		// If the file portion was contiguous, we're already done now
3475 		if (size == numBytes)
3476 			return B_OK;
3477 
3478 		// if we reached the end of the file, we can return as well
3479 		if ((off_t)size != fileVecs[0].length) {
3480 			*_numBytes = size;
3481 			return B_OK;
3482 		}
3483 
3484 		fileVecIndex = 1;
3485 
3486 		// first, find out where we have to continue in our iovecs
3487 		for (; vecIndex < vecCount; vecIndex++) {
3488 			if (size < vecs[vecIndex].iov_len)
3489 				break;
3490 
3491 			size -= vecs[vecIndex].iov_len;
3492 		}
3493 
3494 		vecOffset = size;
3495 	} else {
3496 		fileVecIndex = 0;
3497 		size = 0;
3498 	}
3499 
3500 	// Too bad, let's process the rest of the file_io_vecs
3501 
3502 	size_t totalSize = size;
3503 	size_t bytesLeft = numBytes - size;
3504 
3505 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3506 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3507 		off_t fileOffset = fileVec.offset;
3508 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3509 
3510 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3511 			fileLeft));
3512 
3513 		// process the complete fileVec
3514 		while (fileLeft > 0) {
3515 			iovec tempVecs[MAX_TEMP_IO_VECS];
3516 			uint32 tempCount = 0;
3517 
3518 			// size tracks how much of what is left of the current fileVec
3519 			// (fileLeft) has been assigned to tempVecs
3520 			size = 0;
3521 
3522 			// assign what is left of the current fileVec to the tempVecs
3523 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3524 					&& tempCount < MAX_TEMP_IO_VECS;) {
3525 				// try to satisfy one iovec per iteration (or as much as
3526 				// possible)
3527 
3528 				// bytes left of the current iovec
3529 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3530 				if (vecLeft == 0) {
3531 					vecOffset = 0;
3532 					vecIndex++;
3533 					continue;
3534 				}
3535 
3536 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3537 					vecIndex, vecOffset, size));
3538 
3539 				// actually available bytes
3540 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3541 
3542 				tempVecs[tempCount].iov_base
3543 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3544 				tempVecs[tempCount].iov_len = tempVecSize;
3545 				tempCount++;
3546 
3547 				size += tempVecSize;
3548 				vecOffset += tempVecSize;
3549 			}
3550 
3551 			size_t bytes = size;
3552 
3553 			if (fileOffset == -1) {
3554 				if (doWrite) {
3555 					panic("sparse write attempt: vnode %p", vnode);
3556 					status = B_IO_ERROR;
3557 				} else {
3558 					// sparse read
3559 					zero_iovecs(tempVecs, tempCount, bytes);
3560 					status = B_OK;
3561 				}
3562 			} else if (doWrite) {
3563 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3564 					tempVecs, tempCount, &bytes);
3565 			} else {
3566 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3567 					tempVecs, tempCount, &bytes);
3568 			}
3569 			if (status != B_OK)
3570 				return status;
3571 
3572 			totalSize += bytes;
3573 			bytesLeft -= size;
3574 			if (fileOffset >= 0)
3575 				fileOffset += size;
3576 			fileLeft -= size;
3577 			//dprintf("-> file left = %Lu\n", fileLeft);
3578 
3579 			if (size != bytes || vecIndex >= vecCount) {
3580 				// there are no more bytes or iovecs, let's bail out
3581 				*_numBytes = totalSize;
3582 				return B_OK;
3583 			}
3584 		}
3585 	}
3586 
3587 	*_vecIndex = vecIndex;
3588 	*_vecOffset = vecOffset;
3589 	*_numBytes = totalSize;
3590 	return B_OK;
3591 }
3592 
3593 
3594 static bool
3595 is_user_in_group(gid_t gid)
3596 {
3597 	if (gid == getegid())
3598 		return true;
3599 
3600 	gid_t groups[NGROUPS_MAX];
3601 	int groupCount = getgroups(NGROUPS_MAX, groups);
3602 	for (int i = 0; i < groupCount; i++) {
3603 		if (gid == groups[i])
3604 			return true;
3605 	}
3606 
3607 	return false;
3608 }
3609 
3610 
3611 //	#pragma mark - public API for file systems
3612 
3613 
3614 extern "C" status_t
3615 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3616 	fs_vnode_ops* ops)
3617 {
3618 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3619 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3620 
3621 	if (privateNode == NULL)
3622 		return B_BAD_VALUE;
3623 
3624 	// create the node
3625 	bool nodeCreated;
3626 	struct vnode* vnode;
3627 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3628 		nodeCreated);
3629 	if (status != B_OK)
3630 		return status;
3631 
3632 	WriteLocker nodeLocker(sVnodeLock, true);
3633 		// create_new_vnode_and_lock() has locked for us
3634 
3635 	// file system integrity check:
3636 	// test if the vnode already exists and bail out if this is the case!
3637 	if (!nodeCreated) {
3638 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3639 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3640 			vnode->private_node);
3641 		return B_ERROR;
3642 	}
3643 
3644 	vnode->private_node = privateNode;
3645 	vnode->ops = ops;
3646 	vnode->SetUnpublished(true);
3647 
3648 	TRACE(("returns: %s\n", strerror(status)));
3649 
3650 	return status;
3651 }
3652 
3653 
3654 extern "C" status_t
3655 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3656 	fs_vnode_ops* ops, int type, uint32 flags)
3657 {
3658 	FUNCTION(("publish_vnode()\n"));
3659 
3660 	WriteLocker locker(sVnodeLock);
3661 
3662 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3663 
3664 	bool nodeCreated = false;
3665 	if (vnode == NULL) {
3666 		if (privateNode == NULL)
3667 			return B_BAD_VALUE;
3668 
3669 		// create the node
3670 		locker.Unlock();
3671 			// create_new_vnode_and_lock() will re-lock for us on success
3672 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3673 			nodeCreated);
3674 		if (status != B_OK)
3675 			return status;
3676 
3677 		locker.SetTo(sVnodeLock, true);
3678 	}
3679 
3680 	if (nodeCreated) {
3681 		vnode->private_node = privateNode;
3682 		vnode->ops = ops;
3683 		vnode->SetUnpublished(true);
3684 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3685 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3686 		// already known, but not published
3687 	} else
3688 		return B_BAD_VALUE;
3689 
3690 	bool publishSpecialSubNode = false;
3691 
3692 	vnode->SetType(type);
3693 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3694 	publishSpecialSubNode = is_special_node_type(type)
3695 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3696 
3697 	status_t status = B_OK;
3698 
3699 	// create sub vnodes, if necessary
3700 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3701 		locker.Unlock();
3702 
3703 		fs_volume* subVolume = volume;
3704 		if (volume->sub_volume != NULL) {
3705 			while (status == B_OK && subVolume->sub_volume != NULL) {
3706 				subVolume = subVolume->sub_volume;
3707 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3708 					vnode);
3709 			}
3710 		}
3711 
3712 		if (status == B_OK && publishSpecialSubNode)
3713 			status = create_special_sub_node(vnode, flags);
3714 
3715 		if (status != B_OK) {
3716 			// error -- clean up the created sub vnodes
3717 			while (subVolume->super_volume != volume) {
3718 				subVolume = subVolume->super_volume;
3719 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3720 			}
3721 		}
3722 
3723 		if (status == B_OK) {
3724 			ReadLocker vnodesReadLocker(sVnodeLock);
3725 			AutoLocker<Vnode> nodeLocker(vnode);
3726 			vnode->SetBusy(false);
3727 			vnode->SetUnpublished(false);
3728 		} else {
3729 			locker.Lock();
3730 			hash_remove(sVnodeTable, vnode);
3731 			remove_vnode_from_mount_list(vnode, vnode->mount);
3732 			free(vnode);
3733 		}
3734 	} else {
3735 		// we still hold the write lock -- mark the node unbusy and published
3736 		vnode->SetBusy(false);
3737 		vnode->SetUnpublished(false);
3738 	}
3739 
3740 	TRACE(("returns: %s\n", strerror(status)));
3741 
3742 	return status;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	if (volume == NULL)
3752 		return B_BAD_VALUE;
3753 
3754 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3755 	if (status != B_OK)
3756 		return status;
3757 
3758 	// If this is a layered FS, we need to get the node cookie for the requested
3759 	// layer.
3760 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3761 		fs_vnode resolvedNode;
3762 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3763 			&resolvedNode);
3764 		if (status != B_OK) {
3765 			panic("get_vnode(): Failed to get super node for vnode %p, "
3766 				"volume: %p", vnode, volume);
3767 			put_vnode(vnode);
3768 			return status;
3769 		}
3770 
3771 		if (_privateNode != NULL)
3772 			*_privateNode = resolvedNode.private_node;
3773 	} else if (_privateNode != NULL)
3774 		*_privateNode = vnode->private_node;
3775 
3776 	return B_OK;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3782 {
3783 	struct vnode* vnode;
3784 
3785 	rw_lock_read_lock(&sVnodeLock);
3786 	vnode = lookup_vnode(volume->id, vnodeID);
3787 	rw_lock_read_unlock(&sVnodeLock);
3788 
3789 	if (vnode == NULL)
3790 		return B_BAD_VALUE;
3791 
3792 	inc_vnode_ref_count(vnode);
3793 	return B_OK;
3794 }
3795 
3796 
3797 extern "C" status_t
3798 put_vnode(fs_volume* volume, ino_t vnodeID)
3799 {
3800 	struct vnode* vnode;
3801 
3802 	rw_lock_read_lock(&sVnodeLock);
3803 	vnode = lookup_vnode(volume->id, vnodeID);
3804 	rw_lock_read_unlock(&sVnodeLock);
3805 
3806 	if (vnode == NULL)
3807 		return B_BAD_VALUE;
3808 
3809 	dec_vnode_ref_count(vnode, false, true);
3810 	return B_OK;
3811 }
3812 
3813 
3814 extern "C" status_t
3815 remove_vnode(fs_volume* volume, ino_t vnodeID)
3816 {
3817 	ReadLocker locker(sVnodeLock);
3818 
3819 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3820 	if (vnode == NULL)
3821 		return B_ENTRY_NOT_FOUND;
3822 
3823 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3824 		// this vnode is in use
3825 		return B_BUSY;
3826 	}
3827 
3828 	vnode->Lock();
3829 
3830 	vnode->SetRemoved(true);
3831 	bool removeUnpublished = false;
3832 
3833 	if (vnode->IsUnpublished()) {
3834 		// prepare the vnode for deletion
3835 		removeUnpublished = true;
3836 		vnode->SetBusy(true);
3837 	}
3838 
3839 	vnode->Unlock();
3840 	locker.Unlock();
3841 
3842 	if (removeUnpublished) {
3843 		// If the vnode hasn't been published yet, we delete it here
3844 		atomic_add(&vnode->ref_count, -1);
3845 		free_vnode(vnode, true);
3846 	}
3847 
3848 	return B_OK;
3849 }
3850 
3851 
3852 extern "C" status_t
3853 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3854 {
3855 	struct vnode* vnode;
3856 
3857 	rw_lock_read_lock(&sVnodeLock);
3858 
3859 	vnode = lookup_vnode(volume->id, vnodeID);
3860 	if (vnode) {
3861 		AutoLocker<Vnode> nodeLocker(vnode);
3862 		vnode->SetRemoved(false);
3863 	}
3864 
3865 	rw_lock_read_unlock(&sVnodeLock);
3866 	return B_OK;
3867 }
3868 
3869 
3870 extern "C" status_t
3871 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3872 {
3873 	ReadLocker _(sVnodeLock);
3874 
3875 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3876 		if (_removed != NULL)
3877 			*_removed = vnode->IsRemoved();
3878 		return B_OK;
3879 	}
3880 
3881 	return B_BAD_VALUE;
3882 }
3883 
3884 
3885 extern "C" fs_volume*
3886 volume_for_vnode(fs_vnode* _vnode)
3887 {
3888 	if (_vnode == NULL)
3889 		return NULL;
3890 
3891 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3892 	return vnode->mount->volume;
3893 }
3894 
3895 
3896 extern "C" status_t
3897 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3898 	uid_t nodeUserID)
3899 {
3900 	// get node permissions
3901 	int userPermissions = (mode & S_IRWXU) >> 6;
3902 	int groupPermissions = (mode & S_IRWXG) >> 3;
3903 	int otherPermissions = mode & S_IRWXO;
3904 
3905 	// get the node permissions for this uid/gid
3906 	int permissions = 0;
3907 	uid_t uid = geteuid();
3908 
3909 	if (uid == 0) {
3910 		// user is root
3911 		// root has always read/write permission, but at least one of the
3912 		// X bits must be set for execute permission
3913 		permissions = userPermissions | groupPermissions | otherPermissions
3914 			| S_IROTH | S_IWOTH;
3915 		if (S_ISDIR(mode))
3916 			permissions |= S_IXOTH;
3917 	} else if (uid == nodeUserID) {
3918 		// user is node owner
3919 		permissions = userPermissions;
3920 	} else if (is_user_in_group(nodeGroupID)) {
3921 		// user is in owning group
3922 		permissions = groupPermissions;
3923 	} else {
3924 		// user is one of the others
3925 		permissions = otherPermissions;
3926 	}
3927 
3928 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
3929 }
3930 
3931 
3932 #if 0
3933 extern "C" status_t
3934 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3935 	size_t* _numBytes)
3936 {
3937 	struct file_descriptor* descriptor;
3938 	struct vnode* vnode;
3939 
3940 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3941 	if (descriptor == NULL)
3942 		return B_FILE_ERROR;
3943 
3944 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3945 		count, 0, _numBytes);
3946 
3947 	put_fd(descriptor);
3948 	return status;
3949 }
3950 
3951 
3952 extern "C" status_t
3953 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3954 	size_t* _numBytes)
3955 {
3956 	struct file_descriptor* descriptor;
3957 	struct vnode* vnode;
3958 
3959 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3960 	if (descriptor == NULL)
3961 		return B_FILE_ERROR;
3962 
3963 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3964 		count, 0, _numBytes);
3965 
3966 	put_fd(descriptor);
3967 	return status;
3968 }
3969 #endif
3970 
3971 
3972 extern "C" status_t
3973 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3974 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3975 	size_t* _bytes)
3976 {
3977 	struct file_descriptor* descriptor;
3978 	struct vnode* vnode;
3979 
3980 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3981 	if (descriptor == NULL)
3982 		return B_FILE_ERROR;
3983 
3984 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3985 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3986 		false);
3987 
3988 	put_fd(descriptor);
3989 	return status;
3990 }
3991 
3992 
3993 extern "C" status_t
3994 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3995 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3996 	size_t* _bytes)
3997 {
3998 	struct file_descriptor* descriptor;
3999 	struct vnode* vnode;
4000 
4001 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4002 	if (descriptor == NULL)
4003 		return B_FILE_ERROR;
4004 
4005 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4006 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4007 		true);
4008 
4009 	put_fd(descriptor);
4010 	return status;
4011 }
4012 
4013 
4014 extern "C" status_t
4015 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4016 {
4017 	// lookup mount -- the caller is required to make sure that the mount
4018 	// won't go away
4019 	MutexLocker locker(sMountMutex);
4020 	struct fs_mount* mount = find_mount(mountID);
4021 	if (mount == NULL)
4022 		return B_BAD_VALUE;
4023 	locker.Unlock();
4024 
4025 	return mount->entry_cache.Add(dirID, name, nodeID);
4026 }
4027 
4028 
4029 extern "C" status_t
4030 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4031 {
4032 	// lookup mount -- the caller is required to make sure that the mount
4033 	// won't go away
4034 	MutexLocker locker(sMountMutex);
4035 	struct fs_mount* mount = find_mount(mountID);
4036 	if (mount == NULL)
4037 		return B_BAD_VALUE;
4038 	locker.Unlock();
4039 
4040 	return mount->entry_cache.Remove(dirID, name);
4041 }
4042 
4043 
4044 //	#pragma mark - private VFS API
4045 //	Functions the VFS exports for other parts of the kernel
4046 
4047 
4048 /*! Acquires another reference to the vnode that has to be released
4049 	by calling vfs_put_vnode().
4050 */
4051 void
4052 vfs_acquire_vnode(struct vnode* vnode)
4053 {
4054 	inc_vnode_ref_count(vnode);
4055 }
4056 
4057 
4058 /*! This is currently called from file_cache_create() only.
4059 	It's probably a temporary solution as long as devfs requires that
4060 	fs_read_pages()/fs_write_pages() are called with the standard
4061 	open cookie and not with a device cookie.
4062 	If that's done differently, remove this call; it has no other
4063 	purpose.
4064 */
4065 extern "C" status_t
4066 vfs_get_cookie_from_fd(int fd, void** _cookie)
4067 {
4068 	struct file_descriptor* descriptor;
4069 
4070 	descriptor = get_fd(get_current_io_context(true), fd);
4071 	if (descriptor == NULL)
4072 		return B_FILE_ERROR;
4073 
4074 	*_cookie = descriptor->cookie;
4075 	return B_OK;
4076 }
4077 
4078 
4079 extern "C" status_t
4080 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4081 {
4082 	*vnode = get_vnode_from_fd(fd, kernel);
4083 
4084 	if (*vnode == NULL)
4085 		return B_FILE_ERROR;
4086 
4087 	return B_NO_ERROR;
4088 }
4089 
4090 
4091 extern "C" status_t
4092 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4093 {
4094 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4095 		path, kernel));
4096 
4097 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4098 	if (pathBuffer.InitCheck() != B_OK)
4099 		return B_NO_MEMORY;
4100 
4101 	char* buffer = pathBuffer.LockBuffer();
4102 	strlcpy(buffer, path, pathBuffer.BufferSize());
4103 
4104 	struct vnode* vnode;
4105 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4106 	if (status != B_OK)
4107 		return status;
4108 
4109 	*_vnode = vnode;
4110 	return B_OK;
4111 }
4112 
4113 
4114 extern "C" status_t
4115 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4116 {
4117 	struct vnode* vnode;
4118 
4119 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4120 	if (status != B_OK)
4121 		return status;
4122 
4123 	*_vnode = vnode;
4124 	return B_OK;
4125 }
4126 
4127 
4128 extern "C" status_t
4129 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4130 	const char* name, struct vnode** _vnode)
4131 {
4132 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4133 }
4134 
4135 
4136 extern "C" void
4137 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4138 {
4139 	*_mountID = vnode->device;
4140 	*_vnodeID = vnode->id;
4141 }
4142 
4143 
4144 /*!
4145 	Helper function abstracting the process of "converting" a given
4146 	vnode-pointer to a fs_vnode-pointer.
4147 	Currently only used in bindfs.
4148 */
4149 extern "C" fs_vnode*
4150 vfs_fsnode_for_vnode(struct vnode* vnode)
4151 {
4152 	return vnode;
4153 }
4154 
4155 
4156 /*!
4157 	Calls fs_open() on the given vnode and returns a new
4158 	file descriptor for it
4159 */
4160 int
4161 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4162 {
4163 	return open_vnode(vnode, openMode, kernel);
4164 }
4165 
4166 
4167 /*!	Looks up a vnode with the given mount and vnode ID.
4168 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4169 	to the node.
4170 	It's currently only be used by file_cache_create().
4171 */
4172 extern "C" status_t
4173 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4174 {
4175 	rw_lock_read_lock(&sVnodeLock);
4176 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4177 	rw_lock_read_unlock(&sVnodeLock);
4178 
4179 	if (vnode == NULL)
4180 		return B_ERROR;
4181 
4182 	*_vnode = vnode;
4183 	return B_OK;
4184 }
4185 
4186 
4187 extern "C" status_t
4188 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4189 	bool traverseLeafLink, bool kernel, void** _node)
4190 {
4191 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4192 		volume, path, kernel));
4193 
4194 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4195 	if (pathBuffer.InitCheck() != B_OK)
4196 		return B_NO_MEMORY;
4197 
4198 	fs_mount* mount;
4199 	status_t status = get_mount(volume->id, &mount);
4200 	if (status != B_OK)
4201 		return status;
4202 
4203 	char* buffer = pathBuffer.LockBuffer();
4204 	strlcpy(buffer, path, pathBuffer.BufferSize());
4205 
4206 	struct vnode* vnode = mount->root_vnode;
4207 
4208 	if (buffer[0] == '/')
4209 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4210 	else {
4211 		inc_vnode_ref_count(vnode);
4212 			// vnode_path_to_vnode() releases a reference to the starting vnode
4213 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4214 			kernel, &vnode, NULL);
4215 	}
4216 
4217 	put_mount(mount);
4218 
4219 	if (status != B_OK)
4220 		return status;
4221 
4222 	if (vnode->device != volume->id) {
4223 		// wrong mount ID - must not gain access on foreign file system nodes
4224 		put_vnode(vnode);
4225 		return B_BAD_VALUE;
4226 	}
4227 
4228 	// Use get_vnode() to resolve the cookie for the right layer.
4229 	status = get_vnode(volume, vnode->id, _node);
4230 	put_vnode(vnode);
4231 
4232 	return status;
4233 }
4234 
4235 
4236 status_t
4237 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4238 	struct stat* stat, bool kernel)
4239 {
4240 	status_t status;
4241 
4242 	if (path) {
4243 		// path given: get the stat of the node referred to by (fd, path)
4244 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4245 		if (pathBuffer.InitCheck() != B_OK)
4246 			return B_NO_MEMORY;
4247 
4248 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4249 			traverseLeafLink, stat, kernel);
4250 	} else {
4251 		// no path given: get the FD and use the FD operation
4252 		struct file_descriptor* descriptor
4253 			= get_fd(get_current_io_context(kernel), fd);
4254 		if (descriptor == NULL)
4255 			return B_FILE_ERROR;
4256 
4257 		if (descriptor->ops->fd_read_stat)
4258 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4259 		else
4260 			status = B_UNSUPPORTED;
4261 
4262 		put_fd(descriptor);
4263 	}
4264 
4265 	return status;
4266 }
4267 
4268 
4269 /*!	Finds the full path to the file that contains the module \a moduleName,
4270 	puts it into \a pathBuffer, and returns B_OK for success.
4271 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4272 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4273 	\a pathBuffer is clobbered in any case and must not be relied on if this
4274 	functions returns unsuccessfully.
4275 	\a basePath and \a pathBuffer must not point to the same space.
4276 */
4277 status_t
4278 vfs_get_module_path(const char* basePath, const char* moduleName,
4279 	char* pathBuffer, size_t bufferSize)
4280 {
4281 	struct vnode* dir;
4282 	struct vnode* file;
4283 	status_t status;
4284 	size_t length;
4285 	char* path;
4286 
4287 	if (bufferSize == 0
4288 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4289 		return B_BUFFER_OVERFLOW;
4290 
4291 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4292 	if (status != B_OK)
4293 		return status;
4294 
4295 	// the path buffer had been clobbered by the above call
4296 	length = strlcpy(pathBuffer, basePath, bufferSize);
4297 	if (pathBuffer[length - 1] != '/')
4298 		pathBuffer[length++] = '/';
4299 
4300 	path = pathBuffer + length;
4301 	bufferSize -= length;
4302 
4303 	while (moduleName) {
4304 		char* nextPath = strchr(moduleName, '/');
4305 		if (nextPath == NULL)
4306 			length = strlen(moduleName);
4307 		else {
4308 			length = nextPath - moduleName;
4309 			nextPath++;
4310 		}
4311 
4312 		if (length + 1 >= bufferSize) {
4313 			status = B_BUFFER_OVERFLOW;
4314 			goto err;
4315 		}
4316 
4317 		memcpy(path, moduleName, length);
4318 		path[length] = '\0';
4319 		moduleName = nextPath;
4320 
4321 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4322 		if (status != B_OK) {
4323 			// vnode_path_to_vnode() has already released the reference to dir
4324 			return status;
4325 		}
4326 
4327 		if (S_ISDIR(file->Type())) {
4328 			// goto the next directory
4329 			path[length] = '/';
4330 			path[length + 1] = '\0';
4331 			path += length + 1;
4332 			bufferSize -= length + 1;
4333 
4334 			dir = file;
4335 		} else if (S_ISREG(file->Type())) {
4336 			// it's a file so it should be what we've searched for
4337 			put_vnode(file);
4338 
4339 			return B_OK;
4340 		} else {
4341 			TRACE(("vfs_get_module_path(): something is strange here: "
4342 				"0x%08" B_PRIx32 "...\n", file->Type()));
4343 			status = B_ERROR;
4344 			dir = file;
4345 			goto err;
4346 		}
4347 	}
4348 
4349 	// if we got here, the moduleName just pointed to a directory, not to
4350 	// a real module - what should we do in this case?
4351 	status = B_ENTRY_NOT_FOUND;
4352 
4353 err:
4354 	put_vnode(dir);
4355 	return status;
4356 }
4357 
4358 
4359 /*!	\brief Normalizes a given path.
4360 
4361 	The path must refer to an existing or non-existing entry in an existing
4362 	directory, that is chopping off the leaf component the remaining path must
4363 	refer to an existing directory.
4364 
4365 	The returned will be canonical in that it will be absolute, will not
4366 	contain any "." or ".." components or duplicate occurrences of '/'s,
4367 	and none of the directory components will by symbolic links.
4368 
4369 	Any two paths referring to the same entry, will result in the same
4370 	normalized path (well, that is pretty much the definition of `normalized',
4371 	isn't it :-).
4372 
4373 	\param path The path to be normalized.
4374 	\param buffer The buffer into which the normalized path will be written.
4375 		   May be the same one as \a path.
4376 	\param bufferSize The size of \a buffer.
4377 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4378 	\param kernel \c true, if the IO context of the kernel shall be used,
4379 		   otherwise that of the team this thread belongs to. Only relevant,
4380 		   if the path is relative (to get the CWD).
4381 	\return \c B_OK if everything went fine, another error code otherwise.
4382 */
4383 status_t
4384 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4385 	bool traverseLink, bool kernel)
4386 {
4387 	if (!path || !buffer || bufferSize < 1)
4388 		return B_BAD_VALUE;
4389 
4390 	if (path != buffer) {
4391 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4392 			return B_BUFFER_OVERFLOW;
4393 	}
4394 
4395 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4396 }
4397 
4398 
4399 /*!	\brief Creates a special node in the file system.
4400 
4401 	The caller gets a reference to the newly created node (which is passed
4402 	back through \a _createdVnode) and is responsible for releasing it.
4403 
4404 	\param path The path where to create the entry for the node. Can be \c NULL,
4405 		in which case the node is created without an entry in the root FS -- it
4406 		will automatically be deleted when the last reference has been released.
4407 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4408 		the target file system will just create the node with its standard
4409 		operations. Depending on the type of the node a subnode might be created
4410 		automatically, though.
4411 	\param mode The type and permissions for the node to be created.
4412 	\param flags Flags to be passed to the creating FS.
4413 	\param kernel \c true, if called in the kernel context (relevant only if
4414 		\a path is not \c NULL and not absolute).
4415 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4416 		file system creating the node, with the private data pointer and
4417 		operations for the super node. Can be \c NULL.
4418 	\param _createVnode Pointer to pre-allocated storage where to store the
4419 		pointer to the newly created node.
4420 	\return \c B_OK, if everything went fine, another error code otherwise.
4421 */
4422 status_t
4423 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4424 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4425 	struct vnode** _createdVnode)
4426 {
4427 	struct vnode* dirNode;
4428 	char _leaf[B_FILE_NAME_LENGTH];
4429 	char* leaf = NULL;
4430 
4431 	if (path) {
4432 		// We've got a path. Get the dir vnode and the leaf name.
4433 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4434 		if (tmpPathBuffer.InitCheck() != B_OK)
4435 			return B_NO_MEMORY;
4436 
4437 		char* tmpPath = tmpPathBuffer.LockBuffer();
4438 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4439 			return B_NAME_TOO_LONG;
4440 
4441 		// get the dir vnode and the leaf name
4442 		leaf = _leaf;
4443 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4444 		if (error != B_OK)
4445 			return error;
4446 	} else {
4447 		// No path. Create the node in the root FS.
4448 		dirNode = sRoot;
4449 		inc_vnode_ref_count(dirNode);
4450 	}
4451 
4452 	VNodePutter _(dirNode);
4453 
4454 	// check support for creating special nodes
4455 	if (!HAS_FS_CALL(dirNode, create_special_node))
4456 		return B_UNSUPPORTED;
4457 
4458 	// create the node
4459 	fs_vnode superVnode;
4460 	ino_t nodeID;
4461 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4462 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4463 	if (status != B_OK)
4464 		return status;
4465 
4466 	// lookup the node
4467 	rw_lock_read_lock(&sVnodeLock);
4468 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4469 	rw_lock_read_unlock(&sVnodeLock);
4470 
4471 	if (*_createdVnode == NULL) {
4472 		panic("vfs_create_special_node(): lookup of node failed");
4473 		return B_ERROR;
4474 	}
4475 
4476 	return B_OK;
4477 }
4478 
4479 
4480 extern "C" void
4481 vfs_put_vnode(struct vnode* vnode)
4482 {
4483 	put_vnode(vnode);
4484 }
4485 
4486 
4487 extern "C" status_t
4488 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4489 {
4490 	// Get current working directory from io context
4491 	struct io_context* context = get_current_io_context(false);
4492 	status_t status = B_OK;
4493 
4494 	mutex_lock(&context->io_mutex);
4495 
4496 	if (context->cwd != NULL) {
4497 		*_mountID = context->cwd->device;
4498 		*_vnodeID = context->cwd->id;
4499 	} else
4500 		status = B_ERROR;
4501 
4502 	mutex_unlock(&context->io_mutex);
4503 	return status;
4504 }
4505 
4506 
4507 status_t
4508 vfs_unmount(dev_t mountID, uint32 flags)
4509 {
4510 	return fs_unmount(NULL, mountID, flags, true);
4511 }
4512 
4513 
4514 extern "C" status_t
4515 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4516 {
4517 	struct vnode* vnode;
4518 
4519 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4520 	if (status != B_OK)
4521 		return status;
4522 
4523 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4524 	put_vnode(vnode);
4525 	return B_OK;
4526 }
4527 
4528 
4529 extern "C" void
4530 vfs_free_unused_vnodes(int32 level)
4531 {
4532 	vnode_low_resource_handler(NULL,
4533 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4534 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4535 		level);
4536 }
4537 
4538 
4539 extern "C" bool
4540 vfs_can_page(struct vnode* vnode, void* cookie)
4541 {
4542 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4543 
4544 	if (HAS_FS_CALL(vnode, can_page))
4545 		return FS_CALL(vnode, can_page, cookie);
4546 	return false;
4547 }
4548 
4549 
4550 extern "C" status_t
4551 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4552 	const generic_io_vec* vecs, size_t count, uint32 flags,
4553 	generic_size_t* _numBytes)
4554 {
4555 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4556 		vecs, pos));
4557 
4558 #if VFS_PAGES_IO_TRACING
4559 	generic_size_t bytesRequested = *_numBytes;
4560 #endif
4561 
4562 	IORequest request;
4563 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4564 	if (status == B_OK) {
4565 		status = vfs_vnode_io(vnode, cookie, &request);
4566 		if (status == B_OK)
4567 			status = request.Wait();
4568 		*_numBytes = request.TransferredBytes();
4569 	}
4570 
4571 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4572 		status, *_numBytes));
4573 
4574 	return status;
4575 }
4576 
4577 
4578 extern "C" status_t
4579 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4580 	const generic_io_vec* vecs, size_t count, uint32 flags,
4581 	generic_size_t* _numBytes)
4582 {
4583 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4584 		vecs, pos));
4585 
4586 #if VFS_PAGES_IO_TRACING
4587 	generic_size_t bytesRequested = *_numBytes;
4588 #endif
4589 
4590 	IORequest request;
4591 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4592 	if (status == B_OK) {
4593 		status = vfs_vnode_io(vnode, cookie, &request);
4594 		if (status == B_OK)
4595 			status = request.Wait();
4596 		*_numBytes = request.TransferredBytes();
4597 	}
4598 
4599 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4600 		status, *_numBytes));
4601 
4602 	return status;
4603 }
4604 
4605 
4606 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4607 	created if \a allocate is \c true.
4608 	In case it's successful, it will also grab a reference to the cache
4609 	it returns.
4610 */
4611 extern "C" status_t
4612 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4613 {
4614 	if (vnode->cache != NULL) {
4615 		vnode->cache->AcquireRef();
4616 		*_cache = vnode->cache;
4617 		return B_OK;
4618 	}
4619 
4620 	rw_lock_read_lock(&sVnodeLock);
4621 	vnode->Lock();
4622 
4623 	status_t status = B_OK;
4624 
4625 	// The cache could have been created in the meantime
4626 	if (vnode->cache == NULL) {
4627 		if (allocate) {
4628 			// TODO: actually the vnode needs to be busy already here, or
4629 			//	else this won't work...
4630 			bool wasBusy = vnode->IsBusy();
4631 			vnode->SetBusy(true);
4632 
4633 			vnode->Unlock();
4634 			rw_lock_read_unlock(&sVnodeLock);
4635 
4636 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4637 
4638 			rw_lock_read_lock(&sVnodeLock);
4639 			vnode->Lock();
4640 			vnode->SetBusy(wasBusy);
4641 		} else
4642 			status = B_BAD_VALUE;
4643 	}
4644 
4645 	vnode->Unlock();
4646 	rw_lock_read_unlock(&sVnodeLock);
4647 
4648 	if (status == B_OK) {
4649 		vnode->cache->AcquireRef();
4650 		*_cache = vnode->cache;
4651 	}
4652 
4653 	return status;
4654 }
4655 
4656 
4657 status_t
4658 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4659 	file_io_vec* vecs, size_t* _count)
4660 {
4661 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4662 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4663 
4664 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4665 }
4666 
4667 
4668 status_t
4669 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4670 {
4671 	status_t status = FS_CALL(vnode, read_stat, stat);
4672 
4673 	// fill in the st_dev and st_ino fields
4674 	if (status == B_OK) {
4675 		stat->st_dev = vnode->device;
4676 		stat->st_ino = vnode->id;
4677 		stat->st_rdev = -1;
4678 	}
4679 
4680 	return status;
4681 }
4682 
4683 
4684 status_t
4685 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4686 {
4687 	struct vnode* vnode;
4688 	status_t status = get_vnode(device, inode, &vnode, true, false);
4689 	if (status != B_OK)
4690 		return status;
4691 
4692 	status = FS_CALL(vnode, read_stat, stat);
4693 
4694 	// fill in the st_dev and st_ino fields
4695 	if (status == B_OK) {
4696 		stat->st_dev = vnode->device;
4697 		stat->st_ino = vnode->id;
4698 		stat->st_rdev = -1;
4699 	}
4700 
4701 	put_vnode(vnode);
4702 	return status;
4703 }
4704 
4705 
4706 status_t
4707 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4708 {
4709 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4710 }
4711 
4712 
4713 status_t
4714 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4715 	bool kernel, char* path, size_t pathLength)
4716 {
4717 	struct vnode* vnode;
4718 	status_t status;
4719 
4720 	// filter invalid leaf names
4721 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4722 		return B_BAD_VALUE;
4723 
4724 	// get the vnode matching the dir's node_ref
4725 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4726 		// special cases "." and "..": we can directly get the vnode of the
4727 		// referenced directory
4728 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4729 		leaf = NULL;
4730 	} else
4731 		status = get_vnode(device, inode, &vnode, true, false);
4732 	if (status != B_OK)
4733 		return status;
4734 
4735 	// get the directory path
4736 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4737 	put_vnode(vnode);
4738 		// we don't need the vnode anymore
4739 	if (status != B_OK)
4740 		return status;
4741 
4742 	// append the leaf name
4743 	if (leaf) {
4744 		// insert a directory separator if this is not the file system root
4745 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4746 				>= pathLength)
4747 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4748 			return B_NAME_TOO_LONG;
4749 		}
4750 	}
4751 
4752 	return B_OK;
4753 }
4754 
4755 
4756 /*!	If the given descriptor locked its vnode, that lock will be released. */
4757 void
4758 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4759 {
4760 	struct vnode* vnode = fd_vnode(descriptor);
4761 
4762 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4763 		vnode->mandatory_locked_by = NULL;
4764 }
4765 
4766 
4767 /*!	Closes all file descriptors of the specified I/O context that
4768 	have the O_CLOEXEC flag set.
4769 */
4770 void
4771 vfs_exec_io_context(io_context* context)
4772 {
4773 	uint32 i;
4774 
4775 	for (i = 0; i < context->table_size; i++) {
4776 		mutex_lock(&context->io_mutex);
4777 
4778 		struct file_descriptor* descriptor = context->fds[i];
4779 		bool remove = false;
4780 
4781 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4782 			context->fds[i] = NULL;
4783 			context->num_used_fds--;
4784 
4785 			remove = true;
4786 		}
4787 
4788 		mutex_unlock(&context->io_mutex);
4789 
4790 		if (remove) {
4791 			close_fd(descriptor);
4792 			put_fd(descriptor);
4793 		}
4794 	}
4795 }
4796 
4797 
4798 /*! Sets up a new io_control structure, and inherits the properties
4799 	of the parent io_control if it is given.
4800 */
4801 io_context*
4802 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4803 {
4804 	io_context* context = (io_context*)malloc(sizeof(io_context));
4805 	if (context == NULL)
4806 		return NULL;
4807 
4808 	TIOC(NewIOContext(context, parentContext));
4809 
4810 	memset(context, 0, sizeof(io_context));
4811 	context->ref_count = 1;
4812 
4813 	MutexLocker parentLocker;
4814 
4815 	size_t tableSize;
4816 	if (parentContext) {
4817 		parentLocker.SetTo(parentContext->io_mutex, false);
4818 		tableSize = parentContext->table_size;
4819 	} else
4820 		tableSize = DEFAULT_FD_TABLE_SIZE;
4821 
4822 	// allocate space for FDs and their close-on-exec flag
4823 	context->fds = (file_descriptor**)malloc(
4824 		sizeof(struct file_descriptor*) * tableSize
4825 		+ sizeof(struct select_sync*) * tableSize
4826 		+ (tableSize + 7) / 8);
4827 	if (context->fds == NULL) {
4828 		free(context);
4829 		return NULL;
4830 	}
4831 
4832 	context->select_infos = (select_info**)(context->fds + tableSize);
4833 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4834 
4835 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4836 		+ sizeof(struct select_sync*) * tableSize
4837 		+ (tableSize + 7) / 8);
4838 
4839 	mutex_init(&context->io_mutex, "I/O context");
4840 
4841 	// Copy all parent file descriptors
4842 
4843 	if (parentContext) {
4844 		size_t i;
4845 
4846 		mutex_lock(&sIOContextRootLock);
4847 		context->root = parentContext->root;
4848 		if (context->root)
4849 			inc_vnode_ref_count(context->root);
4850 		mutex_unlock(&sIOContextRootLock);
4851 
4852 		context->cwd = parentContext->cwd;
4853 		if (context->cwd)
4854 			inc_vnode_ref_count(context->cwd);
4855 
4856 		for (i = 0; i < tableSize; i++) {
4857 			struct file_descriptor* descriptor = parentContext->fds[i];
4858 
4859 			if (descriptor != NULL) {
4860 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4861 				if (closeOnExec && purgeCloseOnExec)
4862 					continue;
4863 
4864 				TFD(InheritFD(context, i, descriptor, parentContext));
4865 
4866 				context->fds[i] = descriptor;
4867 				context->num_used_fds++;
4868 				atomic_add(&descriptor->ref_count, 1);
4869 				atomic_add(&descriptor->open_count, 1);
4870 
4871 				if (closeOnExec)
4872 					fd_set_close_on_exec(context, i, true);
4873 			}
4874 		}
4875 
4876 		parentLocker.Unlock();
4877 	} else {
4878 		context->root = sRoot;
4879 		context->cwd = sRoot;
4880 
4881 		if (context->root)
4882 			inc_vnode_ref_count(context->root);
4883 
4884 		if (context->cwd)
4885 			inc_vnode_ref_count(context->cwd);
4886 	}
4887 
4888 	context->table_size = tableSize;
4889 
4890 	list_init(&context->node_monitors);
4891 	context->max_monitors = DEFAULT_NODE_MONITORS;
4892 
4893 	return context;
4894 }
4895 
4896 
4897 static status_t
4898 vfs_free_io_context(io_context* context)
4899 {
4900 	uint32 i;
4901 
4902 	TIOC(FreeIOContext(context));
4903 
4904 	if (context->root)
4905 		put_vnode(context->root);
4906 
4907 	if (context->cwd)
4908 		put_vnode(context->cwd);
4909 
4910 	mutex_lock(&context->io_mutex);
4911 
4912 	for (i = 0; i < context->table_size; i++) {
4913 		if (struct file_descriptor* descriptor = context->fds[i]) {
4914 			close_fd(descriptor);
4915 			put_fd(descriptor);
4916 		}
4917 	}
4918 
4919 	mutex_destroy(&context->io_mutex);
4920 
4921 	remove_node_monitors(context);
4922 	free(context->fds);
4923 	free(context);
4924 
4925 	return B_OK;
4926 }
4927 
4928 
4929 void
4930 vfs_get_io_context(io_context* context)
4931 {
4932 	atomic_add(&context->ref_count, 1);
4933 }
4934 
4935 
4936 void
4937 vfs_put_io_context(io_context* context)
4938 {
4939 	if (atomic_add(&context->ref_count, -1) == 1)
4940 		vfs_free_io_context(context);
4941 }
4942 
4943 
4944 status_t
4945 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
4946 {
4947 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
4948 		return B_BAD_VALUE;
4949 
4950 	TIOC(ResizeIOContext(context, newSize));
4951 
4952 	MutexLocker _(context->io_mutex);
4953 
4954 	uint32 oldSize = context->table_size;
4955 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4956 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4957 
4958 	// If the tables shrink, make sure none of the fds being dropped are in use.
4959 	if (newSize < oldSize) {
4960 		for (uint32 i = oldSize; i-- > newSize;) {
4961 			if (context->fds[i])
4962 				return B_BUSY;
4963 		}
4964 	}
4965 
4966 	// store pointers to the old tables
4967 	file_descriptor** oldFDs = context->fds;
4968 	select_info** oldSelectInfos = context->select_infos;
4969 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4970 
4971 	// allocate new tables
4972 	file_descriptor** newFDs = (file_descriptor**)malloc(
4973 		sizeof(struct file_descriptor*) * newSize
4974 		+ sizeof(struct select_sync*) * newSize
4975 		+ newCloseOnExitBitmapSize);
4976 	if (newFDs == NULL)
4977 		return B_NO_MEMORY;
4978 
4979 	context->fds = newFDs;
4980 	context->select_infos = (select_info**)(context->fds + newSize);
4981 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4982 	context->table_size = newSize;
4983 
4984 	// copy entries from old tables
4985 	uint32 toCopy = min_c(oldSize, newSize);
4986 
4987 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4988 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4989 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4990 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4991 
4992 	// clear additional entries, if the tables grow
4993 	if (newSize > oldSize) {
4994 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4995 		memset(context->select_infos + oldSize, 0,
4996 			sizeof(void*) * (newSize - oldSize));
4997 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4998 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4999 	}
5000 
5001 	free(oldFDs);
5002 
5003 	return B_OK;
5004 }
5005 
5006 
5007 static status_t
5008 vfs_resize_monitor_table(struct io_context* context, const int newSize)
5009 {
5010 	int	status = B_OK;
5011 
5012 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
5013 		return B_BAD_VALUE;
5014 
5015 	mutex_lock(&context->io_mutex);
5016 
5017 	if ((size_t)newSize < context->num_monitors) {
5018 		status = B_BUSY;
5019 		goto out;
5020 	}
5021 	context->max_monitors = newSize;
5022 
5023 out:
5024 	mutex_unlock(&context->io_mutex);
5025 	return status;
5026 }
5027 
5028 
5029 status_t
5030 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5031 	ino_t* _mountPointNodeID)
5032 {
5033 	ReadLocker nodeLocker(sVnodeLock);
5034 	MutexLocker mountLocker(sMountMutex);
5035 
5036 	struct fs_mount* mount = find_mount(mountID);
5037 	if (mount == NULL)
5038 		return B_BAD_VALUE;
5039 
5040 	Vnode* mountPoint = mount->covers_vnode;
5041 
5042 	*_mountPointMountID = mountPoint->device;
5043 	*_mountPointNodeID = mountPoint->id;
5044 
5045 	return B_OK;
5046 }
5047 
5048 
5049 status_t
5050 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5051 	ino_t coveredNodeID)
5052 {
5053 	// get the vnodes
5054 	Vnode* vnode;
5055 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5056 	if (error != B_OK)
5057 		return B_BAD_VALUE;
5058 	VNodePutter vnodePutter(vnode);
5059 
5060 	Vnode* coveredVnode;
5061 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5062 		false);
5063 	if (error != B_OK)
5064 		return B_BAD_VALUE;
5065 	VNodePutter coveredVnodePutter(coveredVnode);
5066 
5067 	// establish the covered/covering links
5068 	WriteLocker locker(sVnodeLock);
5069 
5070 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5071 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5072 		return B_BUSY;
5073 	}
5074 
5075 	vnode->covers = coveredVnode;
5076 	vnode->SetCovering(true);
5077 
5078 	coveredVnode->covered_by = vnode;
5079 	coveredVnode->SetCovered(true);
5080 
5081 	// the vnodes do now reference each other
5082 	inc_vnode_ref_count(vnode);
5083 	inc_vnode_ref_count(coveredVnode);
5084 
5085 	return B_OK;
5086 }
5087 
5088 
5089 int
5090 vfs_getrlimit(int resource, struct rlimit* rlp)
5091 {
5092 	if (!rlp)
5093 		return B_BAD_ADDRESS;
5094 
5095 	switch (resource) {
5096 		case RLIMIT_NOFILE:
5097 		{
5098 			struct io_context* context = get_current_io_context(false);
5099 			MutexLocker _(context->io_mutex);
5100 
5101 			rlp->rlim_cur = context->table_size;
5102 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5103 			return 0;
5104 		}
5105 
5106 		case RLIMIT_NOVMON:
5107 		{
5108 			struct io_context* context = get_current_io_context(false);
5109 			MutexLocker _(context->io_mutex);
5110 
5111 			rlp->rlim_cur = context->max_monitors;
5112 			rlp->rlim_max = MAX_NODE_MONITORS;
5113 			return 0;
5114 		}
5115 
5116 		default:
5117 			return B_BAD_VALUE;
5118 	}
5119 }
5120 
5121 
5122 int
5123 vfs_setrlimit(int resource, const struct rlimit* rlp)
5124 {
5125 	if (!rlp)
5126 		return B_BAD_ADDRESS;
5127 
5128 	switch (resource) {
5129 		case RLIMIT_NOFILE:
5130 			/* TODO: check getuid() */
5131 			if (rlp->rlim_max != RLIM_SAVED_MAX
5132 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5133 				return B_NOT_ALLOWED;
5134 
5135 			return vfs_resize_fd_table(get_current_io_context(false),
5136 				rlp->rlim_cur);
5137 
5138 		case RLIMIT_NOVMON:
5139 			/* TODO: check getuid() */
5140 			if (rlp->rlim_max != RLIM_SAVED_MAX
5141 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5142 				return B_NOT_ALLOWED;
5143 
5144 			return vfs_resize_monitor_table(get_current_io_context(false),
5145 				rlp->rlim_cur);
5146 
5147 		default:
5148 			return B_BAD_VALUE;
5149 	}
5150 }
5151 
5152 
5153 status_t
5154 vfs_init(kernel_args* args)
5155 {
5156 	vnode::StaticInit();
5157 
5158 	struct vnode dummyVnode;
5159 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5160 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5161 	if (sVnodeTable == NULL)
5162 		panic("vfs_init: error creating vnode hash table\n");
5163 
5164 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5165 
5166 	struct fs_mount dummyMount;
5167 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5168 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5169 	if (sMountsTable == NULL)
5170 		panic("vfs_init: error creating mounts hash table\n");
5171 
5172 	node_monitor_init();
5173 
5174 	sRoot = NULL;
5175 
5176 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5177 
5178 	if (block_cache_init() != B_OK)
5179 		return B_ERROR;
5180 
5181 #ifdef ADD_DEBUGGER_COMMANDS
5182 	// add some debugger commands
5183 	add_debugger_command_etc("vnode", &dump_vnode,
5184 		"Print info about the specified vnode",
5185 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5186 		"Prints information about the vnode specified by address <vnode> or\n"
5187 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5188 		"constructed and printed. It might not be possible to construct a\n"
5189 		"complete path, though.\n",
5190 		0);
5191 	add_debugger_command("vnodes", &dump_vnodes,
5192 		"list all vnodes (from the specified device)");
5193 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5194 		"list all vnode caches");
5195 	add_debugger_command("mount", &dump_mount,
5196 		"info about the specified fs_mount");
5197 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5198 	add_debugger_command("io_context", &dump_io_context,
5199 		"info about the I/O context");
5200 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5201 		"info about vnode usage");
5202 #endif
5203 
5204 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5205 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5206 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5207 		0);
5208 
5209 	fifo_init();
5210 	file_map_init();
5211 
5212 	return file_cache_init();
5213 }
5214 
5215 
5216 //	#pragma mark - fd_ops implementations
5217 
5218 
5219 /*!
5220 	Calls fs_open() on the given vnode and returns a new
5221 	file descriptor for it
5222 */
5223 static int
5224 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5225 {
5226 	void* cookie;
5227 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5228 	if (status != B_OK)
5229 		return status;
5230 
5231 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5232 	if (fd < 0) {
5233 		FS_CALL(vnode, close, cookie);
5234 		FS_CALL(vnode, free_cookie, cookie);
5235 	}
5236 	return fd;
5237 }
5238 
5239 
5240 /*!
5241 	Calls fs_open() on the given vnode and returns a new
5242 	file descriptor for it
5243 */
5244 static int
5245 create_vnode(struct vnode* directory, const char* name, int openMode,
5246 	int perms, bool kernel)
5247 {
5248 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5249 	status_t status = B_ERROR;
5250 	struct vnode* vnode;
5251 	void* cookie;
5252 	ino_t newID;
5253 
5254 	// This is somewhat tricky: If the entry already exists, the FS responsible
5255 	// for the directory might not necessarily also be the one responsible for
5256 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5257 	// we can actually never call the create() hook without O_EXCL. Instead we
5258 	// try to look the entry up first. If it already exists, we just open the
5259 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5260 	// introduces a race condition, since someone else might have created the
5261 	// entry in the meantime. We hope the respective FS returns the correct
5262 	// error code and retry (up to 3 times) again.
5263 
5264 	for (int i = 0; i < 3 && status != B_OK; i++) {
5265 		// look the node up
5266 		status = lookup_dir_entry(directory, name, &vnode);
5267 		if (status == B_OK) {
5268 			VNodePutter putter(vnode);
5269 
5270 			if ((openMode & O_EXCL) != 0)
5271 				return B_FILE_EXISTS;
5272 
5273 			// If the node is a symlink, we have to follow it, unless
5274 			// O_NOTRAVERSE is set.
5275 			if (S_ISLNK(vnode->Type()) && traverse) {
5276 				putter.Put();
5277 				char clonedName[B_FILE_NAME_LENGTH + 1];
5278 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5279 						>= B_FILE_NAME_LENGTH) {
5280 					return B_NAME_TOO_LONG;
5281 				}
5282 
5283 				inc_vnode_ref_count(directory);
5284 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5285 					kernel, &vnode, NULL);
5286 				if (status != B_OK)
5287 					return status;
5288 
5289 				putter.SetTo(vnode);
5290 			}
5291 
5292 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5293 				return B_LINK_LIMIT;
5294 
5295 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5296 			// on success keep the vnode reference for the FD
5297 			if (fd >= 0)
5298 				putter.Detach();
5299 
5300 			return fd;
5301 		}
5302 
5303 		// it doesn't exist yet -- try to create it
5304 
5305 		if (!HAS_FS_CALL(directory, create))
5306 			return B_READ_ONLY_DEVICE;
5307 
5308 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5309 			&cookie, &newID);
5310 		if (status != B_OK
5311 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5312 			return status;
5313 		}
5314 	}
5315 
5316 	if (status != B_OK)
5317 		return status;
5318 
5319 	// the node has been created successfully
5320 
5321 	rw_lock_read_lock(&sVnodeLock);
5322 	vnode = lookup_vnode(directory->device, newID);
5323 	rw_lock_read_unlock(&sVnodeLock);
5324 
5325 	if (vnode == NULL) {
5326 		panic("vfs: fs_create() returned success but there is no vnode, "
5327 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5328 		return B_BAD_VALUE;
5329 	}
5330 
5331 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5332 	if (fd >= 0)
5333 		return fd;
5334 
5335 	status = fd;
5336 
5337 	// something went wrong, clean up
5338 
5339 	FS_CALL(vnode, close, cookie);
5340 	FS_CALL(vnode, free_cookie, cookie);
5341 	put_vnode(vnode);
5342 
5343 	FS_CALL(directory, unlink, name);
5344 
5345 	return status;
5346 }
5347 
5348 
5349 /*! Calls fs open_dir() on the given vnode and returns a new
5350 	file descriptor for it
5351 */
5352 static int
5353 open_dir_vnode(struct vnode* vnode, bool kernel)
5354 {
5355 	void* cookie;
5356 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5357 	if (status != B_OK)
5358 		return status;
5359 
5360 	// directory is opened, create a fd
5361 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5362 	if (status >= 0)
5363 		return status;
5364 
5365 	FS_CALL(vnode, close_dir, cookie);
5366 	FS_CALL(vnode, free_dir_cookie, cookie);
5367 
5368 	return status;
5369 }
5370 
5371 
5372 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5373 	file descriptor for it.
5374 	Used by attr_dir_open(), and attr_dir_open_fd().
5375 */
5376 static int
5377 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5378 {
5379 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5380 		return B_UNSUPPORTED;
5381 
5382 	void* cookie;
5383 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5384 	if (status != B_OK)
5385 		return status;
5386 
5387 	// directory is opened, create a fd
5388 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5389 		kernel);
5390 	if (status >= 0)
5391 		return status;
5392 
5393 	FS_CALL(vnode, close_attr_dir, cookie);
5394 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5395 
5396 	return status;
5397 }
5398 
5399 
5400 static int
5401 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5402 	int openMode, int perms, bool kernel)
5403 {
5404 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5405 		"kernel %d\n", name, openMode, perms, kernel));
5406 
5407 	// get directory to put the new file in
5408 	struct vnode* directory;
5409 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5410 	if (status != B_OK)
5411 		return status;
5412 
5413 	status = create_vnode(directory, name, openMode, perms, kernel);
5414 	put_vnode(directory);
5415 
5416 	return status;
5417 }
5418 
5419 
5420 static int
5421 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5422 {
5423 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5424 		openMode, perms, kernel));
5425 
5426 	// get directory to put the new file in
5427 	char name[B_FILE_NAME_LENGTH];
5428 	struct vnode* directory;
5429 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5430 		kernel);
5431 	if (status < 0)
5432 		return status;
5433 
5434 	status = create_vnode(directory, name, openMode, perms, kernel);
5435 
5436 	put_vnode(directory);
5437 	return status;
5438 }
5439 
5440 
5441 static int
5442 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5443 	int openMode, bool kernel)
5444 {
5445 	if (name == NULL || *name == '\0')
5446 		return B_BAD_VALUE;
5447 
5448 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5449 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5450 
5451 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5452 
5453 	// get the vnode matching the entry_ref
5454 	struct vnode* vnode;
5455 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5456 		kernel, &vnode);
5457 	if (status != B_OK)
5458 		return status;
5459 
5460 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5461 		put_vnode(vnode);
5462 		return B_LINK_LIMIT;
5463 	}
5464 
5465 	int newFD = open_vnode(vnode, openMode, kernel);
5466 	if (newFD >= 0) {
5467 		// The vnode reference has been transferred to the FD
5468 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5469 			directoryID, vnode->id, name);
5470 	} else
5471 		put_vnode(vnode);
5472 
5473 	return newFD;
5474 }
5475 
5476 
5477 static int
5478 file_open(int fd, char* path, int openMode, bool kernel)
5479 {
5480 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5481 
5482 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5483 		fd, path, openMode, kernel));
5484 
5485 	// get the vnode matching the vnode + path combination
5486 	struct vnode* vnode;
5487 	ino_t parentID;
5488 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5489 		&parentID, kernel);
5490 	if (status != B_OK)
5491 		return status;
5492 
5493 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5494 		put_vnode(vnode);
5495 		return B_LINK_LIMIT;
5496 	}
5497 
5498 	// open the vnode
5499 	int newFD = open_vnode(vnode, openMode, kernel);
5500 	if (newFD >= 0) {
5501 		// The vnode reference has been transferred to the FD
5502 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5503 			vnode->device, parentID, vnode->id, NULL);
5504 	} else
5505 		put_vnode(vnode);
5506 
5507 	return newFD;
5508 }
5509 
5510 
5511 static status_t
5512 file_close(struct file_descriptor* descriptor)
5513 {
5514 	struct vnode* vnode = descriptor->u.vnode;
5515 	status_t status = B_OK;
5516 
5517 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5518 
5519 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5520 		vnode->id);
5521 	if (HAS_FS_CALL(vnode, close)) {
5522 		status = FS_CALL(vnode, close, descriptor->cookie);
5523 	}
5524 
5525 	if (status == B_OK) {
5526 		// remove all outstanding locks for this team
5527 		if (HAS_FS_CALL(vnode, release_lock))
5528 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5529 		else
5530 			status = release_advisory_lock(vnode, NULL);
5531 	}
5532 	return status;
5533 }
5534 
5535 
5536 static void
5537 file_free_fd(struct file_descriptor* descriptor)
5538 {
5539 	struct vnode* vnode = descriptor->u.vnode;
5540 
5541 	if (vnode != NULL) {
5542 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5543 		put_vnode(vnode);
5544 	}
5545 }
5546 
5547 
5548 static status_t
5549 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5550 	size_t* length)
5551 {
5552 	struct vnode* vnode = descriptor->u.vnode;
5553 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5554 		pos, length, *length));
5555 
5556 	if (S_ISDIR(vnode->Type()))
5557 		return B_IS_A_DIRECTORY;
5558 
5559 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5560 }
5561 
5562 
5563 static status_t
5564 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5565 	size_t* length)
5566 {
5567 	struct vnode* vnode = descriptor->u.vnode;
5568 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5569 		length));
5570 
5571 	if (S_ISDIR(vnode->Type()))
5572 		return B_IS_A_DIRECTORY;
5573 	if (!HAS_FS_CALL(vnode, write))
5574 		return B_READ_ONLY_DEVICE;
5575 
5576 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5577 }
5578 
5579 
5580 static off_t
5581 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5582 {
5583 	struct vnode* vnode = descriptor->u.vnode;
5584 	off_t offset;
5585 	bool isDevice = false;
5586 
5587 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5588 		seekType));
5589 
5590 	// some kinds of files are not seekable
5591 	switch (vnode->Type() & S_IFMT) {
5592 		case S_IFIFO:
5593 		case S_IFSOCK:
5594 			return ESPIPE;
5595 
5596 		// drivers publish block devices as chr, so pick both
5597 		case S_IFBLK:
5598 		case S_IFCHR:
5599 			isDevice = true;
5600 			break;
5601 		// The Open Group Base Specs don't mention any file types besides pipes,
5602 		// fifos, and sockets specially, so we allow seeking them.
5603 		case S_IFREG:
5604 		case S_IFDIR:
5605 		case S_IFLNK:
5606 			break;
5607 	}
5608 
5609 	switch (seekType) {
5610 		case SEEK_SET:
5611 			offset = 0;
5612 			break;
5613 		case SEEK_CUR:
5614 			offset = descriptor->pos;
5615 			break;
5616 		case SEEK_END:
5617 		{
5618 			// stat() the node
5619 			if (!HAS_FS_CALL(vnode, read_stat))
5620 				return B_UNSUPPORTED;
5621 
5622 			struct stat stat;
5623 			status_t status = FS_CALL(vnode, read_stat, &stat);
5624 			if (status != B_OK)
5625 				return status;
5626 
5627 			offset = stat.st_size;
5628 
5629 			if (offset == 0 && isDevice) {
5630 				// stat() on regular drivers doesn't report size
5631 				device_geometry geometry;
5632 
5633 				if (HAS_FS_CALL(vnode, ioctl)) {
5634 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5635 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5636 					if (status == B_OK)
5637 						offset = (off_t)geometry.bytes_per_sector
5638 							* geometry.sectors_per_track
5639 							* geometry.cylinder_count
5640 							* geometry.head_count;
5641 				}
5642 			}
5643 
5644 			break;
5645 		}
5646 		default:
5647 			return B_BAD_VALUE;
5648 	}
5649 
5650 	// assumes off_t is 64 bits wide
5651 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5652 		return B_BUFFER_OVERFLOW;
5653 
5654 	pos += offset;
5655 	if (pos < 0)
5656 		return B_BAD_VALUE;
5657 
5658 	return descriptor->pos = pos;
5659 }
5660 
5661 
5662 static status_t
5663 file_select(struct file_descriptor* descriptor, uint8 event,
5664 	struct selectsync* sync)
5665 {
5666 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5667 
5668 	struct vnode* vnode = descriptor->u.vnode;
5669 
5670 	// If the FS has no select() hook, notify select() now.
5671 	if (!HAS_FS_CALL(vnode, select))
5672 		return notify_select_event(sync, event);
5673 
5674 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5675 }
5676 
5677 
5678 static status_t
5679 file_deselect(struct file_descriptor* descriptor, uint8 event,
5680 	struct selectsync* sync)
5681 {
5682 	struct vnode* vnode = descriptor->u.vnode;
5683 
5684 	if (!HAS_FS_CALL(vnode, deselect))
5685 		return B_OK;
5686 
5687 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5688 }
5689 
5690 
5691 static status_t
5692 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5693 	bool kernel)
5694 {
5695 	struct vnode* vnode;
5696 	status_t status;
5697 
5698 	if (name == NULL || *name == '\0')
5699 		return B_BAD_VALUE;
5700 
5701 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5702 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5703 
5704 	status = get_vnode(mountID, parentID, &vnode, true, false);
5705 	if (status != B_OK)
5706 		return status;
5707 
5708 	if (HAS_FS_CALL(vnode, create_dir))
5709 		status = FS_CALL(vnode, create_dir, name, perms);
5710 	else
5711 		status = B_READ_ONLY_DEVICE;
5712 
5713 	put_vnode(vnode);
5714 	return status;
5715 }
5716 
5717 
5718 static status_t
5719 dir_create(int fd, char* path, int perms, bool kernel)
5720 {
5721 	char filename[B_FILE_NAME_LENGTH];
5722 	struct vnode* vnode;
5723 	status_t status;
5724 
5725 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5726 		kernel));
5727 
5728 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5729 	if (status < 0)
5730 		return status;
5731 
5732 	if (HAS_FS_CALL(vnode, create_dir)) {
5733 		status = FS_CALL(vnode, create_dir, filename, perms);
5734 	} else
5735 		status = B_READ_ONLY_DEVICE;
5736 
5737 	put_vnode(vnode);
5738 	return status;
5739 }
5740 
5741 
5742 static int
5743 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5744 {
5745 	FUNCTION(("dir_open_entry_ref()\n"));
5746 
5747 	if (name && name[0] == '\0')
5748 		return B_BAD_VALUE;
5749 
5750 	// get the vnode matching the entry_ref/node_ref
5751 	struct vnode* vnode;
5752 	status_t status;
5753 	if (name) {
5754 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5755 			&vnode);
5756 	} else
5757 		status = get_vnode(mountID, parentID, &vnode, true, false);
5758 	if (status != B_OK)
5759 		return status;
5760 
5761 	int newFD = open_dir_vnode(vnode, kernel);
5762 	if (newFD >= 0) {
5763 		// The vnode reference has been transferred to the FD
5764 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5765 			vnode->id, name);
5766 	} else
5767 		put_vnode(vnode);
5768 
5769 	return newFD;
5770 }
5771 
5772 
5773 static int
5774 dir_open(int fd, char* path, bool kernel)
5775 {
5776 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5777 		kernel));
5778 
5779 	// get the vnode matching the vnode + path combination
5780 	struct vnode* vnode = NULL;
5781 	ino_t parentID;
5782 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5783 		kernel);
5784 	if (status != B_OK)
5785 		return status;
5786 
5787 	// open the dir
5788 	int newFD = open_dir_vnode(vnode, kernel);
5789 	if (newFD >= 0) {
5790 		// The vnode reference has been transferred to the FD
5791 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5792 			parentID, vnode->id, NULL);
5793 	} else
5794 		put_vnode(vnode);
5795 
5796 	return newFD;
5797 }
5798 
5799 
5800 static status_t
5801 dir_close(struct file_descriptor* descriptor)
5802 {
5803 	struct vnode* vnode = descriptor->u.vnode;
5804 
5805 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5806 
5807 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5808 		vnode->id);
5809 	if (HAS_FS_CALL(vnode, close_dir))
5810 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5811 
5812 	return B_OK;
5813 }
5814 
5815 
5816 static void
5817 dir_free_fd(struct file_descriptor* descriptor)
5818 {
5819 	struct vnode* vnode = descriptor->u.vnode;
5820 
5821 	if (vnode != NULL) {
5822 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5823 		put_vnode(vnode);
5824 	}
5825 }
5826 
5827 
5828 static status_t
5829 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5830 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5831 {
5832 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5833 		bufferSize, _count);
5834 }
5835 
5836 
5837 static status_t
5838 fix_dirent(struct vnode* parent, struct dirent* entry,
5839 	struct io_context* ioContext)
5840 {
5841 	// set d_pdev and d_pino
5842 	entry->d_pdev = parent->device;
5843 	entry->d_pino = parent->id;
5844 
5845 	// If this is the ".." entry and the directory covering another vnode,
5846 	// we need to replace d_dev and d_ino with the actual values.
5847 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5848 		// Make sure the IO context root is not bypassed.
5849 		if (parent == ioContext->root) {
5850 			entry->d_dev = parent->device;
5851 			entry->d_ino = parent->id;
5852 		} else {
5853 			inc_vnode_ref_count(parent);
5854 				// vnode_path_to_vnode() puts the node
5855 
5856 			// ".." is guaranteed not to be clobbered by this call
5857 			struct vnode* vnode;
5858 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5859 				ioContext, &vnode, NULL);
5860 
5861 			if (status == B_OK) {
5862 				entry->d_dev = vnode->device;
5863 				entry->d_ino = vnode->id;
5864 				put_vnode(vnode);
5865 			}
5866 		}
5867 	} else {
5868 		// resolve covered vnodes
5869 		ReadLocker _(&sVnodeLock);
5870 
5871 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5872 		if (vnode != NULL && vnode->covered_by != NULL) {
5873 			do {
5874 				vnode = vnode->covered_by;
5875 			} while (vnode->covered_by != NULL);
5876 
5877 			entry->d_dev = vnode->device;
5878 			entry->d_ino = vnode->id;
5879 		}
5880 	}
5881 
5882 	return B_OK;
5883 }
5884 
5885 
5886 static status_t
5887 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5888 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5889 {
5890 	if (!HAS_FS_CALL(vnode, read_dir))
5891 		return B_UNSUPPORTED;
5892 
5893 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5894 		_count);
5895 	if (error != B_OK)
5896 		return error;
5897 
5898 	// we need to adjust the read dirents
5899 	uint32 count = *_count;
5900 	for (uint32 i = 0; i < count; i++) {
5901 		error = fix_dirent(vnode, buffer, ioContext);
5902 		if (error != B_OK)
5903 			return error;
5904 
5905 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5906 	}
5907 
5908 	return error;
5909 }
5910 
5911 
5912 static status_t
5913 dir_rewind(struct file_descriptor* descriptor)
5914 {
5915 	struct vnode* vnode = descriptor->u.vnode;
5916 
5917 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5918 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5919 	}
5920 
5921 	return B_UNSUPPORTED;
5922 }
5923 
5924 
5925 static status_t
5926 dir_remove(int fd, char* path, bool kernel)
5927 {
5928 	char name[B_FILE_NAME_LENGTH];
5929 	struct vnode* directory;
5930 	status_t status;
5931 
5932 	if (path != NULL) {
5933 		// we need to make sure our path name doesn't stop with "/", ".",
5934 		// or ".."
5935 		char* lastSlash;
5936 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5937 			char* leaf = lastSlash + 1;
5938 			if (!strcmp(leaf, ".."))
5939 				return B_NOT_ALLOWED;
5940 
5941 			// omit multiple slashes
5942 			while (lastSlash > path && lastSlash[-1] == '/')
5943 				lastSlash--;
5944 
5945 			if (leaf[0]
5946 				&& strcmp(leaf, ".")) {
5947 				break;
5948 			}
5949 			// "name/" -> "name", or "name/." -> "name"
5950 			lastSlash[0] = '\0';
5951 		}
5952 
5953 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5954 			return B_NOT_ALLOWED;
5955 	}
5956 
5957 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5958 	if (status != B_OK)
5959 		return status;
5960 
5961 	if (HAS_FS_CALL(directory, remove_dir))
5962 		status = FS_CALL(directory, remove_dir, name);
5963 	else
5964 		status = B_READ_ONLY_DEVICE;
5965 
5966 	put_vnode(directory);
5967 	return status;
5968 }
5969 
5970 
5971 static status_t
5972 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5973 	size_t length)
5974 {
5975 	struct vnode* vnode = descriptor->u.vnode;
5976 
5977 	if (HAS_FS_CALL(vnode, ioctl))
5978 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5979 
5980 	return B_DEV_INVALID_IOCTL;
5981 }
5982 
5983 
5984 static status_t
5985 common_fcntl(int fd, int op, size_t argument, bool kernel)
5986 {
5987 	struct flock flock;
5988 
5989 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5990 		fd, op, argument, kernel ? "kernel" : "user"));
5991 
5992 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5993 		fd);
5994 	if (descriptor == NULL)
5995 		return B_FILE_ERROR;
5996 
5997 	struct vnode* vnode = fd_vnode(descriptor);
5998 
5999 	status_t status = B_OK;
6000 
6001 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6002 		if (descriptor->type != FDTYPE_FILE)
6003 			status = B_BAD_VALUE;
6004 		else if (user_memcpy(&flock, (struct flock*)argument,
6005 				sizeof(struct flock)) != B_OK)
6006 			status = B_BAD_ADDRESS;
6007 
6008 		if (status != B_OK) {
6009 			put_fd(descriptor);
6010 			return status;
6011 		}
6012 	}
6013 
6014 	switch (op) {
6015 		case F_SETFD:
6016 		{
6017 			struct io_context* context = get_current_io_context(kernel);
6018 			// Set file descriptor flags
6019 
6020 			// O_CLOEXEC is the only flag available at this time
6021 			mutex_lock(&context->io_mutex);
6022 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6023 			mutex_unlock(&context->io_mutex);
6024 
6025 			status = B_OK;
6026 			break;
6027 		}
6028 
6029 		case F_GETFD:
6030 		{
6031 			struct io_context* context = get_current_io_context(kernel);
6032 
6033 			// Get file descriptor flags
6034 			mutex_lock(&context->io_mutex);
6035 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6036 			mutex_unlock(&context->io_mutex);
6037 			break;
6038 		}
6039 
6040 		case F_SETFL:
6041 			// Set file descriptor open mode
6042 
6043 			// we only accept changes to O_APPEND and O_NONBLOCK
6044 			argument &= O_APPEND | O_NONBLOCK;
6045 			if (descriptor->ops->fd_set_flags != NULL) {
6046 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6047 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6048 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6049 					(int)argument);
6050 			} else
6051 				status = B_UNSUPPORTED;
6052 
6053 			if (status == B_OK) {
6054 				// update this descriptor's open_mode field
6055 				descriptor->open_mode = (descriptor->open_mode
6056 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6057 			}
6058 
6059 			break;
6060 
6061 		case F_GETFL:
6062 			// Get file descriptor open mode
6063 			status = descriptor->open_mode;
6064 			break;
6065 
6066 		case F_DUPFD:
6067 		{
6068 			struct io_context* context = get_current_io_context(kernel);
6069 
6070 			status = new_fd_etc(context, descriptor, (int)argument);
6071 			if (status >= 0) {
6072 				mutex_lock(&context->io_mutex);
6073 				fd_set_close_on_exec(context, fd, false);
6074 				mutex_unlock(&context->io_mutex);
6075 
6076 				atomic_add(&descriptor->ref_count, 1);
6077 			}
6078 			break;
6079 		}
6080 
6081 		case F_GETLK:
6082 			if (vnode != NULL) {
6083 				struct flock normalizedLock;
6084 
6085 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6086 				status = normalize_flock(descriptor, &normalizedLock);
6087 				if (status != B_OK)
6088 					break;
6089 
6090 				if (HAS_FS_CALL(vnode, test_lock)) {
6091 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6092 						&normalizedLock);
6093 				} else
6094 					status = test_advisory_lock(vnode, &normalizedLock);
6095 				if (status == B_OK) {
6096 					if (normalizedLock.l_type == F_UNLCK) {
6097 						// no conflicting lock found, copy back the same struct
6098 						// we were given except change type to F_UNLCK
6099 						flock.l_type = F_UNLCK;
6100 						status = user_memcpy((struct flock*)argument, &flock,
6101 							sizeof(struct flock));
6102 					} else {
6103 						// a conflicting lock was found, copy back its range and
6104 						// type
6105 						if (normalizedLock.l_len == OFF_MAX)
6106 							normalizedLock.l_len = 0;
6107 
6108 						status = user_memcpy((struct flock*)argument,
6109 							&normalizedLock, sizeof(struct flock));
6110 					}
6111 				}
6112 			} else
6113 				status = B_BAD_VALUE;
6114 			break;
6115 
6116 		case F_SETLK:
6117 		case F_SETLKW:
6118 			status = normalize_flock(descriptor, &flock);
6119 			if (status != B_OK)
6120 				break;
6121 
6122 			if (vnode == NULL) {
6123 				status = B_BAD_VALUE;
6124 			} else if (flock.l_type == F_UNLCK) {
6125 				if (HAS_FS_CALL(vnode, release_lock)) {
6126 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6127 						&flock);
6128 				} else
6129 					status = release_advisory_lock(vnode, &flock);
6130 			} else {
6131 				// the open mode must match the lock type
6132 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6133 						&& flock.l_type == F_WRLCK)
6134 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6135 						&& flock.l_type == F_RDLCK))
6136 					status = B_FILE_ERROR;
6137 				else {
6138 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6139 						status = FS_CALL(vnode, acquire_lock,
6140 							descriptor->cookie, &flock, op == F_SETLKW);
6141 					} else {
6142 						status = acquire_advisory_lock(vnode, -1,
6143 							&flock, op == F_SETLKW);
6144 					}
6145 				}
6146 			}
6147 			break;
6148 
6149 		// ToDo: add support for more ops?
6150 
6151 		default:
6152 			status = B_BAD_VALUE;
6153 	}
6154 
6155 	put_fd(descriptor);
6156 	return status;
6157 }
6158 
6159 
6160 static status_t
6161 common_sync(int fd, bool kernel)
6162 {
6163 	struct file_descriptor* descriptor;
6164 	struct vnode* vnode;
6165 	status_t status;
6166 
6167 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6168 
6169 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6170 	if (descriptor == NULL)
6171 		return B_FILE_ERROR;
6172 
6173 	if (HAS_FS_CALL(vnode, fsync))
6174 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6175 	else
6176 		status = B_UNSUPPORTED;
6177 
6178 	put_fd(descriptor);
6179 	return status;
6180 }
6181 
6182 
6183 static status_t
6184 common_lock_node(int fd, bool kernel)
6185 {
6186 	struct file_descriptor* descriptor;
6187 	struct vnode* vnode;
6188 
6189 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6190 	if (descriptor == NULL)
6191 		return B_FILE_ERROR;
6192 
6193 	status_t status = B_OK;
6194 
6195 	// We need to set the locking atomically - someone
6196 	// else might set one at the same time
6197 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6198 			(file_descriptor*)NULL) != NULL)
6199 		status = B_BUSY;
6200 
6201 	put_fd(descriptor);
6202 	return status;
6203 }
6204 
6205 
6206 static status_t
6207 common_unlock_node(int fd, bool kernel)
6208 {
6209 	struct file_descriptor* descriptor;
6210 	struct vnode* vnode;
6211 
6212 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6213 	if (descriptor == NULL)
6214 		return B_FILE_ERROR;
6215 
6216 	status_t status = B_OK;
6217 
6218 	// We need to set the locking atomically - someone
6219 	// else might set one at the same time
6220 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6221 			(file_descriptor*)NULL, descriptor) != descriptor)
6222 		status = B_BAD_VALUE;
6223 
6224 	put_fd(descriptor);
6225 	return status;
6226 }
6227 
6228 
6229 static status_t
6230 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6231 	bool kernel)
6232 {
6233 	struct vnode* vnode;
6234 	status_t status;
6235 
6236 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6237 	if (status != B_OK)
6238 		return status;
6239 
6240 	if (HAS_FS_CALL(vnode, read_symlink)) {
6241 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6242 	} else
6243 		status = B_BAD_VALUE;
6244 
6245 	put_vnode(vnode);
6246 	return status;
6247 }
6248 
6249 
6250 static status_t
6251 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6252 	bool kernel)
6253 {
6254 	// path validity checks have to be in the calling function!
6255 	char name[B_FILE_NAME_LENGTH];
6256 	struct vnode* vnode;
6257 	status_t status;
6258 
6259 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6260 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6261 
6262 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6263 	if (status != B_OK)
6264 		return status;
6265 
6266 	if (HAS_FS_CALL(vnode, create_symlink))
6267 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6268 	else {
6269 		status = HAS_FS_CALL(vnode, write)
6270 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6271 	}
6272 
6273 	put_vnode(vnode);
6274 
6275 	return status;
6276 }
6277 
6278 
6279 static status_t
6280 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6281 	bool traverseLeafLink, bool kernel)
6282 {
6283 	// path validity checks have to be in the calling function!
6284 
6285 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6286 		toPath, kernel));
6287 
6288 	char name[B_FILE_NAME_LENGTH];
6289 	struct vnode* directory;
6290 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6291 		kernel);
6292 	if (status != B_OK)
6293 		return status;
6294 
6295 	struct vnode* vnode;
6296 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6297 		kernel);
6298 	if (status != B_OK)
6299 		goto err;
6300 
6301 	if (directory->mount != vnode->mount) {
6302 		status = B_CROSS_DEVICE_LINK;
6303 		goto err1;
6304 	}
6305 
6306 	if (HAS_FS_CALL(directory, link))
6307 		status = FS_CALL(directory, link, name, vnode);
6308 	else
6309 		status = B_READ_ONLY_DEVICE;
6310 
6311 err1:
6312 	put_vnode(vnode);
6313 err:
6314 	put_vnode(directory);
6315 
6316 	return status;
6317 }
6318 
6319 
6320 static status_t
6321 common_unlink(int fd, char* path, bool kernel)
6322 {
6323 	char filename[B_FILE_NAME_LENGTH];
6324 	struct vnode* vnode;
6325 	status_t status;
6326 
6327 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6328 		kernel));
6329 
6330 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6331 	if (status < 0)
6332 		return status;
6333 
6334 	if (HAS_FS_CALL(vnode, unlink))
6335 		status = FS_CALL(vnode, unlink, filename);
6336 	else
6337 		status = B_READ_ONLY_DEVICE;
6338 
6339 	put_vnode(vnode);
6340 
6341 	return status;
6342 }
6343 
6344 
6345 static status_t
6346 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6347 {
6348 	struct vnode* vnode;
6349 	status_t status;
6350 
6351 	// TODO: honor effectiveUserGroup argument
6352 
6353 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6354 	if (status != B_OK)
6355 		return status;
6356 
6357 	if (HAS_FS_CALL(vnode, access))
6358 		status = FS_CALL(vnode, access, mode);
6359 	else
6360 		status = B_OK;
6361 
6362 	put_vnode(vnode);
6363 
6364 	return status;
6365 }
6366 
6367 
6368 static status_t
6369 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6370 {
6371 	struct vnode* fromVnode;
6372 	struct vnode* toVnode;
6373 	char fromName[B_FILE_NAME_LENGTH];
6374 	char toName[B_FILE_NAME_LENGTH];
6375 	status_t status;
6376 
6377 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6378 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6379 
6380 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6381 	if (status != B_OK)
6382 		return status;
6383 
6384 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6385 	if (status != B_OK)
6386 		goto err1;
6387 
6388 	if (fromVnode->device != toVnode->device) {
6389 		status = B_CROSS_DEVICE_LINK;
6390 		goto err2;
6391 	}
6392 
6393 	if (fromName[0] == '\0' || toName[0] == '\0'
6394 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6395 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6396 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6397 		status = B_BAD_VALUE;
6398 		goto err2;
6399 	}
6400 
6401 	if (HAS_FS_CALL(fromVnode, rename))
6402 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6403 	else
6404 		status = B_READ_ONLY_DEVICE;
6405 
6406 err2:
6407 	put_vnode(toVnode);
6408 err1:
6409 	put_vnode(fromVnode);
6410 
6411 	return status;
6412 }
6413 
6414 
6415 static status_t
6416 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6417 {
6418 	struct vnode* vnode = descriptor->u.vnode;
6419 
6420 	FUNCTION(("common_read_stat: stat %p\n", stat));
6421 
6422 	// TODO: remove this once all file systems properly set them!
6423 	stat->st_crtim.tv_nsec = 0;
6424 	stat->st_ctim.tv_nsec = 0;
6425 	stat->st_mtim.tv_nsec = 0;
6426 	stat->st_atim.tv_nsec = 0;
6427 
6428 	status_t status = FS_CALL(vnode, read_stat, stat);
6429 
6430 	// fill in the st_dev and st_ino fields
6431 	if (status == B_OK) {
6432 		stat->st_dev = vnode->device;
6433 		stat->st_ino = vnode->id;
6434 		stat->st_rdev = -1;
6435 	}
6436 
6437 	return status;
6438 }
6439 
6440 
6441 static status_t
6442 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6443 	int statMask)
6444 {
6445 	struct vnode* vnode = descriptor->u.vnode;
6446 
6447 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6448 		vnode, stat, statMask));
6449 
6450 	if (!HAS_FS_CALL(vnode, write_stat))
6451 		return B_READ_ONLY_DEVICE;
6452 
6453 	return FS_CALL(vnode, write_stat, stat, statMask);
6454 }
6455 
6456 
6457 static status_t
6458 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6459 	struct stat* stat, bool kernel)
6460 {
6461 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6462 		stat));
6463 
6464 	struct vnode* vnode;
6465 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6466 		NULL, kernel);
6467 	if (status != B_OK)
6468 		return status;
6469 
6470 	status = FS_CALL(vnode, read_stat, stat);
6471 
6472 	// fill in the st_dev and st_ino fields
6473 	if (status == B_OK) {
6474 		stat->st_dev = vnode->device;
6475 		stat->st_ino = vnode->id;
6476 		stat->st_rdev = -1;
6477 	}
6478 
6479 	put_vnode(vnode);
6480 	return status;
6481 }
6482 
6483 
6484 static status_t
6485 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6486 	const struct stat* stat, int statMask, bool kernel)
6487 {
6488 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6489 		"kernel %d\n", fd, path, stat, statMask, kernel));
6490 
6491 	struct vnode* vnode;
6492 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6493 		NULL, kernel);
6494 	if (status != B_OK)
6495 		return status;
6496 
6497 	if (HAS_FS_CALL(vnode, write_stat))
6498 		status = FS_CALL(vnode, write_stat, stat, statMask);
6499 	else
6500 		status = B_READ_ONLY_DEVICE;
6501 
6502 	put_vnode(vnode);
6503 
6504 	return status;
6505 }
6506 
6507 
6508 static int
6509 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6510 {
6511 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6512 		kernel));
6513 
6514 	struct vnode* vnode;
6515 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6516 		NULL, kernel);
6517 	if (status != B_OK)
6518 		return status;
6519 
6520 	status = open_attr_dir_vnode(vnode, kernel);
6521 	if (status < 0)
6522 		put_vnode(vnode);
6523 
6524 	return status;
6525 }
6526 
6527 
6528 static status_t
6529 attr_dir_close(struct file_descriptor* descriptor)
6530 {
6531 	struct vnode* vnode = descriptor->u.vnode;
6532 
6533 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6534 
6535 	if (HAS_FS_CALL(vnode, close_attr_dir))
6536 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6537 
6538 	return B_OK;
6539 }
6540 
6541 
6542 static void
6543 attr_dir_free_fd(struct file_descriptor* descriptor)
6544 {
6545 	struct vnode* vnode = descriptor->u.vnode;
6546 
6547 	if (vnode != NULL) {
6548 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6549 		put_vnode(vnode);
6550 	}
6551 }
6552 
6553 
6554 static status_t
6555 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6556 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6557 {
6558 	struct vnode* vnode = descriptor->u.vnode;
6559 
6560 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6561 
6562 	if (HAS_FS_CALL(vnode, read_attr_dir))
6563 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6564 			bufferSize, _count);
6565 
6566 	return B_UNSUPPORTED;
6567 }
6568 
6569 
6570 static status_t
6571 attr_dir_rewind(struct file_descriptor* descriptor)
6572 {
6573 	struct vnode* vnode = descriptor->u.vnode;
6574 
6575 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6576 
6577 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6578 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6579 
6580 	return B_UNSUPPORTED;
6581 }
6582 
6583 
6584 static int
6585 attr_create(int fd, char* path, const char* name, uint32 type,
6586 	int openMode, bool kernel)
6587 {
6588 	if (name == NULL || *name == '\0')
6589 		return B_BAD_VALUE;
6590 
6591 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6592 	struct vnode* vnode;
6593 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6594 		kernel);
6595 	if (status != B_OK)
6596 		return status;
6597 
6598 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6599 		status = B_LINK_LIMIT;
6600 		goto err;
6601 	}
6602 
6603 	if (!HAS_FS_CALL(vnode, create_attr)) {
6604 		status = B_READ_ONLY_DEVICE;
6605 		goto err;
6606 	}
6607 
6608 	void* cookie;
6609 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6610 	if (status != B_OK)
6611 		goto err;
6612 
6613 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6614 	if (fd >= 0)
6615 		return fd;
6616 
6617 	status = fd;
6618 
6619 	FS_CALL(vnode, close_attr, cookie);
6620 	FS_CALL(vnode, free_attr_cookie, cookie);
6621 
6622 	FS_CALL(vnode, remove_attr, name);
6623 
6624 err:
6625 	put_vnode(vnode);
6626 
6627 	return status;
6628 }
6629 
6630 
6631 static int
6632 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6633 {
6634 	if (name == NULL || *name == '\0')
6635 		return B_BAD_VALUE;
6636 
6637 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6638 	struct vnode* vnode;
6639 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6640 		kernel);
6641 	if (status != B_OK)
6642 		return status;
6643 
6644 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6645 		status = B_LINK_LIMIT;
6646 		goto err;
6647 	}
6648 
6649 	if (!HAS_FS_CALL(vnode, open_attr)) {
6650 		status = B_UNSUPPORTED;
6651 		goto err;
6652 	}
6653 
6654 	void* cookie;
6655 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6656 	if (status != B_OK)
6657 		goto err;
6658 
6659 	// now we only need a file descriptor for this attribute and we're done
6660 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6661 	if (fd >= 0)
6662 		return fd;
6663 
6664 	status = fd;
6665 
6666 	FS_CALL(vnode, close_attr, cookie);
6667 	FS_CALL(vnode, free_attr_cookie, cookie);
6668 
6669 err:
6670 	put_vnode(vnode);
6671 
6672 	return status;
6673 }
6674 
6675 
6676 static status_t
6677 attr_close(struct file_descriptor* descriptor)
6678 {
6679 	struct vnode* vnode = descriptor->u.vnode;
6680 
6681 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6682 
6683 	if (HAS_FS_CALL(vnode, close_attr))
6684 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6685 
6686 	return B_OK;
6687 }
6688 
6689 
6690 static void
6691 attr_free_fd(struct file_descriptor* descriptor)
6692 {
6693 	struct vnode* vnode = descriptor->u.vnode;
6694 
6695 	if (vnode != NULL) {
6696 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6697 		put_vnode(vnode);
6698 	}
6699 }
6700 
6701 
6702 static status_t
6703 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6704 	size_t* length)
6705 {
6706 	struct vnode* vnode = descriptor->u.vnode;
6707 
6708 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6709 		pos, length, *length));
6710 
6711 	if (!HAS_FS_CALL(vnode, read_attr))
6712 		return B_UNSUPPORTED;
6713 
6714 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6715 }
6716 
6717 
6718 static status_t
6719 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6720 	size_t* length)
6721 {
6722 	struct vnode* vnode = descriptor->u.vnode;
6723 
6724 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6725 		length));
6726 
6727 	if (!HAS_FS_CALL(vnode, write_attr))
6728 		return B_UNSUPPORTED;
6729 
6730 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6731 }
6732 
6733 
6734 static off_t
6735 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6736 {
6737 	off_t offset;
6738 
6739 	switch (seekType) {
6740 		case SEEK_SET:
6741 			offset = 0;
6742 			break;
6743 		case SEEK_CUR:
6744 			offset = descriptor->pos;
6745 			break;
6746 		case SEEK_END:
6747 		{
6748 			struct vnode* vnode = descriptor->u.vnode;
6749 			if (!HAS_FS_CALL(vnode, read_stat))
6750 				return B_UNSUPPORTED;
6751 
6752 			struct stat stat;
6753 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6754 				&stat);
6755 			if (status != B_OK)
6756 				return status;
6757 
6758 			offset = stat.st_size;
6759 			break;
6760 		}
6761 		default:
6762 			return B_BAD_VALUE;
6763 	}
6764 
6765 	// assumes off_t is 64 bits wide
6766 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6767 		return B_BUFFER_OVERFLOW;
6768 
6769 	pos += offset;
6770 	if (pos < 0)
6771 		return B_BAD_VALUE;
6772 
6773 	return descriptor->pos = pos;
6774 }
6775 
6776 
6777 static status_t
6778 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6779 {
6780 	struct vnode* vnode = descriptor->u.vnode;
6781 
6782 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6783 
6784 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6785 		return B_UNSUPPORTED;
6786 
6787 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6788 }
6789 
6790 
6791 static status_t
6792 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6793 	int statMask)
6794 {
6795 	struct vnode* vnode = descriptor->u.vnode;
6796 
6797 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6798 
6799 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6800 		return B_READ_ONLY_DEVICE;
6801 
6802 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6803 }
6804 
6805 
6806 static status_t
6807 attr_remove(int fd, const char* name, bool kernel)
6808 {
6809 	struct file_descriptor* descriptor;
6810 	struct vnode* vnode;
6811 	status_t status;
6812 
6813 	if (name == NULL || *name == '\0')
6814 		return B_BAD_VALUE;
6815 
6816 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6817 		kernel));
6818 
6819 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6820 	if (descriptor == NULL)
6821 		return B_FILE_ERROR;
6822 
6823 	if (HAS_FS_CALL(vnode, remove_attr))
6824 		status = FS_CALL(vnode, remove_attr, name);
6825 	else
6826 		status = B_READ_ONLY_DEVICE;
6827 
6828 	put_fd(descriptor);
6829 
6830 	return status;
6831 }
6832 
6833 
6834 static status_t
6835 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6836 	bool kernel)
6837 {
6838 	struct file_descriptor* fromDescriptor;
6839 	struct file_descriptor* toDescriptor;
6840 	struct vnode* fromVnode;
6841 	struct vnode* toVnode;
6842 	status_t status;
6843 
6844 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6845 		|| *toName == '\0')
6846 		return B_BAD_VALUE;
6847 
6848 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6849 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6850 
6851 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6852 	if (fromDescriptor == NULL)
6853 		return B_FILE_ERROR;
6854 
6855 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6856 	if (toDescriptor == NULL) {
6857 		status = B_FILE_ERROR;
6858 		goto err;
6859 	}
6860 
6861 	// are the files on the same volume?
6862 	if (fromVnode->device != toVnode->device) {
6863 		status = B_CROSS_DEVICE_LINK;
6864 		goto err1;
6865 	}
6866 
6867 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6868 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6869 	} else
6870 		status = B_READ_ONLY_DEVICE;
6871 
6872 err1:
6873 	put_fd(toDescriptor);
6874 err:
6875 	put_fd(fromDescriptor);
6876 
6877 	return status;
6878 }
6879 
6880 
6881 static int
6882 index_dir_open(dev_t mountID, bool kernel)
6883 {
6884 	struct fs_mount* mount;
6885 	void* cookie;
6886 
6887 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6888 		kernel));
6889 
6890 	status_t status = get_mount(mountID, &mount);
6891 	if (status != B_OK)
6892 		return status;
6893 
6894 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6895 		status = B_UNSUPPORTED;
6896 		goto error;
6897 	}
6898 
6899 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6900 	if (status != B_OK)
6901 		goto error;
6902 
6903 	// get fd for the index directory
6904 	int fd;
6905 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6906 	if (fd >= 0)
6907 		return fd;
6908 
6909 	// something went wrong
6910 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6911 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6912 
6913 	status = fd;
6914 
6915 error:
6916 	put_mount(mount);
6917 	return status;
6918 }
6919 
6920 
6921 static status_t
6922 index_dir_close(struct file_descriptor* descriptor)
6923 {
6924 	struct fs_mount* mount = descriptor->u.mount;
6925 
6926 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6927 
6928 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6929 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6930 
6931 	return B_OK;
6932 }
6933 
6934 
6935 static void
6936 index_dir_free_fd(struct file_descriptor* descriptor)
6937 {
6938 	struct fs_mount* mount = descriptor->u.mount;
6939 
6940 	if (mount != NULL) {
6941 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6942 		put_mount(mount);
6943 	}
6944 }
6945 
6946 
6947 static status_t
6948 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6949 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6950 {
6951 	struct fs_mount* mount = descriptor->u.mount;
6952 
6953 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6954 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6955 			bufferSize, _count);
6956 	}
6957 
6958 	return B_UNSUPPORTED;
6959 }
6960 
6961 
6962 static status_t
6963 index_dir_rewind(struct file_descriptor* descriptor)
6964 {
6965 	struct fs_mount* mount = descriptor->u.mount;
6966 
6967 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6968 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6969 
6970 	return B_UNSUPPORTED;
6971 }
6972 
6973 
6974 static status_t
6975 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6976 	bool kernel)
6977 {
6978 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6979 		mountID, name, kernel));
6980 
6981 	struct fs_mount* mount;
6982 	status_t status = get_mount(mountID, &mount);
6983 	if (status != B_OK)
6984 		return status;
6985 
6986 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6987 		status = B_READ_ONLY_DEVICE;
6988 		goto out;
6989 	}
6990 
6991 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6992 
6993 out:
6994 	put_mount(mount);
6995 	return status;
6996 }
6997 
6998 
6999 #if 0
7000 static status_t
7001 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7002 {
7003 	struct vnode* vnode = descriptor->u.vnode;
7004 
7005 	// ToDo: currently unused!
7006 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7007 	if (!HAS_FS_CALL(vnode, read_index_stat))
7008 		return B_UNSUPPORTED;
7009 
7010 	return B_UNSUPPORTED;
7011 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7012 }
7013 
7014 
7015 static void
7016 index_free_fd(struct file_descriptor* descriptor)
7017 {
7018 	struct vnode* vnode = descriptor->u.vnode;
7019 
7020 	if (vnode != NULL) {
7021 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7022 		put_vnode(vnode);
7023 	}
7024 }
7025 #endif
7026 
7027 
7028 static status_t
7029 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7030 	bool kernel)
7031 {
7032 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7033 		mountID, name, kernel));
7034 
7035 	struct fs_mount* mount;
7036 	status_t status = get_mount(mountID, &mount);
7037 	if (status != B_OK)
7038 		return status;
7039 
7040 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7041 		status = B_UNSUPPORTED;
7042 		goto out;
7043 	}
7044 
7045 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7046 
7047 out:
7048 	put_mount(mount);
7049 	return status;
7050 }
7051 
7052 
7053 static status_t
7054 index_remove(dev_t mountID, const char* name, bool kernel)
7055 {
7056 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7057 		mountID, name, kernel));
7058 
7059 	struct fs_mount* mount;
7060 	status_t status = get_mount(mountID, &mount);
7061 	if (status != B_OK)
7062 		return status;
7063 
7064 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7065 		status = B_READ_ONLY_DEVICE;
7066 		goto out;
7067 	}
7068 
7069 	status = FS_MOUNT_CALL(mount, remove_index, name);
7070 
7071 out:
7072 	put_mount(mount);
7073 	return status;
7074 }
7075 
7076 
7077 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7078 		It would be nice if the FS would find some more kernel support
7079 		for them.
7080 		For example, query parsing should be moved into the kernel.
7081 */
7082 static int
7083 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7084 	int32 token, bool kernel)
7085 {
7086 	struct fs_mount* mount;
7087 	void* cookie;
7088 
7089 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7090 		device, query, kernel));
7091 
7092 	status_t status = get_mount(device, &mount);
7093 	if (status != B_OK)
7094 		return status;
7095 
7096 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7097 		status = B_UNSUPPORTED;
7098 		goto error;
7099 	}
7100 
7101 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7102 		&cookie);
7103 	if (status != B_OK)
7104 		goto error;
7105 
7106 	// get fd for the index directory
7107 	int fd;
7108 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7109 	if (fd >= 0)
7110 		return fd;
7111 
7112 	status = fd;
7113 
7114 	// something went wrong
7115 	FS_MOUNT_CALL(mount, close_query, cookie);
7116 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7117 
7118 error:
7119 	put_mount(mount);
7120 	return status;
7121 }
7122 
7123 
7124 static status_t
7125 query_close(struct file_descriptor* descriptor)
7126 {
7127 	struct fs_mount* mount = descriptor->u.mount;
7128 
7129 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7130 
7131 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7132 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7133 
7134 	return B_OK;
7135 }
7136 
7137 
7138 static void
7139 query_free_fd(struct file_descriptor* descriptor)
7140 {
7141 	struct fs_mount* mount = descriptor->u.mount;
7142 
7143 	if (mount != NULL) {
7144 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7145 		put_mount(mount);
7146 	}
7147 }
7148 
7149 
7150 static status_t
7151 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7152 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7153 {
7154 	struct fs_mount* mount = descriptor->u.mount;
7155 
7156 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7157 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7158 			bufferSize, _count);
7159 	}
7160 
7161 	return B_UNSUPPORTED;
7162 }
7163 
7164 
7165 static status_t
7166 query_rewind(struct file_descriptor* descriptor)
7167 {
7168 	struct fs_mount* mount = descriptor->u.mount;
7169 
7170 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7171 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7172 
7173 	return B_UNSUPPORTED;
7174 }
7175 
7176 
7177 //	#pragma mark - General File System functions
7178 
7179 
7180 static dev_t
7181 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7182 	const char* args, bool kernel)
7183 {
7184 	struct ::fs_mount* mount;
7185 	status_t status = B_OK;
7186 	fs_volume* volume = NULL;
7187 	int32 layer = 0;
7188 	Vnode* coveredNode = NULL;
7189 
7190 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7191 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7192 
7193 	// The path is always safe, we just have to make sure that fsName is
7194 	// almost valid - we can't make any assumptions about args, though.
7195 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7196 	// We'll get it from the DDM later.
7197 	if (fsName == NULL) {
7198 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7199 			return B_BAD_VALUE;
7200 	} else if (fsName[0] == '\0')
7201 		return B_BAD_VALUE;
7202 
7203 	RecursiveLocker mountOpLocker(sMountOpLock);
7204 
7205 	// Helper to delete a newly created file device on failure.
7206 	// Not exactly beautiful, but helps to keep the code below cleaner.
7207 	struct FileDeviceDeleter {
7208 		FileDeviceDeleter() : id(-1) {}
7209 		~FileDeviceDeleter()
7210 		{
7211 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7212 		}
7213 
7214 		partition_id id;
7215 	} fileDeviceDeleter;
7216 
7217 	// If the file system is not a "virtual" one, the device argument should
7218 	// point to a real file/device (if given at all).
7219 	// get the partition
7220 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7221 	KPartition* partition = NULL;
7222 	KPath normalizedDevice;
7223 	bool newlyCreatedFileDevice = false;
7224 
7225 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7226 		// normalize the device path
7227 		status = normalizedDevice.SetTo(device, true);
7228 		if (status != B_OK)
7229 			return status;
7230 
7231 		// get a corresponding partition from the DDM
7232 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7233 		if (partition == NULL) {
7234 			// Partition not found: This either means, the user supplied
7235 			// an invalid path, or the path refers to an image file. We try
7236 			// to let the DDM create a file device for the path.
7237 			partition_id deviceID = ddm->CreateFileDevice(
7238 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7239 			if (deviceID >= 0) {
7240 				partition = ddm->RegisterPartition(deviceID);
7241 				if (newlyCreatedFileDevice)
7242 					fileDeviceDeleter.id = deviceID;
7243 			}
7244 		}
7245 
7246 		if (!partition) {
7247 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7248 				normalizedDevice.Path()));
7249 			return B_ENTRY_NOT_FOUND;
7250 		}
7251 
7252 		device = normalizedDevice.Path();
7253 			// correct path to file device
7254 	}
7255 	PartitionRegistrar partitionRegistrar(partition, true);
7256 
7257 	// Write lock the partition's device. For the time being, we keep the lock
7258 	// until we're done mounting -- not nice, but ensure, that no-one is
7259 	// interfering.
7260 	// TODO: Just mark the partition busy while mounting!
7261 	KDiskDevice* diskDevice = NULL;
7262 	if (partition) {
7263 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7264 		if (!diskDevice) {
7265 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7266 			return B_ERROR;
7267 		}
7268 	}
7269 
7270 	DeviceWriteLocker writeLocker(diskDevice, true);
7271 		// this takes over the write lock acquired before
7272 
7273 	if (partition != NULL) {
7274 		// make sure, that the partition is not busy
7275 		if (partition->IsBusy()) {
7276 			TRACE(("fs_mount(): Partition is busy.\n"));
7277 			return B_BUSY;
7278 		}
7279 
7280 		// if no FS name had been supplied, we get it from the partition
7281 		if (fsName == NULL) {
7282 			KDiskSystem* diskSystem = partition->DiskSystem();
7283 			if (!diskSystem) {
7284 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7285 					"recognize it.\n"));
7286 				return B_BAD_VALUE;
7287 			}
7288 
7289 			if (!diskSystem->IsFileSystem()) {
7290 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7291 					"partitioning system.\n"));
7292 				return B_BAD_VALUE;
7293 			}
7294 
7295 			// The disk system name will not change, and the KDiskSystem
7296 			// object will not go away while the disk device is locked (and
7297 			// the partition has a reference to it), so this is safe.
7298 			fsName = diskSystem->Name();
7299 		}
7300 	}
7301 
7302 	mount = new(std::nothrow) (struct ::fs_mount);
7303 	if (mount == NULL)
7304 		return B_NO_MEMORY;
7305 
7306 	mount->device_name = strdup(device);
7307 		// "device" can be NULL
7308 
7309 	status = mount->entry_cache.Init();
7310 	if (status != B_OK)
7311 		goto err1;
7312 
7313 	// initialize structure
7314 	mount->id = sNextMountID++;
7315 	mount->partition = NULL;
7316 	mount->root_vnode = NULL;
7317 	mount->covers_vnode = NULL;
7318 	mount->unmounting = false;
7319 	mount->owns_file_device = false;
7320 	mount->volume = NULL;
7321 
7322 	// build up the volume(s)
7323 	while (true) {
7324 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7325 		if (layerFSName == NULL) {
7326 			if (layer == 0) {
7327 				status = B_NO_MEMORY;
7328 				goto err1;
7329 			}
7330 
7331 			break;
7332 		}
7333 		MemoryDeleter layerFSNameDeleter(layerFSName);
7334 
7335 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7336 		if (volume == NULL) {
7337 			status = B_NO_MEMORY;
7338 			goto err1;
7339 		}
7340 
7341 		volume->id = mount->id;
7342 		volume->partition = partition != NULL ? partition->ID() : -1;
7343 		volume->layer = layer++;
7344 		volume->private_volume = NULL;
7345 		volume->ops = NULL;
7346 		volume->sub_volume = NULL;
7347 		volume->super_volume = NULL;
7348 		volume->file_system = NULL;
7349 		volume->file_system_name = NULL;
7350 
7351 		volume->file_system_name = get_file_system_name(layerFSName);
7352 		if (volume->file_system_name == NULL) {
7353 			status = B_NO_MEMORY;
7354 			free(volume);
7355 			goto err1;
7356 		}
7357 
7358 		volume->file_system = get_file_system(layerFSName);
7359 		if (volume->file_system == NULL) {
7360 			status = B_DEVICE_NOT_FOUND;
7361 			free(volume->file_system_name);
7362 			free(volume);
7363 			goto err1;
7364 		}
7365 
7366 		if (mount->volume == NULL)
7367 			mount->volume = volume;
7368 		else {
7369 			volume->super_volume = mount->volume;
7370 			mount->volume->sub_volume = volume;
7371 			mount->volume = volume;
7372 		}
7373 	}
7374 
7375 	// insert mount struct into list before we call FS's mount() function
7376 	// so that vnodes can be created for this mount
7377 	mutex_lock(&sMountMutex);
7378 	hash_insert(sMountsTable, mount);
7379 	mutex_unlock(&sMountMutex);
7380 
7381 	ino_t rootID;
7382 
7383 	if (!sRoot) {
7384 		// we haven't mounted anything yet
7385 		if (strcmp(path, "/") != 0) {
7386 			status = B_ERROR;
7387 			goto err2;
7388 		}
7389 
7390 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7391 			args, &rootID);
7392 		if (status != 0)
7393 			goto err2;
7394 	} else {
7395 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7396 		if (status != B_OK)
7397 			goto err2;
7398 
7399 		mount->covers_vnode = coveredNode;
7400 
7401 		// make sure covered_vnode is a directory
7402 		if (!S_ISDIR(coveredNode->Type())) {
7403 			status = B_NOT_A_DIRECTORY;
7404 			goto err3;
7405 		}
7406 
7407 		if (coveredNode->IsCovered()) {
7408 			// this is already a covered vnode
7409 			status = B_BUSY;
7410 			goto err3;
7411 		}
7412 
7413 		// mount it/them
7414 		fs_volume* volume = mount->volume;
7415 		while (volume) {
7416 			status = volume->file_system->mount(volume, device, flags, args,
7417 				&rootID);
7418 			if (status != B_OK) {
7419 				if (volume->sub_volume)
7420 					goto err4;
7421 				goto err3;
7422 			}
7423 
7424 			volume = volume->super_volume;
7425 		}
7426 
7427 		volume = mount->volume;
7428 		while (volume) {
7429 			if (volume->ops->all_layers_mounted != NULL)
7430 				volume->ops->all_layers_mounted(volume);
7431 			volume = volume->super_volume;
7432 		}
7433 	}
7434 
7435 	// the root node is supposed to be owned by the file system - it must
7436 	// exist at this point
7437 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7438 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7439 		panic("fs_mount: file system does not own its root node!\n");
7440 		status = B_ERROR;
7441 		goto err4;
7442 	}
7443 
7444 	// set up the links between the root vnode and the vnode it covers
7445 	rw_lock_write_lock(&sVnodeLock);
7446 	if (coveredNode != NULL) {
7447 		if (coveredNode->IsCovered()) {
7448 			// the vnode is covered now
7449 			status = B_BUSY;
7450 			rw_lock_write_unlock(&sVnodeLock);
7451 			goto err4;
7452 		}
7453 
7454 		mount->root_vnode->covers = coveredNode;
7455 		mount->root_vnode->SetCovering(true);
7456 
7457 		coveredNode->covered_by = mount->root_vnode;
7458 		coveredNode->SetCovered(true);
7459 	}
7460 	rw_lock_write_unlock(&sVnodeLock);
7461 
7462 	if (!sRoot) {
7463 		sRoot = mount->root_vnode;
7464 		mutex_lock(&sIOContextRootLock);
7465 		get_current_io_context(true)->root = sRoot;
7466 		mutex_unlock(&sIOContextRootLock);
7467 		inc_vnode_ref_count(sRoot);
7468 	}
7469 
7470 	// supply the partition (if any) with the mount cookie and mark it mounted
7471 	if (partition) {
7472 		partition->SetMountCookie(mount->volume->private_volume);
7473 		partition->SetVolumeID(mount->id);
7474 
7475 		// keep a partition reference as long as the partition is mounted
7476 		partitionRegistrar.Detach();
7477 		mount->partition = partition;
7478 		mount->owns_file_device = newlyCreatedFileDevice;
7479 		fileDeviceDeleter.id = -1;
7480 	}
7481 
7482 	notify_mount(mount->id,
7483 		coveredNode != NULL ? coveredNode->device : -1,
7484 		coveredNode ? coveredNode->id : -1);
7485 
7486 	return mount->id;
7487 
7488 err4:
7489 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7490 err3:
7491 	if (coveredNode != NULL)
7492 		put_vnode(coveredNode);
7493 err2:
7494 	mutex_lock(&sMountMutex);
7495 	hash_remove(sMountsTable, mount);
7496 	mutex_unlock(&sMountMutex);
7497 err1:
7498 	delete mount;
7499 
7500 	return status;
7501 }
7502 
7503 
7504 static status_t
7505 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7506 {
7507 	struct fs_mount* mount;
7508 	status_t err;
7509 
7510 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7511 		mountID, kernel));
7512 
7513 	struct vnode* pathVnode = NULL;
7514 	if (path != NULL) {
7515 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7516 		if (err != B_OK)
7517 			return B_ENTRY_NOT_FOUND;
7518 	}
7519 
7520 	RecursiveLocker mountOpLocker(sMountOpLock);
7521 
7522 	// this lock is not strictly necessary, but here in case of KDEBUG
7523 	// to keep the ASSERT in find_mount() working.
7524 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7525 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7526 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7527 	if (mount == NULL) {
7528 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7529 			pathVnode);
7530 	}
7531 
7532 	if (path != NULL) {
7533 		put_vnode(pathVnode);
7534 
7535 		if (mount->root_vnode != pathVnode) {
7536 			// not mountpoint
7537 			return B_BAD_VALUE;
7538 		}
7539 	}
7540 
7541 	// if the volume is associated with a partition, lock the device of the
7542 	// partition as long as we are unmounting
7543 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7544 	KPartition* partition = mount->partition;
7545 	KDiskDevice* diskDevice = NULL;
7546 	if (partition != NULL) {
7547 		if (partition->Device() == NULL) {
7548 			dprintf("fs_unmount(): There is no device!\n");
7549 			return B_ERROR;
7550 		}
7551 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7552 		if (!diskDevice) {
7553 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7554 			return B_ERROR;
7555 		}
7556 	}
7557 	DeviceWriteLocker writeLocker(diskDevice, true);
7558 
7559 	// make sure, that the partition is not busy
7560 	if (partition != NULL) {
7561 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7562 			TRACE(("fs_unmount(): Partition is busy.\n"));
7563 			return B_BUSY;
7564 		}
7565 	}
7566 
7567 	// grab the vnode master mutex to keep someone from creating
7568 	// a vnode while we're figuring out if we can continue
7569 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7570 
7571 	bool disconnectedDescriptors = false;
7572 
7573 	while (true) {
7574 		bool busy = false;
7575 
7576 		// cycle through the list of vnodes associated with this mount and
7577 		// make sure all of them are not busy or have refs on them
7578 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7579 		while (struct vnode* vnode = iterator.Next()) {
7580 			if (vnode->IsBusy()) {
7581 				busy = true;
7582 				break;
7583 			}
7584 
7585 			// check the vnode's ref count -- subtract additional references for
7586 			// covering
7587 			int32 refCount = vnode->ref_count;
7588 			if (vnode->covers != NULL)
7589 				refCount--;
7590 			if (vnode->covered_by != NULL)
7591 				refCount--;
7592 
7593 			if (refCount != 0) {
7594 				// there are still vnodes in use on this mount, so we cannot
7595 				// unmount yet
7596 				busy = true;
7597 				break;
7598 			}
7599 		}
7600 
7601 		if (!busy)
7602 			break;
7603 
7604 		if ((flags & B_FORCE_UNMOUNT) == 0)
7605 			return B_BUSY;
7606 
7607 		if (disconnectedDescriptors) {
7608 			// wait a bit until the last access is finished, and then try again
7609 			vnodesWriteLocker.Unlock();
7610 			snooze(100000);
7611 			// TODO: if there is some kind of bug that prevents the ref counts
7612 			// from getting back to zero, this will fall into an endless loop...
7613 			vnodesWriteLocker.Lock();
7614 			continue;
7615 		}
7616 
7617 		// the file system is still busy - but we're forced to unmount it,
7618 		// so let's disconnect all open file descriptors
7619 
7620 		mount->unmounting = true;
7621 			// prevent new vnodes from being created
7622 
7623 		vnodesWriteLocker.Unlock();
7624 
7625 		disconnect_mount_or_vnode_fds(mount, NULL);
7626 		disconnectedDescriptors = true;
7627 
7628 		vnodesWriteLocker.Lock();
7629 	}
7630 
7631 	// We can safely continue. Mark all of the vnodes busy and this mount
7632 	// structure in unmounting state. Also undo the vnode covers/covered_by
7633 	// links.
7634 	mount->unmounting = true;
7635 
7636 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7637 	while (struct vnode* vnode = iterator.Next()) {
7638 		// Remove all covers/covered_by links from other mounts' nodes to this
7639 		// vnode and adjust the node ref count accordingly. We will release the
7640 		// references to the external vnodes below.
7641 		if (Vnode* coveredNode = vnode->covers) {
7642 			if (Vnode* coveringNode = vnode->covered_by) {
7643 				// We have both covered and covering vnodes, so just remove us
7644 				// from the chain.
7645 				coveredNode->covered_by = coveringNode;
7646 				coveringNode->covers = coveredNode;
7647 				vnode->ref_count -= 2;
7648 
7649 				vnode->covered_by = NULL;
7650 				vnode->covers = NULL;
7651 				vnode->SetCovering(false);
7652 				vnode->SetCovered(false);
7653 			} else {
7654 				// We only have a covered vnode. Remove its link to us.
7655 				coveredNode->covered_by = NULL;
7656 				coveredNode->SetCovered(false);
7657 				vnode->ref_count--;
7658 
7659 				// If the other node is an external vnode, we keep its link
7660 				// link around so we can put the reference later on. Otherwise
7661 				// we get rid of it right now.
7662 				if (coveredNode->mount == mount) {
7663 					vnode->covers = NULL;
7664 					coveredNode->ref_count--;
7665 				}
7666 			}
7667 		} else if (Vnode* coveringNode = vnode->covered_by) {
7668 			// We only have a covering vnode. Remove its link to us.
7669 			coveringNode->covers = NULL;
7670 			coveringNode->SetCovering(false);
7671 			vnode->ref_count--;
7672 
7673 			// If the other node is an external vnode, we keep its link
7674 			// link around so we can put the reference later on. Otherwise
7675 			// we get rid of it right now.
7676 			if (coveringNode->mount == mount) {
7677 				vnode->covered_by = NULL;
7678 				coveringNode->ref_count--;
7679 			}
7680 		}
7681 
7682 		vnode->SetBusy(true);
7683 		vnode_to_be_freed(vnode);
7684 	}
7685 
7686 	vnodesWriteLocker.Unlock();
7687 
7688 	// Free all vnodes associated with this mount.
7689 	// They will be removed from the mount list by free_vnode(), so
7690 	// we don't have to do this.
7691 	while (struct vnode* vnode = mount->vnodes.Head()) {
7692 		// Put the references to external covered/covering vnodes we kept above.
7693 		if (Vnode* coveredNode = vnode->covers)
7694 			put_vnode(coveredNode);
7695 		if (Vnode* coveringNode = vnode->covered_by)
7696 			put_vnode(coveringNode);
7697 
7698 		free_vnode(vnode, false);
7699 	}
7700 
7701 	// remove the mount structure from the hash table
7702 	mutex_lock(&sMountMutex);
7703 	hash_remove(sMountsTable, mount);
7704 	mutex_unlock(&sMountMutex);
7705 
7706 	mountOpLocker.Unlock();
7707 
7708 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7709 	notify_unmount(mount->id);
7710 
7711 	// dereference the partition and mark it unmounted
7712 	if (partition) {
7713 		partition->SetVolumeID(-1);
7714 		partition->SetMountCookie(NULL);
7715 
7716 		if (mount->owns_file_device)
7717 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7718 		partition->Unregister();
7719 	}
7720 
7721 	delete mount;
7722 	return B_OK;
7723 }
7724 
7725 
7726 static status_t
7727 fs_sync(dev_t device)
7728 {
7729 	struct fs_mount* mount;
7730 	status_t status = get_mount(device, &mount);
7731 	if (status != B_OK)
7732 		return status;
7733 
7734 	struct vnode marker;
7735 	memset(&marker, 0, sizeof(marker));
7736 	marker.SetBusy(true);
7737 	marker.SetRemoved(true);
7738 
7739 	// First, synchronize all file caches
7740 
7741 	while (true) {
7742 		WriteLocker locker(sVnodeLock);
7743 			// Note: That's the easy way. Which is probably OK for sync(),
7744 			// since it's a relatively rare call and doesn't need to allow for
7745 			// a lot of concurrency. Using a read lock would be possible, but
7746 			// also more involved, since we had to lock the individual nodes
7747 			// and take care of the locking order, which we might not want to
7748 			// do while holding fs_mount::rlock.
7749 
7750 		// synchronize access to vnode list
7751 		recursive_lock_lock(&mount->rlock);
7752 
7753 		struct vnode* vnode;
7754 		if (!marker.IsRemoved()) {
7755 			vnode = mount->vnodes.GetNext(&marker);
7756 			mount->vnodes.Remove(&marker);
7757 			marker.SetRemoved(true);
7758 		} else
7759 			vnode = mount->vnodes.First();
7760 
7761 		while (vnode != NULL && (vnode->cache == NULL
7762 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7763 			// TODO: we could track writes (and writable mapped vnodes)
7764 			//	and have a simple flag that we could test for here
7765 			vnode = mount->vnodes.GetNext(vnode);
7766 		}
7767 
7768 		if (vnode != NULL) {
7769 			// insert marker vnode again
7770 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7771 			marker.SetRemoved(false);
7772 		}
7773 
7774 		recursive_lock_unlock(&mount->rlock);
7775 
7776 		if (vnode == NULL)
7777 			break;
7778 
7779 		vnode = lookup_vnode(mount->id, vnode->id);
7780 		if (vnode == NULL || vnode->IsBusy())
7781 			continue;
7782 
7783 		if (vnode->ref_count == 0) {
7784 			// this vnode has been unused before
7785 			vnode_used(vnode);
7786 		}
7787 		inc_vnode_ref_count(vnode);
7788 
7789 		locker.Unlock();
7790 
7791 		if (vnode->cache != NULL && !vnode->IsRemoved())
7792 			vnode->cache->WriteModified();
7793 
7794 		put_vnode(vnode);
7795 	}
7796 
7797 	// And then, let the file systems do their synchronizing work
7798 
7799 	if (HAS_FS_MOUNT_CALL(mount, sync))
7800 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7801 
7802 	put_mount(mount);
7803 	return status;
7804 }
7805 
7806 
7807 static status_t
7808 fs_read_info(dev_t device, struct fs_info* info)
7809 {
7810 	struct fs_mount* mount;
7811 	status_t status = get_mount(device, &mount);
7812 	if (status != B_OK)
7813 		return status;
7814 
7815 	memset(info, 0, sizeof(struct fs_info));
7816 
7817 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7818 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7819 
7820 	// fill in info the file system doesn't (have to) know about
7821 	if (status == B_OK) {
7822 		info->dev = mount->id;
7823 		info->root = mount->root_vnode->id;
7824 
7825 		fs_volume* volume = mount->volume;
7826 		while (volume->super_volume != NULL)
7827 			volume = volume->super_volume;
7828 
7829 		strlcpy(info->fsh_name, volume->file_system_name,
7830 			sizeof(info->fsh_name));
7831 		if (mount->device_name != NULL) {
7832 			strlcpy(info->device_name, mount->device_name,
7833 				sizeof(info->device_name));
7834 		}
7835 	}
7836 
7837 	// if the call is not supported by the file system, there are still
7838 	// the parts that we filled out ourselves
7839 
7840 	put_mount(mount);
7841 	return status;
7842 }
7843 
7844 
7845 static status_t
7846 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7847 {
7848 	struct fs_mount* mount;
7849 	status_t status = get_mount(device, &mount);
7850 	if (status != B_OK)
7851 		return status;
7852 
7853 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7854 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7855 	else
7856 		status = B_READ_ONLY_DEVICE;
7857 
7858 	put_mount(mount);
7859 	return status;
7860 }
7861 
7862 
7863 static dev_t
7864 fs_next_device(int32* _cookie)
7865 {
7866 	struct fs_mount* mount = NULL;
7867 	dev_t device = *_cookie;
7868 
7869 	mutex_lock(&sMountMutex);
7870 
7871 	// Since device IDs are assigned sequentially, this algorithm
7872 	// does work good enough. It makes sure that the device list
7873 	// returned is sorted, and that no device is skipped when an
7874 	// already visited device got unmounted.
7875 
7876 	while (device < sNextMountID) {
7877 		mount = find_mount(device++);
7878 		if (mount != NULL && mount->volume->private_volume != NULL)
7879 			break;
7880 	}
7881 
7882 	*_cookie = device;
7883 
7884 	if (mount != NULL)
7885 		device = mount->id;
7886 	else
7887 		device = B_BAD_VALUE;
7888 
7889 	mutex_unlock(&sMountMutex);
7890 
7891 	return device;
7892 }
7893 
7894 
7895 ssize_t
7896 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7897 	void *buffer, size_t readBytes)
7898 {
7899 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7900 	if (attrFD < 0)
7901 		return attrFD;
7902 
7903 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7904 
7905 	_kern_close(attrFD);
7906 
7907 	return bytesRead;
7908 }
7909 
7910 
7911 static status_t
7912 get_cwd(char* buffer, size_t size, bool kernel)
7913 {
7914 	// Get current working directory from io context
7915 	struct io_context* context = get_current_io_context(kernel);
7916 	status_t status;
7917 
7918 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7919 
7920 	mutex_lock(&context->io_mutex);
7921 
7922 	struct vnode* vnode = context->cwd;
7923 	if (vnode)
7924 		inc_vnode_ref_count(vnode);
7925 
7926 	mutex_unlock(&context->io_mutex);
7927 
7928 	if (vnode) {
7929 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7930 		put_vnode(vnode);
7931 	} else
7932 		status = B_ERROR;
7933 
7934 	return status;
7935 }
7936 
7937 
7938 static status_t
7939 set_cwd(int fd, char* path, bool kernel)
7940 {
7941 	struct io_context* context;
7942 	struct vnode* vnode = NULL;
7943 	struct vnode* oldDirectory;
7944 	status_t status;
7945 
7946 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7947 
7948 	// Get vnode for passed path, and bail if it failed
7949 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7950 	if (status < 0)
7951 		return status;
7952 
7953 	if (!S_ISDIR(vnode->Type())) {
7954 		// nope, can't cwd to here
7955 		status = B_NOT_A_DIRECTORY;
7956 		goto err;
7957 	}
7958 
7959 	// We need to have the permission to enter the directory, too
7960 	if (HAS_FS_CALL(vnode, access)) {
7961 		status = FS_CALL(vnode, access, X_OK);
7962 		if (status != B_OK)
7963 			goto err;
7964 	}
7965 
7966 	// Get current io context and lock
7967 	context = get_current_io_context(kernel);
7968 	mutex_lock(&context->io_mutex);
7969 
7970 	// save the old current working directory first
7971 	oldDirectory = context->cwd;
7972 	context->cwd = vnode;
7973 
7974 	mutex_unlock(&context->io_mutex);
7975 
7976 	if (oldDirectory)
7977 		put_vnode(oldDirectory);
7978 
7979 	return B_NO_ERROR;
7980 
7981 err:
7982 	put_vnode(vnode);
7983 	return status;
7984 }
7985 
7986 
7987 //	#pragma mark - kernel mirrored syscalls
7988 
7989 
7990 dev_t
7991 _kern_mount(const char* path, const char* device, const char* fsName,
7992 	uint32 flags, const char* args, size_t argsLength)
7993 {
7994 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7995 	if (pathBuffer.InitCheck() != B_OK)
7996 		return B_NO_MEMORY;
7997 
7998 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7999 }
8000 
8001 
8002 status_t
8003 _kern_unmount(const char* path, uint32 flags)
8004 {
8005 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8006 	if (pathBuffer.InitCheck() != B_OK)
8007 		return B_NO_MEMORY;
8008 
8009 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8010 }
8011 
8012 
8013 status_t
8014 _kern_read_fs_info(dev_t device, struct fs_info* info)
8015 {
8016 	if (info == NULL)
8017 		return B_BAD_VALUE;
8018 
8019 	return fs_read_info(device, info);
8020 }
8021 
8022 
8023 status_t
8024 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8025 {
8026 	if (info == NULL)
8027 		return B_BAD_VALUE;
8028 
8029 	return fs_write_info(device, info, mask);
8030 }
8031 
8032 
8033 status_t
8034 _kern_sync(void)
8035 {
8036 	// Note: _kern_sync() is also called from _user_sync()
8037 	int32 cookie = 0;
8038 	dev_t device;
8039 	while ((device = next_dev(&cookie)) >= 0) {
8040 		status_t status = fs_sync(device);
8041 		if (status != B_OK && status != B_BAD_VALUE) {
8042 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8043 				strerror(status));
8044 		}
8045 	}
8046 
8047 	return B_OK;
8048 }
8049 
8050 
8051 dev_t
8052 _kern_next_device(int32* _cookie)
8053 {
8054 	return fs_next_device(_cookie);
8055 }
8056 
8057 
8058 status_t
8059 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8060 	size_t infoSize)
8061 {
8062 	if (infoSize != sizeof(fd_info))
8063 		return B_BAD_VALUE;
8064 
8065 	// get the team
8066 	Team* team = Team::Get(teamID);
8067 	if (team == NULL)
8068 		return B_BAD_TEAM_ID;
8069 	BReference<Team> teamReference(team, true);
8070 
8071 	// now that we have a team reference, its I/O context won't go away
8072 	io_context* context = team->io_context;
8073 	MutexLocker contextLocker(context->io_mutex);
8074 
8075 	uint32 slot = *_cookie;
8076 
8077 	struct file_descriptor* descriptor;
8078 	while (slot < context->table_size
8079 		&& (descriptor = context->fds[slot]) == NULL) {
8080 		slot++;
8081 	}
8082 
8083 	if (slot >= context->table_size)
8084 		return B_ENTRY_NOT_FOUND;
8085 
8086 	info->number = slot;
8087 	info->open_mode = descriptor->open_mode;
8088 
8089 	struct vnode* vnode = fd_vnode(descriptor);
8090 	if (vnode != NULL) {
8091 		info->device = vnode->device;
8092 		info->node = vnode->id;
8093 	} else if (descriptor->u.mount != NULL) {
8094 		info->device = descriptor->u.mount->id;
8095 		info->node = -1;
8096 	}
8097 
8098 	*_cookie = slot + 1;
8099 	return B_OK;
8100 }
8101 
8102 
8103 int
8104 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8105 	int perms)
8106 {
8107 	if ((openMode & O_CREAT) != 0) {
8108 		return file_create_entry_ref(device, inode, name, openMode, perms,
8109 			true);
8110 	}
8111 
8112 	return file_open_entry_ref(device, inode, name, openMode, true);
8113 }
8114 
8115 
8116 /*!	\brief Opens a node specified by a FD + path pair.
8117 
8118 	At least one of \a fd and \a path must be specified.
8119 	If only \a fd is given, the function opens the node identified by this
8120 	FD. If only a path is given, this path is opened. If both are given and
8121 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8122 	of the directory (!) identified by \a fd.
8123 
8124 	\param fd The FD. May be < 0.
8125 	\param path The absolute or relative path. May be \c NULL.
8126 	\param openMode The open mode.
8127 	\return A FD referring to the newly opened node, or an error code,
8128 			if an error occurs.
8129 */
8130 int
8131 _kern_open(int fd, const char* path, int openMode, int perms)
8132 {
8133 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8134 	if (pathBuffer.InitCheck() != B_OK)
8135 		return B_NO_MEMORY;
8136 
8137 	if (openMode & O_CREAT)
8138 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8139 
8140 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8141 }
8142 
8143 
8144 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8145 
8146 	The supplied name may be \c NULL, in which case directory identified
8147 	by \a device and \a inode will be opened. Otherwise \a device and
8148 	\a inode identify the parent directory of the directory to be opened
8149 	and \a name its entry name.
8150 
8151 	\param device If \a name is specified the ID of the device the parent
8152 		   directory of the directory to be opened resides on, otherwise
8153 		   the device of the directory itself.
8154 	\param inode If \a name is specified the node ID of the parent
8155 		   directory of the directory to be opened, otherwise node ID of the
8156 		   directory itself.
8157 	\param name The entry name of the directory to be opened. If \c NULL,
8158 		   the \a device + \a inode pair identify the node to be opened.
8159 	\return The FD of the newly opened directory or an error code, if
8160 			something went wrong.
8161 */
8162 int
8163 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8164 {
8165 	return dir_open_entry_ref(device, inode, name, true);
8166 }
8167 
8168 
8169 /*!	\brief Opens a directory specified by a FD + path pair.
8170 
8171 	At least one of \a fd and \a path must be specified.
8172 	If only \a fd is given, the function opens the directory identified by this
8173 	FD. If only a path is given, this path is opened. If both are given and
8174 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8175 	of the directory (!) identified by \a fd.
8176 
8177 	\param fd The FD. May be < 0.
8178 	\param path The absolute or relative path. May be \c NULL.
8179 	\return A FD referring to the newly opened directory, or an error code,
8180 			if an error occurs.
8181 */
8182 int
8183 _kern_open_dir(int fd, const char* path)
8184 {
8185 	if (path == NULL)
8186 		return dir_open(fd, NULL, true);;
8187 
8188 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8189 	if (pathBuffer.InitCheck() != B_OK)
8190 		return B_NO_MEMORY;
8191 
8192 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8193 }
8194 
8195 
8196 status_t
8197 _kern_fcntl(int fd, int op, size_t argument)
8198 {
8199 	return common_fcntl(fd, op, argument, true);
8200 }
8201 
8202 
8203 status_t
8204 _kern_fsync(int fd)
8205 {
8206 	return common_sync(fd, true);
8207 }
8208 
8209 
8210 status_t
8211 _kern_lock_node(int fd)
8212 {
8213 	return common_lock_node(fd, true);
8214 }
8215 
8216 
8217 status_t
8218 _kern_unlock_node(int fd)
8219 {
8220 	return common_unlock_node(fd, true);
8221 }
8222 
8223 
8224 status_t
8225 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8226 	int perms)
8227 {
8228 	return dir_create_entry_ref(device, inode, name, perms, true);
8229 }
8230 
8231 
8232 /*!	\brief Creates a directory specified by a FD + path pair.
8233 
8234 	\a path must always be specified (it contains the name of the new directory
8235 	at least). If only a path is given, this path identifies the location at
8236 	which the directory shall be created. If both \a fd and \a path are given
8237 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8238 	of the directory (!) identified by \a fd.
8239 
8240 	\param fd The FD. May be < 0.
8241 	\param path The absolute or relative path. Must not be \c NULL.
8242 	\param perms The access permissions the new directory shall have.
8243 	\return \c B_OK, if the directory has been created successfully, another
8244 			error code otherwise.
8245 */
8246 status_t
8247 _kern_create_dir(int fd, const char* path, int perms)
8248 {
8249 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8250 	if (pathBuffer.InitCheck() != B_OK)
8251 		return B_NO_MEMORY;
8252 
8253 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8254 }
8255 
8256 
8257 status_t
8258 _kern_remove_dir(int fd, const char* path)
8259 {
8260 	if (path) {
8261 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8262 		if (pathBuffer.InitCheck() != B_OK)
8263 			return B_NO_MEMORY;
8264 
8265 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8266 	}
8267 
8268 	return dir_remove(fd, NULL, true);
8269 }
8270 
8271 
8272 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8273 
8274 	At least one of \a fd and \a path must be specified.
8275 	If only \a fd is given, the function the symlink to be read is the node
8276 	identified by this FD. If only a path is given, this path identifies the
8277 	symlink to be read. If both are given and the path is absolute, \a fd is
8278 	ignored; a relative path is reckoned off of the directory (!) identified
8279 	by \a fd.
8280 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8281 	will still be updated to reflect the required buffer size.
8282 
8283 	\param fd The FD. May be < 0.
8284 	\param path The absolute or relative path. May be \c NULL.
8285 	\param buffer The buffer into which the contents of the symlink shall be
8286 		   written.
8287 	\param _bufferSize A pointer to the size of the supplied buffer.
8288 	\return The length of the link on success or an appropriate error code
8289 */
8290 status_t
8291 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8292 {
8293 	if (path) {
8294 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8295 		if (pathBuffer.InitCheck() != B_OK)
8296 			return B_NO_MEMORY;
8297 
8298 		return common_read_link(fd, pathBuffer.LockBuffer(),
8299 			buffer, _bufferSize, true);
8300 	}
8301 
8302 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8303 }
8304 
8305 
8306 /*!	\brief Creates a symlink specified by a FD + path pair.
8307 
8308 	\a path must always be specified (it contains the name of the new symlink
8309 	at least). If only a path is given, this path identifies the location at
8310 	which the symlink shall be created. If both \a fd and \a path are given and
8311 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8312 	of the directory (!) identified by \a fd.
8313 
8314 	\param fd The FD. May be < 0.
8315 	\param toPath The absolute or relative path. Must not be \c NULL.
8316 	\param mode The access permissions the new symlink shall have.
8317 	\return \c B_OK, if the symlink has been created successfully, another
8318 			error code otherwise.
8319 */
8320 status_t
8321 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8322 {
8323 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8324 	if (pathBuffer.InitCheck() != B_OK)
8325 		return B_NO_MEMORY;
8326 
8327 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8328 		toPath, mode, true);
8329 }
8330 
8331 
8332 status_t
8333 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8334 	bool traverseLeafLink)
8335 {
8336 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8337 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8338 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8339 		return B_NO_MEMORY;
8340 
8341 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8342 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8343 }
8344 
8345 
8346 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8347 
8348 	\a path must always be specified (it contains at least the name of the entry
8349 	to be deleted). If only a path is given, this path identifies the entry
8350 	directly. If both \a fd and \a path are given and the path is absolute,
8351 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8352 	identified by \a fd.
8353 
8354 	\param fd The FD. May be < 0.
8355 	\param path The absolute or relative path. Must not be \c NULL.
8356 	\return \c B_OK, if the entry has been removed successfully, another
8357 			error code otherwise.
8358 */
8359 status_t
8360 _kern_unlink(int fd, const char* path)
8361 {
8362 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8363 	if (pathBuffer.InitCheck() != B_OK)
8364 		return B_NO_MEMORY;
8365 
8366 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8367 }
8368 
8369 
8370 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8371 		   by another FD + path pair.
8372 
8373 	\a oldPath and \a newPath must always be specified (they contain at least
8374 	the name of the entry). If only a path is given, this path identifies the
8375 	entry directly. If both a FD and a path are given and the path is absolute,
8376 	the FD is ignored; a relative path is reckoned off of the directory (!)
8377 	identified by the respective FD.
8378 
8379 	\param oldFD The FD of the old location. May be < 0.
8380 	\param oldPath The absolute or relative path of the old location. Must not
8381 		   be \c NULL.
8382 	\param newFD The FD of the new location. May be < 0.
8383 	\param newPath The absolute or relative path of the new location. Must not
8384 		   be \c NULL.
8385 	\return \c B_OK, if the entry has been moved successfully, another
8386 			error code otherwise.
8387 */
8388 status_t
8389 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8390 {
8391 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8392 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8393 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8394 		return B_NO_MEMORY;
8395 
8396 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8397 		newFD, newPathBuffer.LockBuffer(), true);
8398 }
8399 
8400 
8401 status_t
8402 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8403 {
8404 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8405 	if (pathBuffer.InitCheck() != B_OK)
8406 		return B_NO_MEMORY;
8407 
8408 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8409 		true);
8410 }
8411 
8412 
8413 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8414 
8415 	If only \a fd is given, the stat operation associated with the type
8416 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8417 	given, this path identifies the entry for whose node to retrieve the
8418 	stat data. If both \a fd and \a path are given and the path is absolute,
8419 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8420 	identified by \a fd and specifies the entry whose stat data shall be
8421 	retrieved.
8422 
8423 	\param fd The FD. May be < 0.
8424 	\param path The absolute or relative path. Must not be \c NULL.
8425 	\param traverseLeafLink If \a path is given, \c true specifies that the
8426 		   function shall not stick to symlinks, but traverse them.
8427 	\param stat The buffer the stat data shall be written into.
8428 	\param statSize The size of the supplied stat buffer.
8429 	\return \c B_OK, if the the stat data have been read successfully, another
8430 			error code otherwise.
8431 */
8432 status_t
8433 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8434 	struct stat* stat, size_t statSize)
8435 {
8436 	struct stat completeStat;
8437 	struct stat* originalStat = NULL;
8438 	status_t status;
8439 
8440 	if (statSize > sizeof(struct stat))
8441 		return B_BAD_VALUE;
8442 
8443 	// this supports different stat extensions
8444 	if (statSize < sizeof(struct stat)) {
8445 		originalStat = stat;
8446 		stat = &completeStat;
8447 	}
8448 
8449 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8450 
8451 	if (status == B_OK && originalStat != NULL)
8452 		memcpy(originalStat, stat, statSize);
8453 
8454 	return status;
8455 }
8456 
8457 
8458 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8459 
8460 	If only \a fd is given, the stat operation associated with the type
8461 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8462 	given, this path identifies the entry for whose node to write the
8463 	stat data. If both \a fd and \a path are given and the path is absolute,
8464 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8465 	identified by \a fd and specifies the entry whose stat data shall be
8466 	written.
8467 
8468 	\param fd The FD. May be < 0.
8469 	\param path The absolute or relative path. Must not be \c NULL.
8470 	\param traverseLeafLink If \a path is given, \c true specifies that the
8471 		   function shall not stick to symlinks, but traverse them.
8472 	\param stat The buffer containing the stat data to be written.
8473 	\param statSize The size of the supplied stat buffer.
8474 	\param statMask A mask specifying which parts of the stat data shall be
8475 		   written.
8476 	\return \c B_OK, if the the stat data have been written successfully,
8477 			another error code otherwise.
8478 */
8479 status_t
8480 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8481 	const struct stat* stat, size_t statSize, int statMask)
8482 {
8483 	struct stat completeStat;
8484 
8485 	if (statSize > sizeof(struct stat))
8486 		return B_BAD_VALUE;
8487 
8488 	// this supports different stat extensions
8489 	if (statSize < sizeof(struct stat)) {
8490 		memset((uint8*)&completeStat + statSize, 0,
8491 			sizeof(struct stat) - statSize);
8492 		memcpy(&completeStat, stat, statSize);
8493 		stat = &completeStat;
8494 	}
8495 
8496 	status_t status;
8497 
8498 	if (path) {
8499 		// path given: write the stat of the node referred to by (fd, path)
8500 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8501 		if (pathBuffer.InitCheck() != B_OK)
8502 			return B_NO_MEMORY;
8503 
8504 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8505 			traverseLeafLink, stat, statMask, true);
8506 	} else {
8507 		// no path given: get the FD and use the FD operation
8508 		struct file_descriptor* descriptor
8509 			= get_fd(get_current_io_context(true), fd);
8510 		if (descriptor == NULL)
8511 			return B_FILE_ERROR;
8512 
8513 		if (descriptor->ops->fd_write_stat)
8514 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8515 		else
8516 			status = B_UNSUPPORTED;
8517 
8518 		put_fd(descriptor);
8519 	}
8520 
8521 	return status;
8522 }
8523 
8524 
8525 int
8526 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8527 {
8528 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8529 	if (pathBuffer.InitCheck() != B_OK)
8530 		return B_NO_MEMORY;
8531 
8532 	if (path != NULL)
8533 		pathBuffer.SetTo(path);
8534 
8535 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8536 		traverseLeafLink, true);
8537 }
8538 
8539 
8540 int
8541 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8542 	int openMode)
8543 {
8544 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8545 	if (pathBuffer.InitCheck() != B_OK)
8546 		return B_NO_MEMORY;
8547 
8548 	if ((openMode & O_CREAT) != 0) {
8549 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8550 			true);
8551 	}
8552 
8553 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8554 }
8555 
8556 
8557 status_t
8558 _kern_remove_attr(int fd, const char* name)
8559 {
8560 	return attr_remove(fd, name, true);
8561 }
8562 
8563 
8564 status_t
8565 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8566 	const char* toName)
8567 {
8568 	return attr_rename(fromFile, fromName, toFile, toName, true);
8569 }
8570 
8571 
8572 int
8573 _kern_open_index_dir(dev_t device)
8574 {
8575 	return index_dir_open(device, true);
8576 }
8577 
8578 
8579 status_t
8580 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8581 {
8582 	return index_create(device, name, type, flags, true);
8583 }
8584 
8585 
8586 status_t
8587 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8588 {
8589 	return index_name_read_stat(device, name, stat, true);
8590 }
8591 
8592 
8593 status_t
8594 _kern_remove_index(dev_t device, const char* name)
8595 {
8596 	return index_remove(device, name, true);
8597 }
8598 
8599 
8600 status_t
8601 _kern_getcwd(char* buffer, size_t size)
8602 {
8603 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8604 
8605 	// Call vfs to get current working directory
8606 	return get_cwd(buffer, size, true);
8607 }
8608 
8609 
8610 status_t
8611 _kern_setcwd(int fd, const char* path)
8612 {
8613 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8614 	if (pathBuffer.InitCheck() != B_OK)
8615 		return B_NO_MEMORY;
8616 
8617 	if (path != NULL)
8618 		pathBuffer.SetTo(path);
8619 
8620 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8621 }
8622 
8623 
8624 //	#pragma mark - userland syscalls
8625 
8626 
8627 dev_t
8628 _user_mount(const char* userPath, const char* userDevice,
8629 	const char* userFileSystem, uint32 flags, const char* userArgs,
8630 	size_t argsLength)
8631 {
8632 	char fileSystem[B_FILE_NAME_LENGTH];
8633 	KPath path, device;
8634 	char* args = NULL;
8635 	status_t status;
8636 
8637 	if (!IS_USER_ADDRESS(userPath)
8638 		|| !IS_USER_ADDRESS(userFileSystem)
8639 		|| !IS_USER_ADDRESS(userDevice))
8640 		return B_BAD_ADDRESS;
8641 
8642 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8643 		return B_NO_MEMORY;
8644 
8645 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8646 		return B_BAD_ADDRESS;
8647 
8648 	if (userFileSystem != NULL
8649 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8650 		return B_BAD_ADDRESS;
8651 
8652 	if (userDevice != NULL
8653 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8654 			< B_OK)
8655 		return B_BAD_ADDRESS;
8656 
8657 	if (userArgs != NULL && argsLength > 0) {
8658 		// this is a safety restriction
8659 		if (argsLength >= 65536)
8660 			return B_NAME_TOO_LONG;
8661 
8662 		args = (char*)malloc(argsLength + 1);
8663 		if (args == NULL)
8664 			return B_NO_MEMORY;
8665 
8666 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8667 			free(args);
8668 			return B_BAD_ADDRESS;
8669 		}
8670 	}
8671 	path.UnlockBuffer();
8672 	device.UnlockBuffer();
8673 
8674 	status = fs_mount(path.LockBuffer(),
8675 		userDevice != NULL ? device.Path() : NULL,
8676 		userFileSystem ? fileSystem : NULL, flags, args, false);
8677 
8678 	free(args);
8679 	return status;
8680 }
8681 
8682 
8683 status_t
8684 _user_unmount(const char* userPath, uint32 flags)
8685 {
8686 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8687 	if (pathBuffer.InitCheck() != B_OK)
8688 		return B_NO_MEMORY;
8689 
8690 	char* path = pathBuffer.LockBuffer();
8691 
8692 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8693 		return B_BAD_ADDRESS;
8694 
8695 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8696 }
8697 
8698 
8699 status_t
8700 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8701 {
8702 	struct fs_info info;
8703 	status_t status;
8704 
8705 	if (userInfo == NULL)
8706 		return B_BAD_VALUE;
8707 
8708 	if (!IS_USER_ADDRESS(userInfo))
8709 		return B_BAD_ADDRESS;
8710 
8711 	status = fs_read_info(device, &info);
8712 	if (status != B_OK)
8713 		return status;
8714 
8715 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8716 		return B_BAD_ADDRESS;
8717 
8718 	return B_OK;
8719 }
8720 
8721 
8722 status_t
8723 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8724 {
8725 	struct fs_info info;
8726 
8727 	if (userInfo == NULL)
8728 		return B_BAD_VALUE;
8729 
8730 	if (!IS_USER_ADDRESS(userInfo)
8731 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8732 		return B_BAD_ADDRESS;
8733 
8734 	return fs_write_info(device, &info, mask);
8735 }
8736 
8737 
8738 dev_t
8739 _user_next_device(int32* _userCookie)
8740 {
8741 	int32 cookie;
8742 	dev_t device;
8743 
8744 	if (!IS_USER_ADDRESS(_userCookie)
8745 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8746 		return B_BAD_ADDRESS;
8747 
8748 	device = fs_next_device(&cookie);
8749 
8750 	if (device >= B_OK) {
8751 		// update user cookie
8752 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8753 			return B_BAD_ADDRESS;
8754 	}
8755 
8756 	return device;
8757 }
8758 
8759 
8760 status_t
8761 _user_sync(void)
8762 {
8763 	return _kern_sync();
8764 }
8765 
8766 
8767 status_t
8768 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8769 	size_t infoSize)
8770 {
8771 	struct fd_info info;
8772 	uint32 cookie;
8773 
8774 	// only root can do this (or should root's group be enough?)
8775 	if (geteuid() != 0)
8776 		return B_NOT_ALLOWED;
8777 
8778 	if (infoSize != sizeof(fd_info))
8779 		return B_BAD_VALUE;
8780 
8781 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8782 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8783 		return B_BAD_ADDRESS;
8784 
8785 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8786 	if (status != B_OK)
8787 		return status;
8788 
8789 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8790 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8791 		return B_BAD_ADDRESS;
8792 
8793 	return status;
8794 }
8795 
8796 
8797 status_t
8798 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8799 	char* userPath, size_t pathLength)
8800 {
8801 	if (!IS_USER_ADDRESS(userPath))
8802 		return B_BAD_ADDRESS;
8803 
8804 	KPath path(B_PATH_NAME_LENGTH + 1);
8805 	if (path.InitCheck() != B_OK)
8806 		return B_NO_MEMORY;
8807 
8808 	// copy the leaf name onto the stack
8809 	char stackLeaf[B_FILE_NAME_LENGTH];
8810 	if (leaf) {
8811 		if (!IS_USER_ADDRESS(leaf))
8812 			return B_BAD_ADDRESS;
8813 
8814 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8815 		if (length < 0)
8816 			return length;
8817 		if (length >= B_FILE_NAME_LENGTH)
8818 			return B_NAME_TOO_LONG;
8819 
8820 		leaf = stackLeaf;
8821 	}
8822 
8823 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8824 		false, path.LockBuffer(), path.BufferSize());
8825 	if (status != B_OK)
8826 		return status;
8827 
8828 	path.UnlockBuffer();
8829 
8830 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8831 	if (length < 0)
8832 		return length;
8833 	if (length >= (int)pathLength)
8834 		return B_BUFFER_OVERFLOW;
8835 
8836 	return B_OK;
8837 }
8838 
8839 
8840 status_t
8841 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8842 {
8843 	if (userPath == NULL || buffer == NULL)
8844 		return B_BAD_VALUE;
8845 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8846 		return B_BAD_ADDRESS;
8847 
8848 	// copy path from userland
8849 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8850 	if (pathBuffer.InitCheck() != B_OK)
8851 		return B_NO_MEMORY;
8852 	char* path = pathBuffer.LockBuffer();
8853 
8854 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8855 		return B_BAD_ADDRESS;
8856 
8857 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8858 		false);
8859 	if (error != B_OK)
8860 		return error;
8861 
8862 	// copy back to userland
8863 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8864 	if (len < 0)
8865 		return len;
8866 	if (len >= B_PATH_NAME_LENGTH)
8867 		return B_BUFFER_OVERFLOW;
8868 
8869 	return B_OK;
8870 }
8871 
8872 
8873 int
8874 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8875 	int openMode, int perms)
8876 {
8877 	char name[B_FILE_NAME_LENGTH];
8878 
8879 	if (userName == NULL || device < 0 || inode < 0)
8880 		return B_BAD_VALUE;
8881 	if (!IS_USER_ADDRESS(userName)
8882 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8883 		return B_BAD_ADDRESS;
8884 
8885 	if ((openMode & O_CREAT) != 0) {
8886 		return file_create_entry_ref(device, inode, name, openMode, perms,
8887 		 false);
8888 	}
8889 
8890 	return file_open_entry_ref(device, inode, name, openMode, false);
8891 }
8892 
8893 
8894 int
8895 _user_open(int fd, const char* userPath, int openMode, int perms)
8896 {
8897 	KPath path(B_PATH_NAME_LENGTH + 1);
8898 	if (path.InitCheck() != B_OK)
8899 		return B_NO_MEMORY;
8900 
8901 	char* buffer = path.LockBuffer();
8902 
8903 	if (!IS_USER_ADDRESS(userPath)
8904 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8905 		return B_BAD_ADDRESS;
8906 
8907 	if ((openMode & O_CREAT) != 0)
8908 		return file_create(fd, buffer, openMode, perms, false);
8909 
8910 	return file_open(fd, buffer, openMode, false);
8911 }
8912 
8913 
8914 int
8915 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8916 {
8917 	if (userName != NULL) {
8918 		char name[B_FILE_NAME_LENGTH];
8919 
8920 		if (!IS_USER_ADDRESS(userName)
8921 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8922 			return B_BAD_ADDRESS;
8923 
8924 		return dir_open_entry_ref(device, inode, name, false);
8925 	}
8926 	return dir_open_entry_ref(device, inode, NULL, false);
8927 }
8928 
8929 
8930 int
8931 _user_open_dir(int fd, const char* userPath)
8932 {
8933 	if (userPath == NULL)
8934 		return dir_open(fd, NULL, false);
8935 
8936 	KPath path(B_PATH_NAME_LENGTH + 1);
8937 	if (path.InitCheck() != B_OK)
8938 		return B_NO_MEMORY;
8939 
8940 	char* buffer = path.LockBuffer();
8941 
8942 	if (!IS_USER_ADDRESS(userPath)
8943 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8944 		return B_BAD_ADDRESS;
8945 
8946 	return dir_open(fd, buffer, false);
8947 }
8948 
8949 
8950 /*!	\brief Opens a directory's parent directory and returns the entry name
8951 		   of the former.
8952 
8953 	Aside from that it returns the directory's entry name, this method is
8954 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8955 	equivalent, if \a userName is \c NULL.
8956 
8957 	If a name buffer is supplied and the name does not fit the buffer, the
8958 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8959 
8960 	\param fd A FD referring to a directory.
8961 	\param userName Buffer the directory's entry name shall be written into.
8962 		   May be \c NULL.
8963 	\param nameLength Size of the name buffer.
8964 	\return The file descriptor of the opened parent directory, if everything
8965 			went fine, an error code otherwise.
8966 */
8967 int
8968 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8969 {
8970 	bool kernel = false;
8971 
8972 	if (userName && !IS_USER_ADDRESS(userName))
8973 		return B_BAD_ADDRESS;
8974 
8975 	// open the parent dir
8976 	int parentFD = dir_open(fd, (char*)"..", kernel);
8977 	if (parentFD < 0)
8978 		return parentFD;
8979 	FDCloser fdCloser(parentFD, kernel);
8980 
8981 	if (userName) {
8982 		// get the vnodes
8983 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8984 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8985 		VNodePutter parentVNodePutter(parentVNode);
8986 		VNodePutter dirVNodePutter(dirVNode);
8987 		if (!parentVNode || !dirVNode)
8988 			return B_FILE_ERROR;
8989 
8990 		// get the vnode name
8991 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8992 		struct dirent* buffer = (struct dirent*)_buffer;
8993 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8994 			sizeof(_buffer), get_current_io_context(false));
8995 		if (status != B_OK)
8996 			return status;
8997 
8998 		// copy the name to the userland buffer
8999 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9000 		if (len < 0)
9001 			return len;
9002 		if (len >= (int)nameLength)
9003 			return B_BUFFER_OVERFLOW;
9004 	}
9005 
9006 	return fdCloser.Detach();
9007 }
9008 
9009 
9010 status_t
9011 _user_fcntl(int fd, int op, size_t argument)
9012 {
9013 	status_t status = common_fcntl(fd, op, argument, false);
9014 	if (op == F_SETLKW)
9015 		syscall_restart_handle_post(status);
9016 
9017 	return status;
9018 }
9019 
9020 
9021 status_t
9022 _user_fsync(int fd)
9023 {
9024 	return common_sync(fd, false);
9025 }
9026 
9027 
9028 status_t
9029 _user_flock(int fd, int operation)
9030 {
9031 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9032 
9033 	// Check if the operation is valid
9034 	switch (operation & ~LOCK_NB) {
9035 		case LOCK_UN:
9036 		case LOCK_SH:
9037 		case LOCK_EX:
9038 			break;
9039 
9040 		default:
9041 			return B_BAD_VALUE;
9042 	}
9043 
9044 	struct file_descriptor* descriptor;
9045 	struct vnode* vnode;
9046 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9047 	if (descriptor == NULL)
9048 		return B_FILE_ERROR;
9049 
9050 	if (descriptor->type != FDTYPE_FILE) {
9051 		put_fd(descriptor);
9052 		return B_BAD_VALUE;
9053 	}
9054 
9055 	struct flock flock;
9056 	flock.l_start = 0;
9057 	flock.l_len = OFF_MAX;
9058 	flock.l_whence = 0;
9059 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9060 
9061 	status_t status;
9062 	if ((operation & LOCK_UN) != 0)
9063 		status = release_advisory_lock(vnode, &flock);
9064 	else {
9065 		status = acquire_advisory_lock(vnode,
9066 			thread_get_current_thread()->team->session_id, &flock,
9067 			(operation & LOCK_NB) == 0);
9068 	}
9069 
9070 	syscall_restart_handle_post(status);
9071 
9072 	put_fd(descriptor);
9073 	return status;
9074 }
9075 
9076 
9077 status_t
9078 _user_lock_node(int fd)
9079 {
9080 	return common_lock_node(fd, false);
9081 }
9082 
9083 
9084 status_t
9085 _user_unlock_node(int fd)
9086 {
9087 	return common_unlock_node(fd, false);
9088 }
9089 
9090 
9091 status_t
9092 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9093 	int perms)
9094 {
9095 	char name[B_FILE_NAME_LENGTH];
9096 	status_t status;
9097 
9098 	if (!IS_USER_ADDRESS(userName))
9099 		return B_BAD_ADDRESS;
9100 
9101 	status = user_strlcpy(name, userName, sizeof(name));
9102 	if (status < 0)
9103 		return status;
9104 
9105 	return dir_create_entry_ref(device, inode, name, perms, false);
9106 }
9107 
9108 
9109 status_t
9110 _user_create_dir(int fd, const char* userPath, int perms)
9111 {
9112 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9113 	if (pathBuffer.InitCheck() != B_OK)
9114 		return B_NO_MEMORY;
9115 
9116 	char* path = pathBuffer.LockBuffer();
9117 
9118 	if (!IS_USER_ADDRESS(userPath)
9119 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9120 		return B_BAD_ADDRESS;
9121 
9122 	return dir_create(fd, path, perms, false);
9123 }
9124 
9125 
9126 status_t
9127 _user_remove_dir(int fd, const char* userPath)
9128 {
9129 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9130 	if (pathBuffer.InitCheck() != B_OK)
9131 		return B_NO_MEMORY;
9132 
9133 	char* path = pathBuffer.LockBuffer();
9134 
9135 	if (userPath != NULL) {
9136 		if (!IS_USER_ADDRESS(userPath)
9137 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9138 			return B_BAD_ADDRESS;
9139 	}
9140 
9141 	return dir_remove(fd, userPath ? path : NULL, false);
9142 }
9143 
9144 
9145 status_t
9146 _user_read_link(int fd, const char* userPath, char* userBuffer,
9147 	size_t* userBufferSize)
9148 {
9149 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9150 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9151 		return B_NO_MEMORY;
9152 
9153 	size_t bufferSize;
9154 
9155 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9156 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9157 		return B_BAD_ADDRESS;
9158 
9159 	char* path = pathBuffer.LockBuffer();
9160 	char* buffer = linkBuffer.LockBuffer();
9161 
9162 	if (userPath) {
9163 		if (!IS_USER_ADDRESS(userPath)
9164 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9165 			return B_BAD_ADDRESS;
9166 
9167 		if (bufferSize > B_PATH_NAME_LENGTH)
9168 			bufferSize = B_PATH_NAME_LENGTH;
9169 	}
9170 
9171 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9172 		&bufferSize, false);
9173 
9174 	// we also update the bufferSize in case of errors
9175 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9176 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9177 		return B_BAD_ADDRESS;
9178 
9179 	if (status != B_OK)
9180 		return status;
9181 
9182 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9183 		return B_BAD_ADDRESS;
9184 
9185 	return B_OK;
9186 }
9187 
9188 
9189 status_t
9190 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9191 	int mode)
9192 {
9193 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9194 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9195 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9196 		return B_NO_MEMORY;
9197 
9198 	char* path = pathBuffer.LockBuffer();
9199 	char* toPath = toPathBuffer.LockBuffer();
9200 
9201 	if (!IS_USER_ADDRESS(userPath)
9202 		|| !IS_USER_ADDRESS(userToPath)
9203 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9204 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9205 		return B_BAD_ADDRESS;
9206 
9207 	return common_create_symlink(fd, path, toPath, mode, false);
9208 }
9209 
9210 
9211 status_t
9212 _user_create_link(int pathFD, const char* userPath, int toFD,
9213 	const char* userToPath, bool traverseLeafLink)
9214 {
9215 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9216 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9217 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9218 		return B_NO_MEMORY;
9219 
9220 	char* path = pathBuffer.LockBuffer();
9221 	char* toPath = toPathBuffer.LockBuffer();
9222 
9223 	if (!IS_USER_ADDRESS(userPath)
9224 		|| !IS_USER_ADDRESS(userToPath)
9225 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9226 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9227 		return B_BAD_ADDRESS;
9228 
9229 	status_t status = check_path(toPath);
9230 	if (status != B_OK)
9231 		return status;
9232 
9233 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9234 		false);
9235 }
9236 
9237 
9238 status_t
9239 _user_unlink(int fd, const char* userPath)
9240 {
9241 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9242 	if (pathBuffer.InitCheck() != B_OK)
9243 		return B_NO_MEMORY;
9244 
9245 	char* path = pathBuffer.LockBuffer();
9246 
9247 	if (!IS_USER_ADDRESS(userPath)
9248 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9249 		return B_BAD_ADDRESS;
9250 
9251 	return common_unlink(fd, path, false);
9252 }
9253 
9254 
9255 status_t
9256 _user_rename(int oldFD, const char* userOldPath, int newFD,
9257 	const char* userNewPath)
9258 {
9259 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9260 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9261 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9262 		return B_NO_MEMORY;
9263 
9264 	char* oldPath = oldPathBuffer.LockBuffer();
9265 	char* newPath = newPathBuffer.LockBuffer();
9266 
9267 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9268 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9269 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9270 		return B_BAD_ADDRESS;
9271 
9272 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9273 }
9274 
9275 
9276 status_t
9277 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9278 {
9279 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9280 	if (pathBuffer.InitCheck() != B_OK)
9281 		return B_NO_MEMORY;
9282 
9283 	char* path = pathBuffer.LockBuffer();
9284 
9285 	if (!IS_USER_ADDRESS(userPath)
9286 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9287 		return B_BAD_ADDRESS;
9288 	}
9289 
9290 	// split into directory vnode and filename path
9291 	char filename[B_FILE_NAME_LENGTH];
9292 	struct vnode* dir;
9293 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9294 	if (status != B_OK)
9295 		return status;
9296 
9297 	VNodePutter _(dir);
9298 
9299 	// the underlying FS needs to support creating FIFOs
9300 	if (!HAS_FS_CALL(dir, create_special_node))
9301 		return B_UNSUPPORTED;
9302 
9303 	// create the entry	-- the FIFO sub node is set up automatically
9304 	fs_vnode superVnode;
9305 	ino_t nodeID;
9306 	status = FS_CALL(dir, create_special_node, filename, NULL,
9307 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9308 
9309 	// create_special_node() acquired a reference for us that we don't need.
9310 	if (status == B_OK)
9311 		put_vnode(dir->mount->volume, nodeID);
9312 
9313 	return status;
9314 }
9315 
9316 
9317 status_t
9318 _user_create_pipe(int* userFDs)
9319 {
9320 	// rootfs should support creating FIFOs, but let's be sure
9321 	if (!HAS_FS_CALL(sRoot, create_special_node))
9322 		return B_UNSUPPORTED;
9323 
9324 	// create the node	-- the FIFO sub node is set up automatically
9325 	fs_vnode superVnode;
9326 	ino_t nodeID;
9327 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9328 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9329 	if (status != B_OK)
9330 		return status;
9331 
9332 	// We've got one reference to the node and need another one.
9333 	struct vnode* vnode;
9334 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9335 	if (status != B_OK) {
9336 		// that should not happen
9337 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9338 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9339 		return status;
9340 	}
9341 
9342 	// Everything looks good so far. Open two FDs for reading respectively
9343 	// writing.
9344 	int fds[2];
9345 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9346 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9347 
9348 	FDCloser closer0(fds[0], false);
9349 	FDCloser closer1(fds[1], false);
9350 
9351 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9352 
9353 	// copy FDs to userland
9354 	if (status == B_OK) {
9355 		if (!IS_USER_ADDRESS(userFDs)
9356 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9357 			status = B_BAD_ADDRESS;
9358 		}
9359 	}
9360 
9361 	// keep FDs, if everything went fine
9362 	if (status == B_OK) {
9363 		closer0.Detach();
9364 		closer1.Detach();
9365 	}
9366 
9367 	return status;
9368 }
9369 
9370 
9371 status_t
9372 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9373 {
9374 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9375 	if (pathBuffer.InitCheck() != B_OK)
9376 		return B_NO_MEMORY;
9377 
9378 	char* path = pathBuffer.LockBuffer();
9379 
9380 	if (!IS_USER_ADDRESS(userPath)
9381 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9382 		return B_BAD_ADDRESS;
9383 
9384 	return common_access(fd, path, mode, effectiveUserGroup, false);
9385 }
9386 
9387 
9388 status_t
9389 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9390 	struct stat* userStat, size_t statSize)
9391 {
9392 	struct stat stat;
9393 	status_t status;
9394 
9395 	if (statSize > sizeof(struct stat))
9396 		return B_BAD_VALUE;
9397 
9398 	if (!IS_USER_ADDRESS(userStat))
9399 		return B_BAD_ADDRESS;
9400 
9401 	if (userPath) {
9402 		// path given: get the stat of the node referred to by (fd, path)
9403 		if (!IS_USER_ADDRESS(userPath))
9404 			return B_BAD_ADDRESS;
9405 
9406 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9407 		if (pathBuffer.InitCheck() != B_OK)
9408 			return B_NO_MEMORY;
9409 
9410 		char* path = pathBuffer.LockBuffer();
9411 
9412 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9413 		if (length < B_OK)
9414 			return length;
9415 		if (length >= B_PATH_NAME_LENGTH)
9416 			return B_NAME_TOO_LONG;
9417 
9418 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9419 	} else {
9420 		// no path given: get the FD and use the FD operation
9421 		struct file_descriptor* descriptor
9422 			= get_fd(get_current_io_context(false), fd);
9423 		if (descriptor == NULL)
9424 			return B_FILE_ERROR;
9425 
9426 		if (descriptor->ops->fd_read_stat)
9427 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9428 		else
9429 			status = B_UNSUPPORTED;
9430 
9431 		put_fd(descriptor);
9432 	}
9433 
9434 	if (status != B_OK)
9435 		return status;
9436 
9437 	return user_memcpy(userStat, &stat, statSize);
9438 }
9439 
9440 
9441 status_t
9442 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9443 	const struct stat* userStat, size_t statSize, int statMask)
9444 {
9445 	if (statSize > sizeof(struct stat))
9446 		return B_BAD_VALUE;
9447 
9448 	struct stat stat;
9449 
9450 	if (!IS_USER_ADDRESS(userStat)
9451 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9452 		return B_BAD_ADDRESS;
9453 
9454 	// clear additional stat fields
9455 	if (statSize < sizeof(struct stat))
9456 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9457 
9458 	status_t status;
9459 
9460 	if (userPath) {
9461 		// path given: write the stat of the node referred to by (fd, path)
9462 		if (!IS_USER_ADDRESS(userPath))
9463 			return B_BAD_ADDRESS;
9464 
9465 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9466 		if (pathBuffer.InitCheck() != B_OK)
9467 			return B_NO_MEMORY;
9468 
9469 		char* path = pathBuffer.LockBuffer();
9470 
9471 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9472 		if (length < B_OK)
9473 			return length;
9474 		if (length >= B_PATH_NAME_LENGTH)
9475 			return B_NAME_TOO_LONG;
9476 
9477 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9478 			statMask, false);
9479 	} else {
9480 		// no path given: get the FD and use the FD operation
9481 		struct file_descriptor* descriptor
9482 			= get_fd(get_current_io_context(false), fd);
9483 		if (descriptor == NULL)
9484 			return B_FILE_ERROR;
9485 
9486 		if (descriptor->ops->fd_write_stat) {
9487 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9488 				statMask);
9489 		} else
9490 			status = B_UNSUPPORTED;
9491 
9492 		put_fd(descriptor);
9493 	}
9494 
9495 	return status;
9496 }
9497 
9498 
9499 int
9500 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9501 {
9502 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9503 	if (pathBuffer.InitCheck() != B_OK)
9504 		return B_NO_MEMORY;
9505 
9506 	char* path = pathBuffer.LockBuffer();
9507 
9508 	if (userPath != NULL) {
9509 		if (!IS_USER_ADDRESS(userPath)
9510 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9511 			return B_BAD_ADDRESS;
9512 	}
9513 
9514 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9515 }
9516 
9517 
9518 ssize_t
9519 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9520 	size_t readBytes)
9521 {
9522 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9523 	if (attr < 0)
9524 		return attr;
9525 
9526 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9527 	_user_close(attr);
9528 
9529 	return bytes;
9530 }
9531 
9532 
9533 ssize_t
9534 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9535 	const void* buffer, size_t writeBytes)
9536 {
9537 	// Try to support the BeOS typical truncation as well as the position
9538 	// argument
9539 	int attr = attr_create(fd, NULL, attribute, type,
9540 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9541 	if (attr < 0)
9542 		return attr;
9543 
9544 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9545 	_user_close(attr);
9546 
9547 	return bytes;
9548 }
9549 
9550 
9551 status_t
9552 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9553 {
9554 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9555 	if (attr < 0)
9556 		return attr;
9557 
9558 	struct file_descriptor* descriptor
9559 		= get_fd(get_current_io_context(false), attr);
9560 	if (descriptor == NULL) {
9561 		_user_close(attr);
9562 		return B_FILE_ERROR;
9563 	}
9564 
9565 	struct stat stat;
9566 	status_t status;
9567 	if (descriptor->ops->fd_read_stat)
9568 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9569 	else
9570 		status = B_UNSUPPORTED;
9571 
9572 	put_fd(descriptor);
9573 	_user_close(attr);
9574 
9575 	if (status == B_OK) {
9576 		attr_info info;
9577 		info.type = stat.st_type;
9578 		info.size = stat.st_size;
9579 
9580 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9581 			return B_BAD_ADDRESS;
9582 	}
9583 
9584 	return status;
9585 }
9586 
9587 
9588 int
9589 _user_open_attr(int fd, const char* userPath, const char* userName,
9590 	uint32 type, int openMode)
9591 {
9592 	char name[B_FILE_NAME_LENGTH];
9593 
9594 	if (!IS_USER_ADDRESS(userName)
9595 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9596 		return B_BAD_ADDRESS;
9597 
9598 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9599 	if (pathBuffer.InitCheck() != B_OK)
9600 		return B_NO_MEMORY;
9601 
9602 	char* path = pathBuffer.LockBuffer();
9603 
9604 	if (userPath != NULL) {
9605 		if (!IS_USER_ADDRESS(userPath)
9606 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9607 			return B_BAD_ADDRESS;
9608 	}
9609 
9610 	if ((openMode & O_CREAT) != 0) {
9611 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9612 			false);
9613 	}
9614 
9615 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9616 }
9617 
9618 
9619 status_t
9620 _user_remove_attr(int fd, const char* userName)
9621 {
9622 	char name[B_FILE_NAME_LENGTH];
9623 
9624 	if (!IS_USER_ADDRESS(userName)
9625 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9626 		return B_BAD_ADDRESS;
9627 
9628 	return attr_remove(fd, name, false);
9629 }
9630 
9631 
9632 status_t
9633 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9634 	const char* userToName)
9635 {
9636 	if (!IS_USER_ADDRESS(userFromName)
9637 		|| !IS_USER_ADDRESS(userToName))
9638 		return B_BAD_ADDRESS;
9639 
9640 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9641 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9642 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9643 		return B_NO_MEMORY;
9644 
9645 	char* fromName = fromNameBuffer.LockBuffer();
9646 	char* toName = toNameBuffer.LockBuffer();
9647 
9648 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9649 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9650 		return B_BAD_ADDRESS;
9651 
9652 	return attr_rename(fromFile, fromName, toFile, toName, false);
9653 }
9654 
9655 
9656 int
9657 _user_open_index_dir(dev_t device)
9658 {
9659 	return index_dir_open(device, false);
9660 }
9661 
9662 
9663 status_t
9664 _user_create_index(dev_t device, const char* userName, uint32 type,
9665 	uint32 flags)
9666 {
9667 	char name[B_FILE_NAME_LENGTH];
9668 
9669 	if (!IS_USER_ADDRESS(userName)
9670 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9671 		return B_BAD_ADDRESS;
9672 
9673 	return index_create(device, name, type, flags, false);
9674 }
9675 
9676 
9677 status_t
9678 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9679 {
9680 	char name[B_FILE_NAME_LENGTH];
9681 	struct stat stat;
9682 	status_t status;
9683 
9684 	if (!IS_USER_ADDRESS(userName)
9685 		|| !IS_USER_ADDRESS(userStat)
9686 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9687 		return B_BAD_ADDRESS;
9688 
9689 	status = index_name_read_stat(device, name, &stat, false);
9690 	if (status == B_OK) {
9691 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9692 			return B_BAD_ADDRESS;
9693 	}
9694 
9695 	return status;
9696 }
9697 
9698 
9699 status_t
9700 _user_remove_index(dev_t device, const char* userName)
9701 {
9702 	char name[B_FILE_NAME_LENGTH];
9703 
9704 	if (!IS_USER_ADDRESS(userName)
9705 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9706 		return B_BAD_ADDRESS;
9707 
9708 	return index_remove(device, name, false);
9709 }
9710 
9711 
9712 status_t
9713 _user_getcwd(char* userBuffer, size_t size)
9714 {
9715 	if (size == 0)
9716 		return B_BAD_VALUE;
9717 	if (!IS_USER_ADDRESS(userBuffer))
9718 		return B_BAD_ADDRESS;
9719 
9720 	if (size > kMaxPathLength)
9721 		size = kMaxPathLength;
9722 
9723 	KPath pathBuffer(size);
9724 	if (pathBuffer.InitCheck() != B_OK)
9725 		return B_NO_MEMORY;
9726 
9727 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9728 
9729 	char* path = pathBuffer.LockBuffer();
9730 
9731 	status_t status = get_cwd(path, size, false);
9732 	if (status != B_OK)
9733 		return status;
9734 
9735 	// Copy back the result
9736 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9737 		return B_BAD_ADDRESS;
9738 
9739 	return status;
9740 }
9741 
9742 
9743 status_t
9744 _user_setcwd(int fd, const char* userPath)
9745 {
9746 	TRACE(("user_setcwd: path = %p\n", userPath));
9747 
9748 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9749 	if (pathBuffer.InitCheck() != B_OK)
9750 		return B_NO_MEMORY;
9751 
9752 	char* path = pathBuffer.LockBuffer();
9753 
9754 	if (userPath != NULL) {
9755 		if (!IS_USER_ADDRESS(userPath)
9756 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9757 			return B_BAD_ADDRESS;
9758 	}
9759 
9760 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9761 }
9762 
9763 
9764 status_t
9765 _user_change_root(const char* userPath)
9766 {
9767 	// only root is allowed to chroot()
9768 	if (geteuid() != 0)
9769 		return B_NOT_ALLOWED;
9770 
9771 	// alloc path buffer
9772 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9773 	if (pathBuffer.InitCheck() != B_OK)
9774 		return B_NO_MEMORY;
9775 
9776 	// copy userland path to kernel
9777 	char* path = pathBuffer.LockBuffer();
9778 	if (userPath != NULL) {
9779 		if (!IS_USER_ADDRESS(userPath)
9780 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9781 			return B_BAD_ADDRESS;
9782 	}
9783 
9784 	// get the vnode
9785 	struct vnode* vnode;
9786 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9787 	if (status != B_OK)
9788 		return status;
9789 
9790 	// set the new root
9791 	struct io_context* context = get_current_io_context(false);
9792 	mutex_lock(&sIOContextRootLock);
9793 	struct vnode* oldRoot = context->root;
9794 	context->root = vnode;
9795 	mutex_unlock(&sIOContextRootLock);
9796 
9797 	put_vnode(oldRoot);
9798 
9799 	return B_OK;
9800 }
9801 
9802 
9803 int
9804 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9805 	uint32 flags, port_id port, int32 token)
9806 {
9807 	char* query;
9808 
9809 	if (device < 0 || userQuery == NULL || queryLength == 0)
9810 		return B_BAD_VALUE;
9811 
9812 	// this is a safety restriction
9813 	if (queryLength >= 65536)
9814 		return B_NAME_TOO_LONG;
9815 
9816 	query = (char*)malloc(queryLength + 1);
9817 	if (query == NULL)
9818 		return B_NO_MEMORY;
9819 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9820 		free(query);
9821 		return B_BAD_ADDRESS;
9822 	}
9823 
9824 	int fd = query_open(device, query, flags, port, token, false);
9825 
9826 	free(query);
9827 	return fd;
9828 }
9829 
9830 
9831 #include "vfs_request_io.cpp"
9832