xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 541ff51a6ef4c47f8ab105ba6ff895cdbba83aca)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
549 		status_t status, generic_size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, (uint64)fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
581 			fFlags, fStatus, (uint64)fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	generic_io_vec*		fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	generic_size_t			fBytesRequested;
594 	status_t		fStatus;
595 	generic_size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
603 		status_t status, generic_size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
622 		status_t status, generic_size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 /*!	Gets the vnode the given vnode is covering.
1325 
1326 	The caller must have \c sVnodeLock read-locked at least.
1327 
1328 	The function returns a reference to the retrieved vnode (if any), the caller
1329 	is responsible to free.
1330 
1331 	\param vnode The vnode whose covered node shall be returned.
1332 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1333 		vnode.
1334 */
1335 static inline Vnode*
1336 get_covered_vnode_locked(Vnode* vnode)
1337 {
1338 	if (Vnode* coveredNode = vnode->covers) {
1339 		while (coveredNode->covers != NULL)
1340 			coveredNode = coveredNode->covers;
1341 
1342 		inc_vnode_ref_count(coveredNode);
1343 		return coveredNode;
1344 	}
1345 
1346 	return NULL;
1347 }
1348 
1349 
1350 /*!	Gets the vnode the given vnode is covering.
1351 
1352 	The caller must not hold \c sVnodeLock. Note that this implies a race
1353 	condition, since the situation can change at any time.
1354 
1355 	The function returns a reference to the retrieved vnode (if any), the caller
1356 	is responsible to free.
1357 
1358 	\param vnode The vnode whose covered node shall be returned.
1359 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1360 		vnode.
1361 */
1362 static inline Vnode*
1363 get_covered_vnode(Vnode* vnode)
1364 {
1365 	if (!vnode->IsCovering())
1366 		return NULL;
1367 
1368 	ReadLocker vnodeReadLocker(sVnodeLock);
1369 	return get_covered_vnode_locked(vnode);
1370 }
1371 
1372 
1373 /*!	Gets the vnode the given vnode is covered by.
1374 
1375 	The caller must have \c sVnodeLock read-locked at least.
1376 
1377 	The function returns a reference to the retrieved vnode (if any), the caller
1378 	is responsible to free.
1379 
1380 	\param vnode The vnode whose covering node shall be returned.
1381 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1382 		any vnode.
1383 */
1384 static Vnode*
1385 get_covering_vnode_locked(Vnode* vnode)
1386 {
1387 	if (Vnode* coveringNode = vnode->covered_by) {
1388 		while (coveringNode->covered_by != NULL)
1389 			coveringNode = coveringNode->covered_by;
1390 
1391 		inc_vnode_ref_count(coveringNode);
1392 		return coveringNode;
1393 	}
1394 
1395 	return NULL;
1396 }
1397 
1398 
1399 /*!	Gets the vnode the given vnode is covered by.
1400 
1401 	The caller must not hold \c sVnodeLock. Note that this implies a race
1402 	condition, since the situation can change at any time.
1403 
1404 	The function returns a reference to the retrieved vnode (if any), the caller
1405 	is responsible to free.
1406 
1407 	\param vnode The vnode whose covering node shall be returned.
1408 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1409 		any vnode.
1410 */
1411 static inline Vnode*
1412 get_covering_vnode(Vnode* vnode)
1413 {
1414 	if (!vnode->IsCovered())
1415 		return NULL;
1416 
1417 	ReadLocker vnodeReadLocker(sVnodeLock);
1418 	return get_covering_vnode_locked(vnode);
1419 }
1420 
1421 
1422 static void
1423 free_unused_vnodes()
1424 {
1425 	free_unused_vnodes(
1426 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1427 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1428 }
1429 
1430 
1431 static void
1432 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1433 {
1434 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1435 
1436 	free_unused_vnodes(level);
1437 }
1438 
1439 
1440 static inline void
1441 put_advisory_locking(struct advisory_locking* locking)
1442 {
1443 	release_sem(locking->lock);
1444 }
1445 
1446 
1447 /*!	Returns the advisory_locking object of the \a vnode in case it
1448 	has one, and locks it.
1449 	You have to call put_advisory_locking() when you're done with
1450 	it.
1451 	Note, you must not have the vnode mutex locked when calling
1452 	this function.
1453 */
1454 static struct advisory_locking*
1455 get_advisory_locking(struct vnode* vnode)
1456 {
1457 	rw_lock_read_lock(&sVnodeLock);
1458 	vnode->Lock();
1459 
1460 	struct advisory_locking* locking = vnode->advisory_locking;
1461 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1462 
1463 	vnode->Unlock();
1464 	rw_lock_read_unlock(&sVnodeLock);
1465 
1466 	if (lock >= 0)
1467 		lock = acquire_sem(lock);
1468 	if (lock < 0) {
1469 		// This means the locking has been deleted in the mean time
1470 		// or had never existed in the first place - otherwise, we
1471 		// would get the lock at some point.
1472 		return NULL;
1473 	}
1474 
1475 	return locking;
1476 }
1477 
1478 
1479 /*!	Creates a locked advisory_locking object, and attaches it to the
1480 	given \a vnode.
1481 	Returns B_OK in case of success - also if the vnode got such an
1482 	object from someone else in the mean time, you'll still get this
1483 	one locked then.
1484 */
1485 static status_t
1486 create_advisory_locking(struct vnode* vnode)
1487 {
1488 	if (vnode == NULL)
1489 		return B_FILE_ERROR;
1490 
1491 	ObjectDeleter<advisory_locking> lockingDeleter;
1492 	struct advisory_locking* locking = NULL;
1493 
1494 	while (get_advisory_locking(vnode) == NULL) {
1495 		// no locking object set on the vnode yet, create one
1496 		if (locking == NULL) {
1497 			locking = new(std::nothrow) advisory_locking;
1498 			if (locking == NULL)
1499 				return B_NO_MEMORY;
1500 			lockingDeleter.SetTo(locking);
1501 
1502 			locking->wait_sem = create_sem(0, "advisory lock");
1503 			if (locking->wait_sem < 0)
1504 				return locking->wait_sem;
1505 
1506 			locking->lock = create_sem(0, "advisory locking");
1507 			if (locking->lock < 0)
1508 				return locking->lock;
1509 		}
1510 
1511 		// set our newly created locking object
1512 		ReadLocker _(sVnodeLock);
1513 		AutoLocker<Vnode> nodeLocker(vnode);
1514 		if (vnode->advisory_locking == NULL) {
1515 			vnode->advisory_locking = locking;
1516 			lockingDeleter.Detach();
1517 			return B_OK;
1518 		}
1519 	}
1520 
1521 	// The vnode already had a locking object. That's just as well.
1522 
1523 	return B_OK;
1524 }
1525 
1526 
1527 /*!	Retrieves the first lock that has been set by the current team.
1528 */
1529 static status_t
1530 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1531 {
1532 	struct advisory_locking* locking = get_advisory_locking(vnode);
1533 	if (locking == NULL)
1534 		return B_BAD_VALUE;
1535 
1536 	// TODO: this should probably get the flock by its file descriptor!
1537 	team_id team = team_get_current_team_id();
1538 	status_t status = B_BAD_VALUE;
1539 
1540 	LockList::Iterator iterator = locking->locks.GetIterator();
1541 	while (iterator.HasNext()) {
1542 		struct advisory_lock* lock = iterator.Next();
1543 
1544 		if (lock->team == team) {
1545 			flock->l_start = lock->start;
1546 			flock->l_len = lock->end - lock->start + 1;
1547 			status = B_OK;
1548 			break;
1549 		}
1550 	}
1551 
1552 	put_advisory_locking(locking);
1553 	return status;
1554 }
1555 
1556 
1557 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1558 	with the advisory_lock \a lock.
1559 */
1560 static bool
1561 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1562 {
1563 	if (flock == NULL)
1564 		return true;
1565 
1566 	return lock->start <= flock->l_start - 1 + flock->l_len
1567 		&& lock->end >= flock->l_start;
1568 }
1569 
1570 
1571 /*!	Removes the specified lock, or all locks of the calling team
1572 	if \a flock is NULL.
1573 */
1574 static status_t
1575 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1576 {
1577 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1578 
1579 	struct advisory_locking* locking = get_advisory_locking(vnode);
1580 	if (locking == NULL)
1581 		return B_OK;
1582 
1583 	// TODO: use the thread ID instead??
1584 	team_id team = team_get_current_team_id();
1585 	pid_t session = thread_get_current_thread()->team->session_id;
1586 
1587 	// find matching lock entries
1588 
1589 	LockList::Iterator iterator = locking->locks.GetIterator();
1590 	while (iterator.HasNext()) {
1591 		struct advisory_lock* lock = iterator.Next();
1592 		bool removeLock = false;
1593 
1594 		if (lock->session == session)
1595 			removeLock = true;
1596 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1597 			bool endsBeyond = false;
1598 			bool startsBefore = false;
1599 			if (flock != NULL) {
1600 				startsBefore = lock->start < flock->l_start;
1601 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1602 			}
1603 
1604 			if (!startsBefore && !endsBeyond) {
1605 				// lock is completely contained in flock
1606 				removeLock = true;
1607 			} else if (startsBefore && !endsBeyond) {
1608 				// cut the end of the lock
1609 				lock->end = flock->l_start - 1;
1610 			} else if (!startsBefore && endsBeyond) {
1611 				// cut the start of the lock
1612 				lock->start = flock->l_start + flock->l_len;
1613 			} else {
1614 				// divide the lock into two locks
1615 				struct advisory_lock* secondLock = new advisory_lock;
1616 				if (secondLock == NULL) {
1617 					// TODO: we should probably revert the locks we already
1618 					// changed... (ie. allocate upfront)
1619 					put_advisory_locking(locking);
1620 					return B_NO_MEMORY;
1621 				}
1622 
1623 				lock->end = flock->l_start - 1;
1624 
1625 				secondLock->team = lock->team;
1626 				secondLock->session = lock->session;
1627 				// values must already be normalized when getting here
1628 				secondLock->start = flock->l_start + flock->l_len;
1629 				secondLock->end = lock->end;
1630 				secondLock->shared = lock->shared;
1631 
1632 				locking->locks.Add(secondLock);
1633 			}
1634 		}
1635 
1636 		if (removeLock) {
1637 			// this lock is no longer used
1638 			iterator.Remove();
1639 			free(lock);
1640 		}
1641 	}
1642 
1643 	bool removeLocking = locking->locks.IsEmpty();
1644 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1645 
1646 	put_advisory_locking(locking);
1647 
1648 	if (removeLocking) {
1649 		// We can remove the whole advisory locking structure; it's no
1650 		// longer used
1651 		locking = get_advisory_locking(vnode);
1652 		if (locking != NULL) {
1653 			ReadLocker locker(sVnodeLock);
1654 			AutoLocker<Vnode> nodeLocker(vnode);
1655 
1656 			// the locking could have been changed in the mean time
1657 			if (locking->locks.IsEmpty()) {
1658 				vnode->advisory_locking = NULL;
1659 				nodeLocker.Unlock();
1660 				locker.Unlock();
1661 
1662 				// we've detached the locking from the vnode, so we can
1663 				// safely delete it
1664 				delete locking;
1665 			} else {
1666 				// the locking is in use again
1667 				nodeLocker.Unlock();
1668 				locker.Unlock();
1669 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1670 			}
1671 		}
1672 	}
1673 
1674 	return B_OK;
1675 }
1676 
1677 
1678 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1679 	will wait for the lock to become available, if there are any collisions
1680 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1681 
1682 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1683 	BSD flock() semantics are used, that is, all children can unlock the file
1684 	in question (we even allow parents to remove the lock, though, but that
1685 	seems to be in line to what the BSD's are doing).
1686 */
1687 static status_t
1688 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1689 	bool wait)
1690 {
1691 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1692 		vnode, flock, wait ? "yes" : "no"));
1693 
1694 	bool shared = flock->l_type == F_RDLCK;
1695 	status_t status = B_OK;
1696 
1697 	// TODO: do deadlock detection!
1698 
1699 	struct advisory_locking* locking;
1700 
1701 	while (true) {
1702 		// if this vnode has an advisory_locking structure attached,
1703 		// lock that one and search for any colliding file lock
1704 		status = create_advisory_locking(vnode);
1705 		if (status != B_OK)
1706 			return status;
1707 
1708 		locking = vnode->advisory_locking;
1709 		team_id team = team_get_current_team_id();
1710 		sem_id waitForLock = -1;
1711 
1712 		// test for collisions
1713 		LockList::Iterator iterator = locking->locks.GetIterator();
1714 		while (iterator.HasNext()) {
1715 			struct advisory_lock* lock = iterator.Next();
1716 
1717 			// TODO: locks from the same team might be joinable!
1718 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1719 				// locks do overlap
1720 				if (!shared || !lock->shared) {
1721 					// we need to wait
1722 					waitForLock = locking->wait_sem;
1723 					break;
1724 				}
1725 			}
1726 		}
1727 
1728 		if (waitForLock < 0)
1729 			break;
1730 
1731 		// We need to wait. Do that or fail now, if we've been asked not to.
1732 
1733 		if (!wait) {
1734 			put_advisory_locking(locking);
1735 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1736 		}
1737 
1738 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1739 			B_CAN_INTERRUPT, 0);
1740 		if (status != B_OK && status != B_BAD_SEM_ID)
1741 			return status;
1742 
1743 		// We have been notified, but we need to re-lock the locking object. So
1744 		// go another round...
1745 	}
1746 
1747 	// install new lock
1748 
1749 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1750 		sizeof(struct advisory_lock));
1751 	if (lock == NULL) {
1752 		put_advisory_locking(locking);
1753 		return B_NO_MEMORY;
1754 	}
1755 
1756 	lock->team = team_get_current_team_id();
1757 	lock->session = session;
1758 	// values must already be normalized when getting here
1759 	lock->start = flock->l_start;
1760 	lock->end = flock->l_start - 1 + flock->l_len;
1761 	lock->shared = shared;
1762 
1763 	locking->locks.Add(lock);
1764 	put_advisory_locking(locking);
1765 
1766 	return status;
1767 }
1768 
1769 
1770 /*!	Normalizes the \a flock structure to make it easier to compare the
1771 	structure with others. The l_start and l_len fields are set to absolute
1772 	values according to the l_whence field.
1773 */
1774 static status_t
1775 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1776 {
1777 	switch (flock->l_whence) {
1778 		case SEEK_SET:
1779 			break;
1780 		case SEEK_CUR:
1781 			flock->l_start += descriptor->pos;
1782 			break;
1783 		case SEEK_END:
1784 		{
1785 			struct vnode* vnode = descriptor->u.vnode;
1786 			struct stat stat;
1787 			status_t status;
1788 
1789 			if (!HAS_FS_CALL(vnode, read_stat))
1790 				return B_UNSUPPORTED;
1791 
1792 			status = FS_CALL(vnode, read_stat, &stat);
1793 			if (status != B_OK)
1794 				return status;
1795 
1796 			flock->l_start += stat.st_size;
1797 			break;
1798 		}
1799 		default:
1800 			return B_BAD_VALUE;
1801 	}
1802 
1803 	if (flock->l_start < 0)
1804 		flock->l_start = 0;
1805 	if (flock->l_len == 0)
1806 		flock->l_len = OFF_MAX;
1807 
1808 	// don't let the offset and length overflow
1809 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1810 		flock->l_len = OFF_MAX - flock->l_start;
1811 
1812 	if (flock->l_len < 0) {
1813 		// a negative length reverses the region
1814 		flock->l_start += flock->l_len;
1815 		flock->l_len = -flock->l_len;
1816 	}
1817 
1818 	return B_OK;
1819 }
1820 
1821 
1822 static void
1823 replace_vnode_if_disconnected(struct fs_mount* mount,
1824 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1825 	struct vnode* fallBack, bool lockRootLock)
1826 {
1827 	struct vnode* givenVnode = vnode;
1828 	bool vnodeReplaced = false;
1829 
1830 	ReadLocker vnodeReadLocker(sVnodeLock);
1831 
1832 	if (lockRootLock)
1833 		mutex_lock(&sIOContextRootLock);
1834 
1835 	while (vnode != NULL && vnode->mount == mount
1836 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1837 		if (vnode->covers != NULL) {
1838 			// redirect the vnode to the covered vnode
1839 			vnode = vnode->covers;
1840 		} else
1841 			vnode = fallBack;
1842 
1843 		vnodeReplaced = true;
1844 	}
1845 
1846 	// If we've replaced the node, grab a reference for the new one.
1847 	if (vnodeReplaced && vnode != NULL)
1848 		inc_vnode_ref_count(vnode);
1849 
1850 	if (lockRootLock)
1851 		mutex_unlock(&sIOContextRootLock);
1852 
1853 	vnodeReadLocker.Unlock();
1854 
1855 	if (vnodeReplaced)
1856 		put_vnode(givenVnode);
1857 }
1858 
1859 
1860 /*!	Disconnects all file descriptors that are associated with the
1861 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1862 	\a mount object.
1863 
1864 	Note, after you've called this function, there might still be ongoing
1865 	accesses - they won't be interrupted if they already happened before.
1866 	However, any subsequent access will fail.
1867 
1868 	This is not a cheap function and should be used with care and rarely.
1869 	TODO: there is currently no means to stop a blocking read/write!
1870 */
1871 static void
1872 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1873 	struct vnode* vnodeToDisconnect)
1874 {
1875 	// iterate over all teams and peek into their file descriptors
1876 	TeamListIterator teamIterator;
1877 	while (Team* team = teamIterator.Next()) {
1878 		BReference<Team> teamReference(team, true);
1879 
1880 		// lock the I/O context
1881 		io_context* context = team->io_context;
1882 		MutexLocker contextLocker(context->io_mutex);
1883 
1884 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1885 			sRoot, true);
1886 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1887 			sRoot, false);
1888 
1889 		for (uint32 i = 0; i < context->table_size; i++) {
1890 			if (struct file_descriptor* descriptor = context->fds[i]) {
1891 				inc_fd_ref_count(descriptor);
1892 
1893 				// if this descriptor points at this mount, we
1894 				// need to disconnect it to be able to unmount
1895 				struct vnode* vnode = fd_vnode(descriptor);
1896 				if (vnodeToDisconnect != NULL) {
1897 					if (vnode == vnodeToDisconnect)
1898 						disconnect_fd(descriptor);
1899 				} else if ((vnode != NULL && vnode->mount == mount)
1900 					|| (vnode == NULL && descriptor->u.mount == mount))
1901 					disconnect_fd(descriptor);
1902 
1903 				put_fd(descriptor);
1904 			}
1905 		}
1906 	}
1907 }
1908 
1909 
1910 /*!	\brief Gets the root node of the current IO context.
1911 	If \a kernel is \c true, the kernel IO context will be used.
1912 	The caller obtains a reference to the returned node.
1913 */
1914 struct vnode*
1915 get_root_vnode(bool kernel)
1916 {
1917 	if (!kernel) {
1918 		// Get current working directory from io context
1919 		struct io_context* context = get_current_io_context(kernel);
1920 
1921 		mutex_lock(&sIOContextRootLock);
1922 
1923 		struct vnode* root = context->root;
1924 		if (root != NULL)
1925 			inc_vnode_ref_count(root);
1926 
1927 		mutex_unlock(&sIOContextRootLock);
1928 
1929 		if (root != NULL)
1930 			return root;
1931 
1932 		// That should never happen.
1933 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1934 			"root\n", team_get_current_team_id());
1935 	}
1936 
1937 	inc_vnode_ref_count(sRoot);
1938 	return sRoot;
1939 }
1940 
1941 
1942 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1943 
1944 	Given an arbitrary vnode (identified by mount and node ID), the function
1945 	checks, whether the vnode is covered by another vnode. If it is, the
1946 	function returns the mount and node ID of the covering vnode. Otherwise
1947 	it simply returns the supplied mount and node ID.
1948 
1949 	In case of error (e.g. the supplied node could not be found) the variables
1950 	for storing the resolved mount and node ID remain untouched and an error
1951 	code is returned.
1952 
1953 	\param mountID The mount ID of the vnode in question.
1954 	\param nodeID The node ID of the vnode in question.
1955 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1956 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1957 	\return
1958 	- \c B_OK, if everything went fine,
1959 	- another error code, if something went wrong.
1960 */
1961 status_t
1962 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1963 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1964 {
1965 	// get the node
1966 	struct vnode* node;
1967 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1968 	if (error != B_OK)
1969 		return error;
1970 
1971 	// resolve the node
1972 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1973 		put_vnode(node);
1974 		node = coveringNode;
1975 	}
1976 
1977 	// set the return values
1978 	*resolvedMountID = node->device;
1979 	*resolvedNodeID = node->id;
1980 
1981 	put_vnode(node);
1982 
1983 	return B_OK;
1984 }
1985 
1986 
1987 /*!	\brief Gets the directory path and leaf name for a given path.
1988 
1989 	The supplied \a path is transformed to refer to the directory part of
1990 	the entry identified by the original path, and into the buffer \a filename
1991 	the leaf name of the original entry is written.
1992 	Neither the returned path nor the leaf name can be expected to be
1993 	canonical.
1994 
1995 	\param path The path to be analyzed. Must be able to store at least one
1996 		   additional character.
1997 	\param filename The buffer into which the leaf name will be written.
1998 		   Must be of size B_FILE_NAME_LENGTH at least.
1999 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2000 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2001 		   if the given path name is empty.
2002 */
2003 static status_t
2004 get_dir_path_and_leaf(char* path, char* filename)
2005 {
2006 	if (*path == '\0')
2007 		return B_ENTRY_NOT_FOUND;
2008 
2009 	char* last = strrchr(path, '/');
2010 		// '/' are not allowed in file names!
2011 
2012 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2013 
2014 	if (last == NULL) {
2015 		// this path is single segment with no '/' in it
2016 		// ex. "foo"
2017 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2018 			return B_NAME_TOO_LONG;
2019 
2020 		strcpy(path, ".");
2021 	} else {
2022 		last++;
2023 		if (last[0] == '\0') {
2024 			// special case: the path ends in one or more '/' - remove them
2025 			while (*--last == '/' && last != path);
2026 			last[1] = '\0';
2027 
2028 			if (last == path && last[0] == '/') {
2029 				// This path points to the root of the file system
2030 				strcpy(filename, ".");
2031 				return B_OK;
2032 			}
2033 			for (; last != path && *(last - 1) != '/'; last--);
2034 				// rewind to the start of the leaf before the '/'
2035 		}
2036 
2037 		// normal leaf: replace the leaf portion of the path with a '.'
2038 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2039 			return B_NAME_TOO_LONG;
2040 
2041 		last[0] = '.';
2042 		last[1] = '\0';
2043 	}
2044 	return B_OK;
2045 }
2046 
2047 
2048 static status_t
2049 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2050 	bool traverse, bool kernel, struct vnode** _vnode)
2051 {
2052 	char clonedName[B_FILE_NAME_LENGTH + 1];
2053 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2054 		return B_NAME_TOO_LONG;
2055 
2056 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2057 	struct vnode* directory;
2058 
2059 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2060 	if (status < 0)
2061 		return status;
2062 
2063 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2064 		_vnode, NULL);
2065 }
2066 
2067 
2068 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2069 	and returns the respective vnode.
2070 	On success a reference to the vnode is acquired for the caller.
2071 */
2072 static status_t
2073 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2074 {
2075 	ino_t id;
2076 
2077 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2078 		return get_vnode(dir->device, id, _vnode, true, false);
2079 
2080 	status_t status = FS_CALL(dir, lookup, name, &id);
2081 	if (status != B_OK)
2082 		return status;
2083 
2084 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2085 	// have a reference and just need to look the node up.
2086 	rw_lock_read_lock(&sVnodeLock);
2087 	*_vnode = lookup_vnode(dir->device, id);
2088 	rw_lock_read_unlock(&sVnodeLock);
2089 
2090 	if (*_vnode == NULL) {
2091 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2092 			"0x%Lx)\n", dir->device, id);
2093 		return B_ENTRY_NOT_FOUND;
2094 	}
2095 
2096 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2097 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2098 //		(*_vnode)->mount->id, (*_vnode)->id);
2099 
2100 	return B_OK;
2101 }
2102 
2103 
2104 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2105 	\a path must not be NULL.
2106 	If it returns successfully, \a path contains the name of the last path
2107 	component. This function clobbers the buffer pointed to by \a path only
2108 	if it does contain more than one component.
2109 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2110 	it is successful or not!
2111 */
2112 static status_t
2113 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2114 	int count, struct io_context* ioContext, struct vnode** _vnode,
2115 	ino_t* _parentID)
2116 {
2117 	status_t status = B_OK;
2118 	ino_t lastParentID = vnode->id;
2119 
2120 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2121 
2122 	if (path == NULL) {
2123 		put_vnode(vnode);
2124 		return B_BAD_VALUE;
2125 	}
2126 
2127 	if (*path == '\0') {
2128 		put_vnode(vnode);
2129 		return B_ENTRY_NOT_FOUND;
2130 	}
2131 
2132 	while (true) {
2133 		struct vnode* nextVnode;
2134 		char* nextPath;
2135 
2136 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2137 			path));
2138 
2139 		// done?
2140 		if (path[0] == '\0')
2141 			break;
2142 
2143 		// walk to find the next path component ("path" will point to a single
2144 		// path component), and filter out multiple slashes
2145 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2146 				nextPath++);
2147 
2148 		if (*nextPath == '/') {
2149 			*nextPath = '\0';
2150 			do
2151 				nextPath++;
2152 			while (*nextPath == '/');
2153 		}
2154 
2155 		// See if the '..' is at a covering vnode move to the covered
2156 		// vnode so we pass the '..' path to the underlying filesystem.
2157 		// Also prevent breaking the root of the IO context.
2158 		if (strcmp("..", path) == 0) {
2159 			if (vnode == ioContext->root) {
2160 				// Attempted prison break! Keep it contained.
2161 				path = nextPath;
2162 				continue;
2163 			}
2164 
2165 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2166 				nextVnode = coveredVnode;
2167 				put_vnode(vnode);
2168 				vnode = nextVnode;
2169 			}
2170 		}
2171 
2172 		// check if vnode is really a directory
2173 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2174 			status = B_NOT_A_DIRECTORY;
2175 
2176 		// Check if we have the right to search the current directory vnode.
2177 		// If a file system doesn't have the access() function, we assume that
2178 		// searching a directory is always allowed
2179 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2180 			status = FS_CALL(vnode, access, X_OK);
2181 
2182 		// Tell the filesystem to get the vnode of this path component (if we
2183 		// got the permission from the call above)
2184 		if (status == B_OK)
2185 			status = lookup_dir_entry(vnode, path, &nextVnode);
2186 
2187 		if (status != B_OK) {
2188 			put_vnode(vnode);
2189 			return status;
2190 		}
2191 
2192 		// If the new node is a symbolic link, resolve it (if we've been told
2193 		// to do it)
2194 		if (S_ISLNK(nextVnode->Type())
2195 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2196 			size_t bufferSize;
2197 			char* buffer;
2198 
2199 			TRACE(("traverse link\n"));
2200 
2201 			// it's not exactly nice style using goto in this way, but hey,
2202 			// it works :-/
2203 			if (count + 1 > B_MAX_SYMLINKS) {
2204 				status = B_LINK_LIMIT;
2205 				goto resolve_link_error;
2206 			}
2207 
2208 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2209 			if (buffer == NULL) {
2210 				status = B_NO_MEMORY;
2211 				goto resolve_link_error;
2212 			}
2213 
2214 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2215 				bufferSize--;
2216 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2217 				// null-terminate
2218 				if (status >= 0)
2219 					buffer[bufferSize] = '\0';
2220 			} else
2221 				status = B_BAD_VALUE;
2222 
2223 			if (status != B_OK) {
2224 				free(buffer);
2225 
2226 		resolve_link_error:
2227 				put_vnode(vnode);
2228 				put_vnode(nextVnode);
2229 
2230 				return status;
2231 			}
2232 			put_vnode(nextVnode);
2233 
2234 			// Check if we start from the root directory or the current
2235 			// directory ("vnode" still points to that one).
2236 			// Cut off all leading slashes if it's the root directory
2237 			path = buffer;
2238 			bool absoluteSymlink = false;
2239 			if (path[0] == '/') {
2240 				// we don't need the old directory anymore
2241 				put_vnode(vnode);
2242 
2243 				while (*++path == '/')
2244 					;
2245 
2246 				mutex_lock(&sIOContextRootLock);
2247 				vnode = ioContext->root;
2248 				inc_vnode_ref_count(vnode);
2249 				mutex_unlock(&sIOContextRootLock);
2250 
2251 				absoluteSymlink = true;
2252 			}
2253 
2254 			inc_vnode_ref_count(vnode);
2255 				// balance the next recursion - we will decrement the
2256 				// ref_count of the vnode, no matter if we succeeded or not
2257 
2258 			if (absoluteSymlink && *path == '\0') {
2259 				// symlink was just "/"
2260 				nextVnode = vnode;
2261 			} else {
2262 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2263 					ioContext, &nextVnode, &lastParentID);
2264 			}
2265 
2266 			free(buffer);
2267 
2268 			if (status != B_OK) {
2269 				put_vnode(vnode);
2270 				return status;
2271 			}
2272 		} else
2273 			lastParentID = vnode->id;
2274 
2275 		// decrease the ref count on the old dir we just looked up into
2276 		put_vnode(vnode);
2277 
2278 		path = nextPath;
2279 		vnode = nextVnode;
2280 
2281 		// see if we hit a covered node
2282 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2283 			put_vnode(vnode);
2284 			vnode = coveringNode;
2285 		}
2286 	}
2287 
2288 	*_vnode = vnode;
2289 	if (_parentID)
2290 		*_parentID = lastParentID;
2291 
2292 	return B_OK;
2293 }
2294 
2295 
2296 static status_t
2297 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2298 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2299 {
2300 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2301 		get_current_io_context(kernel), _vnode, _parentID);
2302 }
2303 
2304 
2305 static status_t
2306 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2307 	ino_t* _parentID, bool kernel)
2308 {
2309 	struct vnode* start = NULL;
2310 
2311 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2312 
2313 	if (!path)
2314 		return B_BAD_VALUE;
2315 
2316 	if (*path == '\0')
2317 		return B_ENTRY_NOT_FOUND;
2318 
2319 	// figure out if we need to start at root or at cwd
2320 	if (*path == '/') {
2321 		if (sRoot == NULL) {
2322 			// we're a bit early, aren't we?
2323 			return B_ERROR;
2324 		}
2325 
2326 		while (*++path == '/')
2327 			;
2328 		start = get_root_vnode(kernel);
2329 
2330 		if (*path == '\0') {
2331 			*_vnode = start;
2332 			return B_OK;
2333 		}
2334 
2335 	} else {
2336 		struct io_context* context = get_current_io_context(kernel);
2337 
2338 		mutex_lock(&context->io_mutex);
2339 		start = context->cwd;
2340 		if (start != NULL)
2341 			inc_vnode_ref_count(start);
2342 		mutex_unlock(&context->io_mutex);
2343 
2344 		if (start == NULL)
2345 			return B_ERROR;
2346 	}
2347 
2348 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2349 		_parentID);
2350 }
2351 
2352 
2353 /*! Returns the vnode in the next to last segment of the path, and returns
2354 	the last portion in filename.
2355 	The path buffer must be able to store at least one additional character.
2356 */
2357 static status_t
2358 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2359 	bool kernel)
2360 {
2361 	status_t status = get_dir_path_and_leaf(path, filename);
2362 	if (status != B_OK)
2363 		return status;
2364 
2365 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2366 }
2367 
2368 
2369 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2370 		   to by a FD + path pair.
2371 
2372 	\a path must be given in either case. \a fd might be omitted, in which
2373 	case \a path is either an absolute path or one relative to the current
2374 	directory. If both a supplied and \a path is relative it is reckoned off
2375 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2376 	ignored.
2377 
2378 	The caller has the responsibility to call put_vnode() on the returned
2379 	directory vnode.
2380 
2381 	\param fd The FD. May be < 0.
2382 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2383 	       is modified by this function. It must have at least room for a
2384 	       string one character longer than the path it contains.
2385 	\param _vnode A pointer to a variable the directory vnode shall be written
2386 		   into.
2387 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2388 		   the leaf name of the specified entry will be written.
2389 	\param kernel \c true, if invoked from inside the kernel, \c false if
2390 		   invoked from userland.
2391 	\return \c B_OK, if everything went fine, another error code otherwise.
2392 */
2393 static status_t
2394 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2395 	char* filename, bool kernel)
2396 {
2397 	if (!path)
2398 		return B_BAD_VALUE;
2399 	if (*path == '\0')
2400 		return B_ENTRY_NOT_FOUND;
2401 	if (fd < 0)
2402 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2403 
2404 	status_t status = get_dir_path_and_leaf(path, filename);
2405 	if (status != B_OK)
2406 		return status;
2407 
2408 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2409 }
2410 
2411 
2412 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2413 		   to by a vnode + path pair.
2414 
2415 	\a path must be given in either case. \a vnode might be omitted, in which
2416 	case \a path is either an absolute path or one relative to the current
2417 	directory. If both a supplied and \a path is relative it is reckoned off
2418 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2419 	ignored.
2420 
2421 	The caller has the responsibility to call put_vnode() on the returned
2422 	directory vnode.
2423 
2424 	\param vnode The vnode. May be \c NULL.
2425 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2426 	       is modified by this function. It must have at least room for a
2427 	       string one character longer than the path it contains.
2428 	\param _vnode A pointer to a variable the directory vnode shall be written
2429 		   into.
2430 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2431 		   the leaf name of the specified entry will be written.
2432 	\param kernel \c true, if invoked from inside the kernel, \c false if
2433 		   invoked from userland.
2434 	\return \c B_OK, if everything went fine, another error code otherwise.
2435 */
2436 static status_t
2437 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2438 	struct vnode** _vnode, char* filename, bool kernel)
2439 {
2440 	if (!path)
2441 		return B_BAD_VALUE;
2442 	if (*path == '\0')
2443 		return B_ENTRY_NOT_FOUND;
2444 	if (vnode == NULL || path[0] == '/')
2445 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2446 
2447 	status_t status = get_dir_path_and_leaf(path, filename);
2448 	if (status != B_OK)
2449 		return status;
2450 
2451 	inc_vnode_ref_count(vnode);
2452 		// vnode_path_to_vnode() always decrements the ref count
2453 
2454 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2455 }
2456 
2457 
2458 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2459 */
2460 static status_t
2461 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2462 	size_t bufferSize, struct io_context* ioContext)
2463 {
2464 	if (bufferSize < sizeof(struct dirent))
2465 		return B_BAD_VALUE;
2466 
2467 	// See if the vnode is convering another vnode and move to the covered
2468 	// vnode so we get the underlying file system
2469 	VNodePutter vnodePutter;
2470 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2471 		vnode = coveredVnode;
2472 		vnodePutter.SetTo(vnode);
2473 	}
2474 
2475 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2476 		// The FS supports getting the name of a vnode.
2477 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2478 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2479 			return B_OK;
2480 	}
2481 
2482 	// The FS doesn't support getting the name of a vnode. So we search the
2483 	// parent directory for the vnode, if the caller let us.
2484 
2485 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2486 		return B_UNSUPPORTED;
2487 
2488 	void* cookie;
2489 
2490 	status_t status = FS_CALL(parent, open_dir, &cookie);
2491 	if (status >= B_OK) {
2492 		while (true) {
2493 			uint32 num = 1;
2494 			// We use the FS hook directly instead of dir_read(), since we don't
2495 			// want the entries to be fixed. We have already resolved vnode to
2496 			// the covered node.
2497 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2498 				&num);
2499 			if (status != B_OK)
2500 				break;
2501 			if (num == 0) {
2502 				status = B_ENTRY_NOT_FOUND;
2503 				break;
2504 			}
2505 
2506 			if (vnode->id == buffer->d_ino) {
2507 				// found correct entry!
2508 				break;
2509 			}
2510 		}
2511 
2512 		FS_CALL(vnode, close_dir, cookie);
2513 		FS_CALL(vnode, free_dir_cookie, cookie);
2514 	}
2515 	return status;
2516 }
2517 
2518 
2519 static status_t
2520 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2521 	size_t nameSize, bool kernel)
2522 {
2523 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2524 	struct dirent* dirent = (struct dirent*)buffer;
2525 
2526 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2527 		get_current_io_context(kernel));
2528 	if (status != B_OK)
2529 		return status;
2530 
2531 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2532 		return B_BUFFER_OVERFLOW;
2533 
2534 	return B_OK;
2535 }
2536 
2537 
2538 /*!	Gets the full path to a given directory vnode.
2539 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2540 	file system doesn't support this call, it will fall back to iterating
2541 	through the parent directory to get the name of the child.
2542 
2543 	To protect against circular loops, it supports a maximum tree depth
2544 	of 256 levels.
2545 
2546 	Note that the path may not be correct the time this function returns!
2547 	It doesn't use any locking to prevent returning the correct path, as
2548 	paths aren't safe anyway: the path to a file can change at any time.
2549 
2550 	It might be a good idea, though, to check if the returned path exists
2551 	in the calling function (it's not done here because of efficiency)
2552 */
2553 static status_t
2554 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2555 	bool kernel)
2556 {
2557 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2558 
2559 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2560 		return B_BAD_VALUE;
2561 
2562 	if (!S_ISDIR(vnode->Type()))
2563 		return B_NOT_A_DIRECTORY;
2564 
2565 	char* path = buffer;
2566 	int32 insert = bufferSize;
2567 	int32 maxLevel = 256;
2568 	int32 length;
2569 	status_t status;
2570 	struct io_context* ioContext = get_current_io_context(kernel);
2571 
2572 	// we don't use get_vnode() here because this call is more
2573 	// efficient and does all we need from get_vnode()
2574 	inc_vnode_ref_count(vnode);
2575 
2576 	if (vnode != ioContext->root) {
2577 		// we don't hit the IO context root
2578 		// resolve a vnode to its covered vnode
2579 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2580 			put_vnode(vnode);
2581 			vnode = coveredVnode;
2582 		}
2583 	}
2584 
2585 	path[--insert] = '\0';
2586 		// the path is filled right to left
2587 
2588 	while (true) {
2589 		// the name buffer is also used for fs_read_dir()
2590 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2591 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2592 		struct vnode* parentVnode;
2593 
2594 		// lookup the parent vnode
2595 		if (vnode == ioContext->root) {
2596 			// we hit the IO context root
2597 			parentVnode = vnode;
2598 			inc_vnode_ref_count(vnode);
2599 		} else {
2600 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2601 			if (status != B_OK)
2602 				goto out;
2603 		}
2604 
2605 		// get the node's name
2606 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2607 			sizeof(nameBuffer), ioContext);
2608 
2609 		if (vnode != ioContext->root) {
2610 			// we don't hit the IO context root
2611 			// resolve a vnode to its covered vnode
2612 			if (Vnode* coveredVnode = get_covered_vnode(parentVnode)) {
2613 				put_vnode(parentVnode);
2614 				parentVnode = coveredVnode;
2615 			}
2616 		}
2617 
2618 		bool hitRoot = (parentVnode == vnode);
2619 
2620 		// release the current vnode, we only need its parent from now on
2621 		put_vnode(vnode);
2622 		vnode = parentVnode;
2623 
2624 		if (status != B_OK)
2625 			goto out;
2626 
2627 		if (hitRoot) {
2628 			// we have reached "/", which means we have constructed the full
2629 			// path
2630 			break;
2631 		}
2632 
2633 		// TODO: add an explicit check for loops in about 10 levels to do
2634 		// real loop detection
2635 
2636 		// don't go deeper as 'maxLevel' to prevent circular loops
2637 		if (maxLevel-- < 0) {
2638 			status = B_LINK_LIMIT;
2639 			goto out;
2640 		}
2641 
2642 		// add the name in front of the current path
2643 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2644 		length = strlen(name);
2645 		insert -= length;
2646 		if (insert <= 0) {
2647 			status = B_RESULT_NOT_REPRESENTABLE;
2648 			goto out;
2649 		}
2650 		memcpy(path + insert, name, length);
2651 		path[--insert] = '/';
2652 	}
2653 
2654 	// the root dir will result in an empty path: fix it
2655 	if (path[insert] == '\0')
2656 		path[--insert] = '/';
2657 
2658 	TRACE(("  path is: %s\n", path + insert));
2659 
2660 	// move the path to the start of the buffer
2661 	length = bufferSize - insert;
2662 	memmove(buffer, path + insert, length);
2663 
2664 out:
2665 	put_vnode(vnode);
2666 	return status;
2667 }
2668 
2669 
2670 /*!	Checks the length of every path component, and adds a '.'
2671 	if the path ends in a slash.
2672 	The given path buffer must be able to store at least one
2673 	additional character.
2674 */
2675 static status_t
2676 check_path(char* to)
2677 {
2678 	int32 length = 0;
2679 
2680 	// check length of every path component
2681 
2682 	while (*to) {
2683 		char* begin;
2684 		if (*to == '/')
2685 			to++, length++;
2686 
2687 		begin = to;
2688 		while (*to != '/' && *to)
2689 			to++, length++;
2690 
2691 		if (to - begin > B_FILE_NAME_LENGTH)
2692 			return B_NAME_TOO_LONG;
2693 	}
2694 
2695 	if (length == 0)
2696 		return B_ENTRY_NOT_FOUND;
2697 
2698 	// complete path if there is a slash at the end
2699 
2700 	if (*(to - 1) == '/') {
2701 		if (length > B_PATH_NAME_LENGTH - 2)
2702 			return B_NAME_TOO_LONG;
2703 
2704 		to[0] = '.';
2705 		to[1] = '\0';
2706 	}
2707 
2708 	return B_OK;
2709 }
2710 
2711 
2712 static struct file_descriptor*
2713 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2714 {
2715 	struct file_descriptor* descriptor
2716 		= get_fd(get_current_io_context(kernel), fd);
2717 	if (descriptor == NULL)
2718 		return NULL;
2719 
2720 	struct vnode* vnode = fd_vnode(descriptor);
2721 	if (vnode == NULL) {
2722 		put_fd(descriptor);
2723 		return NULL;
2724 	}
2725 
2726 	// ToDo: when we can close a file descriptor at any point, investigate
2727 	//	if this is still valid to do (accessing the vnode without ref_count
2728 	//	or locking)
2729 	*_vnode = vnode;
2730 	return descriptor;
2731 }
2732 
2733 
2734 static struct vnode*
2735 get_vnode_from_fd(int fd, bool kernel)
2736 {
2737 	struct file_descriptor* descriptor;
2738 	struct vnode* vnode;
2739 
2740 	descriptor = get_fd(get_current_io_context(kernel), fd);
2741 	if (descriptor == NULL)
2742 		return NULL;
2743 
2744 	vnode = fd_vnode(descriptor);
2745 	if (vnode != NULL)
2746 		inc_vnode_ref_count(vnode);
2747 
2748 	put_fd(descriptor);
2749 	return vnode;
2750 }
2751 
2752 
2753 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2754 	only the path will be considered. In this case, the \a path must not be
2755 	NULL.
2756 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2757 	and should be NULL for files.
2758 */
2759 static status_t
2760 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2761 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2762 {
2763 	if (fd < 0 && !path)
2764 		return B_BAD_VALUE;
2765 
2766 	if (path != NULL && *path == '\0')
2767 		return B_ENTRY_NOT_FOUND;
2768 
2769 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2770 		// no FD or absolute path
2771 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2772 	}
2773 
2774 	// FD only, or FD + relative path
2775 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2776 	if (!vnode)
2777 		return B_FILE_ERROR;
2778 
2779 	if (path != NULL) {
2780 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2781 			_vnode, _parentID);
2782 	}
2783 
2784 	// there is no relative path to take into account
2785 
2786 	*_vnode = vnode;
2787 	if (_parentID)
2788 		*_parentID = -1;
2789 
2790 	return B_OK;
2791 }
2792 
2793 
2794 static int
2795 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2796 	void* cookie, int openMode, bool kernel)
2797 {
2798 	struct file_descriptor* descriptor;
2799 	int fd;
2800 
2801 	// If the vnode is locked, we don't allow creating a new file/directory
2802 	// file_descriptor for it
2803 	if (vnode && vnode->mandatory_locked_by != NULL
2804 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2805 		return B_BUSY;
2806 
2807 	descriptor = alloc_fd();
2808 	if (!descriptor)
2809 		return B_NO_MEMORY;
2810 
2811 	if (vnode)
2812 		descriptor->u.vnode = vnode;
2813 	else
2814 		descriptor->u.mount = mount;
2815 	descriptor->cookie = cookie;
2816 
2817 	switch (type) {
2818 		// vnode types
2819 		case FDTYPE_FILE:
2820 			descriptor->ops = &sFileOps;
2821 			break;
2822 		case FDTYPE_DIR:
2823 			descriptor->ops = &sDirectoryOps;
2824 			break;
2825 		case FDTYPE_ATTR:
2826 			descriptor->ops = &sAttributeOps;
2827 			break;
2828 		case FDTYPE_ATTR_DIR:
2829 			descriptor->ops = &sAttributeDirectoryOps;
2830 			break;
2831 
2832 		// mount types
2833 		case FDTYPE_INDEX_DIR:
2834 			descriptor->ops = &sIndexDirectoryOps;
2835 			break;
2836 		case FDTYPE_QUERY:
2837 			descriptor->ops = &sQueryOps;
2838 			break;
2839 
2840 		default:
2841 			panic("get_new_fd() called with unknown type %d\n", type);
2842 			break;
2843 	}
2844 	descriptor->type = type;
2845 	descriptor->open_mode = openMode;
2846 
2847 	io_context* context = get_current_io_context(kernel);
2848 	fd = new_fd(context, descriptor);
2849 	if (fd < 0) {
2850 		free(descriptor);
2851 		return B_NO_MORE_FDS;
2852 	}
2853 
2854 	mutex_lock(&context->io_mutex);
2855 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2856 	mutex_unlock(&context->io_mutex);
2857 
2858 	return fd;
2859 }
2860 
2861 
2862 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2863 	vfs_normalize_path(). See there for more documentation.
2864 */
2865 static status_t
2866 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2867 {
2868 	VNodePutter dirPutter;
2869 	struct vnode* dir = NULL;
2870 	status_t error;
2871 
2872 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2873 		// get dir vnode + leaf name
2874 		struct vnode* nextDir;
2875 		char leaf[B_FILE_NAME_LENGTH];
2876 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2877 		if (error != B_OK)
2878 			return error;
2879 
2880 		dir = nextDir;
2881 		strcpy(path, leaf);
2882 		dirPutter.SetTo(dir);
2883 
2884 		// get file vnode, if we shall resolve links
2885 		bool fileExists = false;
2886 		struct vnode* fileVnode;
2887 		VNodePutter fileVnodePutter;
2888 		if (traverseLink) {
2889 			inc_vnode_ref_count(dir);
2890 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2891 					NULL) == B_OK) {
2892 				fileVnodePutter.SetTo(fileVnode);
2893 				fileExists = true;
2894 			}
2895 		}
2896 
2897 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2898 			// we're done -- construct the path
2899 			bool hasLeaf = true;
2900 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2901 				// special cases "." and ".." -- get the dir, forget the leaf
2902 				inc_vnode_ref_count(dir);
2903 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2904 					&nextDir, NULL);
2905 				if (error != B_OK)
2906 					return error;
2907 				dir = nextDir;
2908 				dirPutter.SetTo(dir);
2909 				hasLeaf = false;
2910 			}
2911 
2912 			// get the directory path
2913 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2914 			if (error != B_OK)
2915 				return error;
2916 
2917 			// append the leaf name
2918 			if (hasLeaf) {
2919 				// insert a directory separator if this is not the file system
2920 				// root
2921 				if ((strcmp(path, "/") != 0
2922 					&& strlcat(path, "/", pathSize) >= pathSize)
2923 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2924 					return B_NAME_TOO_LONG;
2925 				}
2926 			}
2927 
2928 			return B_OK;
2929 		}
2930 
2931 		// read link
2932 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2933 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2934 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2935 			if (error != B_OK)
2936 				return error;
2937 			path[bufferSize] = '\0';
2938 		} else
2939 			return B_BAD_VALUE;
2940 	}
2941 
2942 	return B_LINK_LIMIT;
2943 }
2944 
2945 
2946 #ifdef ADD_DEBUGGER_COMMANDS
2947 
2948 
2949 static void
2950 _dump_advisory_locking(advisory_locking* locking)
2951 {
2952 	if (locking == NULL)
2953 		return;
2954 
2955 	kprintf("   lock:        %ld", locking->lock);
2956 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2957 
2958 	int32 index = 0;
2959 	LockList::Iterator iterator = locking->locks.GetIterator();
2960 	while (iterator.HasNext()) {
2961 		struct advisory_lock* lock = iterator.Next();
2962 
2963 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2964 		kprintf("        start:  %Ld\n", lock->start);
2965 		kprintf("        end:    %Ld\n", lock->end);
2966 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2967 	}
2968 }
2969 
2970 
2971 static void
2972 _dump_mount(struct fs_mount* mount)
2973 {
2974 	kprintf("MOUNT: %p\n", mount);
2975 	kprintf(" id:            %ld\n", mount->id);
2976 	kprintf(" device_name:   %s\n", mount->device_name);
2977 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2978 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2979 	kprintf(" partition:     %p\n", mount->partition);
2980 	kprintf(" lock:          %p\n", &mount->rlock);
2981 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2982 		mount->owns_file_device ? " owns_file_device" : "");
2983 
2984 	fs_volume* volume = mount->volume;
2985 	while (volume != NULL) {
2986 		kprintf(" volume %p:\n", volume);
2987 		kprintf("  layer:            %ld\n", volume->layer);
2988 		kprintf("  private_volume:   %p\n", volume->private_volume);
2989 		kprintf("  ops:              %p\n", volume->ops);
2990 		kprintf("  file_system:      %p\n", volume->file_system);
2991 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2992 		volume = volume->super_volume;
2993 	}
2994 
2995 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2996 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2997 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
2998 	set_debug_variable("_partition", (addr_t)mount->partition);
2999 }
3000 
3001 
3002 static bool
3003 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3004 	const char* name)
3005 {
3006 	bool insertSlash = buffer[bufferSize] != '\0';
3007 	size_t nameLength = strlen(name);
3008 
3009 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3010 		return false;
3011 
3012 	if (insertSlash)
3013 		buffer[--bufferSize] = '/';
3014 
3015 	bufferSize -= nameLength;
3016 	memcpy(buffer + bufferSize, name, nameLength);
3017 
3018 	return true;
3019 }
3020 
3021 
3022 static bool
3023 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3024 	ino_t nodeID)
3025 {
3026 	if (bufferSize == 0)
3027 		return false;
3028 
3029 	bool insertSlash = buffer[bufferSize] != '\0';
3030 	if (insertSlash)
3031 		buffer[--bufferSize] = '/';
3032 
3033 	size_t size = snprintf(buffer, bufferSize,
3034 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3035 	if (size > bufferSize) {
3036 		if (insertSlash)
3037 			bufferSize++;
3038 		return false;
3039 	}
3040 
3041 	if (size < bufferSize)
3042 		memmove(buffer + bufferSize - size, buffer, size);
3043 
3044 	bufferSize -= size;
3045 	return true;
3046 }
3047 
3048 
3049 static char*
3050 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3051 	bool& _truncated)
3052 {
3053 	// null-terminate the path
3054 	buffer[--bufferSize] = '\0';
3055 
3056 	while (true) {
3057 		while (vnode->covers != NULL)
3058 			vnode = vnode->covers;
3059 
3060 		if (vnode == sRoot) {
3061 			_truncated = bufferSize == 0;
3062 			if (!_truncated)
3063 				buffer[--bufferSize] = '/';
3064 			return buffer + bufferSize;
3065 		}
3066 
3067 		// resolve the name
3068 		ino_t dirID;
3069 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3070 			vnode->id, dirID);
3071 		if (name == NULL) {
3072 			// Failed to resolve the name -- prepend "<dev,node>/".
3073 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3074 				vnode->mount->id, vnode->id);
3075 			return buffer + bufferSize;
3076 		}
3077 
3078 		// prepend the name
3079 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3080 			_truncated = true;
3081 			return buffer + bufferSize;
3082 		}
3083 
3084 		// resolve the directory node
3085 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3086 		if (nextVnode == NULL) {
3087 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3088 				vnode->mount->id, dirID);
3089 			return buffer + bufferSize;
3090 		}
3091 
3092 		vnode = nextVnode;
3093 	}
3094 }
3095 
3096 
3097 static void
3098 _dump_vnode(struct vnode* vnode, bool printPath)
3099 {
3100 	kprintf("VNODE: %p\n", vnode);
3101 	kprintf(" device:        %ld\n", vnode->device);
3102 	kprintf(" id:            %Ld\n", vnode->id);
3103 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3104 	kprintf(" private_node:  %p\n", vnode->private_node);
3105 	kprintf(" mount:         %p\n", vnode->mount);
3106 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3107 	kprintf(" covers:        %p\n", vnode->covers);
3108 	kprintf(" cache:         %p\n", vnode->cache);
3109 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3110 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3111 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3112 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3113 
3114 	_dump_advisory_locking(vnode->advisory_locking);
3115 
3116 	if (printPath) {
3117 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3118 		if (buffer != NULL) {
3119 			bool truncated;
3120 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3121 				B_PATH_NAME_LENGTH, truncated);
3122 			if (path != NULL) {
3123 				kprintf(" path:          ");
3124 				if (truncated)
3125 					kputs("<truncated>/");
3126 				kputs(path);
3127 				kputs("\n");
3128 			} else
3129 				kprintf("Failed to resolve vnode path.\n");
3130 
3131 			debug_free(buffer);
3132 		} else
3133 			kprintf("Failed to allocate memory for constructing the path.\n");
3134 	}
3135 
3136 	set_debug_variable("_node", (addr_t)vnode->private_node);
3137 	set_debug_variable("_mount", (addr_t)vnode->mount);
3138 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3139 	set_debug_variable("_covers", (addr_t)vnode->covers);
3140 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3141 }
3142 
3143 
3144 static int
3145 dump_mount(int argc, char** argv)
3146 {
3147 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3148 		kprintf("usage: %s [id|address]\n", argv[0]);
3149 		return 0;
3150 	}
3151 
3152 	uint32 id = parse_expression(argv[1]);
3153 	struct fs_mount* mount = NULL;
3154 
3155 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3156 	if (mount == NULL) {
3157 		if (IS_USER_ADDRESS(id)) {
3158 			kprintf("fs_mount not found\n");
3159 			return 0;
3160 		}
3161 		mount = (fs_mount*)id;
3162 	}
3163 
3164 	_dump_mount(mount);
3165 	return 0;
3166 }
3167 
3168 
3169 static int
3170 dump_mounts(int argc, char** argv)
3171 {
3172 	if (argc != 1) {
3173 		kprintf("usage: %s\n", argv[0]);
3174 		return 0;
3175 	}
3176 
3177 	kprintf("address     id root       covers     cookie     fs_name\n");
3178 
3179 	struct hash_iterator iterator;
3180 	struct fs_mount* mount;
3181 
3182 	hash_open(sMountsTable, &iterator);
3183 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3184 			!= NULL) {
3185 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3186 			mount->root_vnode->covers, mount->volume->private_volume,
3187 			mount->volume->file_system_name);
3188 
3189 		fs_volume* volume = mount->volume;
3190 		while (volume->super_volume != NULL) {
3191 			volume = volume->super_volume;
3192 			kprintf("                                     %p %s\n",
3193 				volume->private_volume, volume->file_system_name);
3194 		}
3195 	}
3196 
3197 	hash_close(sMountsTable, &iterator, false);
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 dump_vnode(int argc, char** argv)
3204 {
3205 	bool printPath = false;
3206 	int argi = 1;
3207 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3208 		printPath = true;
3209 		argi++;
3210 	}
3211 
3212 	if (argi >= argc || argi + 2 < argc) {
3213 		print_debugger_command_usage(argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	struct vnode* vnode = NULL;
3218 
3219 	if (argi + 1 == argc) {
3220 		vnode = (struct vnode*)parse_expression(argv[argi]);
3221 		if (IS_USER_ADDRESS(vnode)) {
3222 			kprintf("invalid vnode address\n");
3223 			return 0;
3224 		}
3225 		_dump_vnode(vnode, printPath);
3226 		return 0;
3227 	}
3228 
3229 	struct hash_iterator iterator;
3230 	dev_t device = parse_expression(argv[argi]);
3231 	ino_t id = parse_expression(argv[argi + 1]);
3232 
3233 	hash_open(sVnodeTable, &iterator);
3234 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3235 		if (vnode->id != id || vnode->device != device)
3236 			continue;
3237 
3238 		_dump_vnode(vnode, printPath);
3239 	}
3240 
3241 	hash_close(sVnodeTable, &iterator, false);
3242 	return 0;
3243 }
3244 
3245 
3246 static int
3247 dump_vnodes(int argc, char** argv)
3248 {
3249 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3250 		kprintf("usage: %s [device]\n", argv[0]);
3251 		return 0;
3252 	}
3253 
3254 	// restrict dumped nodes to a certain device if requested
3255 	dev_t device = parse_expression(argv[1]);
3256 
3257 	struct hash_iterator iterator;
3258 	struct vnode* vnode;
3259 
3260 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3261 		"flags\n");
3262 
3263 	hash_open(sVnodeTable, &iterator);
3264 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3265 		if (vnode->device != device)
3266 			continue;
3267 
3268 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3269 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3270 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3271 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3272 	}
3273 
3274 	hash_close(sVnodeTable, &iterator, false);
3275 	return 0;
3276 }
3277 
3278 
3279 static int
3280 dump_vnode_caches(int argc, char** argv)
3281 {
3282 	struct hash_iterator iterator;
3283 	struct vnode* vnode;
3284 
3285 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3286 		kprintf("usage: %s [device]\n", argv[0]);
3287 		return 0;
3288 	}
3289 
3290 	// restrict dumped nodes to a certain device if requested
3291 	dev_t device = -1;
3292 	if (argc > 1)
3293 		device = parse_expression(argv[1]);
3294 
3295 	kprintf("address    dev     inode cache          size   pages\n");
3296 
3297 	hash_open(sVnodeTable, &iterator);
3298 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3299 		if (vnode->cache == NULL)
3300 			continue;
3301 		if (device != -1 && vnode->device != device)
3302 			continue;
3303 
3304 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3305 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3306 				/ B_PAGE_SIZE, vnode->cache->page_count);
3307 	}
3308 
3309 	hash_close(sVnodeTable, &iterator, false);
3310 	return 0;
3311 }
3312 
3313 
3314 int
3315 dump_io_context(int argc, char** argv)
3316 {
3317 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3318 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3319 		return 0;
3320 	}
3321 
3322 	struct io_context* context = NULL;
3323 
3324 	if (argc > 1) {
3325 		uint32 num = parse_expression(argv[1]);
3326 		if (IS_KERNEL_ADDRESS(num))
3327 			context = (struct io_context*)num;
3328 		else {
3329 			Team* team = team_get_team_struct_locked(num);
3330 			if (team == NULL) {
3331 				kprintf("could not find team with ID %ld\n", num);
3332 				return 0;
3333 			}
3334 			context = (struct io_context*)team->io_context;
3335 		}
3336 	} else
3337 		context = get_current_io_context(true);
3338 
3339 	kprintf("I/O CONTEXT: %p\n", context);
3340 	kprintf(" root vnode:\t%p\n", context->root);
3341 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3342 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3343 	kprintf(" max fds:\t%lu\n", context->table_size);
3344 
3345 	if (context->num_used_fds)
3346 		kprintf("   no.  type         ops  ref  open  mode         pos"
3347 			"      cookie\n");
3348 
3349 	for (uint32 i = 0; i < context->table_size; i++) {
3350 		struct file_descriptor* fd = context->fds[i];
3351 		if (fd == NULL)
3352 			continue;
3353 
3354 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3355 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3356 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3357 			fd->pos, fd->cookie,
3358 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3359 				? "mount" : "vnode",
3360 			fd->u.vnode);
3361 	}
3362 
3363 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3364 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3365 
3366 	set_debug_variable("_cwd", (addr_t)context->cwd);
3367 
3368 	return 0;
3369 }
3370 
3371 
3372 int
3373 dump_vnode_usage(int argc, char** argv)
3374 {
3375 	if (argc != 1) {
3376 		kprintf("usage: %s\n", argv[0]);
3377 		return 0;
3378 	}
3379 
3380 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3381 		kMaxUnusedVnodes);
3382 
3383 	struct hash_iterator iterator;
3384 	hash_open(sVnodeTable, &iterator);
3385 
3386 	uint32 count = 0;
3387 	struct vnode* vnode;
3388 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3389 		count++;
3390 	}
3391 
3392 	hash_close(sVnodeTable, &iterator, false);
3393 
3394 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3395 	return 0;
3396 }
3397 
3398 #endif	// ADD_DEBUGGER_COMMANDS
3399 
3400 /*!	Clears an iovec array of physical pages.
3401 	Returns in \a _bytes the number of bytes successfully cleared.
3402 */
3403 static status_t
3404 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3405 {
3406 	size_t bytes = *_bytes;
3407 	size_t index = 0;
3408 
3409 	while (bytes > 0) {
3410 		size_t length = min_c(vecs[index].iov_len, bytes);
3411 
3412 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3413 			length);
3414 		if (status != B_OK) {
3415 			*_bytes -= bytes;
3416 			return status;
3417 		}
3418 
3419 		bytes -= length;
3420 	}
3421 
3422 	return B_OK;
3423 }
3424 
3425 
3426 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3427 	and calls the file system hooks to read/write the request to disk.
3428 */
3429 static status_t
3430 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3431 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3432 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3433 	bool doWrite)
3434 {
3435 	if (fileVecCount == 0) {
3436 		// There are no file vecs at this offset, so we're obviously trying
3437 		// to access the file outside of its bounds
3438 		return B_BAD_VALUE;
3439 	}
3440 
3441 	size_t numBytes = *_numBytes;
3442 	uint32 fileVecIndex;
3443 	size_t vecOffset = *_vecOffset;
3444 	uint32 vecIndex = *_vecIndex;
3445 	status_t status;
3446 	size_t size;
3447 
3448 	if (!doWrite && vecOffset == 0) {
3449 		// now directly read the data from the device
3450 		// the first file_io_vec can be read directly
3451 
3452 		if (fileVecs[0].length < numBytes)
3453 			size = fileVecs[0].length;
3454 		else
3455 			size = numBytes;
3456 
3457 		if (fileVecs[0].offset >= 0) {
3458 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3459 				&vecs[vecIndex], vecCount - vecIndex, &size);
3460 		} else {
3461 			// sparse read
3462 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3463 		}
3464 		if (status != B_OK)
3465 			return status;
3466 
3467 		// TODO: this is a work-around for buggy device drivers!
3468 		//	When our own drivers honour the length, we can:
3469 		//	a) also use this direct I/O for writes (otherwise, it would
3470 		//	   overwrite precious data)
3471 		//	b) panic if the term below is true (at least for writes)
3472 		if (size > fileVecs[0].length) {
3473 			//dprintf("warning: device driver %p doesn't respect total length "
3474 			//	"in read_pages() call!\n", ref->device);
3475 			size = fileVecs[0].length;
3476 		}
3477 
3478 		ASSERT(size <= fileVecs[0].length);
3479 
3480 		// If the file portion was contiguous, we're already done now
3481 		if (size == numBytes)
3482 			return B_OK;
3483 
3484 		// if we reached the end of the file, we can return as well
3485 		if (size != fileVecs[0].length) {
3486 			*_numBytes = size;
3487 			return B_OK;
3488 		}
3489 
3490 		fileVecIndex = 1;
3491 
3492 		// first, find out where we have to continue in our iovecs
3493 		for (; vecIndex < vecCount; vecIndex++) {
3494 			if (size < vecs[vecIndex].iov_len)
3495 				break;
3496 
3497 			size -= vecs[vecIndex].iov_len;
3498 		}
3499 
3500 		vecOffset = size;
3501 	} else {
3502 		fileVecIndex = 0;
3503 		size = 0;
3504 	}
3505 
3506 	// Too bad, let's process the rest of the file_io_vecs
3507 
3508 	size_t totalSize = size;
3509 	size_t bytesLeft = numBytes - size;
3510 
3511 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3512 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3513 		off_t fileOffset = fileVec.offset;
3514 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3515 
3516 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3517 
3518 		// process the complete fileVec
3519 		while (fileLeft > 0) {
3520 			iovec tempVecs[MAX_TEMP_IO_VECS];
3521 			uint32 tempCount = 0;
3522 
3523 			// size tracks how much of what is left of the current fileVec
3524 			// (fileLeft) has been assigned to tempVecs
3525 			size = 0;
3526 
3527 			// assign what is left of the current fileVec to the tempVecs
3528 			for (size = 0; size < fileLeft && vecIndex < vecCount
3529 					&& tempCount < MAX_TEMP_IO_VECS;) {
3530 				// try to satisfy one iovec per iteration (or as much as
3531 				// possible)
3532 
3533 				// bytes left of the current iovec
3534 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3535 				if (vecLeft == 0) {
3536 					vecOffset = 0;
3537 					vecIndex++;
3538 					continue;
3539 				}
3540 
3541 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3542 					vecIndex, vecOffset, size));
3543 
3544 				// actually available bytes
3545 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3546 
3547 				tempVecs[tempCount].iov_base
3548 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3549 				tempVecs[tempCount].iov_len = tempVecSize;
3550 				tempCount++;
3551 
3552 				size += tempVecSize;
3553 				vecOffset += tempVecSize;
3554 			}
3555 
3556 			size_t bytes = size;
3557 
3558 			if (fileOffset == -1) {
3559 				if (doWrite) {
3560 					panic("sparse write attempt: vnode %p", vnode);
3561 					status = B_IO_ERROR;
3562 				} else {
3563 					// sparse read
3564 					status = zero_pages(tempVecs, tempCount, &bytes);
3565 				}
3566 			} else if (doWrite) {
3567 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3568 					tempVecs, tempCount, &bytes);
3569 			} else {
3570 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3571 					tempVecs, tempCount, &bytes);
3572 			}
3573 			if (status != B_OK)
3574 				return status;
3575 
3576 			totalSize += bytes;
3577 			bytesLeft -= size;
3578 			if (fileOffset >= 0)
3579 				fileOffset += size;
3580 			fileLeft -= size;
3581 			//dprintf("-> file left = %Lu\n", fileLeft);
3582 
3583 			if (size != bytes || vecIndex >= vecCount) {
3584 				// there are no more bytes or iovecs, let's bail out
3585 				*_numBytes = totalSize;
3586 				return B_OK;
3587 			}
3588 		}
3589 	}
3590 
3591 	*_vecIndex = vecIndex;
3592 	*_vecOffset = vecOffset;
3593 	*_numBytes = totalSize;
3594 	return B_OK;
3595 }
3596 
3597 
3598 //	#pragma mark - public API for file systems
3599 
3600 
3601 extern "C" status_t
3602 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3603 	fs_vnode_ops* ops)
3604 {
3605 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3606 		volume, volume->id, vnodeID, privateNode));
3607 
3608 	if (privateNode == NULL)
3609 		return B_BAD_VALUE;
3610 
3611 	// create the node
3612 	bool nodeCreated;
3613 	struct vnode* vnode;
3614 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3615 		nodeCreated);
3616 	if (status != B_OK)
3617 		return status;
3618 
3619 	WriteLocker nodeLocker(sVnodeLock, true);
3620 		// create_new_vnode_and_lock() has locked for us
3621 
3622 	// file system integrity check:
3623 	// test if the vnode already exists and bail out if this is the case!
3624 	if (!nodeCreated) {
3625 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3626 			volume->id, vnodeID, privateNode, vnode->private_node);
3627 		return B_ERROR;
3628 	}
3629 
3630 	vnode->private_node = privateNode;
3631 	vnode->ops = ops;
3632 	vnode->SetUnpublished(true);
3633 
3634 	TRACE(("returns: %s\n", strerror(status)));
3635 
3636 	return status;
3637 }
3638 
3639 
3640 extern "C" status_t
3641 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3642 	fs_vnode_ops* ops, int type, uint32 flags)
3643 {
3644 	FUNCTION(("publish_vnode()\n"));
3645 
3646 	WriteLocker locker(sVnodeLock);
3647 
3648 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3649 
3650 	bool nodeCreated = false;
3651 	if (vnode == NULL) {
3652 		if (privateNode == NULL)
3653 			return B_BAD_VALUE;
3654 
3655 		// create the node
3656 		locker.Unlock();
3657 			// create_new_vnode_and_lock() will re-lock for us on success
3658 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3659 			nodeCreated);
3660 		if (status != B_OK)
3661 			return status;
3662 
3663 		locker.SetTo(sVnodeLock, true);
3664 	}
3665 
3666 	if (nodeCreated) {
3667 		vnode->private_node = privateNode;
3668 		vnode->ops = ops;
3669 		vnode->SetUnpublished(true);
3670 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3671 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3672 		// already known, but not published
3673 	} else
3674 		return B_BAD_VALUE;
3675 
3676 	bool publishSpecialSubNode = false;
3677 
3678 	vnode->SetType(type);
3679 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3680 	publishSpecialSubNode = is_special_node_type(type)
3681 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3682 
3683 	status_t status = B_OK;
3684 
3685 	// create sub vnodes, if necessary
3686 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3687 		locker.Unlock();
3688 
3689 		fs_volume* subVolume = volume;
3690 		if (volume->sub_volume != NULL) {
3691 			while (status == B_OK && subVolume->sub_volume != NULL) {
3692 				subVolume = subVolume->sub_volume;
3693 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3694 					vnode);
3695 			}
3696 		}
3697 
3698 		if (status == B_OK && publishSpecialSubNode)
3699 			status = create_special_sub_node(vnode, flags);
3700 
3701 		if (status != B_OK) {
3702 			// error -- clean up the created sub vnodes
3703 			while (subVolume->super_volume != volume) {
3704 				subVolume = subVolume->super_volume;
3705 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3706 			}
3707 		}
3708 
3709 		if (status == B_OK) {
3710 			ReadLocker vnodesReadLocker(sVnodeLock);
3711 			AutoLocker<Vnode> nodeLocker(vnode);
3712 			vnode->SetBusy(false);
3713 			vnode->SetUnpublished(false);
3714 		} else {
3715 			locker.Lock();
3716 			hash_remove(sVnodeTable, vnode);
3717 			remove_vnode_from_mount_list(vnode, vnode->mount);
3718 			free(vnode);
3719 		}
3720 	} else {
3721 		// we still hold the write lock -- mark the node unbusy and published
3722 		vnode->SetBusy(false);
3723 		vnode->SetUnpublished(false);
3724 	}
3725 
3726 	TRACE(("returns: %s\n", strerror(status)));
3727 
3728 	return status;
3729 }
3730 
3731 
3732 extern "C" status_t
3733 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3734 {
3735 	struct vnode* vnode;
3736 
3737 	if (volume == NULL)
3738 		return B_BAD_VALUE;
3739 
3740 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3741 	if (status != B_OK)
3742 		return status;
3743 
3744 	// If this is a layered FS, we need to get the node cookie for the requested
3745 	// layer.
3746 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3747 		fs_vnode resolvedNode;
3748 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3749 			&resolvedNode);
3750 		if (status != B_OK) {
3751 			panic("get_vnode(): Failed to get super node for vnode %p, "
3752 				"volume: %p", vnode, volume);
3753 			put_vnode(vnode);
3754 			return status;
3755 		}
3756 
3757 		if (_privateNode != NULL)
3758 			*_privateNode = resolvedNode.private_node;
3759 	} else if (_privateNode != NULL)
3760 		*_privateNode = vnode->private_node;
3761 
3762 	return B_OK;
3763 }
3764 
3765 
3766 extern "C" status_t
3767 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3768 {
3769 	struct vnode* vnode;
3770 
3771 	rw_lock_read_lock(&sVnodeLock);
3772 	vnode = lookup_vnode(volume->id, vnodeID);
3773 	rw_lock_read_unlock(&sVnodeLock);
3774 
3775 	if (vnode == NULL)
3776 		return B_BAD_VALUE;
3777 
3778 	inc_vnode_ref_count(vnode);
3779 	return B_OK;
3780 }
3781 
3782 
3783 extern "C" status_t
3784 put_vnode(fs_volume* volume, ino_t vnodeID)
3785 {
3786 	struct vnode* vnode;
3787 
3788 	rw_lock_read_lock(&sVnodeLock);
3789 	vnode = lookup_vnode(volume->id, vnodeID);
3790 	rw_lock_read_unlock(&sVnodeLock);
3791 
3792 	if (vnode == NULL)
3793 		return B_BAD_VALUE;
3794 
3795 	dec_vnode_ref_count(vnode, false, true);
3796 	return B_OK;
3797 }
3798 
3799 
3800 extern "C" status_t
3801 remove_vnode(fs_volume* volume, ino_t vnodeID)
3802 {
3803 	ReadLocker locker(sVnodeLock);
3804 
3805 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3806 	if (vnode == NULL)
3807 		return B_ENTRY_NOT_FOUND;
3808 
3809 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3810 		// this vnode is in use
3811 		return B_BUSY;
3812 	}
3813 
3814 	vnode->Lock();
3815 
3816 	vnode->SetRemoved(true);
3817 	bool removeUnpublished = false;
3818 
3819 	if (vnode->IsUnpublished()) {
3820 		// prepare the vnode for deletion
3821 		removeUnpublished = true;
3822 		vnode->SetBusy(true);
3823 	}
3824 
3825 	vnode->Unlock();
3826 	locker.Unlock();
3827 
3828 	if (removeUnpublished) {
3829 		// If the vnode hasn't been published yet, we delete it here
3830 		atomic_add(&vnode->ref_count, -1);
3831 		free_vnode(vnode, true);
3832 	}
3833 
3834 	return B_OK;
3835 }
3836 
3837 
3838 extern "C" status_t
3839 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3840 {
3841 	struct vnode* vnode;
3842 
3843 	rw_lock_read_lock(&sVnodeLock);
3844 
3845 	vnode = lookup_vnode(volume->id, vnodeID);
3846 	if (vnode) {
3847 		AutoLocker<Vnode> nodeLocker(vnode);
3848 		vnode->SetRemoved(false);
3849 	}
3850 
3851 	rw_lock_read_unlock(&sVnodeLock);
3852 	return B_OK;
3853 }
3854 
3855 
3856 extern "C" status_t
3857 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3858 {
3859 	ReadLocker _(sVnodeLock);
3860 
3861 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3862 		if (_removed != NULL)
3863 			*_removed = vnode->IsRemoved();
3864 		return B_OK;
3865 	}
3866 
3867 	return B_BAD_VALUE;
3868 }
3869 
3870 
3871 extern "C" fs_volume*
3872 volume_for_vnode(fs_vnode* _vnode)
3873 {
3874 	if (_vnode == NULL)
3875 		return NULL;
3876 
3877 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3878 	return vnode->mount->volume;
3879 }
3880 
3881 
3882 #if 0
3883 extern "C" status_t
3884 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3885 	size_t* _numBytes)
3886 {
3887 	struct file_descriptor* descriptor;
3888 	struct vnode* vnode;
3889 
3890 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3891 	if (descriptor == NULL)
3892 		return B_FILE_ERROR;
3893 
3894 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3895 		count, 0, _numBytes);
3896 
3897 	put_fd(descriptor);
3898 	return status;
3899 }
3900 
3901 
3902 extern "C" status_t
3903 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3904 	size_t* _numBytes)
3905 {
3906 	struct file_descriptor* descriptor;
3907 	struct vnode* vnode;
3908 
3909 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3910 	if (descriptor == NULL)
3911 		return B_FILE_ERROR;
3912 
3913 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3914 		count, 0, _numBytes);
3915 
3916 	put_fd(descriptor);
3917 	return status;
3918 }
3919 #endif
3920 
3921 
3922 extern "C" status_t
3923 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3924 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3925 	size_t* _bytes)
3926 {
3927 	struct file_descriptor* descriptor;
3928 	struct vnode* vnode;
3929 
3930 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3931 	if (descriptor == NULL)
3932 		return B_FILE_ERROR;
3933 
3934 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3935 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3936 		false);
3937 
3938 	put_fd(descriptor);
3939 	return status;
3940 }
3941 
3942 
3943 extern "C" status_t
3944 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3945 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3946 	size_t* _bytes)
3947 {
3948 	struct file_descriptor* descriptor;
3949 	struct vnode* vnode;
3950 
3951 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3952 	if (descriptor == NULL)
3953 		return B_FILE_ERROR;
3954 
3955 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3956 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3957 		true);
3958 
3959 	put_fd(descriptor);
3960 	return status;
3961 }
3962 
3963 
3964 extern "C" status_t
3965 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3966 {
3967 	// lookup mount -- the caller is required to make sure that the mount
3968 	// won't go away
3969 	MutexLocker locker(sMountMutex);
3970 	struct fs_mount* mount = find_mount(mountID);
3971 	if (mount == NULL)
3972 		return B_BAD_VALUE;
3973 	locker.Unlock();
3974 
3975 	return mount->entry_cache.Add(dirID, name, nodeID);
3976 }
3977 
3978 
3979 extern "C" status_t
3980 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3981 {
3982 	// lookup mount -- the caller is required to make sure that the mount
3983 	// won't go away
3984 	MutexLocker locker(sMountMutex);
3985 	struct fs_mount* mount = find_mount(mountID);
3986 	if (mount == NULL)
3987 		return B_BAD_VALUE;
3988 	locker.Unlock();
3989 
3990 	return mount->entry_cache.Remove(dirID, name);
3991 }
3992 
3993 
3994 //	#pragma mark - private VFS API
3995 //	Functions the VFS exports for other parts of the kernel
3996 
3997 
3998 /*! Acquires another reference to the vnode that has to be released
3999 	by calling vfs_put_vnode().
4000 */
4001 void
4002 vfs_acquire_vnode(struct vnode* vnode)
4003 {
4004 	inc_vnode_ref_count(vnode);
4005 }
4006 
4007 
4008 /*! This is currently called from file_cache_create() only.
4009 	It's probably a temporary solution as long as devfs requires that
4010 	fs_read_pages()/fs_write_pages() are called with the standard
4011 	open cookie and not with a device cookie.
4012 	If that's done differently, remove this call; it has no other
4013 	purpose.
4014 */
4015 extern "C" status_t
4016 vfs_get_cookie_from_fd(int fd, void** _cookie)
4017 {
4018 	struct file_descriptor* descriptor;
4019 
4020 	descriptor = get_fd(get_current_io_context(true), fd);
4021 	if (descriptor == NULL)
4022 		return B_FILE_ERROR;
4023 
4024 	*_cookie = descriptor->cookie;
4025 	return B_OK;
4026 }
4027 
4028 
4029 extern "C" status_t
4030 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4031 {
4032 	*vnode = get_vnode_from_fd(fd, kernel);
4033 
4034 	if (*vnode == NULL)
4035 		return B_FILE_ERROR;
4036 
4037 	return B_NO_ERROR;
4038 }
4039 
4040 
4041 extern "C" status_t
4042 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4043 {
4044 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4045 		path, kernel));
4046 
4047 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4048 	if (pathBuffer.InitCheck() != B_OK)
4049 		return B_NO_MEMORY;
4050 
4051 	char* buffer = pathBuffer.LockBuffer();
4052 	strlcpy(buffer, path, pathBuffer.BufferSize());
4053 
4054 	struct vnode* vnode;
4055 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4056 	if (status != B_OK)
4057 		return status;
4058 
4059 	*_vnode = vnode;
4060 	return B_OK;
4061 }
4062 
4063 
4064 extern "C" status_t
4065 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4066 {
4067 	struct vnode* vnode;
4068 
4069 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4070 	if (status != B_OK)
4071 		return status;
4072 
4073 	*_vnode = vnode;
4074 	return B_OK;
4075 }
4076 
4077 
4078 extern "C" status_t
4079 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4080 	const char* name, struct vnode** _vnode)
4081 {
4082 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4083 }
4084 
4085 
4086 extern "C" void
4087 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4088 {
4089 	*_mountID = vnode->device;
4090 	*_vnodeID = vnode->id;
4091 }
4092 
4093 
4094 /*!
4095 	Helper function abstracting the process of "converting" a given
4096 	vnode-pointer to a fs_vnode-pointer.
4097 	Currently only used in bindfs.
4098 */
4099 extern "C" fs_vnode*
4100 vfs_fsnode_for_vnode(struct vnode* vnode)
4101 {
4102 	return vnode;
4103 }
4104 
4105 
4106 /*!
4107 	Calls fs_open() on the given vnode and returns a new
4108 	file descriptor for it
4109 */
4110 int
4111 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4112 {
4113 	return open_vnode(vnode, openMode, kernel);
4114 }
4115 
4116 
4117 /*!	Looks up a vnode with the given mount and vnode ID.
4118 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4119 	to the node.
4120 	It's currently only be used by file_cache_create().
4121 */
4122 extern "C" status_t
4123 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4124 {
4125 	rw_lock_read_lock(&sVnodeLock);
4126 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4127 	rw_lock_read_unlock(&sVnodeLock);
4128 
4129 	if (vnode == NULL)
4130 		return B_ERROR;
4131 
4132 	*_vnode = vnode;
4133 	return B_OK;
4134 }
4135 
4136 
4137 extern "C" status_t
4138 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4139 	bool traverseLeafLink, bool kernel, void** _node)
4140 {
4141 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4142 		volume, path, kernel));
4143 
4144 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4145 	if (pathBuffer.InitCheck() != B_OK)
4146 		return B_NO_MEMORY;
4147 
4148 	fs_mount* mount;
4149 	status_t status = get_mount(volume->id, &mount);
4150 	if (status != B_OK)
4151 		return status;
4152 
4153 	char* buffer = pathBuffer.LockBuffer();
4154 	strlcpy(buffer, path, pathBuffer.BufferSize());
4155 
4156 	struct vnode* vnode = mount->root_vnode;
4157 
4158 	if (buffer[0] == '/')
4159 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4160 	else {
4161 		inc_vnode_ref_count(vnode);
4162 			// vnode_path_to_vnode() releases a reference to the starting vnode
4163 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4164 			kernel, &vnode, NULL);
4165 	}
4166 
4167 	put_mount(mount);
4168 
4169 	if (status != B_OK)
4170 		return status;
4171 
4172 	if (vnode->device != volume->id) {
4173 		// wrong mount ID - must not gain access on foreign file system nodes
4174 		put_vnode(vnode);
4175 		return B_BAD_VALUE;
4176 	}
4177 
4178 	// Use get_vnode() to resolve the cookie for the right layer.
4179 	status = get_vnode(volume, vnode->id, _node);
4180 	put_vnode(vnode);
4181 
4182 	return status;
4183 }
4184 
4185 
4186 status_t
4187 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4188 	struct stat* stat, bool kernel)
4189 {
4190 	status_t status;
4191 
4192 	if (path) {
4193 		// path given: get the stat of the node referred to by (fd, path)
4194 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4195 		if (pathBuffer.InitCheck() != B_OK)
4196 			return B_NO_MEMORY;
4197 
4198 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4199 			traverseLeafLink, stat, kernel);
4200 	} else {
4201 		// no path given: get the FD and use the FD operation
4202 		struct file_descriptor* descriptor
4203 			= get_fd(get_current_io_context(kernel), fd);
4204 		if (descriptor == NULL)
4205 			return B_FILE_ERROR;
4206 
4207 		if (descriptor->ops->fd_read_stat)
4208 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4209 		else
4210 			status = B_UNSUPPORTED;
4211 
4212 		put_fd(descriptor);
4213 	}
4214 
4215 	return status;
4216 }
4217 
4218 
4219 /*!	Finds the full path to the file that contains the module \a moduleName,
4220 	puts it into \a pathBuffer, and returns B_OK for success.
4221 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4222 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4223 	\a pathBuffer is clobbered in any case and must not be relied on if this
4224 	functions returns unsuccessfully.
4225 	\a basePath and \a pathBuffer must not point to the same space.
4226 */
4227 status_t
4228 vfs_get_module_path(const char* basePath, const char* moduleName,
4229 	char* pathBuffer, size_t bufferSize)
4230 {
4231 	struct vnode* dir;
4232 	struct vnode* file;
4233 	status_t status;
4234 	size_t length;
4235 	char* path;
4236 
4237 	if (bufferSize == 0
4238 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4239 		return B_BUFFER_OVERFLOW;
4240 
4241 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4242 	if (status != B_OK)
4243 		return status;
4244 
4245 	// the path buffer had been clobbered by the above call
4246 	length = strlcpy(pathBuffer, basePath, bufferSize);
4247 	if (pathBuffer[length - 1] != '/')
4248 		pathBuffer[length++] = '/';
4249 
4250 	path = pathBuffer + length;
4251 	bufferSize -= length;
4252 
4253 	while (moduleName) {
4254 		char* nextPath = strchr(moduleName, '/');
4255 		if (nextPath == NULL)
4256 			length = strlen(moduleName);
4257 		else {
4258 			length = nextPath - moduleName;
4259 			nextPath++;
4260 		}
4261 
4262 		if (length + 1 >= bufferSize) {
4263 			status = B_BUFFER_OVERFLOW;
4264 			goto err;
4265 		}
4266 
4267 		memcpy(path, moduleName, length);
4268 		path[length] = '\0';
4269 		moduleName = nextPath;
4270 
4271 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4272 		if (status != B_OK) {
4273 			// vnode_path_to_vnode() has already released the reference to dir
4274 			return status;
4275 		}
4276 
4277 		if (S_ISDIR(file->Type())) {
4278 			// goto the next directory
4279 			path[length] = '/';
4280 			path[length + 1] = '\0';
4281 			path += length + 1;
4282 			bufferSize -= length + 1;
4283 
4284 			dir = file;
4285 		} else if (S_ISREG(file->Type())) {
4286 			// it's a file so it should be what we've searched for
4287 			put_vnode(file);
4288 
4289 			return B_OK;
4290 		} else {
4291 			TRACE(("vfs_get_module_path(): something is strange here: "
4292 				"0x%08lx...\n", file->Type()));
4293 			status = B_ERROR;
4294 			dir = file;
4295 			goto err;
4296 		}
4297 	}
4298 
4299 	// if we got here, the moduleName just pointed to a directory, not to
4300 	// a real module - what should we do in this case?
4301 	status = B_ENTRY_NOT_FOUND;
4302 
4303 err:
4304 	put_vnode(dir);
4305 	return status;
4306 }
4307 
4308 
4309 /*!	\brief Normalizes a given path.
4310 
4311 	The path must refer to an existing or non-existing entry in an existing
4312 	directory, that is chopping off the leaf component the remaining path must
4313 	refer to an existing directory.
4314 
4315 	The returned will be canonical in that it will be absolute, will not
4316 	contain any "." or ".." components or duplicate occurrences of '/'s,
4317 	and none of the directory components will by symbolic links.
4318 
4319 	Any two paths referring to the same entry, will result in the same
4320 	normalized path (well, that is pretty much the definition of `normalized',
4321 	isn't it :-).
4322 
4323 	\param path The path to be normalized.
4324 	\param buffer The buffer into which the normalized path will be written.
4325 		   May be the same one as \a path.
4326 	\param bufferSize The size of \a buffer.
4327 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4328 	\param kernel \c true, if the IO context of the kernel shall be used,
4329 		   otherwise that of the team this thread belongs to. Only relevant,
4330 		   if the path is relative (to get the CWD).
4331 	\return \c B_OK if everything went fine, another error code otherwise.
4332 */
4333 status_t
4334 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4335 	bool traverseLink, bool kernel)
4336 {
4337 	if (!path || !buffer || bufferSize < 1)
4338 		return B_BAD_VALUE;
4339 
4340 	if (path != buffer) {
4341 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4342 			return B_BUFFER_OVERFLOW;
4343 	}
4344 
4345 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4346 }
4347 
4348 
4349 /*!	\brief Creates a special node in the file system.
4350 
4351 	The caller gets a reference to the newly created node (which is passed
4352 	back through \a _createdVnode) and is responsible for releasing it.
4353 
4354 	\param path The path where to create the entry for the node. Can be \c NULL,
4355 		in which case the node is created without an entry in the root FS -- it
4356 		will automatically be deleted when the last reference has been released.
4357 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4358 		the target file system will just create the node with its standard
4359 		operations. Depending on the type of the node a subnode might be created
4360 		automatically, though.
4361 	\param mode The type and permissions for the node to be created.
4362 	\param flags Flags to be passed to the creating FS.
4363 	\param kernel \c true, if called in the kernel context (relevant only if
4364 		\a path is not \c NULL and not absolute).
4365 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4366 		file system creating the node, with the private data pointer and
4367 		operations for the super node. Can be \c NULL.
4368 	\param _createVnode Pointer to pre-allocated storage where to store the
4369 		pointer to the newly created node.
4370 	\return \c B_OK, if everything went fine, another error code otherwise.
4371 */
4372 status_t
4373 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4374 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4375 	struct vnode** _createdVnode)
4376 {
4377 	struct vnode* dirNode;
4378 	char _leaf[B_FILE_NAME_LENGTH];
4379 	char* leaf = NULL;
4380 
4381 	if (path) {
4382 		// We've got a path. Get the dir vnode and the leaf name.
4383 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4384 		if (tmpPathBuffer.InitCheck() != B_OK)
4385 			return B_NO_MEMORY;
4386 
4387 		char* tmpPath = tmpPathBuffer.LockBuffer();
4388 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4389 			return B_NAME_TOO_LONG;
4390 
4391 		// get the dir vnode and the leaf name
4392 		leaf = _leaf;
4393 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4394 		if (error != B_OK)
4395 			return error;
4396 	} else {
4397 		// No path. Create the node in the root FS.
4398 		dirNode = sRoot;
4399 		inc_vnode_ref_count(dirNode);
4400 	}
4401 
4402 	VNodePutter _(dirNode);
4403 
4404 	// check support for creating special nodes
4405 	if (!HAS_FS_CALL(dirNode, create_special_node))
4406 		return B_UNSUPPORTED;
4407 
4408 	// create the node
4409 	fs_vnode superVnode;
4410 	ino_t nodeID;
4411 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4412 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4413 	if (status != B_OK)
4414 		return status;
4415 
4416 	// lookup the node
4417 	rw_lock_read_lock(&sVnodeLock);
4418 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4419 	rw_lock_read_unlock(&sVnodeLock);
4420 
4421 	if (*_createdVnode == NULL) {
4422 		panic("vfs_create_special_node(): lookup of node failed");
4423 		return B_ERROR;
4424 	}
4425 
4426 	return B_OK;
4427 }
4428 
4429 
4430 extern "C" void
4431 vfs_put_vnode(struct vnode* vnode)
4432 {
4433 	put_vnode(vnode);
4434 }
4435 
4436 
4437 extern "C" status_t
4438 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4439 {
4440 	// Get current working directory from io context
4441 	struct io_context* context = get_current_io_context(false);
4442 	status_t status = B_OK;
4443 
4444 	mutex_lock(&context->io_mutex);
4445 
4446 	if (context->cwd != NULL) {
4447 		*_mountID = context->cwd->device;
4448 		*_vnodeID = context->cwd->id;
4449 	} else
4450 		status = B_ERROR;
4451 
4452 	mutex_unlock(&context->io_mutex);
4453 	return status;
4454 }
4455 
4456 
4457 status_t
4458 vfs_unmount(dev_t mountID, uint32 flags)
4459 {
4460 	return fs_unmount(NULL, mountID, flags, true);
4461 }
4462 
4463 
4464 extern "C" status_t
4465 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4466 {
4467 	struct vnode* vnode;
4468 
4469 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4470 	if (status != B_OK)
4471 		return status;
4472 
4473 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4474 	put_vnode(vnode);
4475 	return B_OK;
4476 }
4477 
4478 
4479 extern "C" void
4480 vfs_free_unused_vnodes(int32 level)
4481 {
4482 	vnode_low_resource_handler(NULL,
4483 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4484 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4485 		level);
4486 }
4487 
4488 
4489 extern "C" bool
4490 vfs_can_page(struct vnode* vnode, void* cookie)
4491 {
4492 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4493 
4494 	if (HAS_FS_CALL(vnode, can_page))
4495 		return FS_CALL(vnode, can_page, cookie);
4496 	return false;
4497 }
4498 
4499 
4500 extern "C" status_t
4501 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4502 	const generic_io_vec* vecs, size_t count, uint32 flags,
4503 	generic_size_t* _numBytes)
4504 {
4505 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4506 		pos));
4507 
4508 #if VFS_PAGES_IO_TRACING
4509 	generic_size_t bytesRequested = *_numBytes;
4510 #endif
4511 
4512 	IORequest request;
4513 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4514 	if (status == B_OK) {
4515 		status = vfs_vnode_io(vnode, cookie, &request);
4516 		if (status == B_OK)
4517 			status = request.Wait();
4518 		*_numBytes = request.TransferredBytes();
4519 	}
4520 
4521 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4522 		status, *_numBytes));
4523 
4524 	return status;
4525 }
4526 
4527 
4528 extern "C" status_t
4529 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4530 	const generic_io_vec* vecs, size_t count, uint32 flags,
4531 	generic_size_t* _numBytes)
4532 {
4533 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4534 		pos));
4535 
4536 #if VFS_PAGES_IO_TRACING
4537 	generic_size_t bytesRequested = *_numBytes;
4538 #endif
4539 
4540 	IORequest request;
4541 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4542 	if (status == B_OK) {
4543 		status = vfs_vnode_io(vnode, cookie, &request);
4544 		if (status == B_OK)
4545 			status = request.Wait();
4546 		*_numBytes = request.TransferredBytes();
4547 	}
4548 
4549 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4550 		status, *_numBytes));
4551 
4552 	return status;
4553 }
4554 
4555 
4556 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4557 	created if \a allocate is \c true.
4558 	In case it's successful, it will also grab a reference to the cache
4559 	it returns.
4560 */
4561 extern "C" status_t
4562 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4563 {
4564 	if (vnode->cache != NULL) {
4565 		vnode->cache->AcquireRef();
4566 		*_cache = vnode->cache;
4567 		return B_OK;
4568 	}
4569 
4570 	rw_lock_read_lock(&sVnodeLock);
4571 	vnode->Lock();
4572 
4573 	status_t status = B_OK;
4574 
4575 	// The cache could have been created in the meantime
4576 	if (vnode->cache == NULL) {
4577 		if (allocate) {
4578 			// TODO: actually the vnode needs to be busy already here, or
4579 			//	else this won't work...
4580 			bool wasBusy = vnode->IsBusy();
4581 			vnode->SetBusy(true);
4582 
4583 			vnode->Unlock();
4584 			rw_lock_read_unlock(&sVnodeLock);
4585 
4586 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4587 
4588 			rw_lock_read_lock(&sVnodeLock);
4589 			vnode->Lock();
4590 			vnode->SetBusy(wasBusy);
4591 		} else
4592 			status = B_BAD_VALUE;
4593 	}
4594 
4595 	vnode->Unlock();
4596 	rw_lock_read_unlock(&sVnodeLock);
4597 
4598 	if (status == B_OK) {
4599 		vnode->cache->AcquireRef();
4600 		*_cache = vnode->cache;
4601 	}
4602 
4603 	return status;
4604 }
4605 
4606 
4607 status_t
4608 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4609 	file_io_vec* vecs, size_t* _count)
4610 {
4611 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4612 		vnode, vecs, offset, size));
4613 
4614 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4615 }
4616 
4617 
4618 status_t
4619 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4620 {
4621 	status_t status = FS_CALL(vnode, read_stat, stat);
4622 
4623 	// fill in the st_dev and st_ino fields
4624 	if (status == B_OK) {
4625 		stat->st_dev = vnode->device;
4626 		stat->st_ino = vnode->id;
4627 		stat->st_rdev = -1;
4628 	}
4629 
4630 	return status;
4631 }
4632 
4633 
4634 status_t
4635 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4636 {
4637 	struct vnode* vnode;
4638 	status_t status = get_vnode(device, inode, &vnode, true, false);
4639 	if (status != B_OK)
4640 		return status;
4641 
4642 	status = FS_CALL(vnode, read_stat, stat);
4643 
4644 	// fill in the st_dev and st_ino fields
4645 	if (status == B_OK) {
4646 		stat->st_dev = vnode->device;
4647 		stat->st_ino = vnode->id;
4648 		stat->st_rdev = -1;
4649 	}
4650 
4651 	put_vnode(vnode);
4652 	return status;
4653 }
4654 
4655 
4656 status_t
4657 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4658 {
4659 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4660 }
4661 
4662 
4663 status_t
4664 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4665 	char* path, size_t pathLength)
4666 {
4667 	struct vnode* vnode;
4668 	status_t status;
4669 
4670 	// filter invalid leaf names
4671 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4672 		return B_BAD_VALUE;
4673 
4674 	// get the vnode matching the dir's node_ref
4675 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4676 		// special cases "." and "..": we can directly get the vnode of the
4677 		// referenced directory
4678 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4679 		leaf = NULL;
4680 	} else
4681 		status = get_vnode(device, inode, &vnode, true, false);
4682 	if (status != B_OK)
4683 		return status;
4684 
4685 	// get the directory path
4686 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4687 	put_vnode(vnode);
4688 		// we don't need the vnode anymore
4689 	if (status != B_OK)
4690 		return status;
4691 
4692 	// append the leaf name
4693 	if (leaf) {
4694 		// insert a directory separator if this is not the file system root
4695 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4696 				>= pathLength)
4697 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4698 			return B_NAME_TOO_LONG;
4699 		}
4700 	}
4701 
4702 	return B_OK;
4703 }
4704 
4705 
4706 /*!	If the given descriptor locked its vnode, that lock will be released. */
4707 void
4708 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4709 {
4710 	struct vnode* vnode = fd_vnode(descriptor);
4711 
4712 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4713 		vnode->mandatory_locked_by = NULL;
4714 }
4715 
4716 
4717 /*!	Closes all file descriptors of the specified I/O context that
4718 	have the O_CLOEXEC flag set.
4719 */
4720 void
4721 vfs_exec_io_context(io_context* context)
4722 {
4723 	uint32 i;
4724 
4725 	for (i = 0; i < context->table_size; i++) {
4726 		mutex_lock(&context->io_mutex);
4727 
4728 		struct file_descriptor* descriptor = context->fds[i];
4729 		bool remove = false;
4730 
4731 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4732 			context->fds[i] = NULL;
4733 			context->num_used_fds--;
4734 
4735 			remove = true;
4736 		}
4737 
4738 		mutex_unlock(&context->io_mutex);
4739 
4740 		if (remove) {
4741 			close_fd(descriptor);
4742 			put_fd(descriptor);
4743 		}
4744 	}
4745 }
4746 
4747 
4748 /*! Sets up a new io_control structure, and inherits the properties
4749 	of the parent io_control if it is given.
4750 */
4751 io_context*
4752 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4753 {
4754 	io_context* context = (io_context*)malloc(sizeof(io_context));
4755 	if (context == NULL)
4756 		return NULL;
4757 
4758 	TIOC(NewIOContext(context, parentContext));
4759 
4760 	memset(context, 0, sizeof(io_context));
4761 	context->ref_count = 1;
4762 
4763 	MutexLocker parentLocker;
4764 
4765 	size_t tableSize;
4766 	if (parentContext) {
4767 		parentLocker.SetTo(parentContext->io_mutex, false);
4768 		tableSize = parentContext->table_size;
4769 	} else
4770 		tableSize = DEFAULT_FD_TABLE_SIZE;
4771 
4772 	// allocate space for FDs and their close-on-exec flag
4773 	context->fds = (file_descriptor**)malloc(
4774 		sizeof(struct file_descriptor*) * tableSize
4775 		+ sizeof(struct select_sync*) * tableSize
4776 		+ (tableSize + 7) / 8);
4777 	if (context->fds == NULL) {
4778 		free(context);
4779 		return NULL;
4780 	}
4781 
4782 	context->select_infos = (select_info**)(context->fds + tableSize);
4783 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4784 
4785 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4786 		+ sizeof(struct select_sync*) * tableSize
4787 		+ (tableSize + 7) / 8);
4788 
4789 	mutex_init(&context->io_mutex, "I/O context");
4790 
4791 	// Copy all parent file descriptors
4792 
4793 	if (parentContext) {
4794 		size_t i;
4795 
4796 		mutex_lock(&sIOContextRootLock);
4797 		context->root = parentContext->root;
4798 		if (context->root)
4799 			inc_vnode_ref_count(context->root);
4800 		mutex_unlock(&sIOContextRootLock);
4801 
4802 		context->cwd = parentContext->cwd;
4803 		if (context->cwd)
4804 			inc_vnode_ref_count(context->cwd);
4805 
4806 		for (i = 0; i < tableSize; i++) {
4807 			struct file_descriptor* descriptor = parentContext->fds[i];
4808 
4809 			if (descriptor != NULL) {
4810 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4811 				if (closeOnExec && purgeCloseOnExec)
4812 					continue;
4813 
4814 				TFD(InheritFD(context, i, descriptor, parentContext));
4815 
4816 				context->fds[i] = descriptor;
4817 				context->num_used_fds++;
4818 				atomic_add(&descriptor->ref_count, 1);
4819 				atomic_add(&descriptor->open_count, 1);
4820 
4821 				if (closeOnExec)
4822 					fd_set_close_on_exec(context, i, true);
4823 			}
4824 		}
4825 
4826 		parentLocker.Unlock();
4827 	} else {
4828 		context->root = sRoot;
4829 		context->cwd = sRoot;
4830 
4831 		if (context->root)
4832 			inc_vnode_ref_count(context->root);
4833 
4834 		if (context->cwd)
4835 			inc_vnode_ref_count(context->cwd);
4836 	}
4837 
4838 	context->table_size = tableSize;
4839 
4840 	list_init(&context->node_monitors);
4841 	context->max_monitors = DEFAULT_NODE_MONITORS;
4842 
4843 	return context;
4844 }
4845 
4846 
4847 static status_t
4848 vfs_free_io_context(io_context* context)
4849 {
4850 	uint32 i;
4851 
4852 	TIOC(FreeIOContext(context));
4853 
4854 	if (context->root)
4855 		put_vnode(context->root);
4856 
4857 	if (context->cwd)
4858 		put_vnode(context->cwd);
4859 
4860 	mutex_lock(&context->io_mutex);
4861 
4862 	for (i = 0; i < context->table_size; i++) {
4863 		if (struct file_descriptor* descriptor = context->fds[i]) {
4864 			close_fd(descriptor);
4865 			put_fd(descriptor);
4866 		}
4867 	}
4868 
4869 	mutex_destroy(&context->io_mutex);
4870 
4871 	remove_node_monitors(context);
4872 	free(context->fds);
4873 	free(context);
4874 
4875 	return B_OK;
4876 }
4877 
4878 
4879 void
4880 vfs_get_io_context(io_context* context)
4881 {
4882 	atomic_add(&context->ref_count, 1);
4883 }
4884 
4885 
4886 void
4887 vfs_put_io_context(io_context* context)
4888 {
4889 	if (atomic_add(&context->ref_count, -1) == 1)
4890 		vfs_free_io_context(context);
4891 }
4892 
4893 
4894 static status_t
4895 vfs_resize_fd_table(struct io_context* context, const int newSize)
4896 {
4897 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4898 		return B_BAD_VALUE;
4899 
4900 	TIOC(ResizeIOContext(context, newSize));
4901 
4902 	MutexLocker _(context->io_mutex);
4903 
4904 	int oldSize = context->table_size;
4905 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4906 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4907 
4908 	// If the tables shrink, make sure none of the fds being dropped are in use.
4909 	if (newSize < oldSize) {
4910 		for (int i = oldSize; i-- > newSize;) {
4911 			if (context->fds[i])
4912 				return B_BUSY;
4913 		}
4914 	}
4915 
4916 	// store pointers to the old tables
4917 	file_descriptor** oldFDs = context->fds;
4918 	select_info** oldSelectInfos = context->select_infos;
4919 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4920 
4921 	// allocate new tables
4922 	file_descriptor** newFDs = (file_descriptor**)malloc(
4923 		sizeof(struct file_descriptor*) * newSize
4924 		+ sizeof(struct select_sync*) * newSize
4925 		+ newCloseOnExitBitmapSize);
4926 	if (newFDs == NULL)
4927 		return B_NO_MEMORY;
4928 
4929 	context->fds = newFDs;
4930 	context->select_infos = (select_info**)(context->fds + newSize);
4931 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4932 	context->table_size = newSize;
4933 
4934 	// copy entries from old tables
4935 	int toCopy = min_c(oldSize, newSize);
4936 
4937 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4938 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4939 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4940 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4941 
4942 	// clear additional entries, if the tables grow
4943 	if (newSize > oldSize) {
4944 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4945 		memset(context->select_infos + oldSize, 0,
4946 			sizeof(void*) * (newSize - oldSize));
4947 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4948 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4949 	}
4950 
4951 	free(oldFDs);
4952 
4953 	return B_OK;
4954 }
4955 
4956 
4957 static status_t
4958 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4959 {
4960 	int	status = B_OK;
4961 
4962 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4963 		return B_BAD_VALUE;
4964 
4965 	mutex_lock(&context->io_mutex);
4966 
4967 	if ((size_t)newSize < context->num_monitors) {
4968 		status = B_BUSY;
4969 		goto out;
4970 	}
4971 	context->max_monitors = newSize;
4972 
4973 out:
4974 	mutex_unlock(&context->io_mutex);
4975 	return status;
4976 }
4977 
4978 
4979 status_t
4980 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
4981 	ino_t* _mountPointNodeID)
4982 {
4983 	ReadLocker nodeLocker(sVnodeLock);
4984 	MutexLocker mountLocker(sMountMutex);
4985 
4986 	struct fs_mount* mount = find_mount(mountID);
4987 	if (mount == NULL)
4988 		return B_BAD_VALUE;
4989 
4990 	Vnode* mountPoint = mount->covers_vnode;
4991 
4992 	*_mountPointMountID = mountPoint->device;
4993 	*_mountPointNodeID = mountPoint->id;
4994 
4995 	return B_OK;
4996 }
4997 
4998 
4999 status_t
5000 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5001 	ino_t coveredNodeID)
5002 {
5003 	// get the vnodes
5004 	Vnode* vnode;
5005 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5006 	if (error != B_OK)
5007 		return B_BAD_VALUE;
5008 	VNodePutter vnodePutter(vnode);
5009 
5010 	Vnode* coveredVnode;
5011 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5012 		false);
5013 	if (error != B_OK)
5014 		return B_BAD_VALUE;
5015 	VNodePutter coveredVnodePutter(coveredVnode);
5016 
5017 	// establish the covered/covering links
5018 	WriteLocker locker(sVnodeLock);
5019 
5020 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5021 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5022 		return B_BUSY;
5023 	}
5024 
5025 	vnode->covers = coveredVnode;
5026 	vnode->SetCovering(true);
5027 
5028 	coveredVnode->covered_by = vnode;
5029 	coveredVnode->SetCovered(true);
5030 
5031 	// the vnodes do now reference each other
5032 	inc_vnode_ref_count(vnode);
5033 	inc_vnode_ref_count(coveredVnode);
5034 
5035 	return B_OK;
5036 }
5037 
5038 
5039 int
5040 vfs_getrlimit(int resource, struct rlimit* rlp)
5041 {
5042 	if (!rlp)
5043 		return B_BAD_ADDRESS;
5044 
5045 	switch (resource) {
5046 		case RLIMIT_NOFILE:
5047 		{
5048 			struct io_context* context = get_current_io_context(false);
5049 			MutexLocker _(context->io_mutex);
5050 
5051 			rlp->rlim_cur = context->table_size;
5052 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5053 			return 0;
5054 		}
5055 
5056 		case RLIMIT_NOVMON:
5057 		{
5058 			struct io_context* context = get_current_io_context(false);
5059 			MutexLocker _(context->io_mutex);
5060 
5061 			rlp->rlim_cur = context->max_monitors;
5062 			rlp->rlim_max = MAX_NODE_MONITORS;
5063 			return 0;
5064 		}
5065 
5066 		default:
5067 			return B_BAD_VALUE;
5068 	}
5069 }
5070 
5071 
5072 int
5073 vfs_setrlimit(int resource, const struct rlimit* rlp)
5074 {
5075 	if (!rlp)
5076 		return B_BAD_ADDRESS;
5077 
5078 	switch (resource) {
5079 		case RLIMIT_NOFILE:
5080 			/* TODO: check getuid() */
5081 			if (rlp->rlim_max != RLIM_SAVED_MAX
5082 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5083 				return B_NOT_ALLOWED;
5084 
5085 			return vfs_resize_fd_table(get_current_io_context(false),
5086 				rlp->rlim_cur);
5087 
5088 		case RLIMIT_NOVMON:
5089 			/* TODO: check getuid() */
5090 			if (rlp->rlim_max != RLIM_SAVED_MAX
5091 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5092 				return B_NOT_ALLOWED;
5093 
5094 			return vfs_resize_monitor_table(get_current_io_context(false),
5095 				rlp->rlim_cur);
5096 
5097 		default:
5098 			return B_BAD_VALUE;
5099 	}
5100 }
5101 
5102 
5103 status_t
5104 vfs_init(kernel_args* args)
5105 {
5106 	vnode::StaticInit();
5107 
5108 	struct vnode dummyVnode;
5109 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5110 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5111 	if (sVnodeTable == NULL)
5112 		panic("vfs_init: error creating vnode hash table\n");
5113 
5114 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5115 
5116 	struct fs_mount dummyMount;
5117 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5118 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5119 	if (sMountsTable == NULL)
5120 		panic("vfs_init: error creating mounts hash table\n");
5121 
5122 	node_monitor_init();
5123 
5124 	sRoot = NULL;
5125 
5126 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5127 
5128 	if (block_cache_init() != B_OK)
5129 		return B_ERROR;
5130 
5131 #ifdef ADD_DEBUGGER_COMMANDS
5132 	// add some debugger commands
5133 	add_debugger_command_etc("vnode", &dump_vnode,
5134 		"Print info about the specified vnode",
5135 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5136 		"Prints information about the vnode specified by address <vnode> or\n"
5137 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5138 		"constructed and printed. It might not be possible to construct a\n"
5139 		"complete path, though.\n",
5140 		0);
5141 	add_debugger_command("vnodes", &dump_vnodes,
5142 		"list all vnodes (from the specified device)");
5143 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5144 		"list all vnode caches");
5145 	add_debugger_command("mount", &dump_mount,
5146 		"info about the specified fs_mount");
5147 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5148 	add_debugger_command("io_context", &dump_io_context,
5149 		"info about the I/O context");
5150 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5151 		"info about vnode usage");
5152 #endif
5153 
5154 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5155 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5156 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5157 		0);
5158 
5159 	file_map_init();
5160 
5161 	return file_cache_init();
5162 }
5163 
5164 
5165 //	#pragma mark - fd_ops implementations
5166 
5167 
5168 /*!
5169 	Calls fs_open() on the given vnode and returns a new
5170 	file descriptor for it
5171 */
5172 static int
5173 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5174 {
5175 	void* cookie;
5176 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5177 	if (status != B_OK)
5178 		return status;
5179 
5180 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5181 	if (fd < 0) {
5182 		FS_CALL(vnode, close, cookie);
5183 		FS_CALL(vnode, free_cookie, cookie);
5184 	}
5185 	return fd;
5186 }
5187 
5188 
5189 /*!
5190 	Calls fs_open() on the given vnode and returns a new
5191 	file descriptor for it
5192 */
5193 static int
5194 create_vnode(struct vnode* directory, const char* name, int openMode,
5195 	int perms, bool kernel)
5196 {
5197 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5198 	status_t status = B_ERROR;
5199 	struct vnode* vnode;
5200 	void* cookie;
5201 	ino_t newID;
5202 
5203 	// This is somewhat tricky: If the entry already exists, the FS responsible
5204 	// for the directory might not necessarily also be the one responsible for
5205 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5206 	// we can actually never call the create() hook without O_EXCL. Instead we
5207 	// try to look the entry up first. If it already exists, we just open the
5208 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5209 	// introduces a race condition, since someone else might have created the
5210 	// entry in the meantime. We hope the respective FS returns the correct
5211 	// error code and retry (up to 3 times) again.
5212 
5213 	for (int i = 0; i < 3 && status != B_OK; i++) {
5214 		// look the node up
5215 		status = lookup_dir_entry(directory, name, &vnode);
5216 		if (status == B_OK) {
5217 			VNodePutter putter(vnode);
5218 
5219 			if ((openMode & O_EXCL) != 0)
5220 				return B_FILE_EXISTS;
5221 
5222 			// If the node is a symlink, we have to follow it, unless
5223 			// O_NOTRAVERSE is set.
5224 			if (S_ISLNK(vnode->Type()) && traverse) {
5225 				putter.Put();
5226 				char clonedName[B_FILE_NAME_LENGTH + 1];
5227 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5228 						>= B_FILE_NAME_LENGTH) {
5229 					return B_NAME_TOO_LONG;
5230 				}
5231 
5232 				inc_vnode_ref_count(directory);
5233 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5234 					kernel, &vnode, NULL);
5235 				if (status != B_OK)
5236 					return status;
5237 
5238 				putter.SetTo(vnode);
5239 			}
5240 
5241 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5242 				put_vnode(vnode);
5243 				return B_LINK_LIMIT;
5244 			}
5245 
5246 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5247 			// on success keep the vnode reference for the FD
5248 			if (fd >= 0)
5249 				putter.Detach();
5250 
5251 			return fd;
5252 		}
5253 
5254 		// it doesn't exist yet -- try to create it
5255 
5256 		if (!HAS_FS_CALL(directory, create))
5257 			return B_READ_ONLY_DEVICE;
5258 
5259 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5260 			&cookie, &newID);
5261 		if (status != B_OK
5262 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5263 			return status;
5264 		}
5265 	}
5266 
5267 	if (status != B_OK)
5268 		return status;
5269 
5270 	// the node has been created successfully
5271 
5272 	rw_lock_read_lock(&sVnodeLock);
5273 	vnode = lookup_vnode(directory->device, newID);
5274 	rw_lock_read_unlock(&sVnodeLock);
5275 
5276 	if (vnode == NULL) {
5277 		panic("vfs: fs_create() returned success but there is no vnode, "
5278 			"mount ID %ld!\n", directory->device);
5279 		return B_BAD_VALUE;
5280 	}
5281 
5282 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5283 	if (fd >= 0)
5284 		return fd;
5285 
5286 	status = fd;
5287 
5288 	// something went wrong, clean up
5289 
5290 	FS_CALL(vnode, close, cookie);
5291 	FS_CALL(vnode, free_cookie, cookie);
5292 	put_vnode(vnode);
5293 
5294 	FS_CALL(directory, unlink, name);
5295 
5296 	return status;
5297 }
5298 
5299 
5300 /*! Calls fs open_dir() on the given vnode and returns a new
5301 	file descriptor for it
5302 */
5303 static int
5304 open_dir_vnode(struct vnode* vnode, bool kernel)
5305 {
5306 	void* cookie;
5307 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5308 	if (status != B_OK)
5309 		return status;
5310 
5311 	// directory is opened, create a fd
5312 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5313 	if (status >= 0)
5314 		return status;
5315 
5316 	FS_CALL(vnode, close_dir, cookie);
5317 	FS_CALL(vnode, free_dir_cookie, cookie);
5318 
5319 	return status;
5320 }
5321 
5322 
5323 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5324 	file descriptor for it.
5325 	Used by attr_dir_open(), and attr_dir_open_fd().
5326 */
5327 static int
5328 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5329 {
5330 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5331 		return B_UNSUPPORTED;
5332 
5333 	void* cookie;
5334 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5335 	if (status != B_OK)
5336 		return status;
5337 
5338 	// directory is opened, create a fd
5339 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5340 		kernel);
5341 	if (status >= 0)
5342 		return status;
5343 
5344 	FS_CALL(vnode, close_attr_dir, cookie);
5345 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5346 
5347 	return status;
5348 }
5349 
5350 
5351 static int
5352 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5353 	int openMode, int perms, bool kernel)
5354 {
5355 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5356 		"kernel %d\n", name, openMode, perms, kernel));
5357 
5358 	// get directory to put the new file in
5359 	struct vnode* directory;
5360 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5361 	if (status != B_OK)
5362 		return status;
5363 
5364 	status = create_vnode(directory, name, openMode, perms, kernel);
5365 	put_vnode(directory);
5366 
5367 	return status;
5368 }
5369 
5370 
5371 static int
5372 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5373 {
5374 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5375 		openMode, perms, kernel));
5376 
5377 	// get directory to put the new file in
5378 	char name[B_FILE_NAME_LENGTH];
5379 	struct vnode* directory;
5380 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5381 		kernel);
5382 	if (status < 0)
5383 		return status;
5384 
5385 	status = create_vnode(directory, name, openMode, perms, kernel);
5386 
5387 	put_vnode(directory);
5388 	return status;
5389 }
5390 
5391 
5392 static int
5393 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5394 	int openMode, bool kernel)
5395 {
5396 	if (name == NULL || *name == '\0')
5397 		return B_BAD_VALUE;
5398 
5399 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5400 		mountID, directoryID, name, openMode));
5401 
5402 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5403 
5404 	// get the vnode matching the entry_ref
5405 	struct vnode* vnode;
5406 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5407 		kernel, &vnode);
5408 	if (status != B_OK)
5409 		return status;
5410 
5411 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5412 		put_vnode(vnode);
5413 		return B_LINK_LIMIT;
5414 	}
5415 
5416 	int newFD = open_vnode(vnode, openMode, kernel);
5417 	if (newFD >= 0) {
5418 		// The vnode reference has been transferred to the FD
5419 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5420 			directoryID, vnode->id, name);
5421 	} else
5422 		put_vnode(vnode);
5423 
5424 	return newFD;
5425 }
5426 
5427 
5428 static int
5429 file_open(int fd, char* path, int openMode, bool kernel)
5430 {
5431 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5432 
5433 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5434 		fd, path, openMode, kernel));
5435 
5436 	// get the vnode matching the vnode + path combination
5437 	struct vnode* vnode;
5438 	ino_t parentID;
5439 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5440 		&parentID, kernel);
5441 	if (status != B_OK)
5442 		return status;
5443 
5444 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5445 		put_vnode(vnode);
5446 		return B_LINK_LIMIT;
5447 	}
5448 
5449 	// open the vnode
5450 	int newFD = open_vnode(vnode, openMode, kernel);
5451 	if (newFD >= 0) {
5452 		// The vnode reference has been transferred to the FD
5453 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5454 			vnode->device, parentID, vnode->id, NULL);
5455 	} else
5456 		put_vnode(vnode);
5457 
5458 	return newFD;
5459 }
5460 
5461 
5462 static status_t
5463 file_close(struct file_descriptor* descriptor)
5464 {
5465 	struct vnode* vnode = descriptor->u.vnode;
5466 	status_t status = B_OK;
5467 
5468 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5469 
5470 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5471 		vnode->id);
5472 	if (HAS_FS_CALL(vnode, close)) {
5473 		status = FS_CALL(vnode, close, descriptor->cookie);
5474 	}
5475 
5476 	if (status == B_OK) {
5477 		// remove all outstanding locks for this team
5478 		release_advisory_lock(vnode, NULL);
5479 	}
5480 	return status;
5481 }
5482 
5483 
5484 static void
5485 file_free_fd(struct file_descriptor* descriptor)
5486 {
5487 	struct vnode* vnode = descriptor->u.vnode;
5488 
5489 	if (vnode != NULL) {
5490 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5491 		put_vnode(vnode);
5492 	}
5493 }
5494 
5495 
5496 static status_t
5497 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5498 	size_t* length)
5499 {
5500 	struct vnode* vnode = descriptor->u.vnode;
5501 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5502 		*length));
5503 
5504 	if (S_ISDIR(vnode->Type()))
5505 		return B_IS_A_DIRECTORY;
5506 
5507 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5508 }
5509 
5510 
5511 static status_t
5512 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5513 	size_t* length)
5514 {
5515 	struct vnode* vnode = descriptor->u.vnode;
5516 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5517 
5518 	if (S_ISDIR(vnode->Type()))
5519 		return B_IS_A_DIRECTORY;
5520 	if (!HAS_FS_CALL(vnode, write))
5521 		return B_READ_ONLY_DEVICE;
5522 
5523 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5524 }
5525 
5526 
5527 static off_t
5528 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5529 {
5530 	struct vnode* vnode = descriptor->u.vnode;
5531 	off_t offset;
5532 
5533 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5534 
5535 	// some kinds of files are not seekable
5536 	switch (vnode->Type() & S_IFMT) {
5537 		case S_IFIFO:
5538 		case S_IFSOCK:
5539 			return ESPIPE;
5540 
5541 		// The Open Group Base Specs don't mention any file types besides pipes,
5542 		// fifos, and sockets specially, so we allow seeking them.
5543 		case S_IFREG:
5544 		case S_IFBLK:
5545 		case S_IFDIR:
5546 		case S_IFLNK:
5547 		case S_IFCHR:
5548 			break;
5549 	}
5550 
5551 	switch (seekType) {
5552 		case SEEK_SET:
5553 			offset = 0;
5554 			break;
5555 		case SEEK_CUR:
5556 			offset = descriptor->pos;
5557 			break;
5558 		case SEEK_END:
5559 		{
5560 			// stat() the node
5561 			if (!HAS_FS_CALL(vnode, read_stat))
5562 				return B_UNSUPPORTED;
5563 
5564 			struct stat stat;
5565 			status_t status = FS_CALL(vnode, read_stat, &stat);
5566 			if (status != B_OK)
5567 				return status;
5568 
5569 			offset = stat.st_size;
5570 			break;
5571 		}
5572 		default:
5573 			return B_BAD_VALUE;
5574 	}
5575 
5576 	// assumes off_t is 64 bits wide
5577 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5578 		return B_BUFFER_OVERFLOW;
5579 
5580 	pos += offset;
5581 	if (pos < 0)
5582 		return B_BAD_VALUE;
5583 
5584 	return descriptor->pos = pos;
5585 }
5586 
5587 
5588 static status_t
5589 file_select(struct file_descriptor* descriptor, uint8 event,
5590 	struct selectsync* sync)
5591 {
5592 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5593 
5594 	struct vnode* vnode = descriptor->u.vnode;
5595 
5596 	// If the FS has no select() hook, notify select() now.
5597 	if (!HAS_FS_CALL(vnode, select))
5598 		return notify_select_event(sync, event);
5599 
5600 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5601 }
5602 
5603 
5604 static status_t
5605 file_deselect(struct file_descriptor* descriptor, uint8 event,
5606 	struct selectsync* sync)
5607 {
5608 	struct vnode* vnode = descriptor->u.vnode;
5609 
5610 	if (!HAS_FS_CALL(vnode, deselect))
5611 		return B_OK;
5612 
5613 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5614 }
5615 
5616 
5617 static status_t
5618 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5619 	bool kernel)
5620 {
5621 	struct vnode* vnode;
5622 	status_t status;
5623 
5624 	if (name == NULL || *name == '\0')
5625 		return B_BAD_VALUE;
5626 
5627 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5628 		"perms = %d)\n", mountID, parentID, name, perms));
5629 
5630 	status = get_vnode(mountID, parentID, &vnode, true, false);
5631 	if (status != B_OK)
5632 		return status;
5633 
5634 	if (HAS_FS_CALL(vnode, create_dir))
5635 		status = FS_CALL(vnode, create_dir, name, perms);
5636 	else
5637 		status = B_READ_ONLY_DEVICE;
5638 
5639 	put_vnode(vnode);
5640 	return status;
5641 }
5642 
5643 
5644 static status_t
5645 dir_create(int fd, char* path, int perms, bool kernel)
5646 {
5647 	char filename[B_FILE_NAME_LENGTH];
5648 	struct vnode* vnode;
5649 	status_t status;
5650 
5651 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5652 		kernel));
5653 
5654 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5655 	if (status < 0)
5656 		return status;
5657 
5658 	if (HAS_FS_CALL(vnode, create_dir)) {
5659 		status = FS_CALL(vnode, create_dir, filename, perms);
5660 	} else
5661 		status = B_READ_ONLY_DEVICE;
5662 
5663 	put_vnode(vnode);
5664 	return status;
5665 }
5666 
5667 
5668 static int
5669 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5670 {
5671 	FUNCTION(("dir_open_entry_ref()\n"));
5672 
5673 	if (name && name[0] == '\0')
5674 		return B_BAD_VALUE;
5675 
5676 	// get the vnode matching the entry_ref/node_ref
5677 	struct vnode* vnode;
5678 	status_t status;
5679 	if (name) {
5680 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5681 			&vnode);
5682 	} else
5683 		status = get_vnode(mountID, parentID, &vnode, true, false);
5684 	if (status != B_OK)
5685 		return status;
5686 
5687 	int newFD = open_dir_vnode(vnode, kernel);
5688 	if (newFD >= 0) {
5689 		// The vnode reference has been transferred to the FD
5690 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5691 			vnode->id, name);
5692 	} else
5693 		put_vnode(vnode);
5694 
5695 	return newFD;
5696 }
5697 
5698 
5699 static int
5700 dir_open(int fd, char* path, bool kernel)
5701 {
5702 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5703 		kernel));
5704 
5705 	// get the vnode matching the vnode + path combination
5706 	struct vnode* vnode = NULL;
5707 	ino_t parentID;
5708 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5709 		kernel);
5710 	if (status != B_OK)
5711 		return status;
5712 
5713 	// open the dir
5714 	int newFD = open_dir_vnode(vnode, kernel);
5715 	if (newFD >= 0) {
5716 		// The vnode reference has been transferred to the FD
5717 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5718 			parentID, vnode->id, NULL);
5719 	} else
5720 		put_vnode(vnode);
5721 
5722 	return newFD;
5723 }
5724 
5725 
5726 static status_t
5727 dir_close(struct file_descriptor* descriptor)
5728 {
5729 	struct vnode* vnode = descriptor->u.vnode;
5730 
5731 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5732 
5733 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5734 		vnode->id);
5735 	if (HAS_FS_CALL(vnode, close_dir))
5736 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5737 
5738 	return B_OK;
5739 }
5740 
5741 
5742 static void
5743 dir_free_fd(struct file_descriptor* descriptor)
5744 {
5745 	struct vnode* vnode = descriptor->u.vnode;
5746 
5747 	if (vnode != NULL) {
5748 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5749 		put_vnode(vnode);
5750 	}
5751 }
5752 
5753 
5754 static status_t
5755 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5756 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5757 {
5758 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5759 		bufferSize, _count);
5760 }
5761 
5762 
5763 static status_t
5764 fix_dirent(struct vnode* parent, struct dirent* entry,
5765 	struct io_context* ioContext)
5766 {
5767 	// set d_pdev and d_pino
5768 	entry->d_pdev = parent->device;
5769 	entry->d_pino = parent->id;
5770 
5771 	// If this is the ".." entry and the directory covering another vnode,
5772 	// we need to replace d_dev and d_ino with the actual values.
5773 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5774 		// Make sure the IO context root is not bypassed.
5775 		if (parent == ioContext->root) {
5776 			entry->d_dev = parent->device;
5777 			entry->d_ino = parent->id;
5778 		} else {
5779 			inc_vnode_ref_count(parent);
5780 				// vnode_path_to_vnode() puts the node
5781 
5782 			// ".." is guaranteed not to be clobbered by this call
5783 			struct vnode* vnode;
5784 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5785 				ioContext, &vnode, NULL);
5786 
5787 			if (status == B_OK) {
5788 				entry->d_dev = vnode->device;
5789 				entry->d_ino = vnode->id;
5790 				put_vnode(vnode);
5791 			}
5792 		}
5793 	} else {
5794 		// resolve covered vnodes
5795 		ReadLocker _(&sVnodeLock);
5796 
5797 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5798 		if (vnode != NULL && vnode->covered_by != NULL) {
5799 			do {
5800 				vnode = vnode->covered_by;
5801 			} while (vnode->covered_by != NULL);
5802 
5803 			entry->d_dev = vnode->device;
5804 			entry->d_ino = vnode->id;
5805 		}
5806 	}
5807 
5808 	return B_OK;
5809 }
5810 
5811 
5812 static status_t
5813 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5814 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5815 {
5816 	if (!HAS_FS_CALL(vnode, read_dir))
5817 		return B_UNSUPPORTED;
5818 
5819 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5820 		_count);
5821 	if (error != B_OK)
5822 		return error;
5823 
5824 	// we need to adjust the read dirents
5825 	uint32 count = *_count;
5826 	for (uint32 i = 0; i < count; i++) {
5827 		error = fix_dirent(vnode, buffer, ioContext);
5828 		if (error != B_OK)
5829 			return error;
5830 
5831 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5832 	}
5833 
5834 	return error;
5835 }
5836 
5837 
5838 static status_t
5839 dir_rewind(struct file_descriptor* descriptor)
5840 {
5841 	struct vnode* vnode = descriptor->u.vnode;
5842 
5843 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5844 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5845 	}
5846 
5847 	return B_UNSUPPORTED;
5848 }
5849 
5850 
5851 static status_t
5852 dir_remove(int fd, char* path, bool kernel)
5853 {
5854 	char name[B_FILE_NAME_LENGTH];
5855 	struct vnode* directory;
5856 	status_t status;
5857 
5858 	if (path != NULL) {
5859 		// we need to make sure our path name doesn't stop with "/", ".",
5860 		// or ".."
5861 		char* lastSlash;
5862 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5863 			char* leaf = lastSlash + 1;
5864 			if (!strcmp(leaf, ".."))
5865 				return B_NOT_ALLOWED;
5866 
5867 			// omit multiple slashes
5868 			while (lastSlash > path && lastSlash[-1] == '/')
5869 				lastSlash--;
5870 
5871 			if (leaf[0]
5872 				&& strcmp(leaf, ".")) {
5873 				break;
5874 			}
5875 			// "name/" -> "name", or "name/." -> "name"
5876 			lastSlash[0] = '\0';
5877 		}
5878 
5879 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5880 			return B_NOT_ALLOWED;
5881 	}
5882 
5883 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5884 	if (status != B_OK)
5885 		return status;
5886 
5887 	if (HAS_FS_CALL(directory, remove_dir))
5888 		status = FS_CALL(directory, remove_dir, name);
5889 	else
5890 		status = B_READ_ONLY_DEVICE;
5891 
5892 	put_vnode(directory);
5893 	return status;
5894 }
5895 
5896 
5897 static status_t
5898 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5899 	size_t length)
5900 {
5901 	struct vnode* vnode = descriptor->u.vnode;
5902 
5903 	if (HAS_FS_CALL(vnode, ioctl))
5904 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5905 
5906 	return B_DEV_INVALID_IOCTL;
5907 }
5908 
5909 
5910 static status_t
5911 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5912 {
5913 	struct flock flock;
5914 
5915 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5916 		fd, op, argument, kernel ? "kernel" : "user"));
5917 
5918 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5919 		fd);
5920 	if (descriptor == NULL)
5921 		return B_FILE_ERROR;
5922 
5923 	struct vnode* vnode = fd_vnode(descriptor);
5924 
5925 	status_t status = B_OK;
5926 
5927 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5928 		if (descriptor->type != FDTYPE_FILE)
5929 			status = B_BAD_VALUE;
5930 		else if (user_memcpy(&flock, (struct flock*)argument,
5931 				sizeof(struct flock)) != B_OK)
5932 			status = B_BAD_ADDRESS;
5933 
5934 		if (status != B_OK) {
5935 			put_fd(descriptor);
5936 			return status;
5937 		}
5938 	}
5939 
5940 	switch (op) {
5941 		case F_SETFD:
5942 		{
5943 			struct io_context* context = get_current_io_context(kernel);
5944 			// Set file descriptor flags
5945 
5946 			// O_CLOEXEC is the only flag available at this time
5947 			mutex_lock(&context->io_mutex);
5948 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5949 			mutex_unlock(&context->io_mutex);
5950 
5951 			status = B_OK;
5952 			break;
5953 		}
5954 
5955 		case F_GETFD:
5956 		{
5957 			struct io_context* context = get_current_io_context(kernel);
5958 
5959 			// Get file descriptor flags
5960 			mutex_lock(&context->io_mutex);
5961 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5962 			mutex_unlock(&context->io_mutex);
5963 			break;
5964 		}
5965 
5966 		case F_SETFL:
5967 			// Set file descriptor open mode
5968 
5969 			// we only accept changes to O_APPEND and O_NONBLOCK
5970 			argument &= O_APPEND | O_NONBLOCK;
5971 			if (descriptor->ops->fd_set_flags != NULL) {
5972 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5973 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5974 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5975 					(int)argument);
5976 			} else
5977 				status = B_UNSUPPORTED;
5978 
5979 			if (status == B_OK) {
5980 				// update this descriptor's open_mode field
5981 				descriptor->open_mode = (descriptor->open_mode
5982 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5983 			}
5984 
5985 			break;
5986 
5987 		case F_GETFL:
5988 			// Get file descriptor open mode
5989 			status = descriptor->open_mode;
5990 			break;
5991 
5992 		case F_DUPFD:
5993 		{
5994 			struct io_context* context = get_current_io_context(kernel);
5995 
5996 			status = new_fd_etc(context, descriptor, (int)argument);
5997 			if (status >= 0) {
5998 				mutex_lock(&context->io_mutex);
5999 				fd_set_close_on_exec(context, fd, false);
6000 				mutex_unlock(&context->io_mutex);
6001 
6002 				atomic_add(&descriptor->ref_count, 1);
6003 			}
6004 			break;
6005 		}
6006 
6007 		case F_GETLK:
6008 			if (vnode != NULL) {
6009 				status = get_advisory_lock(vnode, &flock);
6010 				if (status == B_OK) {
6011 					// copy back flock structure
6012 					status = user_memcpy((struct flock*)argument, &flock,
6013 						sizeof(struct flock));
6014 				}
6015 			} else
6016 				status = B_BAD_VALUE;
6017 			break;
6018 
6019 		case F_SETLK:
6020 		case F_SETLKW:
6021 			status = normalize_flock(descriptor, &flock);
6022 			if (status != B_OK)
6023 				break;
6024 
6025 			if (vnode == NULL) {
6026 				status = B_BAD_VALUE;
6027 			} else if (flock.l_type == F_UNLCK) {
6028 				status = release_advisory_lock(vnode, &flock);
6029 			} else {
6030 				// the open mode must match the lock type
6031 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6032 						&& flock.l_type == F_WRLCK)
6033 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6034 						&& flock.l_type == F_RDLCK))
6035 					status = B_FILE_ERROR;
6036 				else {
6037 					status = acquire_advisory_lock(vnode, -1,
6038 						&flock, op == F_SETLKW);
6039 				}
6040 			}
6041 			break;
6042 
6043 		// ToDo: add support for more ops?
6044 
6045 		default:
6046 			status = B_BAD_VALUE;
6047 	}
6048 
6049 	put_fd(descriptor);
6050 	return status;
6051 }
6052 
6053 
6054 static status_t
6055 common_sync(int fd, bool kernel)
6056 {
6057 	struct file_descriptor* descriptor;
6058 	struct vnode* vnode;
6059 	status_t status;
6060 
6061 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6062 
6063 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6064 	if (descriptor == NULL)
6065 		return B_FILE_ERROR;
6066 
6067 	if (HAS_FS_CALL(vnode, fsync))
6068 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6069 	else
6070 		status = B_UNSUPPORTED;
6071 
6072 	put_fd(descriptor);
6073 	return status;
6074 }
6075 
6076 
6077 static status_t
6078 common_lock_node(int fd, bool kernel)
6079 {
6080 	struct file_descriptor* descriptor;
6081 	struct vnode* vnode;
6082 
6083 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6084 	if (descriptor == NULL)
6085 		return B_FILE_ERROR;
6086 
6087 	status_t status = B_OK;
6088 
6089 	// We need to set the locking atomically - someone
6090 	// else might set one at the same time
6091 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6092 			(file_descriptor*)NULL) != NULL)
6093 		status = B_BUSY;
6094 
6095 	put_fd(descriptor);
6096 	return status;
6097 }
6098 
6099 
6100 static status_t
6101 common_unlock_node(int fd, bool kernel)
6102 {
6103 	struct file_descriptor* descriptor;
6104 	struct vnode* vnode;
6105 
6106 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6107 	if (descriptor == NULL)
6108 		return B_FILE_ERROR;
6109 
6110 	status_t status = B_OK;
6111 
6112 	// We need to set the locking atomically - someone
6113 	// else might set one at the same time
6114 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6115 			(file_descriptor*)NULL, descriptor) != descriptor)
6116 		status = B_BAD_VALUE;
6117 
6118 	put_fd(descriptor);
6119 	return status;
6120 }
6121 
6122 
6123 static status_t
6124 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6125 	bool kernel)
6126 {
6127 	struct vnode* vnode;
6128 	status_t status;
6129 
6130 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6131 	if (status != B_OK)
6132 		return status;
6133 
6134 	if (HAS_FS_CALL(vnode, read_symlink)) {
6135 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6136 	} else
6137 		status = B_BAD_VALUE;
6138 
6139 	put_vnode(vnode);
6140 	return status;
6141 }
6142 
6143 
6144 static status_t
6145 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6146 	bool kernel)
6147 {
6148 	// path validity checks have to be in the calling function!
6149 	char name[B_FILE_NAME_LENGTH];
6150 	struct vnode* vnode;
6151 	status_t status;
6152 
6153 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6154 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6155 
6156 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6157 	if (status != B_OK)
6158 		return status;
6159 
6160 	if (HAS_FS_CALL(vnode, create_symlink))
6161 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6162 	else {
6163 		status = HAS_FS_CALL(vnode, write)
6164 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6165 	}
6166 
6167 	put_vnode(vnode);
6168 
6169 	return status;
6170 }
6171 
6172 
6173 static status_t
6174 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6175 	bool traverseLeafLink, bool kernel)
6176 {
6177 	// path validity checks have to be in the calling function!
6178 
6179 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6180 		toPath, kernel));
6181 
6182 	char name[B_FILE_NAME_LENGTH];
6183 	struct vnode* directory;
6184 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6185 		kernel);
6186 	if (status != B_OK)
6187 		return status;
6188 
6189 	struct vnode* vnode;
6190 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6191 		kernel);
6192 	if (status != B_OK)
6193 		goto err;
6194 
6195 	if (directory->mount != vnode->mount) {
6196 		status = B_CROSS_DEVICE_LINK;
6197 		goto err1;
6198 	}
6199 
6200 	if (HAS_FS_CALL(directory, link))
6201 		status = FS_CALL(directory, link, name, vnode);
6202 	else
6203 		status = B_READ_ONLY_DEVICE;
6204 
6205 err1:
6206 	put_vnode(vnode);
6207 err:
6208 	put_vnode(directory);
6209 
6210 	return status;
6211 }
6212 
6213 
6214 static status_t
6215 common_unlink(int fd, char* path, bool kernel)
6216 {
6217 	char filename[B_FILE_NAME_LENGTH];
6218 	struct vnode* vnode;
6219 	status_t status;
6220 
6221 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6222 		kernel));
6223 
6224 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6225 	if (status < 0)
6226 		return status;
6227 
6228 	if (HAS_FS_CALL(vnode, unlink))
6229 		status = FS_CALL(vnode, unlink, filename);
6230 	else
6231 		status = B_READ_ONLY_DEVICE;
6232 
6233 	put_vnode(vnode);
6234 
6235 	return status;
6236 }
6237 
6238 
6239 static status_t
6240 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6241 {
6242 	struct vnode* vnode;
6243 	status_t status;
6244 
6245 	// TODO: honor effectiveUserGroup argument
6246 
6247 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6248 	if (status != B_OK)
6249 		return status;
6250 
6251 	if (HAS_FS_CALL(vnode, access))
6252 		status = FS_CALL(vnode, access, mode);
6253 	else
6254 		status = B_OK;
6255 
6256 	put_vnode(vnode);
6257 
6258 	return status;
6259 }
6260 
6261 
6262 static status_t
6263 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6264 {
6265 	struct vnode* fromVnode;
6266 	struct vnode* toVnode;
6267 	char fromName[B_FILE_NAME_LENGTH];
6268 	char toName[B_FILE_NAME_LENGTH];
6269 	status_t status;
6270 
6271 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6272 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6273 
6274 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6275 	if (status != B_OK)
6276 		return status;
6277 
6278 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6279 	if (status != B_OK)
6280 		goto err1;
6281 
6282 	if (fromVnode->device != toVnode->device) {
6283 		status = B_CROSS_DEVICE_LINK;
6284 		goto err2;
6285 	}
6286 
6287 	if (fromName[0] == '\0' || toName[0] == '\0'
6288 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6289 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6290 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6291 		status = B_BAD_VALUE;
6292 		goto err2;
6293 	}
6294 
6295 	if (HAS_FS_CALL(fromVnode, rename))
6296 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6297 	else
6298 		status = B_READ_ONLY_DEVICE;
6299 
6300 err2:
6301 	put_vnode(toVnode);
6302 err1:
6303 	put_vnode(fromVnode);
6304 
6305 	return status;
6306 }
6307 
6308 
6309 static status_t
6310 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6311 {
6312 	struct vnode* vnode = descriptor->u.vnode;
6313 
6314 	FUNCTION(("common_read_stat: stat %p\n", stat));
6315 
6316 	// TODO: remove this once all file systems properly set them!
6317 	stat->st_crtim.tv_nsec = 0;
6318 	stat->st_ctim.tv_nsec = 0;
6319 	stat->st_mtim.tv_nsec = 0;
6320 	stat->st_atim.tv_nsec = 0;
6321 
6322 	status_t status = FS_CALL(vnode, read_stat, stat);
6323 
6324 	// fill in the st_dev and st_ino fields
6325 	if (status == B_OK) {
6326 		stat->st_dev = vnode->device;
6327 		stat->st_ino = vnode->id;
6328 		stat->st_rdev = -1;
6329 	}
6330 
6331 	return status;
6332 }
6333 
6334 
6335 static status_t
6336 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6337 	int statMask)
6338 {
6339 	struct vnode* vnode = descriptor->u.vnode;
6340 
6341 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6342 		vnode, stat, statMask));
6343 
6344 	if (!HAS_FS_CALL(vnode, write_stat))
6345 		return B_READ_ONLY_DEVICE;
6346 
6347 	return FS_CALL(vnode, write_stat, stat, statMask);
6348 }
6349 
6350 
6351 static status_t
6352 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6353 	struct stat* stat, bool kernel)
6354 {
6355 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6356 		stat));
6357 
6358 	struct vnode* vnode;
6359 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6360 		NULL, kernel);
6361 	if (status != B_OK)
6362 		return status;
6363 
6364 	status = FS_CALL(vnode, read_stat, stat);
6365 
6366 	// fill in the st_dev and st_ino fields
6367 	if (status == B_OK) {
6368 		stat->st_dev = vnode->device;
6369 		stat->st_ino = vnode->id;
6370 		stat->st_rdev = -1;
6371 	}
6372 
6373 	put_vnode(vnode);
6374 	return status;
6375 }
6376 
6377 
6378 static status_t
6379 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6380 	const struct stat* stat, int statMask, bool kernel)
6381 {
6382 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6383 		"kernel %d\n", fd, path, stat, statMask, kernel));
6384 
6385 	struct vnode* vnode;
6386 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6387 		NULL, kernel);
6388 	if (status != B_OK)
6389 		return status;
6390 
6391 	if (HAS_FS_CALL(vnode, write_stat))
6392 		status = FS_CALL(vnode, write_stat, stat, statMask);
6393 	else
6394 		status = B_READ_ONLY_DEVICE;
6395 
6396 	put_vnode(vnode);
6397 
6398 	return status;
6399 }
6400 
6401 
6402 static int
6403 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6404 {
6405 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6406 		kernel));
6407 
6408 	struct vnode* vnode;
6409 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6410 		NULL, kernel);
6411 	if (status != B_OK)
6412 		return status;
6413 
6414 	status = open_attr_dir_vnode(vnode, kernel);
6415 	if (status < 0)
6416 		put_vnode(vnode);
6417 
6418 	return status;
6419 }
6420 
6421 
6422 static status_t
6423 attr_dir_close(struct file_descriptor* descriptor)
6424 {
6425 	struct vnode* vnode = descriptor->u.vnode;
6426 
6427 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6428 
6429 	if (HAS_FS_CALL(vnode, close_attr_dir))
6430 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6431 
6432 	return B_OK;
6433 }
6434 
6435 
6436 static void
6437 attr_dir_free_fd(struct file_descriptor* descriptor)
6438 {
6439 	struct vnode* vnode = descriptor->u.vnode;
6440 
6441 	if (vnode != NULL) {
6442 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6443 		put_vnode(vnode);
6444 	}
6445 }
6446 
6447 
6448 static status_t
6449 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6450 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6451 {
6452 	struct vnode* vnode = descriptor->u.vnode;
6453 
6454 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6455 
6456 	if (HAS_FS_CALL(vnode, read_attr_dir))
6457 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6458 			bufferSize, _count);
6459 
6460 	return B_UNSUPPORTED;
6461 }
6462 
6463 
6464 static status_t
6465 attr_dir_rewind(struct file_descriptor* descriptor)
6466 {
6467 	struct vnode* vnode = descriptor->u.vnode;
6468 
6469 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6470 
6471 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6472 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6473 
6474 	return B_UNSUPPORTED;
6475 }
6476 
6477 
6478 static int
6479 attr_create(int fd, char* path, const char* name, uint32 type,
6480 	int openMode, bool kernel)
6481 {
6482 	if (name == NULL || *name == '\0')
6483 		return B_BAD_VALUE;
6484 
6485 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6486 	struct vnode* vnode;
6487 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6488 		kernel);
6489 	if (status != B_OK)
6490 		return status;
6491 
6492 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6493 		status = B_LINK_LIMIT;
6494 		goto err;
6495 	}
6496 
6497 	if (!HAS_FS_CALL(vnode, create_attr)) {
6498 		status = B_READ_ONLY_DEVICE;
6499 		goto err;
6500 	}
6501 
6502 	void* cookie;
6503 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6504 	if (status != B_OK)
6505 		goto err;
6506 
6507 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6508 	if (fd >= 0)
6509 		return fd;
6510 
6511 	status = fd;
6512 
6513 	FS_CALL(vnode, close_attr, cookie);
6514 	FS_CALL(vnode, free_attr_cookie, cookie);
6515 
6516 	FS_CALL(vnode, remove_attr, name);
6517 
6518 err:
6519 	put_vnode(vnode);
6520 
6521 	return status;
6522 }
6523 
6524 
6525 static int
6526 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6527 {
6528 	if (name == NULL || *name == '\0')
6529 		return B_BAD_VALUE;
6530 
6531 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6532 	struct vnode* vnode;
6533 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6534 		kernel);
6535 	if (status != B_OK)
6536 		return status;
6537 
6538 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6539 		status = B_LINK_LIMIT;
6540 		goto err;
6541 	}
6542 
6543 	if (!HAS_FS_CALL(vnode, open_attr)) {
6544 		status = B_UNSUPPORTED;
6545 		goto err;
6546 	}
6547 
6548 	void* cookie;
6549 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6550 	if (status != B_OK)
6551 		goto err;
6552 
6553 	// now we only need a file descriptor for this attribute and we're done
6554 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6555 	if (fd >= 0)
6556 		return fd;
6557 
6558 	status = fd;
6559 
6560 	FS_CALL(vnode, close_attr, cookie);
6561 	FS_CALL(vnode, free_attr_cookie, cookie);
6562 
6563 err:
6564 	put_vnode(vnode);
6565 
6566 	return status;
6567 }
6568 
6569 
6570 static status_t
6571 attr_close(struct file_descriptor* descriptor)
6572 {
6573 	struct vnode* vnode = descriptor->u.vnode;
6574 
6575 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6576 
6577 	if (HAS_FS_CALL(vnode, close_attr))
6578 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6579 
6580 	return B_OK;
6581 }
6582 
6583 
6584 static void
6585 attr_free_fd(struct file_descriptor* descriptor)
6586 {
6587 	struct vnode* vnode = descriptor->u.vnode;
6588 
6589 	if (vnode != NULL) {
6590 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6591 		put_vnode(vnode);
6592 	}
6593 }
6594 
6595 
6596 static status_t
6597 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6598 	size_t* length)
6599 {
6600 	struct vnode* vnode = descriptor->u.vnode;
6601 
6602 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6603 		*length));
6604 
6605 	if (!HAS_FS_CALL(vnode, read_attr))
6606 		return B_UNSUPPORTED;
6607 
6608 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6609 }
6610 
6611 
6612 static status_t
6613 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6614 	size_t* length)
6615 {
6616 	struct vnode* vnode = descriptor->u.vnode;
6617 
6618 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6619 	if (!HAS_FS_CALL(vnode, write_attr))
6620 		return B_UNSUPPORTED;
6621 
6622 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6623 }
6624 
6625 
6626 static off_t
6627 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6628 {
6629 	off_t offset;
6630 
6631 	switch (seekType) {
6632 		case SEEK_SET:
6633 			offset = 0;
6634 			break;
6635 		case SEEK_CUR:
6636 			offset = descriptor->pos;
6637 			break;
6638 		case SEEK_END:
6639 		{
6640 			struct vnode* vnode = descriptor->u.vnode;
6641 			if (!HAS_FS_CALL(vnode, read_stat))
6642 				return B_UNSUPPORTED;
6643 
6644 			struct stat stat;
6645 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6646 				&stat);
6647 			if (status != B_OK)
6648 				return status;
6649 
6650 			offset = stat.st_size;
6651 			break;
6652 		}
6653 		default:
6654 			return B_BAD_VALUE;
6655 	}
6656 
6657 	// assumes off_t is 64 bits wide
6658 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6659 		return B_BUFFER_OVERFLOW;
6660 
6661 	pos += offset;
6662 	if (pos < 0)
6663 		return B_BAD_VALUE;
6664 
6665 	return descriptor->pos = pos;
6666 }
6667 
6668 
6669 static status_t
6670 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6671 {
6672 	struct vnode* vnode = descriptor->u.vnode;
6673 
6674 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6675 
6676 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6677 		return B_UNSUPPORTED;
6678 
6679 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6680 }
6681 
6682 
6683 static status_t
6684 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6685 	int statMask)
6686 {
6687 	struct vnode* vnode = descriptor->u.vnode;
6688 
6689 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6690 
6691 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6692 		return B_READ_ONLY_DEVICE;
6693 
6694 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6695 }
6696 
6697 
6698 static status_t
6699 attr_remove(int fd, const char* name, bool kernel)
6700 {
6701 	struct file_descriptor* descriptor;
6702 	struct vnode* vnode;
6703 	status_t status;
6704 
6705 	if (name == NULL || *name == '\0')
6706 		return B_BAD_VALUE;
6707 
6708 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6709 		kernel));
6710 
6711 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6712 	if (descriptor == NULL)
6713 		return B_FILE_ERROR;
6714 
6715 	if (HAS_FS_CALL(vnode, remove_attr))
6716 		status = FS_CALL(vnode, remove_attr, name);
6717 	else
6718 		status = B_READ_ONLY_DEVICE;
6719 
6720 	put_fd(descriptor);
6721 
6722 	return status;
6723 }
6724 
6725 
6726 static status_t
6727 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6728 	bool kernel)
6729 {
6730 	struct file_descriptor* fromDescriptor;
6731 	struct file_descriptor* toDescriptor;
6732 	struct vnode* fromVnode;
6733 	struct vnode* toVnode;
6734 	status_t status;
6735 
6736 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6737 		|| *toName == '\0')
6738 		return B_BAD_VALUE;
6739 
6740 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6741 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6742 
6743 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6744 	if (fromDescriptor == NULL)
6745 		return B_FILE_ERROR;
6746 
6747 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6748 	if (toDescriptor == NULL) {
6749 		status = B_FILE_ERROR;
6750 		goto err;
6751 	}
6752 
6753 	// are the files on the same volume?
6754 	if (fromVnode->device != toVnode->device) {
6755 		status = B_CROSS_DEVICE_LINK;
6756 		goto err1;
6757 	}
6758 
6759 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6760 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6761 	} else
6762 		status = B_READ_ONLY_DEVICE;
6763 
6764 err1:
6765 	put_fd(toDescriptor);
6766 err:
6767 	put_fd(fromDescriptor);
6768 
6769 	return status;
6770 }
6771 
6772 
6773 static int
6774 index_dir_open(dev_t mountID, bool kernel)
6775 {
6776 	struct fs_mount* mount;
6777 	void* cookie;
6778 
6779 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6780 
6781 	status_t status = get_mount(mountID, &mount);
6782 	if (status != B_OK)
6783 		return status;
6784 
6785 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6786 		status = B_UNSUPPORTED;
6787 		goto error;
6788 	}
6789 
6790 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6791 	if (status != B_OK)
6792 		goto error;
6793 
6794 	// get fd for the index directory
6795 	int fd;
6796 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6797 	if (fd >= 0)
6798 		return fd;
6799 
6800 	// something went wrong
6801 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6802 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6803 
6804 	status = fd;
6805 
6806 error:
6807 	put_mount(mount);
6808 	return status;
6809 }
6810 
6811 
6812 static status_t
6813 index_dir_close(struct file_descriptor* descriptor)
6814 {
6815 	struct fs_mount* mount = descriptor->u.mount;
6816 
6817 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6818 
6819 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6820 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6821 
6822 	return B_OK;
6823 }
6824 
6825 
6826 static void
6827 index_dir_free_fd(struct file_descriptor* descriptor)
6828 {
6829 	struct fs_mount* mount = descriptor->u.mount;
6830 
6831 	if (mount != NULL) {
6832 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6833 		put_mount(mount);
6834 	}
6835 }
6836 
6837 
6838 static status_t
6839 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6840 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6841 {
6842 	struct fs_mount* mount = descriptor->u.mount;
6843 
6844 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6845 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6846 			bufferSize, _count);
6847 	}
6848 
6849 	return B_UNSUPPORTED;
6850 }
6851 
6852 
6853 static status_t
6854 index_dir_rewind(struct file_descriptor* descriptor)
6855 {
6856 	struct fs_mount* mount = descriptor->u.mount;
6857 
6858 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6859 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6860 
6861 	return B_UNSUPPORTED;
6862 }
6863 
6864 
6865 static status_t
6866 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6867 	bool kernel)
6868 {
6869 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6870 		name, kernel));
6871 
6872 	struct fs_mount* mount;
6873 	status_t status = get_mount(mountID, &mount);
6874 	if (status != B_OK)
6875 		return status;
6876 
6877 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6878 		status = B_READ_ONLY_DEVICE;
6879 		goto out;
6880 	}
6881 
6882 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6883 
6884 out:
6885 	put_mount(mount);
6886 	return status;
6887 }
6888 
6889 
6890 #if 0
6891 static status_t
6892 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6893 {
6894 	struct vnode* vnode = descriptor->u.vnode;
6895 
6896 	// ToDo: currently unused!
6897 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6898 	if (!HAS_FS_CALL(vnode, read_index_stat))
6899 		return B_UNSUPPORTED;
6900 
6901 	return B_UNSUPPORTED;
6902 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6903 }
6904 
6905 
6906 static void
6907 index_free_fd(struct file_descriptor* descriptor)
6908 {
6909 	struct vnode* vnode = descriptor->u.vnode;
6910 
6911 	if (vnode != NULL) {
6912 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6913 		put_vnode(vnode);
6914 	}
6915 }
6916 #endif
6917 
6918 
6919 static status_t
6920 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6921 	bool kernel)
6922 {
6923 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6924 		name, kernel));
6925 
6926 	struct fs_mount* mount;
6927 	status_t status = get_mount(mountID, &mount);
6928 	if (status != B_OK)
6929 		return status;
6930 
6931 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6932 		status = B_UNSUPPORTED;
6933 		goto out;
6934 	}
6935 
6936 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6937 
6938 out:
6939 	put_mount(mount);
6940 	return status;
6941 }
6942 
6943 
6944 static status_t
6945 index_remove(dev_t mountID, const char* name, bool kernel)
6946 {
6947 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6948 		name, kernel));
6949 
6950 	struct fs_mount* mount;
6951 	status_t status = get_mount(mountID, &mount);
6952 	if (status != B_OK)
6953 		return status;
6954 
6955 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6956 		status = B_READ_ONLY_DEVICE;
6957 		goto out;
6958 	}
6959 
6960 	status = FS_MOUNT_CALL(mount, remove_index, name);
6961 
6962 out:
6963 	put_mount(mount);
6964 	return status;
6965 }
6966 
6967 
6968 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6969 		It would be nice if the FS would find some more kernel support
6970 		for them.
6971 		For example, query parsing should be moved into the kernel.
6972 */
6973 static int
6974 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6975 	int32 token, bool kernel)
6976 {
6977 	struct fs_mount* mount;
6978 	void* cookie;
6979 
6980 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6981 		query, kernel));
6982 
6983 	status_t status = get_mount(device, &mount);
6984 	if (status != B_OK)
6985 		return status;
6986 
6987 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6988 		status = B_UNSUPPORTED;
6989 		goto error;
6990 	}
6991 
6992 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6993 		&cookie);
6994 	if (status != B_OK)
6995 		goto error;
6996 
6997 	// get fd for the index directory
6998 	int fd;
6999 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7000 	if (fd >= 0)
7001 		return fd;
7002 
7003 	status = fd;
7004 
7005 	// something went wrong
7006 	FS_MOUNT_CALL(mount, close_query, cookie);
7007 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7008 
7009 error:
7010 	put_mount(mount);
7011 	return status;
7012 }
7013 
7014 
7015 static status_t
7016 query_close(struct file_descriptor* descriptor)
7017 {
7018 	struct fs_mount* mount = descriptor->u.mount;
7019 
7020 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7021 
7022 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7023 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7024 
7025 	return B_OK;
7026 }
7027 
7028 
7029 static void
7030 query_free_fd(struct file_descriptor* descriptor)
7031 {
7032 	struct fs_mount* mount = descriptor->u.mount;
7033 
7034 	if (mount != NULL) {
7035 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7036 		put_mount(mount);
7037 	}
7038 }
7039 
7040 
7041 static status_t
7042 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7043 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7044 {
7045 	struct fs_mount* mount = descriptor->u.mount;
7046 
7047 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7048 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7049 			bufferSize, _count);
7050 	}
7051 
7052 	return B_UNSUPPORTED;
7053 }
7054 
7055 
7056 static status_t
7057 query_rewind(struct file_descriptor* descriptor)
7058 {
7059 	struct fs_mount* mount = descriptor->u.mount;
7060 
7061 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7062 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7063 
7064 	return B_UNSUPPORTED;
7065 }
7066 
7067 
7068 //	#pragma mark - General File System functions
7069 
7070 
7071 static dev_t
7072 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7073 	const char* args, bool kernel)
7074 {
7075 	struct ::fs_mount* mount;
7076 	status_t status = B_OK;
7077 	fs_volume* volume = NULL;
7078 	int32 layer = 0;
7079 	Vnode* coveredNode = NULL;
7080 
7081 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7082 
7083 	// The path is always safe, we just have to make sure that fsName is
7084 	// almost valid - we can't make any assumptions about args, though.
7085 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7086 	// We'll get it from the DDM later.
7087 	if (fsName == NULL) {
7088 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7089 			return B_BAD_VALUE;
7090 	} else if (fsName[0] == '\0')
7091 		return B_BAD_VALUE;
7092 
7093 	RecursiveLocker mountOpLocker(sMountOpLock);
7094 
7095 	// Helper to delete a newly created file device on failure.
7096 	// Not exactly beautiful, but helps to keep the code below cleaner.
7097 	struct FileDeviceDeleter {
7098 		FileDeviceDeleter() : id(-1) {}
7099 		~FileDeviceDeleter()
7100 		{
7101 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7102 		}
7103 
7104 		partition_id id;
7105 	} fileDeviceDeleter;
7106 
7107 	// If the file system is not a "virtual" one, the device argument should
7108 	// point to a real file/device (if given at all).
7109 	// get the partition
7110 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7111 	KPartition* partition = NULL;
7112 	KPath normalizedDevice;
7113 	bool newlyCreatedFileDevice = false;
7114 
7115 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7116 		// normalize the device path
7117 		status = normalizedDevice.SetTo(device, true);
7118 		if (status != B_OK)
7119 			return status;
7120 
7121 		// get a corresponding partition from the DDM
7122 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7123 		if (partition == NULL) {
7124 			// Partition not found: This either means, the user supplied
7125 			// an invalid path, or the path refers to an image file. We try
7126 			// to let the DDM create a file device for the path.
7127 			partition_id deviceID = ddm->CreateFileDevice(
7128 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7129 			if (deviceID >= 0) {
7130 				partition = ddm->RegisterPartition(deviceID);
7131 				if (newlyCreatedFileDevice)
7132 					fileDeviceDeleter.id = deviceID;
7133 			}
7134 		}
7135 
7136 		if (!partition) {
7137 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7138 				normalizedDevice.Path()));
7139 			return B_ENTRY_NOT_FOUND;
7140 		}
7141 
7142 		device = normalizedDevice.Path();
7143 			// correct path to file device
7144 	}
7145 	PartitionRegistrar partitionRegistrar(partition, true);
7146 
7147 	// Write lock the partition's device. For the time being, we keep the lock
7148 	// until we're done mounting -- not nice, but ensure, that no-one is
7149 	// interfering.
7150 	// TODO: Just mark the partition busy while mounting!
7151 	KDiskDevice* diskDevice = NULL;
7152 	if (partition) {
7153 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7154 		if (!diskDevice) {
7155 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7156 			return B_ERROR;
7157 		}
7158 	}
7159 
7160 	DeviceWriteLocker writeLocker(diskDevice, true);
7161 		// this takes over the write lock acquired before
7162 
7163 	if (partition != NULL) {
7164 		// make sure, that the partition is not busy
7165 		if (partition->IsBusy()) {
7166 			TRACE(("fs_mount(): Partition is busy.\n"));
7167 			return B_BUSY;
7168 		}
7169 
7170 		// if no FS name had been supplied, we get it from the partition
7171 		if (fsName == NULL) {
7172 			KDiskSystem* diskSystem = partition->DiskSystem();
7173 			if (!diskSystem) {
7174 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7175 					"recognize it.\n"));
7176 				return B_BAD_VALUE;
7177 			}
7178 
7179 			if (!diskSystem->IsFileSystem()) {
7180 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7181 					"partitioning system.\n"));
7182 				return B_BAD_VALUE;
7183 			}
7184 
7185 			// The disk system name will not change, and the KDiskSystem
7186 			// object will not go away while the disk device is locked (and
7187 			// the partition has a reference to it), so this is safe.
7188 			fsName = diskSystem->Name();
7189 		}
7190 	}
7191 
7192 	mount = new(std::nothrow) (struct ::fs_mount);
7193 	if (mount == NULL)
7194 		return B_NO_MEMORY;
7195 
7196 	mount->device_name = strdup(device);
7197 		// "device" can be NULL
7198 
7199 	status = mount->entry_cache.Init();
7200 	if (status != B_OK)
7201 		goto err1;
7202 
7203 	// initialize structure
7204 	mount->id = sNextMountID++;
7205 	mount->partition = NULL;
7206 	mount->root_vnode = NULL;
7207 	mount->covers_vnode = NULL;
7208 	mount->unmounting = false;
7209 	mount->owns_file_device = false;
7210 	mount->volume = NULL;
7211 
7212 	// build up the volume(s)
7213 	while (true) {
7214 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7215 		if (layerFSName == NULL) {
7216 			if (layer == 0) {
7217 				status = B_NO_MEMORY;
7218 				goto err1;
7219 			}
7220 
7221 			break;
7222 		}
7223 
7224 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7225 		if (volume == NULL) {
7226 			status = B_NO_MEMORY;
7227 			free(layerFSName);
7228 			goto err1;
7229 		}
7230 
7231 		volume->id = mount->id;
7232 		volume->partition = partition != NULL ? partition->ID() : -1;
7233 		volume->layer = layer++;
7234 		volume->private_volume = NULL;
7235 		volume->ops = NULL;
7236 		volume->sub_volume = NULL;
7237 		volume->super_volume = NULL;
7238 		volume->file_system = NULL;
7239 		volume->file_system_name = NULL;
7240 
7241 		volume->file_system_name = get_file_system_name(layerFSName);
7242 		if (volume->file_system_name == NULL) {
7243 			status = B_NO_MEMORY;
7244 			free(layerFSName);
7245 			free(volume);
7246 			goto err1;
7247 		}
7248 
7249 		volume->file_system = get_file_system(layerFSName);
7250 		if (volume->file_system == NULL) {
7251 			status = B_DEVICE_NOT_FOUND;
7252 			free(layerFSName);
7253 			free(volume->file_system_name);
7254 			free(volume);
7255 			goto err1;
7256 		}
7257 
7258 		if (mount->volume == NULL)
7259 			mount->volume = volume;
7260 		else {
7261 			volume->super_volume = mount->volume;
7262 			mount->volume->sub_volume = volume;
7263 			mount->volume = volume;
7264 		}
7265 	}
7266 
7267 	// insert mount struct into list before we call FS's mount() function
7268 	// so that vnodes can be created for this mount
7269 	mutex_lock(&sMountMutex);
7270 	hash_insert(sMountsTable, mount);
7271 	mutex_unlock(&sMountMutex);
7272 
7273 	ino_t rootID;
7274 
7275 	if (!sRoot) {
7276 		// we haven't mounted anything yet
7277 		if (strcmp(path, "/") != 0) {
7278 			status = B_ERROR;
7279 			goto err2;
7280 		}
7281 
7282 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7283 			args, &rootID);
7284 		if (status != 0)
7285 			goto err2;
7286 	} else {
7287 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7288 		if (status != B_OK)
7289 			goto err2;
7290 
7291 		mount->covers_vnode = coveredNode;
7292 
7293 		// make sure covered_vnode is a directory
7294 		if (!S_ISDIR(coveredNode->Type())) {
7295 			status = B_NOT_A_DIRECTORY;
7296 			goto err3;
7297 		}
7298 
7299 		if (coveredNode->IsCovered()) {
7300 			// this is already a covered vnode
7301 			status = B_BUSY;
7302 			goto err3;
7303 		}
7304 
7305 		// mount it/them
7306 		fs_volume* volume = mount->volume;
7307 		while (volume) {
7308 			status = volume->file_system->mount(volume, device, flags, args,
7309 				&rootID);
7310 			if (status != B_OK) {
7311 				if (volume->sub_volume)
7312 					goto err4;
7313 				goto err3;
7314 			}
7315 
7316 			volume = volume->super_volume;
7317 		}
7318 
7319 		volume = mount->volume;
7320 		while (volume) {
7321 			if (volume->ops->all_layers_mounted != NULL)
7322 				volume->ops->all_layers_mounted(volume);
7323 			volume = volume->super_volume;
7324 		}
7325 	}
7326 
7327 	// the root node is supposed to be owned by the file system - it must
7328 	// exist at this point
7329 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7330 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7331 		panic("fs_mount: file system does not own its root node!\n");
7332 		status = B_ERROR;
7333 		goto err4;
7334 	}
7335 
7336 	// set up the links between the root vnode and the vnode it covers
7337 	rw_lock_write_lock(&sVnodeLock);
7338 	if (coveredNode != NULL) {
7339 		if (coveredNode->IsCovered()) {
7340 			// the vnode is covered now
7341 			status = B_BUSY;
7342 			rw_lock_write_unlock(&sVnodeLock);
7343 			goto err4;
7344 		}
7345 
7346 		mount->root_vnode->covers = coveredNode;
7347 		mount->root_vnode->SetCovering(true);
7348 
7349 		coveredNode->covered_by = mount->root_vnode;
7350 		coveredNode->SetCovered(true);
7351 	}
7352 	rw_lock_write_unlock(&sVnodeLock);
7353 
7354 	if (!sRoot) {
7355 		sRoot = mount->root_vnode;
7356 		mutex_lock(&sIOContextRootLock);
7357 		get_current_io_context(true)->root = sRoot;
7358 		mutex_unlock(&sIOContextRootLock);
7359 		inc_vnode_ref_count(sRoot);
7360 	}
7361 
7362 	// supply the partition (if any) with the mount cookie and mark it mounted
7363 	if (partition) {
7364 		partition->SetMountCookie(mount->volume->private_volume);
7365 		partition->SetVolumeID(mount->id);
7366 
7367 		// keep a partition reference as long as the partition is mounted
7368 		partitionRegistrar.Detach();
7369 		mount->partition = partition;
7370 		mount->owns_file_device = newlyCreatedFileDevice;
7371 		fileDeviceDeleter.id = -1;
7372 	}
7373 
7374 	notify_mount(mount->id,
7375 		coveredNode != NULL ? coveredNode->device : -1,
7376 		coveredNode ? coveredNode->id : -1);
7377 
7378 	return mount->id;
7379 
7380 err4:
7381 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7382 err3:
7383 	if (coveredNode != NULL)
7384 		put_vnode(coveredNode);
7385 err2:
7386 	mutex_lock(&sMountMutex);
7387 	hash_remove(sMountsTable, mount);
7388 	mutex_unlock(&sMountMutex);
7389 err1:
7390 	delete mount;
7391 
7392 	return status;
7393 }
7394 
7395 
7396 static status_t
7397 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7398 {
7399 	struct fs_mount* mount;
7400 	status_t err;
7401 
7402 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7403 		kernel));
7404 
7405 	struct vnode* pathVnode = NULL;
7406 	if (path != NULL) {
7407 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7408 		if (err != B_OK)
7409 			return B_ENTRY_NOT_FOUND;
7410 	}
7411 
7412 	RecursiveLocker mountOpLocker(sMountOpLock);
7413 
7414 	// this lock is not strictly necessary, but here in case of KDEBUG
7415 	// to keep the ASSERT in find_mount() working.
7416 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7417 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7418 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7419 	if (mount == NULL) {
7420 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7421 			pathVnode);
7422 	}
7423 
7424 	if (path != NULL) {
7425 		put_vnode(pathVnode);
7426 
7427 		if (mount->root_vnode != pathVnode) {
7428 			// not mountpoint
7429 			return B_BAD_VALUE;
7430 		}
7431 	}
7432 
7433 	// if the volume is associated with a partition, lock the device of the
7434 	// partition as long as we are unmounting
7435 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7436 	KPartition* partition = mount->partition;
7437 	KDiskDevice* diskDevice = NULL;
7438 	if (partition != NULL) {
7439 		if (partition->Device() == NULL) {
7440 			dprintf("fs_unmount(): There is no device!\n");
7441 			return B_ERROR;
7442 		}
7443 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7444 		if (!diskDevice) {
7445 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7446 			return B_ERROR;
7447 		}
7448 	}
7449 	DeviceWriteLocker writeLocker(diskDevice, true);
7450 
7451 	// make sure, that the partition is not busy
7452 	if (partition != NULL) {
7453 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7454 			TRACE(("fs_unmount(): Partition is busy.\n"));
7455 			return B_BUSY;
7456 		}
7457 	}
7458 
7459 	// grab the vnode master mutex to keep someone from creating
7460 	// a vnode while we're figuring out if we can continue
7461 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7462 
7463 	bool disconnectedDescriptors = false;
7464 
7465 	while (true) {
7466 		bool busy = false;
7467 
7468 		// cycle through the list of vnodes associated with this mount and
7469 		// make sure all of them are not busy or have refs on them
7470 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7471 		while (struct vnode* vnode = iterator.Next()) {
7472 			if (vnode->IsBusy()) {
7473 				busy = true;
7474 				break;
7475 			}
7476 
7477 			// check the vnode's ref count -- subtract additional references for
7478 			// covering
7479 			int32 refCount = vnode->ref_count;
7480 			if (vnode->covers != NULL)
7481 				refCount--;
7482 			if (vnode->covered_by != NULL)
7483 				refCount--;
7484 
7485 			if (refCount != 0) {
7486 				// there are still vnodes in use on this mount, so we cannot
7487 				// unmount yet
7488 				busy = true;
7489 				break;
7490 			}
7491 		}
7492 
7493 		if (!busy)
7494 			break;
7495 
7496 		if ((flags & B_FORCE_UNMOUNT) == 0)
7497 			return B_BUSY;
7498 
7499 		if (disconnectedDescriptors) {
7500 			// wait a bit until the last access is finished, and then try again
7501 			vnodesWriteLocker.Unlock();
7502 			snooze(100000);
7503 			// TODO: if there is some kind of bug that prevents the ref counts
7504 			// from getting back to zero, this will fall into an endless loop...
7505 			vnodesWriteLocker.Lock();
7506 			continue;
7507 		}
7508 
7509 		// the file system is still busy - but we're forced to unmount it,
7510 		// so let's disconnect all open file descriptors
7511 
7512 		mount->unmounting = true;
7513 			// prevent new vnodes from being created
7514 
7515 		vnodesWriteLocker.Unlock();
7516 
7517 		disconnect_mount_or_vnode_fds(mount, NULL);
7518 		disconnectedDescriptors = true;
7519 
7520 		vnodesWriteLocker.Lock();
7521 	}
7522 
7523 	// We can safely continue. Mark all of the vnodes busy and this mount
7524 	// structure in unmounting state. Also undo the vnode covers/covered_by
7525 	// links.
7526 	mount->unmounting = true;
7527 
7528 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7529 	while (struct vnode* vnode = iterator.Next()) {
7530 		// Remove all covers/covered_by links from other mounts' nodes to this
7531 		// vnode and adjust the node ref count accordingly. We will release the
7532 		// references to the external vnodes below.
7533 		if (Vnode* coveredNode = vnode->covers) {
7534 			if (Vnode* coveringNode = vnode->covered_by) {
7535 				// We have both covered and covering vnodes, so just remove us
7536 				// from the chain.
7537 				coveredNode->covered_by = coveringNode;
7538 				coveringNode->covers = coveredNode;
7539 				vnode->ref_count -= 2;
7540 
7541 				vnode->covered_by = NULL;
7542 				vnode->covers = NULL;
7543 				vnode->SetCovering(false);
7544 				vnode->SetCovered(false);
7545 			} else {
7546 				// We only have a covered vnode. Remove its link to us.
7547 				coveredNode->covered_by = NULL;
7548 				coveredNode->SetCovered(false);
7549 				vnode->ref_count--;
7550 
7551 				// If the other node is an external vnode, we keep its link
7552 				// link around so we can put the reference later on. Otherwise
7553 				// we get rid of it right now.
7554 				if (coveredNode->mount == mount) {
7555 					vnode->covers = NULL;
7556 					coveredNode->ref_count--;
7557 				}
7558 			}
7559 		} else if (Vnode* coveringNode = vnode->covered_by) {
7560 			// We only have a covering vnode. Remove its link to us.
7561 			coveringNode->covers = NULL;
7562 			coveringNode->SetCovering(false);
7563 			vnode->ref_count--;
7564 
7565 			// If the other node is an external vnode, we keep its link
7566 			// link around so we can put the reference later on. Otherwise
7567 			// we get rid of it right now.
7568 			if (coveringNode->mount == mount) {
7569 				vnode->covered_by = NULL;
7570 				coveringNode->ref_count--;
7571 			}
7572 		}
7573 
7574 		vnode->SetBusy(true);
7575 		vnode_to_be_freed(vnode);
7576 	}
7577 
7578 	vnodesWriteLocker.Unlock();
7579 
7580 	// Free all vnodes associated with this mount.
7581 	// They will be removed from the mount list by free_vnode(), so
7582 	// we don't have to do this.
7583 	while (struct vnode* vnode = mount->vnodes.Head()) {
7584 		// Put the references to external covered/covering vnodes we kept above.
7585 		if (Vnode* coveredNode = vnode->covers)
7586 			put_vnode(coveredNode);
7587 		if (Vnode* coveringNode = vnode->covered_by)
7588 			put_vnode(coveringNode);
7589 
7590 		free_vnode(vnode, false);
7591 	}
7592 
7593 	// remove the mount structure from the hash table
7594 	mutex_lock(&sMountMutex);
7595 	hash_remove(sMountsTable, mount);
7596 	mutex_unlock(&sMountMutex);
7597 
7598 	mountOpLocker.Unlock();
7599 
7600 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7601 	notify_unmount(mount->id);
7602 
7603 	// dereference the partition and mark it unmounted
7604 	if (partition) {
7605 		partition->SetVolumeID(-1);
7606 		partition->SetMountCookie(NULL);
7607 
7608 		if (mount->owns_file_device)
7609 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7610 		partition->Unregister();
7611 	}
7612 
7613 	delete mount;
7614 	return B_OK;
7615 }
7616 
7617 
7618 static status_t
7619 fs_sync(dev_t device)
7620 {
7621 	struct fs_mount* mount;
7622 	status_t status = get_mount(device, &mount);
7623 	if (status != B_OK)
7624 		return status;
7625 
7626 	struct vnode marker;
7627 	memset(&marker, 0, sizeof(marker));
7628 	marker.SetBusy(true);
7629 	marker.SetRemoved(true);
7630 
7631 	// First, synchronize all file caches
7632 
7633 	while (true) {
7634 		WriteLocker locker(sVnodeLock);
7635 			// Note: That's the easy way. Which is probably OK for sync(),
7636 			// since it's a relatively rare call and doesn't need to allow for
7637 			// a lot of concurrency. Using a read lock would be possible, but
7638 			// also more involved, since we had to lock the individual nodes
7639 			// and take care of the locking order, which we might not want to
7640 			// do while holding fs_mount::rlock.
7641 
7642 		// synchronize access to vnode list
7643 		recursive_lock_lock(&mount->rlock);
7644 
7645 		struct vnode* vnode;
7646 		if (!marker.IsRemoved()) {
7647 			vnode = mount->vnodes.GetNext(&marker);
7648 			mount->vnodes.Remove(&marker);
7649 			marker.SetRemoved(true);
7650 		} else
7651 			vnode = mount->vnodes.First();
7652 
7653 		while (vnode != NULL && (vnode->cache == NULL
7654 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7655 			// TODO: we could track writes (and writable mapped vnodes)
7656 			//	and have a simple flag that we could test for here
7657 			vnode = mount->vnodes.GetNext(vnode);
7658 		}
7659 
7660 		if (vnode != NULL) {
7661 			// insert marker vnode again
7662 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7663 			marker.SetRemoved(false);
7664 		}
7665 
7666 		recursive_lock_unlock(&mount->rlock);
7667 
7668 		if (vnode == NULL)
7669 			break;
7670 
7671 		vnode = lookup_vnode(mount->id, vnode->id);
7672 		if (vnode == NULL || vnode->IsBusy())
7673 			continue;
7674 
7675 		if (vnode->ref_count == 0) {
7676 			// this vnode has been unused before
7677 			vnode_used(vnode);
7678 		}
7679 		inc_vnode_ref_count(vnode);
7680 
7681 		locker.Unlock();
7682 
7683 		if (vnode->cache != NULL && !vnode->IsRemoved())
7684 			vnode->cache->WriteModified();
7685 
7686 		put_vnode(vnode);
7687 	}
7688 
7689 	// And then, let the file systems do their synchronizing work
7690 
7691 	if (HAS_FS_MOUNT_CALL(mount, sync))
7692 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7693 
7694 	put_mount(mount);
7695 	return status;
7696 }
7697 
7698 
7699 static status_t
7700 fs_read_info(dev_t device, struct fs_info* info)
7701 {
7702 	struct fs_mount* mount;
7703 	status_t status = get_mount(device, &mount);
7704 	if (status != B_OK)
7705 		return status;
7706 
7707 	memset(info, 0, sizeof(struct fs_info));
7708 
7709 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7710 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7711 
7712 	// fill in info the file system doesn't (have to) know about
7713 	if (status == B_OK) {
7714 		info->dev = mount->id;
7715 		info->root = mount->root_vnode->id;
7716 
7717 		fs_volume* volume = mount->volume;
7718 		while (volume->super_volume != NULL)
7719 			volume = volume->super_volume;
7720 
7721 		strlcpy(info->fsh_name, volume->file_system_name,
7722 			sizeof(info->fsh_name));
7723 		if (mount->device_name != NULL) {
7724 			strlcpy(info->device_name, mount->device_name,
7725 				sizeof(info->device_name));
7726 		}
7727 	}
7728 
7729 	// if the call is not supported by the file system, there are still
7730 	// the parts that we filled out ourselves
7731 
7732 	put_mount(mount);
7733 	return status;
7734 }
7735 
7736 
7737 static status_t
7738 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7739 {
7740 	struct fs_mount* mount;
7741 	status_t status = get_mount(device, &mount);
7742 	if (status != B_OK)
7743 		return status;
7744 
7745 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7746 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7747 	else
7748 		status = B_READ_ONLY_DEVICE;
7749 
7750 	put_mount(mount);
7751 	return status;
7752 }
7753 
7754 
7755 static dev_t
7756 fs_next_device(int32* _cookie)
7757 {
7758 	struct fs_mount* mount = NULL;
7759 	dev_t device = *_cookie;
7760 
7761 	mutex_lock(&sMountMutex);
7762 
7763 	// Since device IDs are assigned sequentially, this algorithm
7764 	// does work good enough. It makes sure that the device list
7765 	// returned is sorted, and that no device is skipped when an
7766 	// already visited device got unmounted.
7767 
7768 	while (device < sNextMountID) {
7769 		mount = find_mount(device++);
7770 		if (mount != NULL && mount->volume->private_volume != NULL)
7771 			break;
7772 	}
7773 
7774 	*_cookie = device;
7775 
7776 	if (mount != NULL)
7777 		device = mount->id;
7778 	else
7779 		device = B_BAD_VALUE;
7780 
7781 	mutex_unlock(&sMountMutex);
7782 
7783 	return device;
7784 }
7785 
7786 
7787 ssize_t
7788 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7789 	void *buffer, size_t readBytes)
7790 {
7791 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7792 	if (attrFD < 0)
7793 		return attrFD;
7794 
7795 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7796 
7797 	_kern_close(attrFD);
7798 
7799 	return bytesRead;
7800 }
7801 
7802 
7803 static status_t
7804 get_cwd(char* buffer, size_t size, bool kernel)
7805 {
7806 	// Get current working directory from io context
7807 	struct io_context* context = get_current_io_context(kernel);
7808 	status_t status;
7809 
7810 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7811 
7812 	mutex_lock(&context->io_mutex);
7813 
7814 	struct vnode* vnode = context->cwd;
7815 	if (vnode)
7816 		inc_vnode_ref_count(vnode);
7817 
7818 	mutex_unlock(&context->io_mutex);
7819 
7820 	if (vnode) {
7821 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7822 		put_vnode(vnode);
7823 	} else
7824 		status = B_ERROR;
7825 
7826 	return status;
7827 }
7828 
7829 
7830 static status_t
7831 set_cwd(int fd, char* path, bool kernel)
7832 {
7833 	struct io_context* context;
7834 	struct vnode* vnode = NULL;
7835 	struct vnode* oldDirectory;
7836 	status_t status;
7837 
7838 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7839 
7840 	// Get vnode for passed path, and bail if it failed
7841 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7842 	if (status < 0)
7843 		return status;
7844 
7845 	if (!S_ISDIR(vnode->Type())) {
7846 		// nope, can't cwd to here
7847 		status = B_NOT_A_DIRECTORY;
7848 		goto err;
7849 	}
7850 
7851 	// Get current io context and lock
7852 	context = get_current_io_context(kernel);
7853 	mutex_lock(&context->io_mutex);
7854 
7855 	// save the old current working directory first
7856 	oldDirectory = context->cwd;
7857 	context->cwd = vnode;
7858 
7859 	mutex_unlock(&context->io_mutex);
7860 
7861 	if (oldDirectory)
7862 		put_vnode(oldDirectory);
7863 
7864 	return B_NO_ERROR;
7865 
7866 err:
7867 	put_vnode(vnode);
7868 	return status;
7869 }
7870 
7871 
7872 //	#pragma mark - kernel mirrored syscalls
7873 
7874 
7875 dev_t
7876 _kern_mount(const char* path, const char* device, const char* fsName,
7877 	uint32 flags, const char* args, size_t argsLength)
7878 {
7879 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7880 	if (pathBuffer.InitCheck() != B_OK)
7881 		return B_NO_MEMORY;
7882 
7883 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7884 }
7885 
7886 
7887 status_t
7888 _kern_unmount(const char* path, uint32 flags)
7889 {
7890 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7891 	if (pathBuffer.InitCheck() != B_OK)
7892 		return B_NO_MEMORY;
7893 
7894 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7895 }
7896 
7897 
7898 status_t
7899 _kern_read_fs_info(dev_t device, struct fs_info* info)
7900 {
7901 	if (info == NULL)
7902 		return B_BAD_VALUE;
7903 
7904 	return fs_read_info(device, info);
7905 }
7906 
7907 
7908 status_t
7909 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7910 {
7911 	if (info == NULL)
7912 		return B_BAD_VALUE;
7913 
7914 	return fs_write_info(device, info, mask);
7915 }
7916 
7917 
7918 status_t
7919 _kern_sync(void)
7920 {
7921 	// Note: _kern_sync() is also called from _user_sync()
7922 	int32 cookie = 0;
7923 	dev_t device;
7924 	while ((device = next_dev(&cookie)) >= 0) {
7925 		status_t status = fs_sync(device);
7926 		if (status != B_OK && status != B_BAD_VALUE) {
7927 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7928 				strerror(status));
7929 		}
7930 	}
7931 
7932 	return B_OK;
7933 }
7934 
7935 
7936 dev_t
7937 _kern_next_device(int32* _cookie)
7938 {
7939 	return fs_next_device(_cookie);
7940 }
7941 
7942 
7943 status_t
7944 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7945 	size_t infoSize)
7946 {
7947 	if (infoSize != sizeof(fd_info))
7948 		return B_BAD_VALUE;
7949 
7950 	// get the team
7951 	Team* team = Team::Get(teamID);
7952 	if (team == NULL)
7953 		return B_BAD_TEAM_ID;
7954 	BReference<Team> teamReference(team, true);
7955 
7956 	// now that we have a team reference, its I/O context won't go away
7957 	io_context* context = team->io_context;
7958 	MutexLocker contextLocker(context->io_mutex);
7959 
7960 	uint32 slot = *_cookie;
7961 
7962 	struct file_descriptor* descriptor;
7963 	while (slot < context->table_size
7964 		&& (descriptor = context->fds[slot]) == NULL) {
7965 		slot++;
7966 	}
7967 
7968 	if (slot >= context->table_size)
7969 		return B_ENTRY_NOT_FOUND;
7970 
7971 	info->number = slot;
7972 	info->open_mode = descriptor->open_mode;
7973 
7974 	struct vnode* vnode = fd_vnode(descriptor);
7975 	if (vnode != NULL) {
7976 		info->device = vnode->device;
7977 		info->node = vnode->id;
7978 	} else if (descriptor->u.mount != NULL) {
7979 		info->device = descriptor->u.mount->id;
7980 		info->node = -1;
7981 	}
7982 
7983 	*_cookie = slot + 1;
7984 	return B_OK;
7985 }
7986 
7987 
7988 int
7989 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7990 	int perms)
7991 {
7992 	if ((openMode & O_CREAT) != 0) {
7993 		return file_create_entry_ref(device, inode, name, openMode, perms,
7994 			true);
7995 	}
7996 
7997 	return file_open_entry_ref(device, inode, name, openMode, true);
7998 }
7999 
8000 
8001 /*!	\brief Opens a node specified by a FD + path pair.
8002 
8003 	At least one of \a fd and \a path must be specified.
8004 	If only \a fd is given, the function opens the node identified by this
8005 	FD. If only a path is given, this path is opened. If both are given and
8006 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8007 	of the directory (!) identified by \a fd.
8008 
8009 	\param fd The FD. May be < 0.
8010 	\param path The absolute or relative path. May be \c NULL.
8011 	\param openMode The open mode.
8012 	\return A FD referring to the newly opened node, or an error code,
8013 			if an error occurs.
8014 */
8015 int
8016 _kern_open(int fd, const char* path, int openMode, int perms)
8017 {
8018 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8019 	if (pathBuffer.InitCheck() != B_OK)
8020 		return B_NO_MEMORY;
8021 
8022 	if (openMode & O_CREAT)
8023 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8024 
8025 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8026 }
8027 
8028 
8029 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8030 
8031 	The supplied name may be \c NULL, in which case directory identified
8032 	by \a device and \a inode will be opened. Otherwise \a device and
8033 	\a inode identify the parent directory of the directory to be opened
8034 	and \a name its entry name.
8035 
8036 	\param device If \a name is specified the ID of the device the parent
8037 		   directory of the directory to be opened resides on, otherwise
8038 		   the device of the directory itself.
8039 	\param inode If \a name is specified the node ID of the parent
8040 		   directory of the directory to be opened, otherwise node ID of the
8041 		   directory itself.
8042 	\param name The entry name of the directory to be opened. If \c NULL,
8043 		   the \a device + \a inode pair identify the node to be opened.
8044 	\return The FD of the newly opened directory or an error code, if
8045 			something went wrong.
8046 */
8047 int
8048 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8049 {
8050 	return dir_open_entry_ref(device, inode, name, true);
8051 }
8052 
8053 
8054 /*!	\brief Opens a directory specified by a FD + path pair.
8055 
8056 	At least one of \a fd and \a path must be specified.
8057 	If only \a fd is given, the function opens the directory identified by this
8058 	FD. If only a path is given, this path is opened. If both are given and
8059 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8060 	of the directory (!) identified by \a fd.
8061 
8062 	\param fd The FD. May be < 0.
8063 	\param path The absolute or relative path. May be \c NULL.
8064 	\return A FD referring to the newly opened directory, or an error code,
8065 			if an error occurs.
8066 */
8067 int
8068 _kern_open_dir(int fd, const char* path)
8069 {
8070 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8071 	if (pathBuffer.InitCheck() != B_OK)
8072 		return B_NO_MEMORY;
8073 
8074 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8075 }
8076 
8077 
8078 status_t
8079 _kern_fcntl(int fd, int op, uint32 argument)
8080 {
8081 	return common_fcntl(fd, op, argument, true);
8082 }
8083 
8084 
8085 status_t
8086 _kern_fsync(int fd)
8087 {
8088 	return common_sync(fd, true);
8089 }
8090 
8091 
8092 status_t
8093 _kern_lock_node(int fd)
8094 {
8095 	return common_lock_node(fd, true);
8096 }
8097 
8098 
8099 status_t
8100 _kern_unlock_node(int fd)
8101 {
8102 	return common_unlock_node(fd, true);
8103 }
8104 
8105 
8106 status_t
8107 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8108 	int perms)
8109 {
8110 	return dir_create_entry_ref(device, inode, name, perms, true);
8111 }
8112 
8113 
8114 /*!	\brief Creates a directory specified by a FD + path pair.
8115 
8116 	\a path must always be specified (it contains the name of the new directory
8117 	at least). If only a path is given, this path identifies the location at
8118 	which the directory shall be created. If both \a fd and \a path are given
8119 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8120 	of the directory (!) identified by \a fd.
8121 
8122 	\param fd The FD. May be < 0.
8123 	\param path The absolute or relative path. Must not be \c NULL.
8124 	\param perms The access permissions the new directory shall have.
8125 	\return \c B_OK, if the directory has been created successfully, another
8126 			error code otherwise.
8127 */
8128 status_t
8129 _kern_create_dir(int fd, const char* path, int perms)
8130 {
8131 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8132 	if (pathBuffer.InitCheck() != B_OK)
8133 		return B_NO_MEMORY;
8134 
8135 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8136 }
8137 
8138 
8139 status_t
8140 _kern_remove_dir(int fd, const char* path)
8141 {
8142 	if (path) {
8143 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8144 		if (pathBuffer.InitCheck() != B_OK)
8145 			return B_NO_MEMORY;
8146 
8147 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8148 	}
8149 
8150 	return dir_remove(fd, NULL, true);
8151 }
8152 
8153 
8154 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8155 
8156 	At least one of \a fd and \a path must be specified.
8157 	If only \a fd is given, the function the symlink to be read is the node
8158 	identified by this FD. If only a path is given, this path identifies the
8159 	symlink to be read. If both are given and the path is absolute, \a fd is
8160 	ignored; a relative path is reckoned off of the directory (!) identified
8161 	by \a fd.
8162 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8163 	will still be updated to reflect the required buffer size.
8164 
8165 	\param fd The FD. May be < 0.
8166 	\param path The absolute or relative path. May be \c NULL.
8167 	\param buffer The buffer into which the contents of the symlink shall be
8168 		   written.
8169 	\param _bufferSize A pointer to the size of the supplied buffer.
8170 	\return The length of the link on success or an appropriate error code
8171 */
8172 status_t
8173 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8174 {
8175 	if (path) {
8176 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8177 		if (pathBuffer.InitCheck() != B_OK)
8178 			return B_NO_MEMORY;
8179 
8180 		return common_read_link(fd, pathBuffer.LockBuffer(),
8181 			buffer, _bufferSize, true);
8182 	}
8183 
8184 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8185 }
8186 
8187 
8188 /*!	\brief Creates a symlink specified by a FD + path pair.
8189 
8190 	\a path must always be specified (it contains the name of the new symlink
8191 	at least). If only a path is given, this path identifies the location at
8192 	which the symlink shall be created. If both \a fd and \a path are given and
8193 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8194 	of the directory (!) identified by \a fd.
8195 
8196 	\param fd The FD. May be < 0.
8197 	\param toPath The absolute or relative path. Must not be \c NULL.
8198 	\param mode The access permissions the new symlink shall have.
8199 	\return \c B_OK, if the symlink has been created successfully, another
8200 			error code otherwise.
8201 */
8202 status_t
8203 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8204 {
8205 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8206 	if (pathBuffer.InitCheck() != B_OK)
8207 		return B_NO_MEMORY;
8208 
8209 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8210 		toPath, mode, true);
8211 }
8212 
8213 
8214 status_t
8215 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8216 	bool traverseLeafLink)
8217 {
8218 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8219 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8220 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8221 		return B_NO_MEMORY;
8222 
8223 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8224 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8225 }
8226 
8227 
8228 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8229 
8230 	\a path must always be specified (it contains at least the name of the entry
8231 	to be deleted). If only a path is given, this path identifies the entry
8232 	directly. If both \a fd and \a path are given and the path is absolute,
8233 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8234 	identified by \a fd.
8235 
8236 	\param fd The FD. May be < 0.
8237 	\param path The absolute or relative path. Must not be \c NULL.
8238 	\return \c B_OK, if the entry has been removed successfully, another
8239 			error code otherwise.
8240 */
8241 status_t
8242 _kern_unlink(int fd, const char* path)
8243 {
8244 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8245 	if (pathBuffer.InitCheck() != B_OK)
8246 		return B_NO_MEMORY;
8247 
8248 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8249 }
8250 
8251 
8252 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8253 		   by another FD + path pair.
8254 
8255 	\a oldPath and \a newPath must always be specified (they contain at least
8256 	the name of the entry). If only a path is given, this path identifies the
8257 	entry directly. If both a FD and a path are given and the path is absolute,
8258 	the FD is ignored; a relative path is reckoned off of the directory (!)
8259 	identified by the respective FD.
8260 
8261 	\param oldFD The FD of the old location. May be < 0.
8262 	\param oldPath The absolute or relative path of the old location. Must not
8263 		   be \c NULL.
8264 	\param newFD The FD of the new location. May be < 0.
8265 	\param newPath The absolute or relative path of the new location. Must not
8266 		   be \c NULL.
8267 	\return \c B_OK, if the entry has been moved successfully, another
8268 			error code otherwise.
8269 */
8270 status_t
8271 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8272 {
8273 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8274 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8275 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8276 		return B_NO_MEMORY;
8277 
8278 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8279 		newFD, newPathBuffer.LockBuffer(), true);
8280 }
8281 
8282 
8283 status_t
8284 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8285 {
8286 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8287 	if (pathBuffer.InitCheck() != B_OK)
8288 		return B_NO_MEMORY;
8289 
8290 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8291 		true);
8292 }
8293 
8294 
8295 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8296 
8297 	If only \a fd is given, the stat operation associated with the type
8298 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8299 	given, this path identifies the entry for whose node to retrieve the
8300 	stat data. If both \a fd and \a path are given and the path is absolute,
8301 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8302 	identified by \a fd and specifies the entry whose stat data shall be
8303 	retrieved.
8304 
8305 	\param fd The FD. May be < 0.
8306 	\param path The absolute or relative path. Must not be \c NULL.
8307 	\param traverseLeafLink If \a path is given, \c true specifies that the
8308 		   function shall not stick to symlinks, but traverse them.
8309 	\param stat The buffer the stat data shall be written into.
8310 	\param statSize The size of the supplied stat buffer.
8311 	\return \c B_OK, if the the stat data have been read successfully, another
8312 			error code otherwise.
8313 */
8314 status_t
8315 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8316 	struct stat* stat, size_t statSize)
8317 {
8318 	struct stat completeStat;
8319 	struct stat* originalStat = NULL;
8320 	status_t status;
8321 
8322 	if (statSize > sizeof(struct stat))
8323 		return B_BAD_VALUE;
8324 
8325 	// this supports different stat extensions
8326 	if (statSize < sizeof(struct stat)) {
8327 		originalStat = stat;
8328 		stat = &completeStat;
8329 	}
8330 
8331 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8332 
8333 	if (status == B_OK && originalStat != NULL)
8334 		memcpy(originalStat, stat, statSize);
8335 
8336 	return status;
8337 }
8338 
8339 
8340 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8341 
8342 	If only \a fd is given, the stat operation associated with the type
8343 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8344 	given, this path identifies the entry for whose node to write the
8345 	stat data. If both \a fd and \a path are given and the path is absolute,
8346 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8347 	identified by \a fd and specifies the entry whose stat data shall be
8348 	written.
8349 
8350 	\param fd The FD. May be < 0.
8351 	\param path The absolute or relative path. Must not be \c NULL.
8352 	\param traverseLeafLink If \a path is given, \c true specifies that the
8353 		   function shall not stick to symlinks, but traverse them.
8354 	\param stat The buffer containing the stat data to be written.
8355 	\param statSize The size of the supplied stat buffer.
8356 	\param statMask A mask specifying which parts of the stat data shall be
8357 		   written.
8358 	\return \c B_OK, if the the stat data have been written successfully,
8359 			another error code otherwise.
8360 */
8361 status_t
8362 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8363 	const struct stat* stat, size_t statSize, int statMask)
8364 {
8365 	struct stat completeStat;
8366 
8367 	if (statSize > sizeof(struct stat))
8368 		return B_BAD_VALUE;
8369 
8370 	// this supports different stat extensions
8371 	if (statSize < sizeof(struct stat)) {
8372 		memset((uint8*)&completeStat + statSize, 0,
8373 			sizeof(struct stat) - statSize);
8374 		memcpy(&completeStat, stat, statSize);
8375 		stat = &completeStat;
8376 	}
8377 
8378 	status_t status;
8379 
8380 	if (path) {
8381 		// path given: write the stat of the node referred to by (fd, path)
8382 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8383 		if (pathBuffer.InitCheck() != B_OK)
8384 			return B_NO_MEMORY;
8385 
8386 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8387 			traverseLeafLink, stat, statMask, true);
8388 	} else {
8389 		// no path given: get the FD and use the FD operation
8390 		struct file_descriptor* descriptor
8391 			= get_fd(get_current_io_context(true), fd);
8392 		if (descriptor == NULL)
8393 			return B_FILE_ERROR;
8394 
8395 		if (descriptor->ops->fd_write_stat)
8396 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8397 		else
8398 			status = B_UNSUPPORTED;
8399 
8400 		put_fd(descriptor);
8401 	}
8402 
8403 	return status;
8404 }
8405 
8406 
8407 int
8408 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8409 {
8410 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8411 	if (pathBuffer.InitCheck() != B_OK)
8412 		return B_NO_MEMORY;
8413 
8414 	if (path != NULL)
8415 		pathBuffer.SetTo(path);
8416 
8417 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8418 		traverseLeafLink, true);
8419 }
8420 
8421 
8422 int
8423 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8424 	int openMode)
8425 {
8426 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8427 	if (pathBuffer.InitCheck() != B_OK)
8428 		return B_NO_MEMORY;
8429 
8430 	if ((openMode & O_CREAT) != 0) {
8431 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8432 			true);
8433 	}
8434 
8435 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8436 }
8437 
8438 
8439 status_t
8440 _kern_remove_attr(int fd, const char* name)
8441 {
8442 	return attr_remove(fd, name, true);
8443 }
8444 
8445 
8446 status_t
8447 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8448 	const char* toName)
8449 {
8450 	return attr_rename(fromFile, fromName, toFile, toName, true);
8451 }
8452 
8453 
8454 int
8455 _kern_open_index_dir(dev_t device)
8456 {
8457 	return index_dir_open(device, true);
8458 }
8459 
8460 
8461 status_t
8462 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8463 {
8464 	return index_create(device, name, type, flags, true);
8465 }
8466 
8467 
8468 status_t
8469 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8470 {
8471 	return index_name_read_stat(device, name, stat, true);
8472 }
8473 
8474 
8475 status_t
8476 _kern_remove_index(dev_t device, const char* name)
8477 {
8478 	return index_remove(device, name, true);
8479 }
8480 
8481 
8482 status_t
8483 _kern_getcwd(char* buffer, size_t size)
8484 {
8485 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8486 
8487 	// Call vfs to get current working directory
8488 	return get_cwd(buffer, size, true);
8489 }
8490 
8491 
8492 status_t
8493 _kern_setcwd(int fd, const char* path)
8494 {
8495 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8496 	if (pathBuffer.InitCheck() != B_OK)
8497 		return B_NO_MEMORY;
8498 
8499 	if (path != NULL)
8500 		pathBuffer.SetTo(path);
8501 
8502 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8503 }
8504 
8505 
8506 //	#pragma mark - userland syscalls
8507 
8508 
8509 dev_t
8510 _user_mount(const char* userPath, const char* userDevice,
8511 	const char* userFileSystem, uint32 flags, const char* userArgs,
8512 	size_t argsLength)
8513 {
8514 	char fileSystem[B_FILE_NAME_LENGTH];
8515 	KPath path, device;
8516 	char* args = NULL;
8517 	status_t status;
8518 
8519 	if (!IS_USER_ADDRESS(userPath)
8520 		|| !IS_USER_ADDRESS(userFileSystem)
8521 		|| !IS_USER_ADDRESS(userDevice))
8522 		return B_BAD_ADDRESS;
8523 
8524 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8525 		return B_NO_MEMORY;
8526 
8527 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8528 		return B_BAD_ADDRESS;
8529 
8530 	if (userFileSystem != NULL
8531 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8532 		return B_BAD_ADDRESS;
8533 
8534 	if (userDevice != NULL
8535 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8536 			< B_OK)
8537 		return B_BAD_ADDRESS;
8538 
8539 	if (userArgs != NULL && argsLength > 0) {
8540 		// this is a safety restriction
8541 		if (argsLength >= 65536)
8542 			return B_NAME_TOO_LONG;
8543 
8544 		args = (char*)malloc(argsLength + 1);
8545 		if (args == NULL)
8546 			return B_NO_MEMORY;
8547 
8548 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8549 			free(args);
8550 			return B_BAD_ADDRESS;
8551 		}
8552 	}
8553 	path.UnlockBuffer();
8554 	device.UnlockBuffer();
8555 
8556 	status = fs_mount(path.LockBuffer(),
8557 		userDevice != NULL ? device.Path() : NULL,
8558 		userFileSystem ? fileSystem : NULL, flags, args, false);
8559 
8560 	free(args);
8561 	return status;
8562 }
8563 
8564 
8565 status_t
8566 _user_unmount(const char* userPath, uint32 flags)
8567 {
8568 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8569 	if (pathBuffer.InitCheck() != B_OK)
8570 		return B_NO_MEMORY;
8571 
8572 	char* path = pathBuffer.LockBuffer();
8573 
8574 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8575 		return B_BAD_ADDRESS;
8576 
8577 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8578 }
8579 
8580 
8581 status_t
8582 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8583 {
8584 	struct fs_info info;
8585 	status_t status;
8586 
8587 	if (userInfo == NULL)
8588 		return B_BAD_VALUE;
8589 
8590 	if (!IS_USER_ADDRESS(userInfo))
8591 		return B_BAD_ADDRESS;
8592 
8593 	status = fs_read_info(device, &info);
8594 	if (status != B_OK)
8595 		return status;
8596 
8597 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8598 		return B_BAD_ADDRESS;
8599 
8600 	return B_OK;
8601 }
8602 
8603 
8604 status_t
8605 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8606 {
8607 	struct fs_info info;
8608 
8609 	if (userInfo == NULL)
8610 		return B_BAD_VALUE;
8611 
8612 	if (!IS_USER_ADDRESS(userInfo)
8613 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8614 		return B_BAD_ADDRESS;
8615 
8616 	return fs_write_info(device, &info, mask);
8617 }
8618 
8619 
8620 dev_t
8621 _user_next_device(int32* _userCookie)
8622 {
8623 	int32 cookie;
8624 	dev_t device;
8625 
8626 	if (!IS_USER_ADDRESS(_userCookie)
8627 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8628 		return B_BAD_ADDRESS;
8629 
8630 	device = fs_next_device(&cookie);
8631 
8632 	if (device >= B_OK) {
8633 		// update user cookie
8634 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8635 			return B_BAD_ADDRESS;
8636 	}
8637 
8638 	return device;
8639 }
8640 
8641 
8642 status_t
8643 _user_sync(void)
8644 {
8645 	return _kern_sync();
8646 }
8647 
8648 
8649 status_t
8650 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8651 	size_t infoSize)
8652 {
8653 	struct fd_info info;
8654 	uint32 cookie;
8655 
8656 	// only root can do this (or should root's group be enough?)
8657 	if (geteuid() != 0)
8658 		return B_NOT_ALLOWED;
8659 
8660 	if (infoSize != sizeof(fd_info))
8661 		return B_BAD_VALUE;
8662 
8663 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8664 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8665 		return B_BAD_ADDRESS;
8666 
8667 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8668 	if (status != B_OK)
8669 		return status;
8670 
8671 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8672 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8673 		return B_BAD_ADDRESS;
8674 
8675 	return status;
8676 }
8677 
8678 
8679 status_t
8680 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8681 	char* userPath, size_t pathLength)
8682 {
8683 	if (!IS_USER_ADDRESS(userPath))
8684 		return B_BAD_ADDRESS;
8685 
8686 	KPath path(B_PATH_NAME_LENGTH + 1);
8687 	if (path.InitCheck() != B_OK)
8688 		return B_NO_MEMORY;
8689 
8690 	// copy the leaf name onto the stack
8691 	char stackLeaf[B_FILE_NAME_LENGTH];
8692 	if (leaf) {
8693 		if (!IS_USER_ADDRESS(leaf))
8694 			return B_BAD_ADDRESS;
8695 
8696 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8697 		if (length < 0)
8698 			return length;
8699 		if (length >= B_FILE_NAME_LENGTH)
8700 			return B_NAME_TOO_LONG;
8701 
8702 		leaf = stackLeaf;
8703 	}
8704 
8705 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8706 		path.LockBuffer(), path.BufferSize());
8707 	if (status != B_OK)
8708 		return status;
8709 
8710 	path.UnlockBuffer();
8711 
8712 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8713 	if (length < 0)
8714 		return length;
8715 	if (length >= (int)pathLength)
8716 		return B_BUFFER_OVERFLOW;
8717 
8718 	return B_OK;
8719 }
8720 
8721 
8722 status_t
8723 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8724 {
8725 	if (userPath == NULL || buffer == NULL)
8726 		return B_BAD_VALUE;
8727 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8728 		return B_BAD_ADDRESS;
8729 
8730 	// copy path from userland
8731 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8732 	if (pathBuffer.InitCheck() != B_OK)
8733 		return B_NO_MEMORY;
8734 	char* path = pathBuffer.LockBuffer();
8735 
8736 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8737 		return B_BAD_ADDRESS;
8738 
8739 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8740 		false);
8741 	if (error != B_OK)
8742 		return error;
8743 
8744 	// copy back to userland
8745 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8746 	if (len < 0)
8747 		return len;
8748 	if (len >= B_PATH_NAME_LENGTH)
8749 		return B_BUFFER_OVERFLOW;
8750 
8751 	return B_OK;
8752 }
8753 
8754 
8755 int
8756 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8757 	int openMode, int perms)
8758 {
8759 	char name[B_FILE_NAME_LENGTH];
8760 
8761 	if (userName == NULL || device < 0 || inode < 0)
8762 		return B_BAD_VALUE;
8763 	if (!IS_USER_ADDRESS(userName)
8764 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8765 		return B_BAD_ADDRESS;
8766 
8767 	if ((openMode & O_CREAT) != 0) {
8768 		return file_create_entry_ref(device, inode, name, openMode, perms,
8769 		 false);
8770 	}
8771 
8772 	return file_open_entry_ref(device, inode, name, openMode, false);
8773 }
8774 
8775 
8776 int
8777 _user_open(int fd, const char* userPath, int openMode, int perms)
8778 {
8779 	KPath path(B_PATH_NAME_LENGTH + 1);
8780 	if (path.InitCheck() != B_OK)
8781 		return B_NO_MEMORY;
8782 
8783 	char* buffer = path.LockBuffer();
8784 
8785 	if (!IS_USER_ADDRESS(userPath)
8786 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8787 		return B_BAD_ADDRESS;
8788 
8789 	if ((openMode & O_CREAT) != 0)
8790 		return file_create(fd, buffer, openMode, perms, false);
8791 
8792 	return file_open(fd, buffer, openMode, false);
8793 }
8794 
8795 
8796 int
8797 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8798 {
8799 	if (userName != NULL) {
8800 		char name[B_FILE_NAME_LENGTH];
8801 
8802 		if (!IS_USER_ADDRESS(userName)
8803 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8804 			return B_BAD_ADDRESS;
8805 
8806 		return dir_open_entry_ref(device, inode, name, false);
8807 	}
8808 	return dir_open_entry_ref(device, inode, NULL, false);
8809 }
8810 
8811 
8812 int
8813 _user_open_dir(int fd, const char* userPath)
8814 {
8815 	if (userPath == NULL)
8816 		return dir_open(fd, NULL, false);
8817 
8818 	KPath path(B_PATH_NAME_LENGTH + 1);
8819 	if (path.InitCheck() != B_OK)
8820 		return B_NO_MEMORY;
8821 
8822 	char* buffer = path.LockBuffer();
8823 
8824 	if (!IS_USER_ADDRESS(userPath)
8825 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8826 		return B_BAD_ADDRESS;
8827 
8828 	return dir_open(fd, buffer, false);
8829 }
8830 
8831 
8832 /*!	\brief Opens a directory's parent directory and returns the entry name
8833 		   of the former.
8834 
8835 	Aside from that it returns the directory's entry name, this method is
8836 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8837 	equivalent, if \a userName is \c NULL.
8838 
8839 	If a name buffer is supplied and the name does not fit the buffer, the
8840 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8841 
8842 	\param fd A FD referring to a directory.
8843 	\param userName Buffer the directory's entry name shall be written into.
8844 		   May be \c NULL.
8845 	\param nameLength Size of the name buffer.
8846 	\return The file descriptor of the opened parent directory, if everything
8847 			went fine, an error code otherwise.
8848 */
8849 int
8850 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8851 {
8852 	bool kernel = false;
8853 
8854 	if (userName && !IS_USER_ADDRESS(userName))
8855 		return B_BAD_ADDRESS;
8856 
8857 	// open the parent dir
8858 	int parentFD = dir_open(fd, (char*)"..", kernel);
8859 	if (parentFD < 0)
8860 		return parentFD;
8861 	FDCloser fdCloser(parentFD, kernel);
8862 
8863 	if (userName) {
8864 		// get the vnodes
8865 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8866 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8867 		VNodePutter parentVNodePutter(parentVNode);
8868 		VNodePutter dirVNodePutter(dirVNode);
8869 		if (!parentVNode || !dirVNode)
8870 			return B_FILE_ERROR;
8871 
8872 		// get the vnode name
8873 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8874 		struct dirent* buffer = (struct dirent*)_buffer;
8875 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8876 			sizeof(_buffer), get_current_io_context(false));
8877 		if (status != B_OK)
8878 			return status;
8879 
8880 		// copy the name to the userland buffer
8881 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8882 		if (len < 0)
8883 			return len;
8884 		if (len >= (int)nameLength)
8885 			return B_BUFFER_OVERFLOW;
8886 	}
8887 
8888 	return fdCloser.Detach();
8889 }
8890 
8891 
8892 status_t
8893 _user_fcntl(int fd, int op, uint32 argument)
8894 {
8895 	status_t status = common_fcntl(fd, op, argument, false);
8896 	if (op == F_SETLKW)
8897 		syscall_restart_handle_post(status);
8898 
8899 	return status;
8900 }
8901 
8902 
8903 status_t
8904 _user_fsync(int fd)
8905 {
8906 	return common_sync(fd, false);
8907 }
8908 
8909 
8910 status_t
8911 _user_flock(int fd, int operation)
8912 {
8913 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8914 
8915 	// Check if the operation is valid
8916 	switch (operation & ~LOCK_NB) {
8917 		case LOCK_UN:
8918 		case LOCK_SH:
8919 		case LOCK_EX:
8920 			break;
8921 
8922 		default:
8923 			return B_BAD_VALUE;
8924 	}
8925 
8926 	struct file_descriptor* descriptor;
8927 	struct vnode* vnode;
8928 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8929 	if (descriptor == NULL)
8930 		return B_FILE_ERROR;
8931 
8932 	if (descriptor->type != FDTYPE_FILE) {
8933 		put_fd(descriptor);
8934 		return B_BAD_VALUE;
8935 	}
8936 
8937 	struct flock flock;
8938 	flock.l_start = 0;
8939 	flock.l_len = OFF_MAX;
8940 	flock.l_whence = 0;
8941 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8942 
8943 	status_t status;
8944 	if ((operation & LOCK_UN) != 0)
8945 		status = release_advisory_lock(vnode, &flock);
8946 	else {
8947 		status = acquire_advisory_lock(vnode,
8948 			thread_get_current_thread()->team->session_id, &flock,
8949 			(operation & LOCK_NB) == 0);
8950 	}
8951 
8952 	syscall_restart_handle_post(status);
8953 
8954 	put_fd(descriptor);
8955 	return status;
8956 }
8957 
8958 
8959 status_t
8960 _user_lock_node(int fd)
8961 {
8962 	return common_lock_node(fd, false);
8963 }
8964 
8965 
8966 status_t
8967 _user_unlock_node(int fd)
8968 {
8969 	return common_unlock_node(fd, false);
8970 }
8971 
8972 
8973 status_t
8974 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8975 	int perms)
8976 {
8977 	char name[B_FILE_NAME_LENGTH];
8978 	status_t status;
8979 
8980 	if (!IS_USER_ADDRESS(userName))
8981 		return B_BAD_ADDRESS;
8982 
8983 	status = user_strlcpy(name, userName, sizeof(name));
8984 	if (status < 0)
8985 		return status;
8986 
8987 	return dir_create_entry_ref(device, inode, name, perms, false);
8988 }
8989 
8990 
8991 status_t
8992 _user_create_dir(int fd, const char* userPath, int perms)
8993 {
8994 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8995 	if (pathBuffer.InitCheck() != B_OK)
8996 		return B_NO_MEMORY;
8997 
8998 	char* path = pathBuffer.LockBuffer();
8999 
9000 	if (!IS_USER_ADDRESS(userPath)
9001 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9002 		return B_BAD_ADDRESS;
9003 
9004 	return dir_create(fd, path, perms, false);
9005 }
9006 
9007 
9008 status_t
9009 _user_remove_dir(int fd, const char* userPath)
9010 {
9011 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9012 	if (pathBuffer.InitCheck() != B_OK)
9013 		return B_NO_MEMORY;
9014 
9015 	char* path = pathBuffer.LockBuffer();
9016 
9017 	if (userPath != NULL) {
9018 		if (!IS_USER_ADDRESS(userPath)
9019 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9020 			return B_BAD_ADDRESS;
9021 	}
9022 
9023 	return dir_remove(fd, userPath ? path : NULL, false);
9024 }
9025 
9026 
9027 status_t
9028 _user_read_link(int fd, const char* userPath, char* userBuffer,
9029 	size_t* userBufferSize)
9030 {
9031 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9032 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9033 		return B_NO_MEMORY;
9034 
9035 	size_t bufferSize;
9036 
9037 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9038 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9039 		return B_BAD_ADDRESS;
9040 
9041 	char* path = pathBuffer.LockBuffer();
9042 	char* buffer = linkBuffer.LockBuffer();
9043 
9044 	if (userPath) {
9045 		if (!IS_USER_ADDRESS(userPath)
9046 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9047 			return B_BAD_ADDRESS;
9048 
9049 		if (bufferSize > B_PATH_NAME_LENGTH)
9050 			bufferSize = B_PATH_NAME_LENGTH;
9051 	}
9052 
9053 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9054 		&bufferSize, false);
9055 
9056 	// we also update the bufferSize in case of errors
9057 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9058 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9059 		return B_BAD_ADDRESS;
9060 
9061 	if (status != B_OK)
9062 		return status;
9063 
9064 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9065 		return B_BAD_ADDRESS;
9066 
9067 	return B_OK;
9068 }
9069 
9070 
9071 status_t
9072 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9073 	int mode)
9074 {
9075 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9076 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9077 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9078 		return B_NO_MEMORY;
9079 
9080 	char* path = pathBuffer.LockBuffer();
9081 	char* toPath = toPathBuffer.LockBuffer();
9082 
9083 	if (!IS_USER_ADDRESS(userPath)
9084 		|| !IS_USER_ADDRESS(userToPath)
9085 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9086 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9087 		return B_BAD_ADDRESS;
9088 
9089 	return common_create_symlink(fd, path, toPath, mode, false);
9090 }
9091 
9092 
9093 status_t
9094 _user_create_link(int pathFD, const char* userPath, int toFD,
9095 	const char* userToPath, bool traverseLeafLink)
9096 {
9097 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9098 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9099 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9100 		return B_NO_MEMORY;
9101 
9102 	char* path = pathBuffer.LockBuffer();
9103 	char* toPath = toPathBuffer.LockBuffer();
9104 
9105 	if (!IS_USER_ADDRESS(userPath)
9106 		|| !IS_USER_ADDRESS(userToPath)
9107 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9108 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9109 		return B_BAD_ADDRESS;
9110 
9111 	status_t status = check_path(toPath);
9112 	if (status != B_OK)
9113 		return status;
9114 
9115 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9116 		false);
9117 }
9118 
9119 
9120 status_t
9121 _user_unlink(int fd, const char* userPath)
9122 {
9123 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9124 	if (pathBuffer.InitCheck() != B_OK)
9125 		return B_NO_MEMORY;
9126 
9127 	char* path = pathBuffer.LockBuffer();
9128 
9129 	if (!IS_USER_ADDRESS(userPath)
9130 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9131 		return B_BAD_ADDRESS;
9132 
9133 	return common_unlink(fd, path, false);
9134 }
9135 
9136 
9137 status_t
9138 _user_rename(int oldFD, const char* userOldPath, int newFD,
9139 	const char* userNewPath)
9140 {
9141 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9142 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9143 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9144 		return B_NO_MEMORY;
9145 
9146 	char* oldPath = oldPathBuffer.LockBuffer();
9147 	char* newPath = newPathBuffer.LockBuffer();
9148 
9149 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9150 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9151 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9152 		return B_BAD_ADDRESS;
9153 
9154 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9155 }
9156 
9157 
9158 status_t
9159 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9160 {
9161 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9162 	if (pathBuffer.InitCheck() != B_OK)
9163 		return B_NO_MEMORY;
9164 
9165 	char* path = pathBuffer.LockBuffer();
9166 
9167 	if (!IS_USER_ADDRESS(userPath)
9168 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9169 		return B_BAD_ADDRESS;
9170 	}
9171 
9172 	// split into directory vnode and filename path
9173 	char filename[B_FILE_NAME_LENGTH];
9174 	struct vnode* dir;
9175 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9176 	if (status != B_OK)
9177 		return status;
9178 
9179 	VNodePutter _(dir);
9180 
9181 	// the underlying FS needs to support creating FIFOs
9182 	if (!HAS_FS_CALL(dir, create_special_node))
9183 		return B_UNSUPPORTED;
9184 
9185 	// create the entry	-- the FIFO sub node is set up automatically
9186 	fs_vnode superVnode;
9187 	ino_t nodeID;
9188 	status = FS_CALL(dir, create_special_node, filename, NULL,
9189 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9190 
9191 	// create_special_node() acquired a reference for us that we don't need.
9192 	if (status == B_OK)
9193 		put_vnode(dir->mount->volume, nodeID);
9194 
9195 	return status;
9196 }
9197 
9198 
9199 status_t
9200 _user_create_pipe(int* userFDs)
9201 {
9202 	// rootfs should support creating FIFOs, but let's be sure
9203 	if (!HAS_FS_CALL(sRoot, create_special_node))
9204 		return B_UNSUPPORTED;
9205 
9206 	// create the node	-- the FIFO sub node is set up automatically
9207 	fs_vnode superVnode;
9208 	ino_t nodeID;
9209 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9210 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9211 	if (status != B_OK)
9212 		return status;
9213 
9214 	// We've got one reference to the node and need another one.
9215 	struct vnode* vnode;
9216 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9217 	if (status != B_OK) {
9218 		// that should not happen
9219 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9220 			sRoot->mount->id, sRoot->id);
9221 		return status;
9222 	}
9223 
9224 	// Everything looks good so far. Open two FDs for reading respectively
9225 	// writing.
9226 	int fds[2];
9227 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9228 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9229 
9230 	FDCloser closer0(fds[0], false);
9231 	FDCloser closer1(fds[1], false);
9232 
9233 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9234 
9235 	// copy FDs to userland
9236 	if (status == B_OK) {
9237 		if (!IS_USER_ADDRESS(userFDs)
9238 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9239 			status = B_BAD_ADDRESS;
9240 		}
9241 	}
9242 
9243 	// keep FDs, if everything went fine
9244 	if (status == B_OK) {
9245 		closer0.Detach();
9246 		closer1.Detach();
9247 	}
9248 
9249 	return status;
9250 }
9251 
9252 
9253 status_t
9254 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9255 {
9256 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9257 	if (pathBuffer.InitCheck() != B_OK)
9258 		return B_NO_MEMORY;
9259 
9260 	char* path = pathBuffer.LockBuffer();
9261 
9262 	if (!IS_USER_ADDRESS(userPath)
9263 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9264 		return B_BAD_ADDRESS;
9265 
9266 	return common_access(fd, path, mode, effectiveUserGroup, false);
9267 }
9268 
9269 
9270 status_t
9271 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9272 	struct stat* userStat, size_t statSize)
9273 {
9274 	struct stat stat;
9275 	status_t status;
9276 
9277 	if (statSize > sizeof(struct stat))
9278 		return B_BAD_VALUE;
9279 
9280 	if (!IS_USER_ADDRESS(userStat))
9281 		return B_BAD_ADDRESS;
9282 
9283 	if (userPath) {
9284 		// path given: get the stat of the node referred to by (fd, path)
9285 		if (!IS_USER_ADDRESS(userPath))
9286 			return B_BAD_ADDRESS;
9287 
9288 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9289 		if (pathBuffer.InitCheck() != B_OK)
9290 			return B_NO_MEMORY;
9291 
9292 		char* path = pathBuffer.LockBuffer();
9293 
9294 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9295 		if (length < B_OK)
9296 			return length;
9297 		if (length >= B_PATH_NAME_LENGTH)
9298 			return B_NAME_TOO_LONG;
9299 
9300 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9301 	} else {
9302 		// no path given: get the FD and use the FD operation
9303 		struct file_descriptor* descriptor
9304 			= get_fd(get_current_io_context(false), fd);
9305 		if (descriptor == NULL)
9306 			return B_FILE_ERROR;
9307 
9308 		if (descriptor->ops->fd_read_stat)
9309 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9310 		else
9311 			status = B_UNSUPPORTED;
9312 
9313 		put_fd(descriptor);
9314 	}
9315 
9316 	if (status != B_OK)
9317 		return status;
9318 
9319 	return user_memcpy(userStat, &stat, statSize);
9320 }
9321 
9322 
9323 status_t
9324 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9325 	const struct stat* userStat, size_t statSize, int statMask)
9326 {
9327 	if (statSize > sizeof(struct stat))
9328 		return B_BAD_VALUE;
9329 
9330 	struct stat stat;
9331 
9332 	if (!IS_USER_ADDRESS(userStat)
9333 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9334 		return B_BAD_ADDRESS;
9335 
9336 	// clear additional stat fields
9337 	if (statSize < sizeof(struct stat))
9338 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9339 
9340 	status_t status;
9341 
9342 	if (userPath) {
9343 		// path given: write the stat of the node referred to by (fd, path)
9344 		if (!IS_USER_ADDRESS(userPath))
9345 			return B_BAD_ADDRESS;
9346 
9347 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9348 		if (pathBuffer.InitCheck() != B_OK)
9349 			return B_NO_MEMORY;
9350 
9351 		char* path = pathBuffer.LockBuffer();
9352 
9353 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9354 		if (length < B_OK)
9355 			return length;
9356 		if (length >= B_PATH_NAME_LENGTH)
9357 			return B_NAME_TOO_LONG;
9358 
9359 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9360 			statMask, false);
9361 	} else {
9362 		// no path given: get the FD and use the FD operation
9363 		struct file_descriptor* descriptor
9364 			= get_fd(get_current_io_context(false), fd);
9365 		if (descriptor == NULL)
9366 			return B_FILE_ERROR;
9367 
9368 		if (descriptor->ops->fd_write_stat) {
9369 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9370 				statMask);
9371 		} else
9372 			status = B_UNSUPPORTED;
9373 
9374 		put_fd(descriptor);
9375 	}
9376 
9377 	return status;
9378 }
9379 
9380 
9381 int
9382 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9383 {
9384 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9385 	if (pathBuffer.InitCheck() != B_OK)
9386 		return B_NO_MEMORY;
9387 
9388 	char* path = pathBuffer.LockBuffer();
9389 
9390 	if (userPath != NULL) {
9391 		if (!IS_USER_ADDRESS(userPath)
9392 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9393 			return B_BAD_ADDRESS;
9394 	}
9395 
9396 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9397 }
9398 
9399 
9400 ssize_t
9401 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9402 	size_t readBytes)
9403 {
9404 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9405 	if (attr < 0)
9406 		return attr;
9407 
9408 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9409 	_user_close(attr);
9410 
9411 	return bytes;
9412 }
9413 
9414 
9415 ssize_t
9416 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9417 	const void* buffer, size_t writeBytes)
9418 {
9419 	// Try to support the BeOS typical truncation as well as the position
9420 	// argument
9421 	int attr = attr_create(fd, NULL, attribute, type,
9422 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9423 	if (attr < 0)
9424 		return attr;
9425 
9426 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9427 	_user_close(attr);
9428 
9429 	return bytes;
9430 }
9431 
9432 
9433 status_t
9434 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9435 {
9436 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9437 	if (attr < 0)
9438 		return attr;
9439 
9440 	struct file_descriptor* descriptor
9441 		= get_fd(get_current_io_context(false), attr);
9442 	if (descriptor == NULL) {
9443 		_user_close(attr);
9444 		return B_FILE_ERROR;
9445 	}
9446 
9447 	struct stat stat;
9448 	status_t status;
9449 	if (descriptor->ops->fd_read_stat)
9450 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9451 	else
9452 		status = B_UNSUPPORTED;
9453 
9454 	put_fd(descriptor);
9455 	_user_close(attr);
9456 
9457 	if (status == B_OK) {
9458 		attr_info info;
9459 		info.type = stat.st_type;
9460 		info.size = stat.st_size;
9461 
9462 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9463 			return B_BAD_ADDRESS;
9464 	}
9465 
9466 	return status;
9467 }
9468 
9469 
9470 int
9471 _user_open_attr(int fd, const char* userPath, const char* userName,
9472 	uint32 type, int openMode)
9473 {
9474 	char name[B_FILE_NAME_LENGTH];
9475 
9476 	if (!IS_USER_ADDRESS(userName)
9477 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9478 		return B_BAD_ADDRESS;
9479 
9480 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9481 	if (pathBuffer.InitCheck() != B_OK)
9482 		return B_NO_MEMORY;
9483 
9484 	char* path = pathBuffer.LockBuffer();
9485 
9486 	if (userPath != NULL) {
9487 		if (!IS_USER_ADDRESS(userPath)
9488 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9489 			return B_BAD_ADDRESS;
9490 	}
9491 
9492 	if ((openMode & O_CREAT) != 0) {
9493 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9494 			false);
9495 	}
9496 
9497 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9498 }
9499 
9500 
9501 status_t
9502 _user_remove_attr(int fd, const char* userName)
9503 {
9504 	char name[B_FILE_NAME_LENGTH];
9505 
9506 	if (!IS_USER_ADDRESS(userName)
9507 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9508 		return B_BAD_ADDRESS;
9509 
9510 	return attr_remove(fd, name, false);
9511 }
9512 
9513 
9514 status_t
9515 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9516 	const char* userToName)
9517 {
9518 	if (!IS_USER_ADDRESS(userFromName)
9519 		|| !IS_USER_ADDRESS(userToName))
9520 		return B_BAD_ADDRESS;
9521 
9522 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9523 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9524 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9525 		return B_NO_MEMORY;
9526 
9527 	char* fromName = fromNameBuffer.LockBuffer();
9528 	char* toName = toNameBuffer.LockBuffer();
9529 
9530 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9531 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9532 		return B_BAD_ADDRESS;
9533 
9534 	return attr_rename(fromFile, fromName, toFile, toName, false);
9535 }
9536 
9537 
9538 int
9539 _user_open_index_dir(dev_t device)
9540 {
9541 	return index_dir_open(device, false);
9542 }
9543 
9544 
9545 status_t
9546 _user_create_index(dev_t device, const char* userName, uint32 type,
9547 	uint32 flags)
9548 {
9549 	char name[B_FILE_NAME_LENGTH];
9550 
9551 	if (!IS_USER_ADDRESS(userName)
9552 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9553 		return B_BAD_ADDRESS;
9554 
9555 	return index_create(device, name, type, flags, false);
9556 }
9557 
9558 
9559 status_t
9560 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9561 {
9562 	char name[B_FILE_NAME_LENGTH];
9563 	struct stat stat;
9564 	status_t status;
9565 
9566 	if (!IS_USER_ADDRESS(userName)
9567 		|| !IS_USER_ADDRESS(userStat)
9568 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9569 		return B_BAD_ADDRESS;
9570 
9571 	status = index_name_read_stat(device, name, &stat, false);
9572 	if (status == B_OK) {
9573 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9574 			return B_BAD_ADDRESS;
9575 	}
9576 
9577 	return status;
9578 }
9579 
9580 
9581 status_t
9582 _user_remove_index(dev_t device, const char* userName)
9583 {
9584 	char name[B_FILE_NAME_LENGTH];
9585 
9586 	if (!IS_USER_ADDRESS(userName)
9587 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9588 		return B_BAD_ADDRESS;
9589 
9590 	return index_remove(device, name, false);
9591 }
9592 
9593 
9594 status_t
9595 _user_getcwd(char* userBuffer, size_t size)
9596 {
9597 	if (size == 0)
9598 		return B_BAD_VALUE;
9599 	if (!IS_USER_ADDRESS(userBuffer))
9600 		return B_BAD_ADDRESS;
9601 
9602 	if (size > kMaxPathLength)
9603 		size = kMaxPathLength;
9604 
9605 	KPath pathBuffer(size);
9606 	if (pathBuffer.InitCheck() != B_OK)
9607 		return B_NO_MEMORY;
9608 
9609 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9610 
9611 	char* path = pathBuffer.LockBuffer();
9612 
9613 	status_t status = get_cwd(path, size, false);
9614 	if (status != B_OK)
9615 		return status;
9616 
9617 	// Copy back the result
9618 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9619 		return B_BAD_ADDRESS;
9620 
9621 	return status;
9622 }
9623 
9624 
9625 status_t
9626 _user_setcwd(int fd, const char* userPath)
9627 {
9628 	TRACE(("user_setcwd: path = %p\n", userPath));
9629 
9630 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9631 	if (pathBuffer.InitCheck() != B_OK)
9632 		return B_NO_MEMORY;
9633 
9634 	char* path = pathBuffer.LockBuffer();
9635 
9636 	if (userPath != NULL) {
9637 		if (!IS_USER_ADDRESS(userPath)
9638 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9639 			return B_BAD_ADDRESS;
9640 	}
9641 
9642 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9643 }
9644 
9645 
9646 status_t
9647 _user_change_root(const char* userPath)
9648 {
9649 	// only root is allowed to chroot()
9650 	if (geteuid() != 0)
9651 		return B_NOT_ALLOWED;
9652 
9653 	// alloc path buffer
9654 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9655 	if (pathBuffer.InitCheck() != B_OK)
9656 		return B_NO_MEMORY;
9657 
9658 	// copy userland path to kernel
9659 	char* path = pathBuffer.LockBuffer();
9660 	if (userPath != NULL) {
9661 		if (!IS_USER_ADDRESS(userPath)
9662 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9663 			return B_BAD_ADDRESS;
9664 	}
9665 
9666 	// get the vnode
9667 	struct vnode* vnode;
9668 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9669 	if (status != B_OK)
9670 		return status;
9671 
9672 	// set the new root
9673 	struct io_context* context = get_current_io_context(false);
9674 	mutex_lock(&sIOContextRootLock);
9675 	struct vnode* oldRoot = context->root;
9676 	context->root = vnode;
9677 	mutex_unlock(&sIOContextRootLock);
9678 
9679 	put_vnode(oldRoot);
9680 
9681 	return B_OK;
9682 }
9683 
9684 
9685 int
9686 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9687 	uint32 flags, port_id port, int32 token)
9688 {
9689 	char* query;
9690 
9691 	if (device < 0 || userQuery == NULL || queryLength == 0)
9692 		return B_BAD_VALUE;
9693 
9694 	// this is a safety restriction
9695 	if (queryLength >= 65536)
9696 		return B_NAME_TOO_LONG;
9697 
9698 	query = (char*)malloc(queryLength + 1);
9699 	if (query == NULL)
9700 		return B_NO_MEMORY;
9701 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9702 		free(query);
9703 		return B_BAD_ADDRESS;
9704 	}
9705 
9706 	int fd = query_open(device, query, flags, port, token, false);
9707 
9708 	free(query);
9709 	return fd;
9710 }
9711 
9712 
9713 #include "vfs_request_io.cpp"
9714