xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 1b6bc2675fe3691538c8764ab016593f3b06ca53)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "FDPath.h"
59 #include "fifo.h"
60 #include "IORequest.h"
61 #include "unused_vnodes.h"
62 #include "vfs_tracing.h"
63 #include "Vnode.h"
64 #include "../cache/vnode_store.h"
65 
66 
67 //#define TRACE_VFS
68 #ifdef TRACE_VFS
69 #	define TRACE(x) dprintf x
70 #	define FUNCTION(x) dprintf x
71 #else
72 #	define TRACE(x) ;
73 #	define FUNCTION(x) ;
74 #endif
75 
76 #define ADD_DEBUGGER_COMMANDS
77 
78 
79 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
80 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
81 
82 #if KDEBUG
83 #	define FS_CALL(vnode, op, params...) \
84 		( HAS_FS_CALL(vnode, op) ? \
85 			vnode->ops->op(vnode->mount->volume, vnode, params) \
86 			: (panic("FS_CALL op " #op " is NULL"), 0))
87 #	define FS_CALL_NO_PARAMS(vnode, op) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode) \
90 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
91 #	define FS_MOUNT_CALL(mount, op, params...) \
92 		( HAS_FS_MOUNT_CALL(mount, op) ? \
93 			mount->volume->ops->op(mount->volume, params) \
94 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
95 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume) \
98 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
99 #else
100 #	define FS_CALL(vnode, op, params...) \
101 			vnode->ops->op(vnode->mount->volume, vnode, params)
102 #	define FS_CALL_NO_PARAMS(vnode, op) \
103 			vnode->ops->op(vnode->mount->volume, vnode)
104 #	define FS_MOUNT_CALL(mount, op, params...) \
105 			mount->volume->ops->op(mount->volume, params)
106 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
107 			mount->volume->ops->op(mount->volume)
108 #endif
109 
110 
111 const static size_t kMaxPathLength = 65536;
112 	// The absolute maximum path length (for getcwd() - this is not depending
113 	// on PATH_MAX
114 
115 
116 struct vnode_hash_key {
117 	dev_t	device;
118 	ino_t	vnode;
119 };
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		recursive_lock_init(&rlock, "mount rlock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		recursive_lock_destroy(&rlock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	recursive_lock	rlock;	// guards the vnodes list
166 		// TODO: Make this a mutex! It is never used recursively.
167 	struct vnode*	root_vnode;
168 	struct vnode*	covers_vnode;	// immutable
169 	KPartition*		partition;
170 	VnodeList		vnodes;
171 	EntryCache		entry_cache;
172 	bool			unmounting;
173 	bool			owns_file_device;
174 };
175 
176 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
177 	list_link		link;
178 	team_id			team;
179 	pid_t			session;
180 	off_t			start;
181 	off_t			end;
182 	bool			shared;
183 };
184 
185 typedef DoublyLinkedList<advisory_lock> LockList;
186 
187 struct advisory_locking {
188 	sem_id			lock;
189 	sem_id			wait_sem;
190 	LockList		locks;
191 
192 	advisory_locking()
193 		:
194 		lock(-1),
195 		wait_sem(-1)
196 	{
197 	}
198 
199 	~advisory_locking()
200 	{
201 		if (lock >= 0)
202 			delete_sem(lock);
203 		if (wait_sem >= 0)
204 			delete_sem(wait_sem);
205 	}
206 };
207 
208 /*!	\brief Guards sMountsTable.
209 
210 	The holder is allowed to read/write access the sMountsTable.
211 	Manipulation of the fs_mount structures themselves
212 	(and their destruction) requires different locks though.
213 */
214 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
215 
216 /*!	\brief Guards mount/unmount operations.
217 
218 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
219 	That is locking the lock ensures that no FS is mounted/unmounted. In
220 	particular this means that
221 	- sMountsTable will not be modified,
222 	- the fields immutable after initialization of the fs_mount structures in
223 	  sMountsTable will not be modified,
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Write access to covered_by and covers requires to write lock
239 	sVnodeLock.
240 
241 	The thread trying to acquire the lock must not hold sMountMutex.
242 	You must not hold this lock when calling create_sem(), as this might call
243 	vfs_free_unused_vnodes() and thus cause a deadlock.
244 */
245 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
246 
247 /*!	\brief Guards io_context::root.
248 
249 	Must be held when setting or getting the io_context::root field.
250 	The only operation allowed while holding this lock besides getting or
251 	setting the field is inc_vnode_ref_count() on io_context::root.
252 */
253 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
254 
255 
256 #define VNODE_HASH_TABLE_SIZE 1024
257 static hash_table* sVnodeTable;
258 static struct vnode* sRoot;
259 
260 #define MOUNTS_HASH_TABLE_SIZE 16
261 static hash_table* sMountsTable;
262 static dev_t sNextMountID = 1;
263 
264 #define MAX_TEMP_IO_VECS 8
265 
266 mode_t __gUmask = 022;
267 
268 /* function declarations */
269 
270 static void free_unused_vnodes();
271 
272 // file descriptor operation prototypes
273 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
274 	void* buffer, size_t* _bytes);
275 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
276 	const void* buffer, size_t* _bytes);
277 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
278 	int seekType);
279 static void file_free_fd(struct file_descriptor* descriptor);
280 static status_t file_close(struct file_descriptor* descriptor);
281 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
282 	struct selectsync* sync);
283 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
284 	struct selectsync* sync);
285 static status_t dir_read(struct io_context* context,
286 	struct file_descriptor* descriptor, struct dirent* buffer,
287 	size_t bufferSize, uint32* _count);
288 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
289 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
290 static status_t dir_rewind(struct file_descriptor* descriptor);
291 static void dir_free_fd(struct file_descriptor* descriptor);
292 static status_t dir_close(struct file_descriptor* descriptor);
293 static status_t attr_dir_read(struct io_context* context,
294 	struct file_descriptor* descriptor, struct dirent* buffer,
295 	size_t bufferSize, uint32* _count);
296 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
297 static void attr_dir_free_fd(struct file_descriptor* descriptor);
298 static status_t attr_dir_close(struct file_descriptor* descriptor);
299 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
300 	void* buffer, size_t* _bytes);
301 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
302 	const void* buffer, size_t* _bytes);
303 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
304 	int seekType);
305 static void attr_free_fd(struct file_descriptor* descriptor);
306 static status_t attr_close(struct file_descriptor* descriptor);
307 static status_t attr_read_stat(struct file_descriptor* descriptor,
308 	struct stat* statData);
309 static status_t attr_write_stat(struct file_descriptor* descriptor,
310 	const struct stat* stat, int statMask);
311 static status_t index_dir_read(struct io_context* context,
312 	struct file_descriptor* descriptor, struct dirent* buffer,
313 	size_t bufferSize, uint32* _count);
314 static status_t index_dir_rewind(struct file_descriptor* descriptor);
315 static void index_dir_free_fd(struct file_descriptor* descriptor);
316 static status_t index_dir_close(struct file_descriptor* descriptor);
317 static status_t query_read(struct io_context* context,
318 	struct file_descriptor* descriptor, struct dirent* buffer,
319 	size_t bufferSize, uint32* _count);
320 static status_t query_rewind(struct file_descriptor* descriptor);
321 static void query_free_fd(struct file_descriptor* descriptor);
322 static status_t query_close(struct file_descriptor* descriptor);
323 
324 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
325 	void* buffer, size_t length);
326 static status_t common_read_stat(struct file_descriptor* descriptor,
327 	struct stat* statData);
328 static status_t common_write_stat(struct file_descriptor* descriptor,
329 	const struct stat* statData, int statMask);
330 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
331 	struct stat* stat, bool kernel);
332 
333 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
334 	bool traverseLeafLink, int count, bool kernel,
335 	struct vnode** _vnode, ino_t* _parentID);
336 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
337 	size_t bufferSize, bool kernel);
338 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
339 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
340 static void inc_vnode_ref_count(struct vnode* vnode);
341 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
342 	bool reenter);
343 static inline void put_vnode(struct vnode* vnode);
344 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
345 	bool kernel);
346 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
347 
348 
349 static struct fd_ops sFileOps = {
350 	file_read,
351 	file_write,
352 	file_seek,
353 	common_ioctl,
354 	NULL,		// set_flags
355 	file_select,
356 	file_deselect,
357 	NULL,		// read_dir()
358 	NULL,		// rewind_dir()
359 	common_read_stat,
360 	common_write_stat,
361 	file_close,
362 	file_free_fd
363 };
364 
365 static struct fd_ops sDirectoryOps = {
366 	NULL,		// read()
367 	NULL,		// write()
368 	NULL,		// seek()
369 	common_ioctl,
370 	NULL,		// set_flags
371 	NULL,		// select()
372 	NULL,		// deselect()
373 	dir_read,
374 	dir_rewind,
375 	common_read_stat,
376 	common_write_stat,
377 	dir_close,
378 	dir_free_fd
379 };
380 
381 static struct fd_ops sAttributeDirectoryOps = {
382 	NULL,		// read()
383 	NULL,		// write()
384 	NULL,		// seek()
385 	common_ioctl,
386 	NULL,		// set_flags
387 	NULL,		// select()
388 	NULL,		// deselect()
389 	attr_dir_read,
390 	attr_dir_rewind,
391 	common_read_stat,
392 	common_write_stat,
393 	attr_dir_close,
394 	attr_dir_free_fd
395 };
396 
397 static struct fd_ops sAttributeOps = {
398 	attr_read,
399 	attr_write,
400 	attr_seek,
401 	common_ioctl,
402 	NULL,		// set_flags
403 	NULL,		// select()
404 	NULL,		// deselect()
405 	NULL,		// read_dir()
406 	NULL,		// rewind_dir()
407 	attr_read_stat,
408 	attr_write_stat,
409 	attr_close,
410 	attr_free_fd
411 };
412 
413 static struct fd_ops sIndexDirectoryOps = {
414 	NULL,		// read()
415 	NULL,		// write()
416 	NULL,		// seek()
417 	NULL,		// ioctl()
418 	NULL,		// set_flags
419 	NULL,		// select()
420 	NULL,		// deselect()
421 	index_dir_read,
422 	index_dir_rewind,
423 	NULL,		// read_stat()
424 	NULL,		// write_stat()
425 	index_dir_close,
426 	index_dir_free_fd
427 };
428 
429 #if 0
430 static struct fd_ops sIndexOps = {
431 	NULL,		// read()
432 	NULL,		// write()
433 	NULL,		// seek()
434 	NULL,		// ioctl()
435 	NULL,		// set_flags
436 	NULL,		// select()
437 	NULL,		// deselect()
438 	NULL,		// dir_read()
439 	NULL,		// dir_rewind()
440 	index_read_stat,	// read_stat()
441 	NULL,		// write_stat()
442 	NULL,		// dir_close()
443 	NULL		// free_fd()
444 };
445 #endif
446 
447 static struct fd_ops sQueryOps = {
448 	NULL,		// read()
449 	NULL,		// write()
450 	NULL,		// seek()
451 	NULL,		// ioctl()
452 	NULL,		// set_flags
453 	NULL,		// select()
454 	NULL,		// deselect()
455 	query_read,
456 	query_rewind,
457 	NULL,		// read_stat()
458 	NULL,		// write_stat()
459 	query_close,
460 	query_free_fd
461 };
462 
463 
464 // VNodePutter
465 class VNodePutter {
466 public:
467 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
468 
469 	~VNodePutter()
470 	{
471 		Put();
472 	}
473 
474 	void SetTo(struct vnode* vnode)
475 	{
476 		Put();
477 		fVNode = vnode;
478 	}
479 
480 	void Put()
481 	{
482 		if (fVNode) {
483 			put_vnode(fVNode);
484 			fVNode = NULL;
485 		}
486 	}
487 
488 	struct vnode* Detach()
489 	{
490 		struct vnode* vnode = fVNode;
491 		fVNode = NULL;
492 		return vnode;
493 	}
494 
495 private:
496 	struct vnode* fVNode;
497 };
498 
499 
500 class FDCloser {
501 public:
502 	FDCloser() : fFD(-1), fKernel(true) {}
503 
504 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
505 
506 	~FDCloser()
507 	{
508 		Close();
509 	}
510 
511 	void SetTo(int fd, bool kernel)
512 	{
513 		Close();
514 		fFD = fd;
515 		fKernel = kernel;
516 	}
517 
518 	void Close()
519 	{
520 		if (fFD >= 0) {
521 			if (fKernel)
522 				_kern_close(fFD);
523 			else
524 				_user_close(fFD);
525 			fFD = -1;
526 		}
527 	}
528 
529 	int Detach()
530 	{
531 		int fd = fFD;
532 		fFD = -1;
533 		return fd;
534 	}
535 
536 private:
537 	int		fFD;
538 	bool	fKernel;
539 };
540 
541 
542 #if VFS_PAGES_IO_TRACING
543 
544 namespace VFSPagesIOTracing {
545 
546 class PagesIOTraceEntry : public AbstractTraceEntry {
547 protected:
548 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
549 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
550 		status_t status, generic_size_t bytesTransferred)
551 		:
552 		fVnode(vnode),
553 		fMountID(vnode->mount->id),
554 		fNodeID(vnode->id),
555 		fCookie(cookie),
556 		fPos(pos),
557 		fCount(count),
558 		fFlags(flags),
559 		fBytesRequested(bytesRequested),
560 		fStatus(status),
561 		fBytesTransferred(bytesTransferred)
562 	{
563 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
564 			false);
565 	}
566 
567 	void AddDump(TraceOutput& out, const char* mode)
568 	{
569 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
570 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
571 			fCookie, fPos, (uint64)fBytesRequested);
572 
573 		if (fVecs != NULL) {
574 			for (uint32 i = 0; i < fCount; i++) {
575 				if (i > 0)
576 					out.Print(", ");
577 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
578 			}
579 		}
580 
581 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
582 			fFlags, fStatus, (uint64)fBytesTransferred);
583 	}
584 
585 protected:
586 	struct vnode*	fVnode;
587 	dev_t			fMountID;
588 	ino_t			fNodeID;
589 	void*			fCookie;
590 	off_t			fPos;
591 	generic_io_vec*		fVecs;
592 	uint32			fCount;
593 	uint32			fFlags;
594 	generic_size_t			fBytesRequested;
595 	status_t		fStatus;
596 	generic_size_t			fBytesTransferred;
597 };
598 
599 
600 class ReadPages : public PagesIOTraceEntry {
601 public:
602 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
603 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
604 		status_t status, generic_size_t bytesTransferred)
605 		:
606 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
607 			bytesRequested, status, bytesTransferred)
608 	{
609 		Initialized();
610 	}
611 
612 	virtual void AddDump(TraceOutput& out)
613 	{
614 		PagesIOTraceEntry::AddDump(out, "read");
615 	}
616 };
617 
618 
619 class WritePages : public PagesIOTraceEntry {
620 public:
621 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
622 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
623 		status_t status, generic_size_t bytesTransferred)
624 		:
625 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
626 			bytesRequested, status, bytesTransferred)
627 	{
628 		Initialized();
629 	}
630 
631 	virtual void AddDump(TraceOutput& out)
632 	{
633 		PagesIOTraceEntry::AddDump(out, "write");
634 	}
635 };
636 
637 }	// namespace VFSPagesIOTracing
638 
639 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
640 #else
641 #	define TPIO(x) ;
642 #endif	// VFS_PAGES_IO_TRACING
643 
644 
645 static int
646 mount_compare(void* _m, const void* _key)
647 {
648 	struct fs_mount* mount = (fs_mount*)_m;
649 	const dev_t* id = (dev_t*)_key;
650 
651 	if (mount->id == *id)
652 		return 0;
653 
654 	return -1;
655 }
656 
657 
658 static uint32
659 mount_hash(void* _m, const void* _key, uint32 range)
660 {
661 	struct fs_mount* mount = (fs_mount*)_m;
662 	const dev_t* id = (dev_t*)_key;
663 
664 	if (mount)
665 		return mount->id % range;
666 
667 	return (uint32)*id % range;
668 }
669 
670 
671 /*! Finds the mounted device (the fs_mount structure) with the given ID.
672 	Note, you must hold the gMountMutex lock when you call this function.
673 */
674 static struct fs_mount*
675 find_mount(dev_t id)
676 {
677 	ASSERT_LOCKED_MUTEX(&sMountMutex);
678 
679 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
680 }
681 
682 
683 static status_t
684 get_mount(dev_t id, struct fs_mount** _mount)
685 {
686 	struct fs_mount* mount;
687 
688 	ReadLocker nodeLocker(sVnodeLock);
689 	MutexLocker mountLocker(sMountMutex);
690 
691 	mount = find_mount(id);
692 	if (mount == NULL)
693 		return B_BAD_VALUE;
694 
695 	struct vnode* rootNode = mount->root_vnode;
696 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
697 		// might have been called during a mount/unmount operation
698 		return B_BUSY;
699 	}
700 
701 	inc_vnode_ref_count(mount->root_vnode);
702 	*_mount = mount;
703 	return B_OK;
704 }
705 
706 
707 static void
708 put_mount(struct fs_mount* mount)
709 {
710 	if (mount)
711 		put_vnode(mount->root_vnode);
712 }
713 
714 
715 /*!	Tries to open the specified file system module.
716 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
717 	Returns a pointer to file system module interface, or NULL if it
718 	could not open the module.
719 */
720 static file_system_module_info*
721 get_file_system(const char* fsName)
722 {
723 	char name[B_FILE_NAME_LENGTH];
724 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
725 		// construct module name if we didn't get one
726 		// (we currently support only one API)
727 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
728 		fsName = NULL;
729 	}
730 
731 	file_system_module_info* info;
732 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
733 		return NULL;
734 
735 	return info;
736 }
737 
738 
739 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
740 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
741 	The name is allocated for you, and you have to free() it when you're
742 	done with it.
743 	Returns NULL if the required memory is not available.
744 */
745 static char*
746 get_file_system_name(const char* fsName)
747 {
748 	const size_t length = strlen("file_systems/");
749 
750 	if (strncmp(fsName, "file_systems/", length)) {
751 		// the name already seems to be the module's file name
752 		return strdup(fsName);
753 	}
754 
755 	fsName += length;
756 	const char* end = strchr(fsName, '/');
757 	if (end == NULL) {
758 		// this doesn't seem to be a valid name, but well...
759 		return strdup(fsName);
760 	}
761 
762 	// cut off the trailing /v1
763 
764 	char* name = (char*)malloc(end + 1 - fsName);
765 	if (name == NULL)
766 		return NULL;
767 
768 	strlcpy(name, fsName, end + 1 - fsName);
769 	return name;
770 }
771 
772 
773 /*!	Accepts a list of file system names separated by a colon, one for each
774 	layer and returns the file system name for the specified layer.
775 	The name is allocated for you, and you have to free() it when you're
776 	done with it.
777 	Returns NULL if the required memory is not available or if there is no
778 	name for the specified layer.
779 */
780 static char*
781 get_file_system_name_for_layer(const char* fsNames, int32 layer)
782 {
783 	while (layer >= 0) {
784 		const char* end = strchr(fsNames, ':');
785 		if (end == NULL) {
786 			if (layer == 0)
787 				return strdup(fsNames);
788 			return NULL;
789 		}
790 
791 		if (layer == 0) {
792 			size_t length = end - fsNames + 1;
793 			char* result = (char*)malloc(length);
794 			strlcpy(result, fsNames, length);
795 			return result;
796 		}
797 
798 		fsNames = end + 1;
799 		layer--;
800 	}
801 
802 	return NULL;
803 }
804 
805 
806 static int
807 vnode_compare(void* _vnode, const void* _key)
808 {
809 	struct vnode* vnode = (struct vnode*)_vnode;
810 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
811 
812 	if (vnode->device == key->device && vnode->id == key->vnode)
813 		return 0;
814 
815 	return -1;
816 }
817 
818 
819 static uint32
820 vnode_hash(void* _vnode, const void* _key, uint32 range)
821 {
822 	struct vnode* vnode = (struct vnode*)_vnode;
823 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
824 
825 #define VHASH(mountid, vnodeid) \
826 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
827 
828 	if (vnode != NULL)
829 		return VHASH(vnode->device, vnode->id) % range;
830 
831 	return VHASH(key->device, key->vnode) % range;
832 
833 #undef VHASH
834 }
835 
836 
837 static void
838 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
839 {
840 	RecursiveLocker _(mount->rlock);
841 	mount->vnodes.Add(vnode);
842 }
843 
844 
845 static void
846 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
847 {
848 	RecursiveLocker _(mount->rlock);
849 	mount->vnodes.Remove(vnode);
850 }
851 
852 
853 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
854 
855 	The caller must hold the sVnodeLock (read lock at least).
856 
857 	\param mountID the mount ID.
858 	\param vnodeID the node ID.
859 
860 	\return The vnode structure, if it was found in the hash table, \c NULL
861 			otherwise.
862 */
863 static struct vnode*
864 lookup_vnode(dev_t mountID, ino_t vnodeID)
865 {
866 	struct vnode_hash_key key;
867 
868 	key.device = mountID;
869 	key.vnode = vnodeID;
870 
871 	return (vnode*)hash_lookup(sVnodeTable, &key);
872 }
873 
874 
875 /*!	Creates a new vnode with the given mount and node ID.
876 	If the node already exists, it is returned instead and no new node is
877 	created. In either case -- but not, if an error occurs -- the function write
878 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
879 	error the lock is not not held on return.
880 
881 	\param mountID The mount ID.
882 	\param vnodeID The vnode ID.
883 	\param _vnode Will be set to the new vnode on success.
884 	\param _nodeCreated Will be set to \c true when the returned vnode has
885 		been newly created, \c false when it already existed. Will not be
886 		changed on error.
887 	\return \c B_OK, when the vnode was successfully created and inserted or
888 		a node with the given ID was found, \c B_NO_MEMORY or
889 		\c B_ENTRY_NOT_FOUND on error.
890 */
891 static status_t
892 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
893 	bool& _nodeCreated)
894 {
895 	FUNCTION(("create_new_vnode_and_lock()\n"));
896 
897 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
898 	if (vnode == NULL)
899 		return B_NO_MEMORY;
900 
901 	// initialize basic values
902 	memset(vnode, 0, sizeof(struct vnode));
903 	vnode->device = mountID;
904 	vnode->id = vnodeID;
905 	vnode->ref_count = 1;
906 	vnode->SetBusy(true);
907 
908 	// look up the the node -- it might have been added by someone else in the
909 	// meantime
910 	rw_lock_write_lock(&sVnodeLock);
911 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
912 	if (existingVnode != NULL) {
913 		free(vnode);
914 		_vnode = existingVnode;
915 		_nodeCreated = false;
916 		return B_OK;
917 	}
918 
919 	// get the mount structure
920 	mutex_lock(&sMountMutex);
921 	vnode->mount = find_mount(mountID);
922 	if (!vnode->mount || vnode->mount->unmounting) {
923 		mutex_unlock(&sMountMutex);
924 		rw_lock_write_unlock(&sVnodeLock);
925 		free(vnode);
926 		return B_ENTRY_NOT_FOUND;
927 	}
928 
929 	// add the vnode to the mount's node list and the hash table
930 	hash_insert(sVnodeTable, vnode);
931 	add_vnode_to_mount_list(vnode, vnode->mount);
932 
933 	mutex_unlock(&sMountMutex);
934 
935 	_vnode = vnode;
936 	_nodeCreated = true;
937 
938 	// keep the vnode lock locked
939 	return B_OK;
940 }
941 
942 
943 /*!	Frees the vnode and all resources it has acquired, and removes
944 	it from the vnode hash as well as from its mount structure.
945 	Will also make sure that any cache modifications are written back.
946 */
947 static void
948 free_vnode(struct vnode* vnode, bool reenter)
949 {
950 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
951 		vnode);
952 
953 	// write back any changes in this vnode's cache -- but only
954 	// if the vnode won't be deleted, in which case the changes
955 	// will be discarded
956 
957 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
958 		FS_CALL_NO_PARAMS(vnode, fsync);
959 
960 	// Note: If this vnode has a cache attached, there will still be two
961 	// references to that cache at this point. The last one belongs to the vnode
962 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
963 	// cache. Each but the last reference to a cache also includes a reference
964 	// to the vnode. The file cache, however, released its reference (cf.
965 	// file_cache_create()), so that this vnode's ref count has the chance to
966 	// ever drop to 0. Deleting the file cache now, will cause the next to last
967 	// cache reference to be released, which will also release a (no longer
968 	// existing) vnode reference. To avoid problems, we set the vnode's ref
969 	// count, so that it will neither become negative nor 0.
970 	vnode->ref_count = 2;
971 
972 	if (!vnode->IsUnpublished()) {
973 		if (vnode->IsRemoved())
974 			FS_CALL(vnode, remove_vnode, reenter);
975 		else
976 			FS_CALL(vnode, put_vnode, reenter);
977 	}
978 
979 	// If the vnode has a VMCache attached, make sure that it won't try to get
980 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
981 	// long as the vnode is busy and in the hash, that won't happen, but as
982 	// soon as we've removed it from the hash, it could reload the vnode -- with
983 	// a new cache attached!
984 	if (vnode->cache != NULL)
985 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
986 
987 	// The file system has removed the resources of the vnode now, so we can
988 	// make it available again (by removing the busy vnode from the hash).
989 	rw_lock_write_lock(&sVnodeLock);
990 	hash_remove(sVnodeTable, vnode);
991 	rw_lock_write_unlock(&sVnodeLock);
992 
993 	// if we have a VMCache attached, remove it
994 	if (vnode->cache)
995 		vnode->cache->ReleaseRef();
996 
997 	vnode->cache = NULL;
998 
999 	remove_vnode_from_mount_list(vnode, vnode->mount);
1000 
1001 	free(vnode);
1002 }
1003 
1004 
1005 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1006 	if the counter dropped to 0.
1007 
1008 	The caller must, of course, own a reference to the vnode to call this
1009 	function.
1010 	The caller must not hold the sVnodeLock or the sMountMutex.
1011 
1012 	\param vnode the vnode.
1013 	\param alwaysFree don't move this vnode into the unused list, but really
1014 		   delete it if possible.
1015 	\param reenter \c true, if this function is called (indirectly) from within
1016 		   a file system. This will be passed to file system hooks only.
1017 	\return \c B_OK, if everything went fine, an error code otherwise.
1018 */
1019 static status_t
1020 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1021 {
1022 	ReadLocker locker(sVnodeLock);
1023 	AutoLocker<Vnode> nodeLocker(vnode);
1024 
1025 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1026 
1027 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1028 
1029 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1030 		vnode->ref_count));
1031 
1032 	if (oldRefCount != 1)
1033 		return B_OK;
1034 
1035 	if (vnode->IsBusy())
1036 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1037 
1038 	bool freeNode = false;
1039 	bool freeUnusedNodes = false;
1040 
1041 	// Just insert the vnode into an unused list if we don't need
1042 	// to delete it
1043 	if (vnode->IsRemoved() || alwaysFree) {
1044 		vnode_to_be_freed(vnode);
1045 		vnode->SetBusy(true);
1046 		freeNode = true;
1047 	} else
1048 		freeUnusedNodes = vnode_unused(vnode);
1049 
1050 	nodeLocker.Unlock();
1051 	locker.Unlock();
1052 
1053 	if (freeNode)
1054 		free_vnode(vnode, reenter);
1055 	else if (freeUnusedNodes)
1056 		free_unused_vnodes();
1057 
1058 	return B_OK;
1059 }
1060 
1061 
1062 /*!	\brief Increments the reference counter of the given vnode.
1063 
1064 	The caller must make sure that the node isn't deleted while this function
1065 	is called. This can be done either:
1066 	- by ensuring that a reference to the node exists and remains in existence,
1067 	  or
1068 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1069 	  or by holding sVnodeLock write locked.
1070 
1071 	In the second case the caller is responsible for dealing with the ref count
1072 	0 -> 1 transition. That is 1. this function must not be invoked when the
1073 	node is busy in the first place and 2. vnode_used() must be called for the
1074 	node.
1075 
1076 	\param vnode the vnode.
1077 */
1078 static void
1079 inc_vnode_ref_count(struct vnode* vnode)
1080 {
1081 	atomic_add(&vnode->ref_count, 1);
1082 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1083 		vnode->ref_count));
1084 }
1085 
1086 
1087 static bool
1088 is_special_node_type(int type)
1089 {
1090 	// at the moment only FIFOs are supported
1091 	return S_ISFIFO(type);
1092 }
1093 
1094 
1095 static status_t
1096 create_special_sub_node(struct vnode* vnode, uint32 flags)
1097 {
1098 	if (S_ISFIFO(vnode->Type()))
1099 		return create_fifo_vnode(vnode->mount->volume, vnode);
1100 
1101 	return B_BAD_VALUE;
1102 }
1103 
1104 
1105 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1106 
1107 	If the node is not yet in memory, it will be loaded.
1108 
1109 	The caller must not hold the sVnodeLock or the sMountMutex.
1110 
1111 	\param mountID the mount ID.
1112 	\param vnodeID the node ID.
1113 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1114 		   retrieved vnode structure shall be written.
1115 	\param reenter \c true, if this function is called (indirectly) from within
1116 		   a file system.
1117 	\return \c B_OK, if everything when fine, an error code otherwise.
1118 */
1119 static status_t
1120 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1121 	int reenter)
1122 {
1123 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1124 		_vnode));
1125 
1126 	rw_lock_read_lock(&sVnodeLock);
1127 
1128 	int32 tries = 2000;
1129 		// try for 10 secs
1130 restart:
1131 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1132 	AutoLocker<Vnode> nodeLocker(vnode);
1133 
1134 	if (vnode && vnode->IsBusy()) {
1135 		nodeLocker.Unlock();
1136 		rw_lock_read_unlock(&sVnodeLock);
1137 		if (!canWait || --tries < 0) {
1138 			// vnode doesn't seem to become unbusy
1139 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1140 				vnodeID);
1141 			return B_BUSY;
1142 		}
1143 		snooze(5000); // 5 ms
1144 		rw_lock_read_lock(&sVnodeLock);
1145 		goto restart;
1146 	}
1147 
1148 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1149 
1150 	status_t status;
1151 
1152 	if (vnode) {
1153 		if (vnode->ref_count == 0) {
1154 			// this vnode has been unused before
1155 			vnode_used(vnode);
1156 		}
1157 		inc_vnode_ref_count(vnode);
1158 
1159 		nodeLocker.Unlock();
1160 		rw_lock_read_unlock(&sVnodeLock);
1161 	} else {
1162 		// we need to create a new vnode and read it in
1163 		rw_lock_read_unlock(&sVnodeLock);
1164 			// unlock -- create_new_vnode_and_lock() write-locks on success
1165 		bool nodeCreated;
1166 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1167 			nodeCreated);
1168 		if (status != B_OK)
1169 			return status;
1170 
1171 		if (!nodeCreated) {
1172 			rw_lock_read_lock(&sVnodeLock);
1173 			rw_lock_write_unlock(&sVnodeLock);
1174 			goto restart;
1175 		}
1176 
1177 		rw_lock_write_unlock(&sVnodeLock);
1178 
1179 		int type;
1180 		uint32 flags;
1181 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1182 			&flags, reenter);
1183 		if (status == B_OK && vnode->private_node == NULL)
1184 			status = B_BAD_VALUE;
1185 
1186 		bool gotNode = status == B_OK;
1187 		bool publishSpecialSubNode = false;
1188 		if (gotNode) {
1189 			vnode->SetType(type);
1190 			publishSpecialSubNode = is_special_node_type(type)
1191 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1192 		}
1193 
1194 		if (gotNode && publishSpecialSubNode)
1195 			status = create_special_sub_node(vnode, flags);
1196 
1197 		if (status != B_OK) {
1198 			if (gotNode)
1199 				FS_CALL(vnode, put_vnode, reenter);
1200 
1201 			rw_lock_write_lock(&sVnodeLock);
1202 			hash_remove(sVnodeTable, vnode);
1203 			remove_vnode_from_mount_list(vnode, vnode->mount);
1204 			rw_lock_write_unlock(&sVnodeLock);
1205 
1206 			free(vnode);
1207 			return status;
1208 		}
1209 
1210 		rw_lock_read_lock(&sVnodeLock);
1211 		vnode->Lock();
1212 
1213 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1214 		vnode->SetBusy(false);
1215 
1216 		vnode->Unlock();
1217 		rw_lock_read_unlock(&sVnodeLock);
1218 	}
1219 
1220 	TRACE(("get_vnode: returning %p\n", vnode));
1221 
1222 	*_vnode = vnode;
1223 	return B_OK;
1224 }
1225 
1226 
1227 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1228 	if the counter dropped to 0.
1229 
1230 	The caller must, of course, own a reference to the vnode to call this
1231 	function.
1232 	The caller must not hold the sVnodeLock or the sMountMutex.
1233 
1234 	\param vnode the vnode.
1235 */
1236 static inline void
1237 put_vnode(struct vnode* vnode)
1238 {
1239 	dec_vnode_ref_count(vnode, false, false);
1240 }
1241 
1242 
1243 static void
1244 free_unused_vnodes(int32 level)
1245 {
1246 	unused_vnodes_check_started();
1247 
1248 	if (level == B_NO_LOW_RESOURCE) {
1249 		unused_vnodes_check_done();
1250 		return;
1251 	}
1252 
1253 	flush_hot_vnodes();
1254 
1255 	// determine how many nodes to free
1256 	uint32 count = 1;
1257 	{
1258 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1259 
1260 		switch (level) {
1261 			case B_LOW_RESOURCE_NOTE:
1262 				count = sUnusedVnodes / 100;
1263 				break;
1264 			case B_LOW_RESOURCE_WARNING:
1265 				count = sUnusedVnodes / 10;
1266 				break;
1267 			case B_LOW_RESOURCE_CRITICAL:
1268 				count = sUnusedVnodes;
1269 				break;
1270 		}
1271 
1272 		if (count > sUnusedVnodes)
1273 			count = sUnusedVnodes;
1274 	}
1275 
1276 	// Write back the modified pages of some unused vnodes and free them.
1277 
1278 	for (uint32 i = 0; i < count; i++) {
1279 		ReadLocker vnodesReadLocker(sVnodeLock);
1280 
1281 		// get the first node
1282 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1283 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1284 			&sUnusedVnodeList);
1285 		unusedVnodesLocker.Unlock();
1286 
1287 		if (vnode == NULL)
1288 			break;
1289 
1290 		// lock the node
1291 		AutoLocker<Vnode> nodeLocker(vnode);
1292 
1293 		// Check whether the node is still unused -- since we only append to the
1294 		// the tail of the unused queue, the vnode should still be at its head.
1295 		// Alternatively we could check its ref count for 0 and its busy flag,
1296 		// but if the node is no longer at the head of the queue, it means it
1297 		// has been touched in the meantime, i.e. it is no longer the least
1298 		// recently used unused vnode and we rather don't free it.
1299 		unusedVnodesLocker.Lock();
1300 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1301 			continue;
1302 		unusedVnodesLocker.Unlock();
1303 
1304 		ASSERT(!vnode->IsBusy());
1305 
1306 		// grab a reference
1307 		inc_vnode_ref_count(vnode);
1308 		vnode_used(vnode);
1309 
1310 		// write back changes and free the node
1311 		nodeLocker.Unlock();
1312 		vnodesReadLocker.Unlock();
1313 
1314 		if (vnode->cache != NULL)
1315 			vnode->cache->WriteModified();
1316 
1317 		dec_vnode_ref_count(vnode, true, false);
1318 			// this should free the vnode when it's still unused
1319 	}
1320 
1321 	unused_vnodes_check_done();
1322 }
1323 
1324 
1325 /*!	Gets the vnode the given vnode is covering.
1326 
1327 	The caller must have \c sVnodeLock read-locked at least.
1328 
1329 	The function returns a reference to the retrieved vnode (if any), the caller
1330 	is responsible to free.
1331 
1332 	\param vnode The vnode whose covered node shall be returned.
1333 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1334 		vnode.
1335 */
1336 static inline Vnode*
1337 get_covered_vnode_locked(Vnode* vnode)
1338 {
1339 	if (Vnode* coveredNode = vnode->covers) {
1340 		while (coveredNode->covers != NULL)
1341 			coveredNode = coveredNode->covers;
1342 
1343 		inc_vnode_ref_count(coveredNode);
1344 		return coveredNode;
1345 	}
1346 
1347 	return NULL;
1348 }
1349 
1350 
1351 /*!	Gets the vnode the given vnode is covering.
1352 
1353 	The caller must not hold \c sVnodeLock. Note that this implies a race
1354 	condition, since the situation can change at any time.
1355 
1356 	The function returns a reference to the retrieved vnode (if any), the caller
1357 	is responsible to free.
1358 
1359 	\param vnode The vnode whose covered node shall be returned.
1360 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1361 		vnode.
1362 */
1363 static inline Vnode*
1364 get_covered_vnode(Vnode* vnode)
1365 {
1366 	if (!vnode->IsCovering())
1367 		return NULL;
1368 
1369 	ReadLocker vnodeReadLocker(sVnodeLock);
1370 	return get_covered_vnode_locked(vnode);
1371 }
1372 
1373 
1374 /*!	Gets the vnode the given vnode is covered by.
1375 
1376 	The caller must have \c sVnodeLock read-locked at least.
1377 
1378 	The function returns a reference to the retrieved vnode (if any), the caller
1379 	is responsible to free.
1380 
1381 	\param vnode The vnode whose covering node shall be returned.
1382 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1383 		any vnode.
1384 */
1385 static Vnode*
1386 get_covering_vnode_locked(Vnode* vnode)
1387 {
1388 	if (Vnode* coveringNode = vnode->covered_by) {
1389 		while (coveringNode->covered_by != NULL)
1390 			coveringNode = coveringNode->covered_by;
1391 
1392 		inc_vnode_ref_count(coveringNode);
1393 		return coveringNode;
1394 	}
1395 
1396 	return NULL;
1397 }
1398 
1399 
1400 /*!	Gets the vnode the given vnode is covered by.
1401 
1402 	The caller must not hold \c sVnodeLock. Note that this implies a race
1403 	condition, since the situation can change at any time.
1404 
1405 	The function returns a reference to the retrieved vnode (if any), the caller
1406 	is responsible to free.
1407 
1408 	\param vnode The vnode whose covering node shall be returned.
1409 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1410 		any vnode.
1411 */
1412 static inline Vnode*
1413 get_covering_vnode(Vnode* vnode)
1414 {
1415 	if (!vnode->IsCovered())
1416 		return NULL;
1417 
1418 	ReadLocker vnodeReadLocker(sVnodeLock);
1419 	return get_covering_vnode_locked(vnode);
1420 }
1421 
1422 
1423 static void
1424 free_unused_vnodes()
1425 {
1426 	free_unused_vnodes(
1427 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1428 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1429 }
1430 
1431 
1432 static void
1433 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1434 {
1435 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1436 
1437 	free_unused_vnodes(level);
1438 }
1439 
1440 
1441 static inline void
1442 put_advisory_locking(struct advisory_locking* locking)
1443 {
1444 	release_sem(locking->lock);
1445 }
1446 
1447 
1448 /*!	Returns the advisory_locking object of the \a vnode in case it
1449 	has one, and locks it.
1450 	You have to call put_advisory_locking() when you're done with
1451 	it.
1452 	Note, you must not have the vnode mutex locked when calling
1453 	this function.
1454 */
1455 static struct advisory_locking*
1456 get_advisory_locking(struct vnode* vnode)
1457 {
1458 	rw_lock_read_lock(&sVnodeLock);
1459 	vnode->Lock();
1460 
1461 	struct advisory_locking* locking = vnode->advisory_locking;
1462 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1463 
1464 	vnode->Unlock();
1465 	rw_lock_read_unlock(&sVnodeLock);
1466 
1467 	if (lock >= 0)
1468 		lock = acquire_sem(lock);
1469 	if (lock < 0) {
1470 		// This means the locking has been deleted in the mean time
1471 		// or had never existed in the first place - otherwise, we
1472 		// would get the lock at some point.
1473 		return NULL;
1474 	}
1475 
1476 	return locking;
1477 }
1478 
1479 
1480 /*!	Creates a locked advisory_locking object, and attaches it to the
1481 	given \a vnode.
1482 	Returns B_OK in case of success - also if the vnode got such an
1483 	object from someone else in the mean time, you'll still get this
1484 	one locked then.
1485 */
1486 static status_t
1487 create_advisory_locking(struct vnode* vnode)
1488 {
1489 	if (vnode == NULL)
1490 		return B_FILE_ERROR;
1491 
1492 	ObjectDeleter<advisory_locking> lockingDeleter;
1493 	struct advisory_locking* locking = NULL;
1494 
1495 	while (get_advisory_locking(vnode) == NULL) {
1496 		// no locking object set on the vnode yet, create one
1497 		if (locking == NULL) {
1498 			locking = new(std::nothrow) advisory_locking;
1499 			if (locking == NULL)
1500 				return B_NO_MEMORY;
1501 			lockingDeleter.SetTo(locking);
1502 
1503 			locking->wait_sem = create_sem(0, "advisory lock");
1504 			if (locking->wait_sem < 0)
1505 				return locking->wait_sem;
1506 
1507 			locking->lock = create_sem(0, "advisory locking");
1508 			if (locking->lock < 0)
1509 				return locking->lock;
1510 		}
1511 
1512 		// set our newly created locking object
1513 		ReadLocker _(sVnodeLock);
1514 		AutoLocker<Vnode> nodeLocker(vnode);
1515 		if (vnode->advisory_locking == NULL) {
1516 			vnode->advisory_locking = locking;
1517 			lockingDeleter.Detach();
1518 			return B_OK;
1519 		}
1520 	}
1521 
1522 	// The vnode already had a locking object. That's just as well.
1523 
1524 	return B_OK;
1525 }
1526 
1527 
1528 /*!	Retrieves the first lock that has been set by the current team.
1529 */
1530 static status_t
1531 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1532 {
1533 	struct advisory_locking* locking = get_advisory_locking(vnode);
1534 	if (locking == NULL)
1535 		return B_BAD_VALUE;
1536 
1537 	// TODO: this should probably get the flock by its file descriptor!
1538 	team_id team = team_get_current_team_id();
1539 	status_t status = B_BAD_VALUE;
1540 
1541 	LockList::Iterator iterator = locking->locks.GetIterator();
1542 	while (iterator.HasNext()) {
1543 		struct advisory_lock* lock = iterator.Next();
1544 
1545 		if (lock->team == team) {
1546 			flock->l_start = lock->start;
1547 			flock->l_len = lock->end - lock->start + 1;
1548 			status = B_OK;
1549 			break;
1550 		}
1551 	}
1552 
1553 	put_advisory_locking(locking);
1554 	return status;
1555 }
1556 
1557 
1558 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1559 	with the advisory_lock \a lock.
1560 */
1561 static bool
1562 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1563 {
1564 	if (flock == NULL)
1565 		return true;
1566 
1567 	return lock->start <= flock->l_start - 1 + flock->l_len
1568 		&& lock->end >= flock->l_start;
1569 }
1570 
1571 
1572 /*!	Removes the specified lock, or all locks of the calling team
1573 	if \a flock is NULL.
1574 */
1575 static status_t
1576 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1577 {
1578 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1579 
1580 	struct advisory_locking* locking = get_advisory_locking(vnode);
1581 	if (locking == NULL)
1582 		return B_OK;
1583 
1584 	// TODO: use the thread ID instead??
1585 	team_id team = team_get_current_team_id();
1586 	pid_t session = thread_get_current_thread()->team->session_id;
1587 
1588 	// find matching lock entries
1589 
1590 	LockList::Iterator iterator = locking->locks.GetIterator();
1591 	while (iterator.HasNext()) {
1592 		struct advisory_lock* lock = iterator.Next();
1593 		bool removeLock = false;
1594 
1595 		if (lock->session == session)
1596 			removeLock = true;
1597 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1598 			bool endsBeyond = false;
1599 			bool startsBefore = false;
1600 			if (flock != NULL) {
1601 				startsBefore = lock->start < flock->l_start;
1602 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1603 			}
1604 
1605 			if (!startsBefore && !endsBeyond) {
1606 				// lock is completely contained in flock
1607 				removeLock = true;
1608 			} else if (startsBefore && !endsBeyond) {
1609 				// cut the end of the lock
1610 				lock->end = flock->l_start - 1;
1611 			} else if (!startsBefore && endsBeyond) {
1612 				// cut the start of the lock
1613 				lock->start = flock->l_start + flock->l_len;
1614 			} else {
1615 				// divide the lock into two locks
1616 				struct advisory_lock* secondLock = new advisory_lock;
1617 				if (secondLock == NULL) {
1618 					// TODO: we should probably revert the locks we already
1619 					// changed... (ie. allocate upfront)
1620 					put_advisory_locking(locking);
1621 					return B_NO_MEMORY;
1622 				}
1623 
1624 				lock->end = flock->l_start - 1;
1625 
1626 				secondLock->team = lock->team;
1627 				secondLock->session = lock->session;
1628 				// values must already be normalized when getting here
1629 				secondLock->start = flock->l_start + flock->l_len;
1630 				secondLock->end = lock->end;
1631 				secondLock->shared = lock->shared;
1632 
1633 				locking->locks.Add(secondLock);
1634 			}
1635 		}
1636 
1637 		if (removeLock) {
1638 			// this lock is no longer used
1639 			iterator.Remove();
1640 			free(lock);
1641 		}
1642 	}
1643 
1644 	bool removeLocking = locking->locks.IsEmpty();
1645 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1646 
1647 	put_advisory_locking(locking);
1648 
1649 	if (removeLocking) {
1650 		// We can remove the whole advisory locking structure; it's no
1651 		// longer used
1652 		locking = get_advisory_locking(vnode);
1653 		if (locking != NULL) {
1654 			ReadLocker locker(sVnodeLock);
1655 			AutoLocker<Vnode> nodeLocker(vnode);
1656 
1657 			// the locking could have been changed in the mean time
1658 			if (locking->locks.IsEmpty()) {
1659 				vnode->advisory_locking = NULL;
1660 				nodeLocker.Unlock();
1661 				locker.Unlock();
1662 
1663 				// we've detached the locking from the vnode, so we can
1664 				// safely delete it
1665 				delete locking;
1666 			} else {
1667 				// the locking is in use again
1668 				nodeLocker.Unlock();
1669 				locker.Unlock();
1670 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1671 			}
1672 		}
1673 	}
1674 
1675 	return B_OK;
1676 }
1677 
1678 
1679 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1680 	will wait for the lock to become available, if there are any collisions
1681 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1682 
1683 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1684 	BSD flock() semantics are used, that is, all children can unlock the file
1685 	in question (we even allow parents to remove the lock, though, but that
1686 	seems to be in line to what the BSD's are doing).
1687 */
1688 static status_t
1689 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1690 	bool wait)
1691 {
1692 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1693 		vnode, flock, wait ? "yes" : "no"));
1694 
1695 	bool shared = flock->l_type == F_RDLCK;
1696 	status_t status = B_OK;
1697 
1698 	// TODO: do deadlock detection!
1699 
1700 	struct advisory_locking* locking;
1701 
1702 	while (true) {
1703 		// if this vnode has an advisory_locking structure attached,
1704 		// lock that one and search for any colliding file lock
1705 		status = create_advisory_locking(vnode);
1706 		if (status != B_OK)
1707 			return status;
1708 
1709 		locking = vnode->advisory_locking;
1710 		team_id team = team_get_current_team_id();
1711 		sem_id waitForLock = -1;
1712 
1713 		// test for collisions
1714 		LockList::Iterator iterator = locking->locks.GetIterator();
1715 		while (iterator.HasNext()) {
1716 			struct advisory_lock* lock = iterator.Next();
1717 
1718 			// TODO: locks from the same team might be joinable!
1719 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1720 				// locks do overlap
1721 				if (!shared || !lock->shared) {
1722 					// we need to wait
1723 					waitForLock = locking->wait_sem;
1724 					break;
1725 				}
1726 			}
1727 		}
1728 
1729 		if (waitForLock < 0)
1730 			break;
1731 
1732 		// We need to wait. Do that or fail now, if we've been asked not to.
1733 
1734 		if (!wait) {
1735 			put_advisory_locking(locking);
1736 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1737 		}
1738 
1739 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1740 			B_CAN_INTERRUPT, 0);
1741 		if (status != B_OK && status != B_BAD_SEM_ID)
1742 			return status;
1743 
1744 		// We have been notified, but we need to re-lock the locking object. So
1745 		// go another round...
1746 	}
1747 
1748 	// install new lock
1749 
1750 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1751 		sizeof(struct advisory_lock));
1752 	if (lock == NULL) {
1753 		put_advisory_locking(locking);
1754 		return B_NO_MEMORY;
1755 	}
1756 
1757 	lock->team = team_get_current_team_id();
1758 	lock->session = session;
1759 	// values must already be normalized when getting here
1760 	lock->start = flock->l_start;
1761 	lock->end = flock->l_start - 1 + flock->l_len;
1762 	lock->shared = shared;
1763 
1764 	locking->locks.Add(lock);
1765 	put_advisory_locking(locking);
1766 
1767 	return status;
1768 }
1769 
1770 
1771 /*!	Normalizes the \a flock structure to make it easier to compare the
1772 	structure with others. The l_start and l_len fields are set to absolute
1773 	values according to the l_whence field.
1774 */
1775 static status_t
1776 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1777 {
1778 	switch (flock->l_whence) {
1779 		case SEEK_SET:
1780 			break;
1781 		case SEEK_CUR:
1782 			flock->l_start += descriptor->pos;
1783 			break;
1784 		case SEEK_END:
1785 		{
1786 			struct vnode* vnode = descriptor->u.vnode;
1787 			struct stat stat;
1788 			status_t status;
1789 
1790 			if (!HAS_FS_CALL(vnode, read_stat))
1791 				return B_UNSUPPORTED;
1792 
1793 			status = FS_CALL(vnode, read_stat, &stat);
1794 			if (status != B_OK)
1795 				return status;
1796 
1797 			flock->l_start += stat.st_size;
1798 			break;
1799 		}
1800 		default:
1801 			return B_BAD_VALUE;
1802 	}
1803 
1804 	if (flock->l_start < 0)
1805 		flock->l_start = 0;
1806 	if (flock->l_len == 0)
1807 		flock->l_len = OFF_MAX;
1808 
1809 	// don't let the offset and length overflow
1810 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1811 		flock->l_len = OFF_MAX - flock->l_start;
1812 
1813 	if (flock->l_len < 0) {
1814 		// a negative length reverses the region
1815 		flock->l_start += flock->l_len;
1816 		flock->l_len = -flock->l_len;
1817 	}
1818 
1819 	return B_OK;
1820 }
1821 
1822 
1823 static void
1824 replace_vnode_if_disconnected(struct fs_mount* mount,
1825 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1826 	struct vnode* fallBack, bool lockRootLock)
1827 {
1828 	struct vnode* givenVnode = vnode;
1829 	bool vnodeReplaced = false;
1830 
1831 	ReadLocker vnodeReadLocker(sVnodeLock);
1832 
1833 	if (lockRootLock)
1834 		mutex_lock(&sIOContextRootLock);
1835 
1836 	while (vnode != NULL && vnode->mount == mount
1837 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1838 		if (vnode->covers != NULL) {
1839 			// redirect the vnode to the covered vnode
1840 			vnode = vnode->covers;
1841 		} else
1842 			vnode = fallBack;
1843 
1844 		vnodeReplaced = true;
1845 	}
1846 
1847 	// If we've replaced the node, grab a reference for the new one.
1848 	if (vnodeReplaced && vnode != NULL)
1849 		inc_vnode_ref_count(vnode);
1850 
1851 	if (lockRootLock)
1852 		mutex_unlock(&sIOContextRootLock);
1853 
1854 	vnodeReadLocker.Unlock();
1855 
1856 	if (vnodeReplaced)
1857 		put_vnode(givenVnode);
1858 }
1859 
1860 
1861 /*!	Disconnects all file descriptors that are associated with the
1862 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1863 	\a mount object.
1864 
1865 	Note, after you've called this function, there might still be ongoing
1866 	accesses - they won't be interrupted if they already happened before.
1867 	However, any subsequent access will fail.
1868 
1869 	This is not a cheap function and should be used with care and rarely.
1870 	TODO: there is currently no means to stop a blocking read/write!
1871 */
1872 static void
1873 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1874 	struct vnode* vnodeToDisconnect)
1875 {
1876 	// iterate over all teams and peek into their file descriptors
1877 	TeamListIterator teamIterator;
1878 	while (Team* team = teamIterator.Next()) {
1879 		BReference<Team> teamReference(team, true);
1880 
1881 		// lock the I/O context
1882 		io_context* context = team->io_context;
1883 		MutexLocker contextLocker(context->io_mutex);
1884 
1885 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1886 			sRoot, true);
1887 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1888 			sRoot, false);
1889 
1890 		for (uint32 i = 0; i < context->table_size; i++) {
1891 			if (struct file_descriptor* descriptor = context->fds[i]) {
1892 				inc_fd_ref_count(descriptor);
1893 
1894 				// if this descriptor points at this mount, we
1895 				// need to disconnect it to be able to unmount
1896 				struct vnode* vnode = fd_vnode(descriptor);
1897 				if (vnodeToDisconnect != NULL) {
1898 					if (vnode == vnodeToDisconnect)
1899 						disconnect_fd(descriptor);
1900 				} else if ((vnode != NULL && vnode->mount == mount)
1901 					|| (vnode == NULL && descriptor->u.mount == mount))
1902 					disconnect_fd(descriptor);
1903 
1904 				put_fd(descriptor);
1905 			}
1906 		}
1907 	}
1908 }
1909 
1910 
1911 /*!	\brief Gets the root node of the current IO context.
1912 	If \a kernel is \c true, the kernel IO context will be used.
1913 	The caller obtains a reference to the returned node.
1914 */
1915 struct vnode*
1916 get_root_vnode(bool kernel)
1917 {
1918 	if (!kernel) {
1919 		// Get current working directory from io context
1920 		struct io_context* context = get_current_io_context(kernel);
1921 
1922 		mutex_lock(&sIOContextRootLock);
1923 
1924 		struct vnode* root = context->root;
1925 		if (root != NULL)
1926 			inc_vnode_ref_count(root);
1927 
1928 		mutex_unlock(&sIOContextRootLock);
1929 
1930 		if (root != NULL)
1931 			return root;
1932 
1933 		// That should never happen.
1934 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1935 			"root\n", team_get_current_team_id());
1936 	}
1937 
1938 	inc_vnode_ref_count(sRoot);
1939 	return sRoot;
1940 }
1941 
1942 
1943 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1944 
1945 	Given an arbitrary vnode (identified by mount and node ID), the function
1946 	checks, whether the vnode is covered by another vnode. If it is, the
1947 	function returns the mount and node ID of the covering vnode. Otherwise
1948 	it simply returns the supplied mount and node ID.
1949 
1950 	In case of error (e.g. the supplied node could not be found) the variables
1951 	for storing the resolved mount and node ID remain untouched and an error
1952 	code is returned.
1953 
1954 	\param mountID The mount ID of the vnode in question.
1955 	\param nodeID The node ID of the vnode in question.
1956 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1957 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1958 	\return
1959 	- \c B_OK, if everything went fine,
1960 	- another error code, if something went wrong.
1961 */
1962 status_t
1963 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1964 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1965 {
1966 	// get the node
1967 	struct vnode* node;
1968 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1969 	if (error != B_OK)
1970 		return error;
1971 
1972 	// resolve the node
1973 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1974 		put_vnode(node);
1975 		node = coveringNode;
1976 	}
1977 
1978 	// set the return values
1979 	*resolvedMountID = node->device;
1980 	*resolvedNodeID = node->id;
1981 
1982 	put_vnode(node);
1983 
1984 	return B_OK;
1985 }
1986 
1987 
1988 /*!	\brief Gets the directory path and leaf name for a given path.
1989 
1990 	The supplied \a path is transformed to refer to the directory part of
1991 	the entry identified by the original path, and into the buffer \a filename
1992 	the leaf name of the original entry is written.
1993 	Neither the returned path nor the leaf name can be expected to be
1994 	canonical.
1995 
1996 	\param path The path to be analyzed. Must be able to store at least one
1997 		   additional character.
1998 	\param filename The buffer into which the leaf name will be written.
1999 		   Must be of size B_FILE_NAME_LENGTH at least.
2000 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2001 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2002 		   if the given path name is empty.
2003 */
2004 static status_t
2005 get_dir_path_and_leaf(char* path, char* filename)
2006 {
2007 	if (*path == '\0')
2008 		return B_ENTRY_NOT_FOUND;
2009 
2010 	char* last = strrchr(path, '/');
2011 		// '/' are not allowed in file names!
2012 
2013 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2014 
2015 	if (last == NULL) {
2016 		// this path is single segment with no '/' in it
2017 		// ex. "foo"
2018 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2019 			return B_NAME_TOO_LONG;
2020 
2021 		strcpy(path, ".");
2022 	} else {
2023 		last++;
2024 		if (last[0] == '\0') {
2025 			// special case: the path ends in one or more '/' - remove them
2026 			while (*--last == '/' && last != path);
2027 			last[1] = '\0';
2028 
2029 			if (last == path && last[0] == '/') {
2030 				// This path points to the root of the file system
2031 				strcpy(filename, ".");
2032 				return B_OK;
2033 			}
2034 			for (; last != path && *(last - 1) != '/'; last--);
2035 				// rewind to the start of the leaf before the '/'
2036 		}
2037 
2038 		// normal leaf: replace the leaf portion of the path with a '.'
2039 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2040 			return B_NAME_TOO_LONG;
2041 
2042 		last[0] = '.';
2043 		last[1] = '\0';
2044 	}
2045 	return B_OK;
2046 }
2047 
2048 
2049 static status_t
2050 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2051 	bool traverse, bool kernel, struct vnode** _vnode)
2052 {
2053 	char clonedName[B_FILE_NAME_LENGTH + 1];
2054 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2055 		return B_NAME_TOO_LONG;
2056 
2057 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2058 	struct vnode* directory;
2059 
2060 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2061 	if (status < 0)
2062 		return status;
2063 
2064 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2065 		_vnode, NULL);
2066 }
2067 
2068 
2069 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2070 	and returns the respective vnode.
2071 	On success a reference to the vnode is acquired for the caller.
2072 */
2073 static status_t
2074 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2075 {
2076 	ino_t id;
2077 
2078 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2079 		return get_vnode(dir->device, id, _vnode, true, false);
2080 
2081 	status_t status = FS_CALL(dir, lookup, name, &id);
2082 	if (status != B_OK)
2083 		return status;
2084 
2085 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2086 	// have a reference and just need to look the node up.
2087 	rw_lock_read_lock(&sVnodeLock);
2088 	*_vnode = lookup_vnode(dir->device, id);
2089 	rw_lock_read_unlock(&sVnodeLock);
2090 
2091 	if (*_vnode == NULL) {
2092 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2093 			"0x%Lx)\n", dir->device, id);
2094 		return B_ENTRY_NOT_FOUND;
2095 	}
2096 
2097 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2098 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2099 //		(*_vnode)->mount->id, (*_vnode)->id);
2100 
2101 	return B_OK;
2102 }
2103 
2104 
2105 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2106 	\a path must not be NULL.
2107 	If it returns successfully, \a path contains the name of the last path
2108 	component. This function clobbers the buffer pointed to by \a path only
2109 	if it does contain more than one component.
2110 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2111 	it is successful or not!
2112 */
2113 static status_t
2114 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2115 	int count, struct io_context* ioContext, struct vnode** _vnode,
2116 	ino_t* _parentID)
2117 {
2118 	status_t status = B_OK;
2119 	ino_t lastParentID = vnode->id;
2120 
2121 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2122 
2123 	if (path == NULL) {
2124 		put_vnode(vnode);
2125 		return B_BAD_VALUE;
2126 	}
2127 
2128 	if (*path == '\0') {
2129 		put_vnode(vnode);
2130 		return B_ENTRY_NOT_FOUND;
2131 	}
2132 
2133 	while (true) {
2134 		struct vnode* nextVnode;
2135 		char* nextPath;
2136 
2137 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2138 			path));
2139 
2140 		// done?
2141 		if (path[0] == '\0')
2142 			break;
2143 
2144 		// walk to find the next path component ("path" will point to a single
2145 		// path component), and filter out multiple slashes
2146 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2147 				nextPath++);
2148 
2149 		if (*nextPath == '/') {
2150 			*nextPath = '\0';
2151 			do
2152 				nextPath++;
2153 			while (*nextPath == '/');
2154 		}
2155 
2156 		// See if the '..' is at a covering vnode move to the covered
2157 		// vnode so we pass the '..' path to the underlying filesystem.
2158 		// Also prevent breaking the root of the IO context.
2159 		if (strcmp("..", path) == 0) {
2160 			if (vnode == ioContext->root) {
2161 				// Attempted prison break! Keep it contained.
2162 				path = nextPath;
2163 				continue;
2164 			}
2165 
2166 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2167 				nextVnode = coveredVnode;
2168 				put_vnode(vnode);
2169 				vnode = nextVnode;
2170 			}
2171 		}
2172 
2173 		// check if vnode is really a directory
2174 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2175 			status = B_NOT_A_DIRECTORY;
2176 
2177 		// Check if we have the right to search the current directory vnode.
2178 		// If a file system doesn't have the access() function, we assume that
2179 		// searching a directory is always allowed
2180 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2181 			status = FS_CALL(vnode, access, X_OK);
2182 
2183 		// Tell the filesystem to get the vnode of this path component (if we
2184 		// got the permission from the call above)
2185 		if (status == B_OK)
2186 			status = lookup_dir_entry(vnode, path, &nextVnode);
2187 
2188 		if (status != B_OK) {
2189 			put_vnode(vnode);
2190 			return status;
2191 		}
2192 
2193 		// If the new node is a symbolic link, resolve it (if we've been told
2194 		// to do it)
2195 		if (S_ISLNK(nextVnode->Type())
2196 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2197 			size_t bufferSize;
2198 			char* buffer;
2199 
2200 			TRACE(("traverse link\n"));
2201 
2202 			// it's not exactly nice style using goto in this way, but hey,
2203 			// it works :-/
2204 			if (count + 1 > B_MAX_SYMLINKS) {
2205 				status = B_LINK_LIMIT;
2206 				goto resolve_link_error;
2207 			}
2208 
2209 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2210 			if (buffer == NULL) {
2211 				status = B_NO_MEMORY;
2212 				goto resolve_link_error;
2213 			}
2214 
2215 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2216 				bufferSize--;
2217 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2218 				// null-terminate
2219 				if (status >= 0)
2220 					buffer[bufferSize] = '\0';
2221 			} else
2222 				status = B_BAD_VALUE;
2223 
2224 			if (status != B_OK) {
2225 				free(buffer);
2226 
2227 		resolve_link_error:
2228 				put_vnode(vnode);
2229 				put_vnode(nextVnode);
2230 
2231 				return status;
2232 			}
2233 			put_vnode(nextVnode);
2234 
2235 			// Check if we start from the root directory or the current
2236 			// directory ("vnode" still points to that one).
2237 			// Cut off all leading slashes if it's the root directory
2238 			path = buffer;
2239 			bool absoluteSymlink = false;
2240 			if (path[0] == '/') {
2241 				// we don't need the old directory anymore
2242 				put_vnode(vnode);
2243 
2244 				while (*++path == '/')
2245 					;
2246 
2247 				mutex_lock(&sIOContextRootLock);
2248 				vnode = ioContext->root;
2249 				inc_vnode_ref_count(vnode);
2250 				mutex_unlock(&sIOContextRootLock);
2251 
2252 				absoluteSymlink = true;
2253 			}
2254 
2255 			inc_vnode_ref_count(vnode);
2256 				// balance the next recursion - we will decrement the
2257 				// ref_count of the vnode, no matter if we succeeded or not
2258 
2259 			if (absoluteSymlink && *path == '\0') {
2260 				// symlink was just "/"
2261 				nextVnode = vnode;
2262 			} else {
2263 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2264 					ioContext, &nextVnode, &lastParentID);
2265 			}
2266 
2267 			free(buffer);
2268 
2269 			if (status != B_OK) {
2270 				put_vnode(vnode);
2271 				return status;
2272 			}
2273 		} else
2274 			lastParentID = vnode->id;
2275 
2276 		// decrease the ref count on the old dir we just looked up into
2277 		put_vnode(vnode);
2278 
2279 		path = nextPath;
2280 		vnode = nextVnode;
2281 
2282 		// see if we hit a covered node
2283 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2284 			put_vnode(vnode);
2285 			vnode = coveringNode;
2286 		}
2287 	}
2288 
2289 	*_vnode = vnode;
2290 	if (_parentID)
2291 		*_parentID = lastParentID;
2292 
2293 	return B_OK;
2294 }
2295 
2296 
2297 static status_t
2298 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2299 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2300 {
2301 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2302 		get_current_io_context(kernel), _vnode, _parentID);
2303 }
2304 
2305 
2306 static status_t
2307 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2308 	ino_t* _parentID, bool kernel)
2309 {
2310 	struct vnode* start = NULL;
2311 
2312 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2313 
2314 	if (!path)
2315 		return B_BAD_VALUE;
2316 
2317 	if (*path == '\0')
2318 		return B_ENTRY_NOT_FOUND;
2319 
2320 	// figure out if we need to start at root or at cwd
2321 	if (*path == '/') {
2322 		if (sRoot == NULL) {
2323 			// we're a bit early, aren't we?
2324 			return B_ERROR;
2325 		}
2326 
2327 		while (*++path == '/')
2328 			;
2329 		start = get_root_vnode(kernel);
2330 
2331 		if (*path == '\0') {
2332 			*_vnode = start;
2333 			return B_OK;
2334 		}
2335 
2336 	} else {
2337 		struct io_context* context = get_current_io_context(kernel);
2338 
2339 		mutex_lock(&context->io_mutex);
2340 		start = context->cwd;
2341 		if (start != NULL)
2342 			inc_vnode_ref_count(start);
2343 		mutex_unlock(&context->io_mutex);
2344 
2345 		if (start == NULL)
2346 			return B_ERROR;
2347 	}
2348 
2349 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2350 		_parentID);
2351 }
2352 
2353 
2354 /*! Returns the vnode in the next to last segment of the path, and returns
2355 	the last portion in filename.
2356 	The path buffer must be able to store at least one additional character.
2357 */
2358 static status_t
2359 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2360 	bool kernel)
2361 {
2362 	status_t status = get_dir_path_and_leaf(path, filename);
2363 	if (status != B_OK)
2364 		return status;
2365 
2366 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2367 }
2368 
2369 
2370 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2371 		   to by a FD + path pair.
2372 
2373 	\a path must be given in either case. \a fd might be omitted, in which
2374 	case \a path is either an absolute path or one relative to the current
2375 	directory. If both a supplied and \a path is relative it is reckoned off
2376 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2377 	ignored.
2378 
2379 	The caller has the responsibility to call put_vnode() on the returned
2380 	directory vnode.
2381 
2382 	\param fd The FD. May be < 0.
2383 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2384 	       is modified by this function. It must have at least room for a
2385 	       string one character longer than the path it contains.
2386 	\param _vnode A pointer to a variable the directory vnode shall be written
2387 		   into.
2388 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2389 		   the leaf name of the specified entry will be written.
2390 	\param kernel \c true, if invoked from inside the kernel, \c false if
2391 		   invoked from userland.
2392 	\return \c B_OK, if everything went fine, another error code otherwise.
2393 */
2394 static status_t
2395 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2396 	char* filename, bool kernel)
2397 {
2398 	if (!path)
2399 		return B_BAD_VALUE;
2400 	if (*path == '\0')
2401 		return B_ENTRY_NOT_FOUND;
2402 	if (fd < 0)
2403 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2404 
2405 	status_t status = get_dir_path_and_leaf(path, filename);
2406 	if (status != B_OK)
2407 		return status;
2408 
2409 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2410 }
2411 
2412 
2413 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2414 		   to by a vnode + path pair.
2415 
2416 	\a path must be given in either case. \a vnode might be omitted, in which
2417 	case \a path is either an absolute path or one relative to the current
2418 	directory. If both a supplied and \a path is relative it is reckoned off
2419 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2420 	ignored.
2421 
2422 	The caller has the responsibility to call put_vnode() on the returned
2423 	directory vnode.
2424 
2425 	\param vnode The vnode. May be \c NULL.
2426 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2427 	       is modified by this function. It must have at least room for a
2428 	       string one character longer than the path it contains.
2429 	\param _vnode A pointer to a variable the directory vnode shall be written
2430 		   into.
2431 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2432 		   the leaf name of the specified entry will be written.
2433 	\param kernel \c true, if invoked from inside the kernel, \c false if
2434 		   invoked from userland.
2435 	\return \c B_OK, if everything went fine, another error code otherwise.
2436 */
2437 static status_t
2438 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2439 	struct vnode** _vnode, char* filename, bool kernel)
2440 {
2441 	if (!path)
2442 		return B_BAD_VALUE;
2443 	if (*path == '\0')
2444 		return B_ENTRY_NOT_FOUND;
2445 	if (vnode == NULL || path[0] == '/')
2446 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2447 
2448 	status_t status = get_dir_path_and_leaf(path, filename);
2449 	if (status != B_OK)
2450 		return status;
2451 
2452 	inc_vnode_ref_count(vnode);
2453 		// vnode_path_to_vnode() always decrements the ref count
2454 
2455 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2456 }
2457 
2458 
2459 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2460 */
2461 static status_t
2462 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2463 	size_t bufferSize, struct io_context* ioContext)
2464 {
2465 	if (bufferSize < sizeof(struct dirent))
2466 		return B_BAD_VALUE;
2467 
2468 	// See if the vnode is convering another vnode and move to the covered
2469 	// vnode so we get the underlying file system
2470 	VNodePutter vnodePutter;
2471 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2472 		vnode = coveredVnode;
2473 		vnodePutter.SetTo(vnode);
2474 	}
2475 
2476 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2477 		// The FS supports getting the name of a vnode.
2478 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2479 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2480 			return B_OK;
2481 	}
2482 
2483 	// The FS doesn't support getting the name of a vnode. So we search the
2484 	// parent directory for the vnode, if the caller let us.
2485 
2486 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2487 		return B_UNSUPPORTED;
2488 
2489 	void* cookie;
2490 
2491 	status_t status = FS_CALL(parent, open_dir, &cookie);
2492 	if (status >= B_OK) {
2493 		while (true) {
2494 			uint32 num = 1;
2495 			// We use the FS hook directly instead of dir_read(), since we don't
2496 			// want the entries to be fixed. We have already resolved vnode to
2497 			// the covered node.
2498 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2499 				&num);
2500 			if (status != B_OK)
2501 				break;
2502 			if (num == 0) {
2503 				status = B_ENTRY_NOT_FOUND;
2504 				break;
2505 			}
2506 
2507 			if (vnode->id == buffer->d_ino) {
2508 				// found correct entry!
2509 				break;
2510 			}
2511 		}
2512 
2513 		FS_CALL(vnode, close_dir, cookie);
2514 		FS_CALL(vnode, free_dir_cookie, cookie);
2515 	}
2516 	return status;
2517 }
2518 
2519 
2520 static status_t
2521 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2522 	size_t nameSize, bool kernel)
2523 {
2524 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2525 	struct dirent* dirent = (struct dirent*)buffer;
2526 
2527 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2528 		get_current_io_context(kernel));
2529 	if (status != B_OK)
2530 		return status;
2531 
2532 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2533 		return B_BUFFER_OVERFLOW;
2534 
2535 	return B_OK;
2536 }
2537 
2538 
2539 /*!	Gets the full path to a given directory vnode.
2540 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2541 	file system doesn't support this call, it will fall back to iterating
2542 	through the parent directory to get the name of the child.
2543 
2544 	To protect against circular loops, it supports a maximum tree depth
2545 	of 256 levels.
2546 
2547 	Note that the path may not be correct the time this function returns!
2548 	It doesn't use any locking to prevent returning the correct path, as
2549 	paths aren't safe anyway: the path to a file can change at any time.
2550 
2551 	It might be a good idea, though, to check if the returned path exists
2552 	in the calling function (it's not done here because of efficiency)
2553 */
2554 static status_t
2555 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2556 	bool kernel)
2557 {
2558 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2559 
2560 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2561 		return B_BAD_VALUE;
2562 
2563 	if (!S_ISDIR(vnode->Type()))
2564 		return B_NOT_A_DIRECTORY;
2565 
2566 	char* path = buffer;
2567 	int32 insert = bufferSize;
2568 	int32 maxLevel = 256;
2569 	int32 length;
2570 	status_t status;
2571 	struct io_context* ioContext = get_current_io_context(kernel);
2572 
2573 	// we don't use get_vnode() here because this call is more
2574 	// efficient and does all we need from get_vnode()
2575 	inc_vnode_ref_count(vnode);
2576 
2577 	if (vnode != ioContext->root) {
2578 		// we don't hit the IO context root
2579 		// resolve a vnode to its covered vnode
2580 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2581 			put_vnode(vnode);
2582 			vnode = coveredVnode;
2583 		}
2584 	}
2585 
2586 	path[--insert] = '\0';
2587 		// the path is filled right to left
2588 
2589 	while (true) {
2590 		// the name buffer is also used for fs_read_dir()
2591 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2592 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2593 		struct vnode* parentVnode;
2594 		ino_t parentID;
2595 
2596 		// lookup the parent vnode
2597 		if (vnode == ioContext->root) {
2598 			// we hit the IO context root
2599 			parentVnode = vnode;
2600 			inc_vnode_ref_count(vnode);
2601 		} else {
2602 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2603 			if (status != B_OK)
2604 				goto out;
2605 		}
2606 
2607 		// get the node's name
2608 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2609 			sizeof(nameBuffer), ioContext);
2610 
2611 		if (vnode != ioContext->root) {
2612 			// we don't hit the IO context root
2613 			// resolve a vnode to its covered vnode
2614 			if (Vnode* coveredVnode = get_covered_vnode(parentVnode)) {
2615 				put_vnode(parentVnode);
2616 				parentVnode = coveredVnode;
2617 				parentID = parentVnode->id;
2618 			}
2619 		}
2620 
2621 		bool hitRoot = (parentVnode == vnode);
2622 
2623 		// release the current vnode, we only need its parent from now on
2624 		put_vnode(vnode);
2625 		vnode = parentVnode;
2626 
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		if (hitRoot) {
2631 			// we have reached "/", which means we have constructed the full
2632 			// path
2633 			break;
2634 		}
2635 
2636 		// TODO: add an explicit check for loops in about 10 levels to do
2637 		// real loop detection
2638 
2639 		// don't go deeper as 'maxLevel' to prevent circular loops
2640 		if (maxLevel-- < 0) {
2641 			status = B_LINK_LIMIT;
2642 			goto out;
2643 		}
2644 
2645 		// add the name in front of the current path
2646 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2647 		length = strlen(name);
2648 		insert -= length;
2649 		if (insert <= 0) {
2650 			status = B_RESULT_NOT_REPRESENTABLE;
2651 			goto out;
2652 		}
2653 		memcpy(path + insert, name, length);
2654 		path[--insert] = '/';
2655 	}
2656 
2657 	// the root dir will result in an empty path: fix it
2658 	if (path[insert] == '\0')
2659 		path[--insert] = '/';
2660 
2661 	TRACE(("  path is: %s\n", path + insert));
2662 
2663 	// move the path to the start of the buffer
2664 	length = bufferSize - insert;
2665 	memmove(buffer, path + insert, length);
2666 
2667 out:
2668 	put_vnode(vnode);
2669 	return status;
2670 }
2671 
2672 
2673 /*!	Checks the length of every path component, and adds a '.'
2674 	if the path ends in a slash.
2675 	The given path buffer must be able to store at least one
2676 	additional character.
2677 */
2678 static status_t
2679 check_path(char* to)
2680 {
2681 	int32 length = 0;
2682 
2683 	// check length of every path component
2684 
2685 	while (*to) {
2686 		char* begin;
2687 		if (*to == '/')
2688 			to++, length++;
2689 
2690 		begin = to;
2691 		while (*to != '/' && *to)
2692 			to++, length++;
2693 
2694 		if (to - begin > B_FILE_NAME_LENGTH)
2695 			return B_NAME_TOO_LONG;
2696 	}
2697 
2698 	if (length == 0)
2699 		return B_ENTRY_NOT_FOUND;
2700 
2701 	// complete path if there is a slash at the end
2702 
2703 	if (*(to - 1) == '/') {
2704 		if (length > B_PATH_NAME_LENGTH - 2)
2705 			return B_NAME_TOO_LONG;
2706 
2707 		to[0] = '.';
2708 		to[1] = '\0';
2709 	}
2710 
2711 	return B_OK;
2712 }
2713 
2714 
2715 static struct file_descriptor*
2716 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2717 {
2718 	struct file_descriptor* descriptor
2719 		= get_fd(get_current_io_context(kernel), fd);
2720 	if (descriptor == NULL)
2721 		return NULL;
2722 
2723 	struct vnode* vnode = fd_vnode(descriptor);
2724 	if (vnode == NULL) {
2725 		put_fd(descriptor);
2726 		return NULL;
2727 	}
2728 
2729 	// ToDo: when we can close a file descriptor at any point, investigate
2730 	//	if this is still valid to do (accessing the vnode without ref_count
2731 	//	or locking)
2732 	*_vnode = vnode;
2733 	return descriptor;
2734 }
2735 
2736 
2737 static struct vnode*
2738 get_vnode_from_fd(int fd, bool kernel)
2739 {
2740 	struct file_descriptor* descriptor;
2741 	struct vnode* vnode;
2742 
2743 	descriptor = get_fd(get_current_io_context(kernel), fd);
2744 	if (descriptor == NULL)
2745 		return NULL;
2746 
2747 	vnode = fd_vnode(descriptor);
2748 	if (vnode != NULL)
2749 		inc_vnode_ref_count(vnode);
2750 
2751 	put_fd(descriptor);
2752 	return vnode;
2753 }
2754 
2755 
2756 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2757 	only the path will be considered. In this case, the \a path must not be
2758 	NULL.
2759 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2760 	and should be NULL for files.
2761 */
2762 static status_t
2763 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2764 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2765 {
2766 	if (fd < 0 && !path)
2767 		return B_BAD_VALUE;
2768 
2769 	if (path != NULL && *path == '\0')
2770 		return B_ENTRY_NOT_FOUND;
2771 
2772 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2773 		// no FD or absolute path
2774 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2775 	}
2776 
2777 	// FD only, or FD + relative path
2778 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2779 	if (!vnode)
2780 		return B_FILE_ERROR;
2781 
2782 	if (path != NULL) {
2783 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2784 			_vnode, _parentID);
2785 	}
2786 
2787 	// there is no relative path to take into account
2788 
2789 	*_vnode = vnode;
2790 	if (_parentID)
2791 		*_parentID = -1;
2792 
2793 	return B_OK;
2794 }
2795 
2796 
2797 static int
2798 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2799 	void* cookie, int openMode, bool kernel)
2800 {
2801 	struct file_descriptor* descriptor;
2802 	int fd;
2803 
2804 	// If the vnode is locked, we don't allow creating a new file/directory
2805 	// file_descriptor for it
2806 	if (vnode && vnode->mandatory_locked_by != NULL
2807 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2808 		return B_BUSY;
2809 
2810 	descriptor = alloc_fd();
2811 	if (!descriptor)
2812 		return B_NO_MEMORY;
2813 
2814 	if (vnode)
2815 		descriptor->u.vnode = vnode;
2816 	else
2817 		descriptor->u.mount = mount;
2818 	descriptor->cookie = cookie;
2819 
2820 	switch (type) {
2821 		// vnode types
2822 		case FDTYPE_FILE:
2823 			descriptor->ops = &sFileOps;
2824 			break;
2825 		case FDTYPE_DIR:
2826 			descriptor->ops = &sDirectoryOps;
2827 			break;
2828 		case FDTYPE_ATTR:
2829 			descriptor->ops = &sAttributeOps;
2830 			break;
2831 		case FDTYPE_ATTR_DIR:
2832 			descriptor->ops = &sAttributeDirectoryOps;
2833 			break;
2834 
2835 		// mount types
2836 		case FDTYPE_INDEX_DIR:
2837 			descriptor->ops = &sIndexDirectoryOps;
2838 			break;
2839 		case FDTYPE_QUERY:
2840 			descriptor->ops = &sQueryOps;
2841 			break;
2842 
2843 		default:
2844 			panic("get_new_fd() called with unknown type %d\n", type);
2845 			break;
2846 	}
2847 	descriptor->type = type;
2848 	descriptor->open_mode = openMode;
2849 
2850 	io_context* context = get_current_io_context(kernel);
2851 	fd = new_fd(context, descriptor);
2852 	if (fd < 0) {
2853 		free(descriptor);
2854 		return B_NO_MORE_FDS;
2855 	}
2856 
2857 	mutex_lock(&context->io_mutex);
2858 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2859 	mutex_unlock(&context->io_mutex);
2860 
2861 	return fd;
2862 }
2863 
2864 
2865 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2866 	vfs_normalize_path(). See there for more documentation.
2867 */
2868 static status_t
2869 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2870 {
2871 	VNodePutter dirPutter;
2872 	struct vnode* dir = NULL;
2873 	status_t error;
2874 
2875 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2876 		// get dir vnode + leaf name
2877 		struct vnode* nextDir;
2878 		char leaf[B_FILE_NAME_LENGTH];
2879 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2880 		if (error != B_OK)
2881 			return error;
2882 
2883 		dir = nextDir;
2884 		strcpy(path, leaf);
2885 		dirPutter.SetTo(dir);
2886 
2887 		// get file vnode, if we shall resolve links
2888 		bool fileExists = false;
2889 		struct vnode* fileVnode;
2890 		VNodePutter fileVnodePutter;
2891 		if (traverseLink) {
2892 			inc_vnode_ref_count(dir);
2893 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2894 					NULL) == B_OK) {
2895 				fileVnodePutter.SetTo(fileVnode);
2896 				fileExists = true;
2897 			}
2898 		}
2899 
2900 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2901 			// we're done -- construct the path
2902 			bool hasLeaf = true;
2903 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2904 				// special cases "." and ".." -- get the dir, forget the leaf
2905 				inc_vnode_ref_count(dir);
2906 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2907 					&nextDir, NULL);
2908 				if (error != B_OK)
2909 					return error;
2910 				dir = nextDir;
2911 				dirPutter.SetTo(dir);
2912 				hasLeaf = false;
2913 			}
2914 
2915 			// get the directory path
2916 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2917 			if (error != B_OK)
2918 				return error;
2919 
2920 			// append the leaf name
2921 			if (hasLeaf) {
2922 				// insert a directory separator if this is not the file system
2923 				// root
2924 				if ((strcmp(path, "/") != 0
2925 					&& strlcat(path, "/", pathSize) >= pathSize)
2926 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2927 					return B_NAME_TOO_LONG;
2928 				}
2929 			}
2930 
2931 			return B_OK;
2932 		}
2933 
2934 		// read link
2935 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2936 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2937 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2938 			if (error != B_OK)
2939 				return error;
2940 			path[bufferSize] = '\0';
2941 		} else
2942 			return B_BAD_VALUE;
2943 	}
2944 
2945 	return B_LINK_LIMIT;
2946 }
2947 
2948 
2949 #ifdef ADD_DEBUGGER_COMMANDS
2950 
2951 
2952 static void
2953 _dump_advisory_locking(advisory_locking* locking)
2954 {
2955 	if (locking == NULL)
2956 		return;
2957 
2958 	kprintf("   lock:        %ld", locking->lock);
2959 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2960 
2961 	int32 index = 0;
2962 	LockList::Iterator iterator = locking->locks.GetIterator();
2963 	while (iterator.HasNext()) {
2964 		struct advisory_lock* lock = iterator.Next();
2965 
2966 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2967 		kprintf("        start:  %Ld\n", lock->start);
2968 		kprintf("        end:    %Ld\n", lock->end);
2969 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2970 	}
2971 }
2972 
2973 
2974 static void
2975 _dump_mount(struct fs_mount* mount)
2976 {
2977 	kprintf("MOUNT: %p\n", mount);
2978 	kprintf(" id:            %ld\n", mount->id);
2979 	kprintf(" device_name:   %s\n", mount->device_name);
2980 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2981 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2982 	kprintf(" partition:     %p\n", mount->partition);
2983 	kprintf(" lock:          %p\n", &mount->rlock);
2984 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2985 		mount->owns_file_device ? " owns_file_device" : "");
2986 
2987 	fs_volume* volume = mount->volume;
2988 	while (volume != NULL) {
2989 		kprintf(" volume %p:\n", volume);
2990 		kprintf("  layer:            %ld\n", volume->layer);
2991 		kprintf("  private_volume:   %p\n", volume->private_volume);
2992 		kprintf("  ops:              %p\n", volume->ops);
2993 		kprintf("  file_system:      %p\n", volume->file_system);
2994 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2995 		volume = volume->super_volume;
2996 	}
2997 
2998 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2999 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3000 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3001 	set_debug_variable("_partition", (addr_t)mount->partition);
3002 }
3003 
3004 
3005 static bool
3006 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3007 	const char* name)
3008 {
3009 	bool insertSlash = buffer[bufferSize] != '\0';
3010 	size_t nameLength = strlen(name);
3011 
3012 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3013 		return false;
3014 
3015 	if (insertSlash)
3016 		buffer[--bufferSize] = '/';
3017 
3018 	bufferSize -= nameLength;
3019 	memcpy(buffer + bufferSize, name, nameLength);
3020 
3021 	return true;
3022 }
3023 
3024 
3025 static bool
3026 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3027 	ino_t nodeID)
3028 {
3029 	if (bufferSize == 0)
3030 		return false;
3031 
3032 	bool insertSlash = buffer[bufferSize] != '\0';
3033 	if (insertSlash)
3034 		buffer[--bufferSize] = '/';
3035 
3036 	size_t size = snprintf(buffer, bufferSize,
3037 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3038 	if (size > bufferSize) {
3039 		if (insertSlash)
3040 			bufferSize++;
3041 		return false;
3042 	}
3043 
3044 	if (size < bufferSize)
3045 		memmove(buffer + bufferSize - size, buffer, size);
3046 
3047 	bufferSize -= size;
3048 	return true;
3049 }
3050 
3051 
3052 static char*
3053 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3054 	bool& _truncated)
3055 {
3056 	// null-terminate the path
3057 	buffer[--bufferSize] = '\0';
3058 
3059 	while (true) {
3060 		while (vnode->covers != NULL)
3061 			vnode = vnode->covers;
3062 
3063 		if (vnode == sRoot) {
3064 			_truncated = bufferSize == 0;
3065 			if (!_truncated)
3066 				buffer[--bufferSize] = '/';
3067 			return buffer + bufferSize;
3068 		}
3069 
3070 		// resolve the name
3071 		ino_t dirID;
3072 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3073 			vnode->id, dirID);
3074 		if (name == NULL) {
3075 			// Failed to resolve the name -- prepend "<dev,node>/".
3076 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3077 				vnode->mount->id, vnode->id);
3078 			return buffer + bufferSize;
3079 		}
3080 
3081 		// prepend the name
3082 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3083 			_truncated = true;
3084 			return buffer + bufferSize;
3085 		}
3086 
3087 		// resolve the directory node
3088 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3089 		if (nextVnode == NULL) {
3090 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3091 				vnode->mount->id, dirID);
3092 			return buffer + bufferSize;
3093 		}
3094 
3095 		vnode = nextVnode;
3096 	}
3097 }
3098 
3099 
3100 static void
3101 _dump_vnode(struct vnode* vnode, bool printPath)
3102 {
3103 	kprintf("VNODE: %p\n", vnode);
3104 	kprintf(" device:        %ld\n", vnode->device);
3105 	kprintf(" id:            %Ld\n", vnode->id);
3106 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3107 	kprintf(" private_node:  %p\n", vnode->private_node);
3108 	kprintf(" mount:         %p\n", vnode->mount);
3109 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3110 	kprintf(" covers:        %p\n", vnode->covers);
3111 	kprintf(" cache:         %p\n", vnode->cache);
3112 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3113 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3114 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3115 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3116 
3117 	_dump_advisory_locking(vnode->advisory_locking);
3118 
3119 	if (printPath) {
3120 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3121 		if (buffer != NULL) {
3122 			bool truncated;
3123 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3124 				B_PATH_NAME_LENGTH, truncated);
3125 			if (path != NULL) {
3126 				kprintf(" path:          ");
3127 				if (truncated)
3128 					kputs("<truncated>/");
3129 				kputs(path);
3130 				kputs("\n");
3131 			} else
3132 				kprintf("Failed to resolve vnode path.\n");
3133 
3134 			debug_free(buffer);
3135 		} else
3136 			kprintf("Failed to allocate memory for constructing the path.\n");
3137 	}
3138 
3139 	set_debug_variable("_node", (addr_t)vnode->private_node);
3140 	set_debug_variable("_mount", (addr_t)vnode->mount);
3141 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3142 	set_debug_variable("_covers", (addr_t)vnode->covers);
3143 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3144 }
3145 
3146 
3147 static int
3148 dump_mount(int argc, char** argv)
3149 {
3150 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3151 		kprintf("usage: %s [id|address]\n", argv[0]);
3152 		return 0;
3153 	}
3154 
3155 	uint32 id = parse_expression(argv[1]);
3156 	struct fs_mount* mount = NULL;
3157 
3158 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3159 	if (mount == NULL) {
3160 		if (IS_USER_ADDRESS(id)) {
3161 			kprintf("fs_mount not found\n");
3162 			return 0;
3163 		}
3164 		mount = (fs_mount*)id;
3165 	}
3166 
3167 	_dump_mount(mount);
3168 	return 0;
3169 }
3170 
3171 
3172 static int
3173 dump_mounts(int argc, char** argv)
3174 {
3175 	if (argc != 1) {
3176 		kprintf("usage: %s\n", argv[0]);
3177 		return 0;
3178 	}
3179 
3180 	kprintf("address     id root       covers     cookie     fs_name\n");
3181 
3182 	struct hash_iterator iterator;
3183 	struct fs_mount* mount;
3184 
3185 	hash_open(sMountsTable, &iterator);
3186 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3187 			!= NULL) {
3188 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3189 			mount->root_vnode->covers, mount->volume->private_volume,
3190 			mount->volume->file_system_name);
3191 
3192 		fs_volume* volume = mount->volume;
3193 		while (volume->super_volume != NULL) {
3194 			volume = volume->super_volume;
3195 			kprintf("                                     %p %s\n",
3196 				volume->private_volume, volume->file_system_name);
3197 		}
3198 	}
3199 
3200 	hash_close(sMountsTable, &iterator, false);
3201 	return 0;
3202 }
3203 
3204 
3205 static int
3206 dump_vnode(int argc, char** argv)
3207 {
3208 	bool printPath = false;
3209 	int argi = 1;
3210 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3211 		printPath = true;
3212 		argi++;
3213 	}
3214 
3215 	if (argi >= argc || argi + 2 < argc) {
3216 		print_debugger_command_usage(argv[0]);
3217 		return 0;
3218 	}
3219 
3220 	struct vnode* vnode = NULL;
3221 
3222 	if (argi + 1 == argc) {
3223 		vnode = (struct vnode*)parse_expression(argv[argi]);
3224 		if (IS_USER_ADDRESS(vnode)) {
3225 			kprintf("invalid vnode address\n");
3226 			return 0;
3227 		}
3228 		_dump_vnode(vnode, printPath);
3229 		return 0;
3230 	}
3231 
3232 	struct hash_iterator iterator;
3233 	dev_t device = parse_expression(argv[argi]);
3234 	ino_t id = parse_expression(argv[argi + 1]);
3235 
3236 	hash_open(sVnodeTable, &iterator);
3237 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3238 		if (vnode->id != id || vnode->device != device)
3239 			continue;
3240 
3241 		_dump_vnode(vnode, printPath);
3242 	}
3243 
3244 	hash_close(sVnodeTable, &iterator, false);
3245 	return 0;
3246 }
3247 
3248 
3249 static int
3250 dump_vnodes(int argc, char** argv)
3251 {
3252 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3253 		kprintf("usage: %s [device]\n", argv[0]);
3254 		return 0;
3255 	}
3256 
3257 	// restrict dumped nodes to a certain device if requested
3258 	dev_t device = parse_expression(argv[1]);
3259 
3260 	struct hash_iterator iterator;
3261 	struct vnode* vnode;
3262 
3263 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3264 		"flags\n");
3265 
3266 	hash_open(sVnodeTable, &iterator);
3267 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3268 		if (vnode->device != device)
3269 			continue;
3270 
3271 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3272 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3273 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3274 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3275 	}
3276 
3277 	hash_close(sVnodeTable, &iterator, false);
3278 	return 0;
3279 }
3280 
3281 
3282 static int
3283 dump_vnode_caches(int argc, char** argv)
3284 {
3285 	struct hash_iterator iterator;
3286 	struct vnode* vnode;
3287 
3288 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3289 		kprintf("usage: %s [device]\n", argv[0]);
3290 		return 0;
3291 	}
3292 
3293 	// restrict dumped nodes to a certain device if requested
3294 	dev_t device = -1;
3295 	if (argc > 1)
3296 		device = parse_expression(argv[1]);
3297 
3298 	kprintf("address    dev     inode cache          size   pages\n");
3299 
3300 	hash_open(sVnodeTable, &iterator);
3301 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3302 		if (vnode->cache == NULL)
3303 			continue;
3304 		if (device != -1 && vnode->device != device)
3305 			continue;
3306 
3307 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3308 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3309 				/ B_PAGE_SIZE, vnode->cache->page_count);
3310 	}
3311 
3312 	hash_close(sVnodeTable, &iterator, false);
3313 	return 0;
3314 }
3315 
3316 
3317 int
3318 dump_io_context(int argc, char** argv)
3319 {
3320 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3321 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3322 		return 0;
3323 	}
3324 
3325 	struct io_context* context = NULL;
3326 
3327 	if (argc > 1) {
3328 		uint32 num = parse_expression(argv[1]);
3329 		if (IS_KERNEL_ADDRESS(num))
3330 			context = (struct io_context*)num;
3331 		else {
3332 			Team* team = team_get_team_struct_locked(num);
3333 			if (team == NULL) {
3334 				kprintf("could not find team with ID %ld\n", num);
3335 				return 0;
3336 			}
3337 			context = (struct io_context*)team->io_context;
3338 		}
3339 	} else
3340 		context = get_current_io_context(true);
3341 
3342 	kprintf("I/O CONTEXT: %p\n", context);
3343 	kprintf(" root vnode:\t%p\n", context->root);
3344 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3345 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3346 	kprintf(" max fds:\t%lu\n", context->table_size);
3347 
3348 	if (context->num_used_fds)
3349 		kprintf("   no.  type         ops  ref  open  mode         pos"
3350 			"      cookie\n");
3351 
3352 	for (uint32 i = 0; i < context->table_size; i++) {
3353 		struct file_descriptor* fd = context->fds[i];
3354 		if (fd == NULL)
3355 			continue;
3356 
3357 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3358 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3359 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3360 			fd->pos, fd->cookie,
3361 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3362 				? "mount" : "vnode",
3363 			fd->u.vnode);
3364 	}
3365 
3366 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3367 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3368 
3369 	set_debug_variable("_cwd", (addr_t)context->cwd);
3370 
3371 	return 0;
3372 }
3373 
3374 
3375 int
3376 dump_vnode_usage(int argc, char** argv)
3377 {
3378 	if (argc != 1) {
3379 		kprintf("usage: %s\n", argv[0]);
3380 		return 0;
3381 	}
3382 
3383 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3384 		kMaxUnusedVnodes);
3385 
3386 	struct hash_iterator iterator;
3387 	hash_open(sVnodeTable, &iterator);
3388 
3389 	uint32 count = 0;
3390 	struct vnode* vnode;
3391 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3392 		count++;
3393 	}
3394 
3395 	hash_close(sVnodeTable, &iterator, false);
3396 
3397 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3398 	return 0;
3399 }
3400 
3401 #endif	// ADD_DEBUGGER_COMMANDS
3402 
3403 /*!	Clears an iovec array of physical pages.
3404 	Returns in \a _bytes the number of bytes successfully cleared.
3405 */
3406 static status_t
3407 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3408 {
3409 	size_t bytes = *_bytes;
3410 	size_t index = 0;
3411 
3412 	while (bytes > 0) {
3413 		size_t length = min_c(vecs[index].iov_len, bytes);
3414 
3415 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3416 			length);
3417 		if (status != B_OK) {
3418 			*_bytes -= bytes;
3419 			return status;
3420 		}
3421 
3422 		bytes -= length;
3423 	}
3424 
3425 	return B_OK;
3426 }
3427 
3428 
3429 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3430 	and calls the file system hooks to read/write the request to disk.
3431 */
3432 static status_t
3433 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3434 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3435 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3436 	bool doWrite)
3437 {
3438 	if (fileVecCount == 0) {
3439 		// There are no file vecs at this offset, so we're obviously trying
3440 		// to access the file outside of its bounds
3441 		return B_BAD_VALUE;
3442 	}
3443 
3444 	size_t numBytes = *_numBytes;
3445 	uint32 fileVecIndex;
3446 	size_t vecOffset = *_vecOffset;
3447 	uint32 vecIndex = *_vecIndex;
3448 	status_t status;
3449 	size_t size;
3450 
3451 	if (!doWrite && vecOffset == 0) {
3452 		// now directly read the data from the device
3453 		// the first file_io_vec can be read directly
3454 
3455 		if (fileVecs[0].length < numBytes)
3456 			size = fileVecs[0].length;
3457 		else
3458 			size = numBytes;
3459 
3460 		if (fileVecs[0].offset >= 0) {
3461 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3462 				&vecs[vecIndex], vecCount - vecIndex, &size);
3463 		} else {
3464 			// sparse read
3465 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3466 		}
3467 		if (status != B_OK)
3468 			return status;
3469 
3470 		// TODO: this is a work-around for buggy device drivers!
3471 		//	When our own drivers honour the length, we can:
3472 		//	a) also use this direct I/O for writes (otherwise, it would
3473 		//	   overwrite precious data)
3474 		//	b) panic if the term below is true (at least for writes)
3475 		if (size > fileVecs[0].length) {
3476 			//dprintf("warning: device driver %p doesn't respect total length "
3477 			//	"in read_pages() call!\n", ref->device);
3478 			size = fileVecs[0].length;
3479 		}
3480 
3481 		ASSERT(size <= fileVecs[0].length);
3482 
3483 		// If the file portion was contiguous, we're already done now
3484 		if (size == numBytes)
3485 			return B_OK;
3486 
3487 		// if we reached the end of the file, we can return as well
3488 		if (size != fileVecs[0].length) {
3489 			*_numBytes = size;
3490 			return B_OK;
3491 		}
3492 
3493 		fileVecIndex = 1;
3494 
3495 		// first, find out where we have to continue in our iovecs
3496 		for (; vecIndex < vecCount; vecIndex++) {
3497 			if (size < vecs[vecIndex].iov_len)
3498 				break;
3499 
3500 			size -= vecs[vecIndex].iov_len;
3501 		}
3502 
3503 		vecOffset = size;
3504 	} else {
3505 		fileVecIndex = 0;
3506 		size = 0;
3507 	}
3508 
3509 	// Too bad, let's process the rest of the file_io_vecs
3510 
3511 	size_t totalSize = size;
3512 	size_t bytesLeft = numBytes - size;
3513 
3514 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3515 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3516 		off_t fileOffset = fileVec.offset;
3517 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3518 
3519 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3520 
3521 		// process the complete fileVec
3522 		while (fileLeft > 0) {
3523 			iovec tempVecs[MAX_TEMP_IO_VECS];
3524 			uint32 tempCount = 0;
3525 
3526 			// size tracks how much of what is left of the current fileVec
3527 			// (fileLeft) has been assigned to tempVecs
3528 			size = 0;
3529 
3530 			// assign what is left of the current fileVec to the tempVecs
3531 			for (size = 0; size < fileLeft && vecIndex < vecCount
3532 					&& tempCount < MAX_TEMP_IO_VECS;) {
3533 				// try to satisfy one iovec per iteration (or as much as
3534 				// possible)
3535 
3536 				// bytes left of the current iovec
3537 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3538 				if (vecLeft == 0) {
3539 					vecOffset = 0;
3540 					vecIndex++;
3541 					continue;
3542 				}
3543 
3544 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3545 					vecIndex, vecOffset, size));
3546 
3547 				// actually available bytes
3548 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3549 
3550 				tempVecs[tempCount].iov_base
3551 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3552 				tempVecs[tempCount].iov_len = tempVecSize;
3553 				tempCount++;
3554 
3555 				size += tempVecSize;
3556 				vecOffset += tempVecSize;
3557 			}
3558 
3559 			size_t bytes = size;
3560 
3561 			if (fileOffset == -1) {
3562 				if (doWrite) {
3563 					panic("sparse write attempt: vnode %p", vnode);
3564 					status = B_IO_ERROR;
3565 				} else {
3566 					// sparse read
3567 					status = zero_pages(tempVecs, tempCount, &bytes);
3568 				}
3569 			} else if (doWrite) {
3570 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3571 					tempVecs, tempCount, &bytes);
3572 			} else {
3573 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3574 					tempVecs, tempCount, &bytes);
3575 			}
3576 			if (status != B_OK)
3577 				return status;
3578 
3579 			totalSize += bytes;
3580 			bytesLeft -= size;
3581 			if (fileOffset >= 0)
3582 				fileOffset += size;
3583 			fileLeft -= size;
3584 			//dprintf("-> file left = %Lu\n", fileLeft);
3585 
3586 			if (size != bytes || vecIndex >= vecCount) {
3587 				// there are no more bytes or iovecs, let's bail out
3588 				*_numBytes = totalSize;
3589 				return B_OK;
3590 			}
3591 		}
3592 	}
3593 
3594 	*_vecIndex = vecIndex;
3595 	*_vecOffset = vecOffset;
3596 	*_numBytes = totalSize;
3597 	return B_OK;
3598 }
3599 
3600 
3601 //	#pragma mark - public API for file systems
3602 
3603 
3604 extern "C" status_t
3605 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3606 	fs_vnode_ops* ops)
3607 {
3608 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3609 		volume, volume->id, vnodeID, privateNode));
3610 
3611 	if (privateNode == NULL)
3612 		return B_BAD_VALUE;
3613 
3614 	// create the node
3615 	bool nodeCreated;
3616 	struct vnode* vnode;
3617 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3618 		nodeCreated);
3619 	if (status != B_OK)
3620 		return status;
3621 
3622 	WriteLocker nodeLocker(sVnodeLock, true);
3623 		// create_new_vnode_and_lock() has locked for us
3624 
3625 	// file system integrity check:
3626 	// test if the vnode already exists and bail out if this is the case!
3627 	if (!nodeCreated) {
3628 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3629 			volume->id, vnodeID, privateNode, vnode->private_node);
3630 		return B_ERROR;
3631 	}
3632 
3633 	vnode->private_node = privateNode;
3634 	vnode->ops = ops;
3635 	vnode->SetUnpublished(true);
3636 
3637 	TRACE(("returns: %s\n", strerror(status)));
3638 
3639 	return status;
3640 }
3641 
3642 
3643 extern "C" status_t
3644 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3645 	fs_vnode_ops* ops, int type, uint32 flags)
3646 {
3647 	FUNCTION(("publish_vnode()\n"));
3648 
3649 	WriteLocker locker(sVnodeLock);
3650 
3651 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3652 
3653 	bool nodeCreated = false;
3654 	if (vnode == NULL) {
3655 		if (privateNode == NULL)
3656 			return B_BAD_VALUE;
3657 
3658 		// create the node
3659 		locker.Unlock();
3660 			// create_new_vnode_and_lock() will re-lock for us on success
3661 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3662 			nodeCreated);
3663 		if (status != B_OK)
3664 			return status;
3665 
3666 		locker.SetTo(sVnodeLock, true);
3667 	}
3668 
3669 	if (nodeCreated) {
3670 		vnode->private_node = privateNode;
3671 		vnode->ops = ops;
3672 		vnode->SetUnpublished(true);
3673 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3674 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3675 		// already known, but not published
3676 	} else
3677 		return B_BAD_VALUE;
3678 
3679 	bool publishSpecialSubNode = false;
3680 
3681 	vnode->SetType(type);
3682 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3683 	publishSpecialSubNode = is_special_node_type(type)
3684 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3685 
3686 	status_t status = B_OK;
3687 
3688 	// create sub vnodes, if necessary
3689 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3690 		locker.Unlock();
3691 
3692 		fs_volume* subVolume = volume;
3693 		if (volume->sub_volume != NULL) {
3694 			while (status == B_OK && subVolume->sub_volume != NULL) {
3695 				subVolume = subVolume->sub_volume;
3696 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3697 					vnode);
3698 			}
3699 		}
3700 
3701 		if (status == B_OK && publishSpecialSubNode)
3702 			status = create_special_sub_node(vnode, flags);
3703 
3704 		if (status != B_OK) {
3705 			// error -- clean up the created sub vnodes
3706 			while (subVolume->super_volume != volume) {
3707 				subVolume = subVolume->super_volume;
3708 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3709 			}
3710 		}
3711 
3712 		if (status == B_OK) {
3713 			ReadLocker vnodesReadLocker(sVnodeLock);
3714 			AutoLocker<Vnode> nodeLocker(vnode);
3715 			vnode->SetBusy(false);
3716 			vnode->SetUnpublished(false);
3717 		} else {
3718 			locker.Lock();
3719 			hash_remove(sVnodeTable, vnode);
3720 			remove_vnode_from_mount_list(vnode, vnode->mount);
3721 			free(vnode);
3722 		}
3723 	} else {
3724 		// we still hold the write lock -- mark the node unbusy and published
3725 		vnode->SetBusy(false);
3726 		vnode->SetUnpublished(false);
3727 	}
3728 
3729 	TRACE(("returns: %s\n", strerror(status)));
3730 
3731 	return status;
3732 }
3733 
3734 
3735 extern "C" status_t
3736 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3737 {
3738 	struct vnode* vnode;
3739 
3740 	if (volume == NULL)
3741 		return B_BAD_VALUE;
3742 
3743 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3744 	if (status != B_OK)
3745 		return status;
3746 
3747 	// If this is a layered FS, we need to get the node cookie for the requested
3748 	// layer.
3749 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3750 		fs_vnode resolvedNode;
3751 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3752 			&resolvedNode);
3753 		if (status != B_OK) {
3754 			panic("get_vnode(): Failed to get super node for vnode %p, "
3755 				"volume: %p", vnode, volume);
3756 			put_vnode(vnode);
3757 			return status;
3758 		}
3759 
3760 		if (_privateNode != NULL)
3761 			*_privateNode = resolvedNode.private_node;
3762 	} else if (_privateNode != NULL)
3763 		*_privateNode = vnode->private_node;
3764 
3765 	return B_OK;
3766 }
3767 
3768 
3769 extern "C" status_t
3770 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3771 {
3772 	struct vnode* vnode;
3773 
3774 	rw_lock_read_lock(&sVnodeLock);
3775 	vnode = lookup_vnode(volume->id, vnodeID);
3776 	rw_lock_read_unlock(&sVnodeLock);
3777 
3778 	if (vnode == NULL)
3779 		return B_BAD_VALUE;
3780 
3781 	inc_vnode_ref_count(vnode);
3782 	return B_OK;
3783 }
3784 
3785 
3786 extern "C" status_t
3787 put_vnode(fs_volume* volume, ino_t vnodeID)
3788 {
3789 	struct vnode* vnode;
3790 
3791 	rw_lock_read_lock(&sVnodeLock);
3792 	vnode = lookup_vnode(volume->id, vnodeID);
3793 	rw_lock_read_unlock(&sVnodeLock);
3794 
3795 	if (vnode == NULL)
3796 		return B_BAD_VALUE;
3797 
3798 	dec_vnode_ref_count(vnode, false, true);
3799 	return B_OK;
3800 }
3801 
3802 
3803 extern "C" status_t
3804 remove_vnode(fs_volume* volume, ino_t vnodeID)
3805 {
3806 	ReadLocker locker(sVnodeLock);
3807 
3808 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3809 	if (vnode == NULL)
3810 		return B_ENTRY_NOT_FOUND;
3811 
3812 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3813 		// this vnode is in use
3814 		return B_BUSY;
3815 	}
3816 
3817 	vnode->Lock();
3818 
3819 	vnode->SetRemoved(true);
3820 	bool removeUnpublished = false;
3821 
3822 	if (vnode->IsUnpublished()) {
3823 		// prepare the vnode for deletion
3824 		removeUnpublished = true;
3825 		vnode->SetBusy(true);
3826 	}
3827 
3828 	vnode->Unlock();
3829 	locker.Unlock();
3830 
3831 	if (removeUnpublished) {
3832 		// If the vnode hasn't been published yet, we delete it here
3833 		atomic_add(&vnode->ref_count, -1);
3834 		free_vnode(vnode, true);
3835 	}
3836 
3837 	return B_OK;
3838 }
3839 
3840 
3841 extern "C" status_t
3842 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3843 {
3844 	struct vnode* vnode;
3845 
3846 	rw_lock_read_lock(&sVnodeLock);
3847 
3848 	vnode = lookup_vnode(volume->id, vnodeID);
3849 	if (vnode) {
3850 		AutoLocker<Vnode> nodeLocker(vnode);
3851 		vnode->SetRemoved(false);
3852 	}
3853 
3854 	rw_lock_read_unlock(&sVnodeLock);
3855 	return B_OK;
3856 }
3857 
3858 
3859 extern "C" status_t
3860 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3861 {
3862 	ReadLocker _(sVnodeLock);
3863 
3864 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3865 		if (_removed != NULL)
3866 			*_removed = vnode->IsRemoved();
3867 		return B_OK;
3868 	}
3869 
3870 	return B_BAD_VALUE;
3871 }
3872 
3873 
3874 extern "C" fs_volume*
3875 volume_for_vnode(fs_vnode* _vnode)
3876 {
3877 	if (_vnode == NULL)
3878 		return NULL;
3879 
3880 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3881 	return vnode->mount->volume;
3882 }
3883 
3884 
3885 #if 0
3886 extern "C" status_t
3887 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3888 	size_t* _numBytes)
3889 {
3890 	struct file_descriptor* descriptor;
3891 	struct vnode* vnode;
3892 
3893 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3894 	if (descriptor == NULL)
3895 		return B_FILE_ERROR;
3896 
3897 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3898 		count, 0, _numBytes);
3899 
3900 	put_fd(descriptor);
3901 	return status;
3902 }
3903 
3904 
3905 extern "C" status_t
3906 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3907 	size_t* _numBytes)
3908 {
3909 	struct file_descriptor* descriptor;
3910 	struct vnode* vnode;
3911 
3912 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3913 	if (descriptor == NULL)
3914 		return B_FILE_ERROR;
3915 
3916 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3917 		count, 0, _numBytes);
3918 
3919 	put_fd(descriptor);
3920 	return status;
3921 }
3922 #endif
3923 
3924 
3925 extern "C" status_t
3926 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3927 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3928 	size_t* _bytes)
3929 {
3930 	struct file_descriptor* descriptor;
3931 	struct vnode* vnode;
3932 
3933 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3934 	if (descriptor == NULL)
3935 		return B_FILE_ERROR;
3936 
3937 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3938 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3939 		false);
3940 
3941 	put_fd(descriptor);
3942 	return status;
3943 }
3944 
3945 
3946 extern "C" status_t
3947 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3948 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3949 	size_t* _bytes)
3950 {
3951 	struct file_descriptor* descriptor;
3952 	struct vnode* vnode;
3953 
3954 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3955 	if (descriptor == NULL)
3956 		return B_FILE_ERROR;
3957 
3958 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3959 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3960 		true);
3961 
3962 	put_fd(descriptor);
3963 	return status;
3964 }
3965 
3966 
3967 extern "C" status_t
3968 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3969 {
3970 	// lookup mount -- the caller is required to make sure that the mount
3971 	// won't go away
3972 	MutexLocker locker(sMountMutex);
3973 	struct fs_mount* mount = find_mount(mountID);
3974 	if (mount == NULL)
3975 		return B_BAD_VALUE;
3976 	locker.Unlock();
3977 
3978 	return mount->entry_cache.Add(dirID, name, nodeID);
3979 }
3980 
3981 
3982 extern "C" status_t
3983 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3984 {
3985 	// lookup mount -- the caller is required to make sure that the mount
3986 	// won't go away
3987 	MutexLocker locker(sMountMutex);
3988 	struct fs_mount* mount = find_mount(mountID);
3989 	if (mount == NULL)
3990 		return B_BAD_VALUE;
3991 	locker.Unlock();
3992 
3993 	return mount->entry_cache.Remove(dirID, name);
3994 }
3995 
3996 
3997 //	#pragma mark - private VFS API
3998 //	Functions the VFS exports for other parts of the kernel
3999 
4000 
4001 /*! Acquires another reference to the vnode that has to be released
4002 	by calling vfs_put_vnode().
4003 */
4004 void
4005 vfs_acquire_vnode(struct vnode* vnode)
4006 {
4007 	inc_vnode_ref_count(vnode);
4008 }
4009 
4010 
4011 /*! This is currently called from file_cache_create() only.
4012 	It's probably a temporary solution as long as devfs requires that
4013 	fs_read_pages()/fs_write_pages() are called with the standard
4014 	open cookie and not with a device cookie.
4015 	If that's done differently, remove this call; it has no other
4016 	purpose.
4017 */
4018 extern "C" status_t
4019 vfs_get_cookie_from_fd(int fd, void** _cookie)
4020 {
4021 	struct file_descriptor* descriptor;
4022 
4023 	descriptor = get_fd(get_current_io_context(true), fd);
4024 	if (descriptor == NULL)
4025 		return B_FILE_ERROR;
4026 
4027 	*_cookie = descriptor->cookie;
4028 	return B_OK;
4029 }
4030 
4031 
4032 extern "C" status_t
4033 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4034 {
4035 	*vnode = get_vnode_from_fd(fd, kernel);
4036 
4037 	if (*vnode == NULL)
4038 		return B_FILE_ERROR;
4039 
4040 	return B_NO_ERROR;
4041 }
4042 
4043 
4044 extern "C" status_t
4045 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4046 {
4047 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4048 		path, kernel));
4049 
4050 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4051 	if (pathBuffer.InitCheck() != B_OK)
4052 		return B_NO_MEMORY;
4053 
4054 	char* buffer = pathBuffer.LockBuffer();
4055 	strlcpy(buffer, path, pathBuffer.BufferSize());
4056 
4057 	struct vnode* vnode;
4058 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4059 	if (status != B_OK)
4060 		return status;
4061 
4062 	*_vnode = vnode;
4063 	return B_OK;
4064 }
4065 
4066 
4067 extern "C" status_t
4068 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4069 {
4070 	struct vnode* vnode;
4071 
4072 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4073 	if (status != B_OK)
4074 		return status;
4075 
4076 	*_vnode = vnode;
4077 	return B_OK;
4078 }
4079 
4080 
4081 extern "C" status_t
4082 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4083 	const char* name, struct vnode** _vnode)
4084 {
4085 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4086 }
4087 
4088 
4089 extern "C" void
4090 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4091 {
4092 	*_mountID = vnode->device;
4093 	*_vnodeID = vnode->id;
4094 }
4095 
4096 
4097 /*!
4098 	Helper function abstracting the process of "converting" a given
4099 	vnode-pointer to a fs_vnode-pointer.
4100 	Currently only used in bindfs.
4101 */
4102 extern "C" fs_vnode*
4103 vfs_fsnode_for_vnode(struct vnode* vnode)
4104 {
4105 	return vnode;
4106 }
4107 
4108 
4109 /*!
4110 	Calls fs_open() on the given vnode and returns a new
4111 	file descriptor for it
4112 */
4113 int
4114 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4115 {
4116 	return open_vnode(vnode, openMode, kernel);
4117 }
4118 
4119 
4120 /*!	Looks up a vnode with the given mount and vnode ID.
4121 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4122 	to the node.
4123 	It's currently only be used by file_cache_create().
4124 */
4125 extern "C" status_t
4126 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4127 {
4128 	rw_lock_read_lock(&sVnodeLock);
4129 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4130 	rw_lock_read_unlock(&sVnodeLock);
4131 
4132 	if (vnode == NULL)
4133 		return B_ERROR;
4134 
4135 	*_vnode = vnode;
4136 	return B_OK;
4137 }
4138 
4139 
4140 extern "C" status_t
4141 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4142 	bool traverseLeafLink, bool kernel, void** _node)
4143 {
4144 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4145 		volume, path, kernel));
4146 
4147 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4148 	if (pathBuffer.InitCheck() != B_OK)
4149 		return B_NO_MEMORY;
4150 
4151 	fs_mount* mount;
4152 	status_t status = get_mount(volume->id, &mount);
4153 	if (status != B_OK)
4154 		return status;
4155 
4156 	char* buffer = pathBuffer.LockBuffer();
4157 	strlcpy(buffer, path, pathBuffer.BufferSize());
4158 
4159 	struct vnode* vnode = mount->root_vnode;
4160 
4161 	if (buffer[0] == '/')
4162 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4163 	else {
4164 		inc_vnode_ref_count(vnode);
4165 			// vnode_path_to_vnode() releases a reference to the starting vnode
4166 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4167 			kernel, &vnode, NULL);
4168 	}
4169 
4170 	put_mount(mount);
4171 
4172 	if (status != B_OK)
4173 		return status;
4174 
4175 	if (vnode->device != volume->id) {
4176 		// wrong mount ID - must not gain access on foreign file system nodes
4177 		put_vnode(vnode);
4178 		return B_BAD_VALUE;
4179 	}
4180 
4181 	// Use get_vnode() to resolve the cookie for the right layer.
4182 	status = get_vnode(volume, vnode->id, _node);
4183 	put_vnode(vnode);
4184 
4185 	return status;
4186 }
4187 
4188 
4189 status_t
4190 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4191 	struct stat* stat, bool kernel)
4192 {
4193 	status_t status;
4194 
4195 	if (path) {
4196 		// path given: get the stat of the node referred to by (fd, path)
4197 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4198 		if (pathBuffer.InitCheck() != B_OK)
4199 			return B_NO_MEMORY;
4200 
4201 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4202 			traverseLeafLink, stat, kernel);
4203 	} else {
4204 		// no path given: get the FD and use the FD operation
4205 		struct file_descriptor* descriptor
4206 			= get_fd(get_current_io_context(kernel), fd);
4207 		if (descriptor == NULL)
4208 			return B_FILE_ERROR;
4209 
4210 		if (descriptor->ops->fd_read_stat)
4211 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4212 		else
4213 			status = B_UNSUPPORTED;
4214 
4215 		put_fd(descriptor);
4216 	}
4217 
4218 	return status;
4219 }
4220 
4221 
4222 /*!	Finds the full path to the file that contains the module \a moduleName,
4223 	puts it into \a pathBuffer, and returns B_OK for success.
4224 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4225 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4226 	\a pathBuffer is clobbered in any case and must not be relied on if this
4227 	functions returns unsuccessfully.
4228 	\a basePath and \a pathBuffer must not point to the same space.
4229 */
4230 status_t
4231 vfs_get_module_path(const char* basePath, const char* moduleName,
4232 	char* pathBuffer, size_t bufferSize)
4233 {
4234 	struct vnode* dir;
4235 	struct vnode* file;
4236 	status_t status;
4237 	size_t length;
4238 	char* path;
4239 
4240 	if (bufferSize == 0
4241 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4242 		return B_BUFFER_OVERFLOW;
4243 
4244 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4245 	if (status != B_OK)
4246 		return status;
4247 
4248 	// the path buffer had been clobbered by the above call
4249 	length = strlcpy(pathBuffer, basePath, bufferSize);
4250 	if (pathBuffer[length - 1] != '/')
4251 		pathBuffer[length++] = '/';
4252 
4253 	path = pathBuffer + length;
4254 	bufferSize -= length;
4255 
4256 	while (moduleName) {
4257 		char* nextPath = strchr(moduleName, '/');
4258 		if (nextPath == NULL)
4259 			length = strlen(moduleName);
4260 		else {
4261 			length = nextPath - moduleName;
4262 			nextPath++;
4263 		}
4264 
4265 		if (length + 1 >= bufferSize) {
4266 			status = B_BUFFER_OVERFLOW;
4267 			goto err;
4268 		}
4269 
4270 		memcpy(path, moduleName, length);
4271 		path[length] = '\0';
4272 		moduleName = nextPath;
4273 
4274 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4275 		if (status != B_OK) {
4276 			// vnode_path_to_vnode() has already released the reference to dir
4277 			return status;
4278 		}
4279 
4280 		if (S_ISDIR(file->Type())) {
4281 			// goto the next directory
4282 			path[length] = '/';
4283 			path[length + 1] = '\0';
4284 			path += length + 1;
4285 			bufferSize -= length + 1;
4286 
4287 			dir = file;
4288 		} else if (S_ISREG(file->Type())) {
4289 			// it's a file so it should be what we've searched for
4290 			put_vnode(file);
4291 
4292 			return B_OK;
4293 		} else {
4294 			TRACE(("vfs_get_module_path(): something is strange here: "
4295 				"0x%08lx...\n", file->Type()));
4296 			status = B_ERROR;
4297 			dir = file;
4298 			goto err;
4299 		}
4300 	}
4301 
4302 	// if we got here, the moduleName just pointed to a directory, not to
4303 	// a real module - what should we do in this case?
4304 	status = B_ENTRY_NOT_FOUND;
4305 
4306 err:
4307 	put_vnode(dir);
4308 	return status;
4309 }
4310 
4311 
4312 /*!	\brief Normalizes a given path.
4313 
4314 	The path must refer to an existing or non-existing entry in an existing
4315 	directory, that is chopping off the leaf component the remaining path must
4316 	refer to an existing directory.
4317 
4318 	The returned will be canonical in that it will be absolute, will not
4319 	contain any "." or ".." components or duplicate occurrences of '/'s,
4320 	and none of the directory components will by symbolic links.
4321 
4322 	Any two paths referring to the same entry, will result in the same
4323 	normalized path (well, that is pretty much the definition of `normalized',
4324 	isn't it :-).
4325 
4326 	\param path The path to be normalized.
4327 	\param buffer The buffer into which the normalized path will be written.
4328 		   May be the same one as \a path.
4329 	\param bufferSize The size of \a buffer.
4330 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4331 	\param kernel \c true, if the IO context of the kernel shall be used,
4332 		   otherwise that of the team this thread belongs to. Only relevant,
4333 		   if the path is relative (to get the CWD).
4334 	\return \c B_OK if everything went fine, another error code otherwise.
4335 */
4336 status_t
4337 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4338 	bool traverseLink, bool kernel)
4339 {
4340 	if (!path || !buffer || bufferSize < 1)
4341 		return B_BAD_VALUE;
4342 
4343 	if (path != buffer) {
4344 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4345 			return B_BUFFER_OVERFLOW;
4346 	}
4347 
4348 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4349 }
4350 
4351 
4352 /*!	\brief Creates a special node in the file system.
4353 
4354 	The caller gets a reference to the newly created node (which is passed
4355 	back through \a _createdVnode) and is responsible for releasing it.
4356 
4357 	\param path The path where to create the entry for the node. Can be \c NULL,
4358 		in which case the node is created without an entry in the root FS -- it
4359 		will automatically be deleted when the last reference has been released.
4360 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4361 		the target file system will just create the node with its standard
4362 		operations. Depending on the type of the node a subnode might be created
4363 		automatically, though.
4364 	\param mode The type and permissions for the node to be created.
4365 	\param flags Flags to be passed to the creating FS.
4366 	\param kernel \c true, if called in the kernel context (relevant only if
4367 		\a path is not \c NULL and not absolute).
4368 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4369 		file system creating the node, with the private data pointer and
4370 		operations for the super node. Can be \c NULL.
4371 	\param _createVnode Pointer to pre-allocated storage where to store the
4372 		pointer to the newly created node.
4373 	\return \c B_OK, if everything went fine, another error code otherwise.
4374 */
4375 status_t
4376 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4377 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4378 	struct vnode** _createdVnode)
4379 {
4380 	struct vnode* dirNode;
4381 	char _leaf[B_FILE_NAME_LENGTH];
4382 	char* leaf = NULL;
4383 
4384 	if (path) {
4385 		// We've got a path. Get the dir vnode and the leaf name.
4386 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4387 		if (tmpPathBuffer.InitCheck() != B_OK)
4388 			return B_NO_MEMORY;
4389 
4390 		char* tmpPath = tmpPathBuffer.LockBuffer();
4391 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4392 			return B_NAME_TOO_LONG;
4393 
4394 		// get the dir vnode and the leaf name
4395 		leaf = _leaf;
4396 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4397 		if (error != B_OK)
4398 			return error;
4399 	} else {
4400 		// No path. Create the node in the root FS.
4401 		dirNode = sRoot;
4402 		inc_vnode_ref_count(dirNode);
4403 	}
4404 
4405 	VNodePutter _(dirNode);
4406 
4407 	// check support for creating special nodes
4408 	if (!HAS_FS_CALL(dirNode, create_special_node))
4409 		return B_UNSUPPORTED;
4410 
4411 	// create the node
4412 	fs_vnode superVnode;
4413 	ino_t nodeID;
4414 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4415 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4416 	if (status != B_OK)
4417 		return status;
4418 
4419 	// lookup the node
4420 	rw_lock_read_lock(&sVnodeLock);
4421 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4422 	rw_lock_read_unlock(&sVnodeLock);
4423 
4424 	if (*_createdVnode == NULL) {
4425 		panic("vfs_create_special_node(): lookup of node failed");
4426 		return B_ERROR;
4427 	}
4428 
4429 	return B_OK;
4430 }
4431 
4432 
4433 extern "C" void
4434 vfs_put_vnode(struct vnode* vnode)
4435 {
4436 	put_vnode(vnode);
4437 }
4438 
4439 
4440 extern "C" status_t
4441 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4442 {
4443 	// Get current working directory from io context
4444 	struct io_context* context = get_current_io_context(false);
4445 	status_t status = B_OK;
4446 
4447 	mutex_lock(&context->io_mutex);
4448 
4449 	if (context->cwd != NULL) {
4450 		*_mountID = context->cwd->device;
4451 		*_vnodeID = context->cwd->id;
4452 	} else
4453 		status = B_ERROR;
4454 
4455 	mutex_unlock(&context->io_mutex);
4456 	return status;
4457 }
4458 
4459 
4460 status_t
4461 vfs_unmount(dev_t mountID, uint32 flags)
4462 {
4463 	return fs_unmount(NULL, mountID, flags, true);
4464 }
4465 
4466 
4467 extern "C" status_t
4468 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4469 {
4470 	struct vnode* vnode;
4471 
4472 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4473 	if (status != B_OK)
4474 		return status;
4475 
4476 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4477 	put_vnode(vnode);
4478 	return B_OK;
4479 }
4480 
4481 
4482 extern "C" void
4483 vfs_free_unused_vnodes(int32 level)
4484 {
4485 	vnode_low_resource_handler(NULL,
4486 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4487 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4488 		level);
4489 }
4490 
4491 
4492 extern "C" bool
4493 vfs_can_page(struct vnode* vnode, void* cookie)
4494 {
4495 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4496 
4497 	if (HAS_FS_CALL(vnode, can_page))
4498 		return FS_CALL(vnode, can_page, cookie);
4499 	return false;
4500 }
4501 
4502 
4503 extern "C" status_t
4504 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4505 	const generic_io_vec* vecs, size_t count, uint32 flags,
4506 	generic_size_t* _numBytes)
4507 {
4508 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4509 		pos));
4510 
4511 #if VFS_PAGES_IO_TRACING
4512 	generic_size_t bytesRequested = *_numBytes;
4513 #endif
4514 
4515 	IORequest request;
4516 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4517 	if (status == B_OK) {
4518 		status = vfs_vnode_io(vnode, cookie, &request);
4519 		if (status == B_OK)
4520 			status = request.Wait();
4521 		*_numBytes = request.TransferredBytes();
4522 	}
4523 
4524 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4525 		status, *_numBytes));
4526 
4527 	return status;
4528 }
4529 
4530 
4531 extern "C" status_t
4532 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4533 	const generic_io_vec* vecs, size_t count, uint32 flags,
4534 	generic_size_t* _numBytes)
4535 {
4536 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4537 		pos));
4538 
4539 #if VFS_PAGES_IO_TRACING
4540 	generic_size_t bytesRequested = *_numBytes;
4541 #endif
4542 
4543 	IORequest request;
4544 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4545 	if (status == B_OK) {
4546 		status = vfs_vnode_io(vnode, cookie, &request);
4547 		if (status == B_OK)
4548 			status = request.Wait();
4549 		*_numBytes = request.TransferredBytes();
4550 	}
4551 
4552 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4553 		status, *_numBytes));
4554 
4555 	return status;
4556 }
4557 
4558 
4559 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4560 	created if \a allocate is \c true.
4561 	In case it's successful, it will also grab a reference to the cache
4562 	it returns.
4563 */
4564 extern "C" status_t
4565 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4566 {
4567 	if (vnode->cache != NULL) {
4568 		vnode->cache->AcquireRef();
4569 		*_cache = vnode->cache;
4570 		return B_OK;
4571 	}
4572 
4573 	rw_lock_read_lock(&sVnodeLock);
4574 	vnode->Lock();
4575 
4576 	status_t status = B_OK;
4577 
4578 	// The cache could have been created in the meantime
4579 	if (vnode->cache == NULL) {
4580 		if (allocate) {
4581 			// TODO: actually the vnode needs to be busy already here, or
4582 			//	else this won't work...
4583 			bool wasBusy = vnode->IsBusy();
4584 			vnode->SetBusy(true);
4585 
4586 			vnode->Unlock();
4587 			rw_lock_read_unlock(&sVnodeLock);
4588 
4589 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4590 
4591 			rw_lock_read_lock(&sVnodeLock);
4592 			vnode->Lock();
4593 			vnode->SetBusy(wasBusy);
4594 		} else
4595 			status = B_BAD_VALUE;
4596 	}
4597 
4598 	vnode->Unlock();
4599 	rw_lock_read_unlock(&sVnodeLock);
4600 
4601 	if (status == B_OK) {
4602 		vnode->cache->AcquireRef();
4603 		*_cache = vnode->cache;
4604 	}
4605 
4606 	return status;
4607 }
4608 
4609 
4610 status_t
4611 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4612 	file_io_vec* vecs, size_t* _count)
4613 {
4614 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4615 		vnode, vecs, offset, size));
4616 
4617 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4618 }
4619 
4620 
4621 status_t
4622 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4623 {
4624 	status_t status = FS_CALL(vnode, read_stat, stat);
4625 
4626 	// fill in the st_dev and st_ino fields
4627 	if (status == B_OK) {
4628 		stat->st_dev = vnode->device;
4629 		stat->st_ino = vnode->id;
4630 		stat->st_rdev = -1;
4631 	}
4632 
4633 	return status;
4634 }
4635 
4636 
4637 status_t
4638 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4639 {
4640 	struct vnode* vnode;
4641 	status_t status = get_vnode(device, inode, &vnode, true, false);
4642 	if (status != B_OK)
4643 		return status;
4644 
4645 	status = FS_CALL(vnode, read_stat, stat);
4646 
4647 	// fill in the st_dev and st_ino fields
4648 	if (status == B_OK) {
4649 		stat->st_dev = vnode->device;
4650 		stat->st_ino = vnode->id;
4651 		stat->st_rdev = -1;
4652 	}
4653 
4654 	put_vnode(vnode);
4655 	return status;
4656 }
4657 
4658 
4659 status_t
4660 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4661 {
4662 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4663 }
4664 
4665 
4666 status_t
4667 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4668 	char* path, size_t pathLength)
4669 {
4670 	struct vnode* vnode;
4671 	status_t status;
4672 
4673 	// filter invalid leaf names
4674 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4675 		return B_BAD_VALUE;
4676 
4677 	// get the vnode matching the dir's node_ref
4678 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4679 		// special cases "." and "..": we can directly get the vnode of the
4680 		// referenced directory
4681 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4682 		leaf = NULL;
4683 	} else
4684 		status = get_vnode(device, inode, &vnode, true, false);
4685 	if (status != B_OK)
4686 		return status;
4687 
4688 	// get the directory path
4689 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4690 	put_vnode(vnode);
4691 		// we don't need the vnode anymore
4692 	if (status != B_OK)
4693 		return status;
4694 
4695 	// append the leaf name
4696 	if (leaf) {
4697 		// insert a directory separator if this is not the file system root
4698 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4699 				>= pathLength)
4700 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4701 			return B_NAME_TOO_LONG;
4702 		}
4703 	}
4704 
4705 	return B_OK;
4706 }
4707 
4708 
4709 /*!	If the given descriptor locked its vnode, that lock will be released. */
4710 void
4711 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4712 {
4713 	struct vnode* vnode = fd_vnode(descriptor);
4714 
4715 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4716 		vnode->mandatory_locked_by = NULL;
4717 }
4718 
4719 
4720 /*!	Closes all file descriptors of the specified I/O context that
4721 	have the O_CLOEXEC flag set.
4722 */
4723 void
4724 vfs_exec_io_context(io_context* context)
4725 {
4726 	uint32 i;
4727 
4728 	for (i = 0; i < context->table_size; i++) {
4729 		mutex_lock(&context->io_mutex);
4730 
4731 		struct file_descriptor* descriptor = context->fds[i];
4732 		bool remove = false;
4733 
4734 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4735 			context->fds[i] = NULL;
4736 			context->num_used_fds--;
4737 
4738 			remove = true;
4739 		}
4740 
4741 		mutex_unlock(&context->io_mutex);
4742 
4743 		if (remove) {
4744 			close_fd(descriptor);
4745 			put_fd(descriptor);
4746 		}
4747 	}
4748 }
4749 
4750 
4751 /*! Sets up a new io_control structure, and inherits the properties
4752 	of the parent io_control if it is given.
4753 */
4754 io_context*
4755 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4756 {
4757 	io_context* context = (io_context*)malloc(sizeof(io_context));
4758 	if (context == NULL)
4759 		return NULL;
4760 
4761 	TIOC(NewIOContext(context, parentContext));
4762 
4763 	memset(context, 0, sizeof(io_context));
4764 	context->ref_count = 1;
4765 
4766 	MutexLocker parentLocker;
4767 
4768 	size_t tableSize;
4769 	if (parentContext) {
4770 		parentLocker.SetTo(parentContext->io_mutex, false);
4771 		tableSize = parentContext->table_size;
4772 	} else
4773 		tableSize = DEFAULT_FD_TABLE_SIZE;
4774 
4775 	// allocate space for FDs and their close-on-exec flag
4776 	context->fds = (file_descriptor**)malloc(
4777 		sizeof(struct file_descriptor*) * tableSize
4778 		+ sizeof(struct select_sync*) * tableSize
4779 		+ (tableSize + 7) / 8);
4780 	if (context->fds == NULL) {
4781 		free(context);
4782 		return NULL;
4783 	}
4784 
4785 	context->select_infos = (select_info**)(context->fds + tableSize);
4786 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4787 
4788 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4789 		+ sizeof(struct select_sync*) * tableSize
4790 		+ (tableSize + 7) / 8);
4791 
4792 	mutex_init(&context->io_mutex, "I/O context");
4793 
4794 	// Copy all parent file descriptors
4795 
4796 	if (parentContext) {
4797 		size_t i;
4798 
4799 		mutex_lock(&sIOContextRootLock);
4800 		context->root = parentContext->root;
4801 		if (context->root)
4802 			inc_vnode_ref_count(context->root);
4803 		mutex_unlock(&sIOContextRootLock);
4804 
4805 		context->cwd = parentContext->cwd;
4806 		if (context->cwd)
4807 			inc_vnode_ref_count(context->cwd);
4808 
4809 		for (i = 0; i < tableSize; i++) {
4810 			struct file_descriptor* descriptor = parentContext->fds[i];
4811 
4812 			if (descriptor != NULL) {
4813 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4814 				if (closeOnExec && purgeCloseOnExec)
4815 					continue;
4816 
4817 				TFD(InheritFD(context, i, descriptor, parentContext));
4818 
4819 				context->fds[i] = descriptor;
4820 				context->num_used_fds++;
4821 				atomic_add(&descriptor->ref_count, 1);
4822 				atomic_add(&descriptor->open_count, 1);
4823 
4824 				if (closeOnExec)
4825 					fd_set_close_on_exec(context, i, true);
4826 			}
4827 		}
4828 
4829 		parentLocker.Unlock();
4830 	} else {
4831 		context->root = sRoot;
4832 		context->cwd = sRoot;
4833 
4834 		if (context->root)
4835 			inc_vnode_ref_count(context->root);
4836 
4837 		if (context->cwd)
4838 			inc_vnode_ref_count(context->cwd);
4839 	}
4840 
4841 	context->table_size = tableSize;
4842 
4843 	list_init(&context->node_monitors);
4844 	context->max_monitors = DEFAULT_NODE_MONITORS;
4845 
4846 	return context;
4847 }
4848 
4849 
4850 static status_t
4851 vfs_free_io_context(io_context* context)
4852 {
4853 	uint32 i;
4854 
4855 	TIOC(FreeIOContext(context));
4856 
4857 	if (context->root)
4858 		put_vnode(context->root);
4859 
4860 	if (context->cwd)
4861 		put_vnode(context->cwd);
4862 
4863 	mutex_lock(&context->io_mutex);
4864 
4865 	for (i = 0; i < context->table_size; i++) {
4866 		if (struct file_descriptor* descriptor = context->fds[i]) {
4867 			close_fd(descriptor);
4868 			put_fd(descriptor);
4869 		}
4870 	}
4871 
4872 	mutex_destroy(&context->io_mutex);
4873 
4874 	remove_node_monitors(context);
4875 	free(context->fds);
4876 	free(context);
4877 
4878 	return B_OK;
4879 }
4880 
4881 
4882 void
4883 vfs_get_io_context(io_context* context)
4884 {
4885 	atomic_add(&context->ref_count, 1);
4886 }
4887 
4888 
4889 void
4890 vfs_put_io_context(io_context* context)
4891 {
4892 	if (atomic_add(&context->ref_count, -1) == 1)
4893 		vfs_free_io_context(context);
4894 }
4895 
4896 
4897 static status_t
4898 vfs_resize_fd_table(struct io_context* context, const int newSize)
4899 {
4900 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4901 		return B_BAD_VALUE;
4902 
4903 	TIOC(ResizeIOContext(context, newSize));
4904 
4905 	MutexLocker _(context->io_mutex);
4906 
4907 	int oldSize = context->table_size;
4908 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4909 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4910 
4911 	// If the tables shrink, make sure none of the fds being dropped are in use.
4912 	if (newSize < oldSize) {
4913 		for (int i = oldSize; i-- > newSize;) {
4914 			if (context->fds[i])
4915 				return B_BUSY;
4916 		}
4917 	}
4918 
4919 	// store pointers to the old tables
4920 	file_descriptor** oldFDs = context->fds;
4921 	select_info** oldSelectInfos = context->select_infos;
4922 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4923 
4924 	// allocate new tables
4925 	file_descriptor** newFDs = (file_descriptor**)malloc(
4926 		sizeof(struct file_descriptor*) * newSize
4927 		+ sizeof(struct select_sync*) * newSize
4928 		+ newCloseOnExitBitmapSize);
4929 	if (newFDs == NULL)
4930 		return B_NO_MEMORY;
4931 
4932 	context->fds = newFDs;
4933 	context->select_infos = (select_info**)(context->fds + newSize);
4934 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4935 	context->table_size = newSize;
4936 
4937 	// copy entries from old tables
4938 	int toCopy = min_c(oldSize, newSize);
4939 
4940 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4941 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4942 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4943 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4944 
4945 	// clear additional entries, if the tables grow
4946 	if (newSize > oldSize) {
4947 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4948 		memset(context->select_infos + oldSize, 0,
4949 			sizeof(void*) * (newSize - oldSize));
4950 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4951 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4952 	}
4953 
4954 	free(oldFDs);
4955 
4956 	return B_OK;
4957 }
4958 
4959 
4960 static status_t
4961 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4962 {
4963 	int	status = B_OK;
4964 
4965 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4966 		return B_BAD_VALUE;
4967 
4968 	mutex_lock(&context->io_mutex);
4969 
4970 	if ((size_t)newSize < context->num_monitors) {
4971 		status = B_BUSY;
4972 		goto out;
4973 	}
4974 	context->max_monitors = newSize;
4975 
4976 out:
4977 	mutex_unlock(&context->io_mutex);
4978 	return status;
4979 }
4980 
4981 
4982 status_t
4983 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
4984 	ino_t* _mountPointNodeID)
4985 {
4986 	ReadLocker nodeLocker(sVnodeLock);
4987 	MutexLocker mountLocker(sMountMutex);
4988 
4989 	struct fs_mount* mount = find_mount(mountID);
4990 	if (mount == NULL)
4991 		return B_BAD_VALUE;
4992 
4993 	Vnode* mountPoint = mount->covers_vnode;
4994 
4995 	*_mountPointMountID = mountPoint->device;
4996 	*_mountPointNodeID = mountPoint->id;
4997 
4998 	return B_OK;
4999 }
5000 
5001 
5002 status_t
5003 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5004 	ino_t coveredNodeID)
5005 {
5006 	// get the vnodes
5007 	Vnode* vnode;
5008 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5009 	if (error != B_OK)
5010 		return B_BAD_VALUE;
5011 	VNodePutter vnodePutter(vnode);
5012 
5013 	Vnode* coveredVnode;
5014 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5015 		false);
5016 	if (error != B_OK)
5017 		return B_BAD_VALUE;
5018 	VNodePutter coveredVnodePutter(coveredVnode);
5019 
5020 	// establish the covered/covering links
5021 	WriteLocker locker(sVnodeLock);
5022 
5023 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5024 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5025 		return B_BUSY;
5026 	}
5027 
5028 	vnode->covers = coveredVnode;
5029 	vnode->SetCovering(true);
5030 
5031 	coveredVnode->covered_by = vnode;
5032 	coveredVnode->SetCovered(true);
5033 
5034 	// the vnodes do now reference each other
5035 	inc_vnode_ref_count(vnode);
5036 	inc_vnode_ref_count(coveredVnode);
5037 
5038 	return B_OK;
5039 }
5040 
5041 
5042 int
5043 vfs_getrlimit(int resource, struct rlimit* rlp)
5044 {
5045 	if (!rlp)
5046 		return B_BAD_ADDRESS;
5047 
5048 	switch (resource) {
5049 		case RLIMIT_NOFILE:
5050 		{
5051 			struct io_context* context = get_current_io_context(false);
5052 			MutexLocker _(context->io_mutex);
5053 
5054 			rlp->rlim_cur = context->table_size;
5055 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5056 			return 0;
5057 		}
5058 
5059 		case RLIMIT_NOVMON:
5060 		{
5061 			struct io_context* context = get_current_io_context(false);
5062 			MutexLocker _(context->io_mutex);
5063 
5064 			rlp->rlim_cur = context->max_monitors;
5065 			rlp->rlim_max = MAX_NODE_MONITORS;
5066 			return 0;
5067 		}
5068 
5069 		default:
5070 			return B_BAD_VALUE;
5071 	}
5072 }
5073 
5074 
5075 int
5076 vfs_setrlimit(int resource, const struct rlimit* rlp)
5077 {
5078 	if (!rlp)
5079 		return B_BAD_ADDRESS;
5080 
5081 	switch (resource) {
5082 		case RLIMIT_NOFILE:
5083 			/* TODO: check getuid() */
5084 			if (rlp->rlim_max != RLIM_SAVED_MAX
5085 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5086 				return B_NOT_ALLOWED;
5087 
5088 			return vfs_resize_fd_table(get_current_io_context(false),
5089 				rlp->rlim_cur);
5090 
5091 		case RLIMIT_NOVMON:
5092 			/* TODO: check getuid() */
5093 			if (rlp->rlim_max != RLIM_SAVED_MAX
5094 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5095 				return B_NOT_ALLOWED;
5096 
5097 			return vfs_resize_monitor_table(get_current_io_context(false),
5098 				rlp->rlim_cur);
5099 
5100 		default:
5101 			return B_BAD_VALUE;
5102 	}
5103 }
5104 
5105 
5106 status_t
5107 vfs_init(kernel_args* args)
5108 {
5109 	vnode::StaticInit();
5110 
5111 	if (init_fd_paths_hash_table() == false)
5112 		panic("vfs_init: error creating vnode to paths hash table\n");
5113 
5114 	struct vnode dummyVnode;
5115 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5116 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5117 	if (sVnodeTable == NULL)
5118 		panic("vfs_init: error creating vnode hash table\n");
5119 
5120 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5121 
5122 	struct fs_mount dummyMount;
5123 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5124 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5125 	if (sMountsTable == NULL)
5126 		panic("vfs_init: error creating mounts hash table\n");
5127 
5128 	node_monitor_init();
5129 
5130 	sRoot = NULL;
5131 
5132 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5133 
5134 	if (block_cache_init() != B_OK)
5135 		return B_ERROR;
5136 
5137 #ifdef ADD_DEBUGGER_COMMANDS
5138 	// add some debugger commands
5139 	add_debugger_command_etc("vnode", &dump_vnode,
5140 		"Print info about the specified vnode",
5141 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5142 		"Prints information about the vnode specified by address <vnode> or\n"
5143 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5144 		"constructed and printed. It might not be possible to construct a\n"
5145 		"complete path, though.\n",
5146 		0);
5147 	add_debugger_command("vnodes", &dump_vnodes,
5148 		"list all vnodes (from the specified device)");
5149 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5150 		"list all vnode caches");
5151 	add_debugger_command("mount", &dump_mount,
5152 		"info about the specified fs_mount");
5153 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5154 	add_debugger_command("io_context", &dump_io_context,
5155 		"info about the I/O context");
5156 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5157 		"info about vnode usage");
5158 #endif
5159 
5160 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5161 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5162 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5163 		0);
5164 
5165 	file_map_init();
5166 
5167 	return file_cache_init();
5168 }
5169 
5170 
5171 //	#pragma mark - fd_ops implementations
5172 
5173 
5174 /*!
5175 	Calls fs_open() on the given vnode and returns a new
5176 	file descriptor for it
5177 */
5178 static int
5179 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5180 {
5181 	void* cookie;
5182 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5183 	if (status != B_OK)
5184 		return status;
5185 
5186 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5187 	if (fd < 0) {
5188 		FS_CALL(vnode, close, cookie);
5189 		FS_CALL(vnode, free_cookie, cookie);
5190 	}
5191 	return fd;
5192 }
5193 
5194 
5195 /*!
5196 	Calls fs_open() on the given vnode and returns a new
5197 	file descriptor for it
5198 */
5199 static int
5200 create_vnode(struct vnode* directory, const char* name, int openMode,
5201 	int perms, bool kernel)
5202 {
5203 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5204 	status_t status = B_ERROR;
5205 	struct vnode* vnode;
5206 	void* cookie;
5207 	ino_t newID;
5208 
5209 	// This is somewhat tricky: If the entry already exists, the FS responsible
5210 	// for the directory might not necessarily also be the one responsible for
5211 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5212 	// we can actually never call the create() hook without O_EXCL. Instead we
5213 	// try to look the entry up first. If it already exists, we just open the
5214 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5215 	// introduces a race condition, since someone else might have created the
5216 	// entry in the meantime. We hope the respective FS returns the correct
5217 	// error code and retry (up to 3 times) again.
5218 
5219 	for (int i = 0; i < 3 && status != B_OK; i++) {
5220 		// look the node up
5221 		status = lookup_dir_entry(directory, name, &vnode);
5222 		if (status == B_OK) {
5223 			VNodePutter putter(vnode);
5224 
5225 			if ((openMode & O_EXCL) != 0)
5226 				return B_FILE_EXISTS;
5227 
5228 			// If the node is a symlink, we have to follow it, unless
5229 			// O_NOTRAVERSE is set.
5230 			if (S_ISLNK(vnode->Type()) && traverse) {
5231 				putter.Put();
5232 				char clonedName[B_FILE_NAME_LENGTH + 1];
5233 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5234 						>= B_FILE_NAME_LENGTH) {
5235 					return B_NAME_TOO_LONG;
5236 				}
5237 
5238 				inc_vnode_ref_count(directory);
5239 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5240 					kernel, &vnode, NULL);
5241 				if (status != B_OK)
5242 					return status;
5243 
5244 				putter.SetTo(vnode);
5245 			}
5246 
5247 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5248 				put_vnode(vnode);
5249 				return B_LINK_LIMIT;
5250 			}
5251 
5252 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5253 			// on success keep the vnode reference for the FD
5254 			if (fd >= 0)
5255 				putter.Detach();
5256 
5257 			insert_fd_path(vnode, fd, kernel, directory->id, name);
5258 
5259 			return fd;
5260 		}
5261 
5262 		// it doesn't exist yet -- try to create it
5263 
5264 		if (!HAS_FS_CALL(directory, create))
5265 			return B_READ_ONLY_DEVICE;
5266 
5267 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5268 			&cookie, &newID);
5269 		if (status != B_OK
5270 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5271 			return status;
5272 		}
5273 	}
5274 
5275 	if (status != B_OK)
5276 		return status;
5277 
5278 	// the node has been created successfully
5279 
5280 	rw_lock_read_lock(&sVnodeLock);
5281 	vnode = lookup_vnode(directory->device, newID);
5282 	rw_lock_read_unlock(&sVnodeLock);
5283 
5284 	if (vnode == NULL) {
5285 		panic("vfs: fs_create() returned success but there is no vnode, "
5286 			"mount ID %ld!\n", directory->device);
5287 		return B_BAD_VALUE;
5288 	}
5289 
5290 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5291 	if (fd >= 0) {
5292 		insert_fd_path(vnode, fd, kernel, directory->id, name);
5293 		return fd;
5294 	}
5295 
5296 	status = fd;
5297 
5298 	// something went wrong, clean up
5299 
5300 	FS_CALL(vnode, close, cookie);
5301 	FS_CALL(vnode, free_cookie, cookie);
5302 	put_vnode(vnode);
5303 
5304 	FS_CALL(directory, unlink, name);
5305 
5306 	return status;
5307 }
5308 
5309 
5310 /*! Calls fs open_dir() on the given vnode and returns a new
5311 	file descriptor for it
5312 */
5313 static int
5314 open_dir_vnode(struct vnode* vnode, bool kernel)
5315 {
5316 	void* cookie;
5317 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5318 	if (status != B_OK)
5319 		return status;
5320 
5321 	// directory is opened, create a fd
5322 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5323 	if (status >= 0)
5324 		return status;
5325 
5326 	FS_CALL(vnode, close_dir, cookie);
5327 	FS_CALL(vnode, free_dir_cookie, cookie);
5328 
5329 	return status;
5330 }
5331 
5332 
5333 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5334 	file descriptor for it.
5335 	Used by attr_dir_open(), and attr_dir_open_fd().
5336 */
5337 static int
5338 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5339 {
5340 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5341 		return B_UNSUPPORTED;
5342 
5343 	void* cookie;
5344 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5345 	if (status != B_OK)
5346 		return status;
5347 
5348 	// directory is opened, create a fd
5349 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5350 		kernel);
5351 	if (status >= 0)
5352 		return status;
5353 
5354 	FS_CALL(vnode, close_attr_dir, cookie);
5355 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5356 
5357 	return status;
5358 }
5359 
5360 
5361 static int
5362 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5363 	int openMode, int perms, bool kernel)
5364 {
5365 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5366 		"kernel %d\n", name, openMode, perms, kernel));
5367 
5368 	// get directory to put the new file in
5369 	struct vnode* directory;
5370 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5371 	if (status != B_OK)
5372 		return status;
5373 
5374 	status = create_vnode(directory, name, openMode, perms, kernel);
5375 	put_vnode(directory);
5376 
5377 	return status;
5378 }
5379 
5380 
5381 static int
5382 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5383 {
5384 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5385 		openMode, perms, kernel));
5386 
5387 	// get directory to put the new file in
5388 	char name[B_FILE_NAME_LENGTH];
5389 	struct vnode* directory;
5390 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5391 		kernel);
5392 	if (status < 0)
5393 		return status;
5394 
5395 	status = create_vnode(directory, name, openMode, perms, kernel);
5396 
5397 	put_vnode(directory);
5398 	return status;
5399 }
5400 
5401 
5402 static int
5403 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5404 	int openMode, bool kernel)
5405 {
5406 	if (name == NULL || *name == '\0')
5407 		return B_BAD_VALUE;
5408 
5409 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5410 		mountID, directoryID, name, openMode));
5411 
5412 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5413 
5414 	// get the vnode matching the entry_ref
5415 	struct vnode* vnode;
5416 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5417 		kernel, &vnode);
5418 	if (status != B_OK)
5419 		return status;
5420 
5421 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5422 		put_vnode(vnode);
5423 		return B_LINK_LIMIT;
5424 	}
5425 
5426 	int newFD = open_vnode(vnode, openMode, kernel);
5427 	if (newFD >= 0) {
5428 		// The vnode reference has been transferred to the FD
5429 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5430 			directoryID, vnode->id, name);
5431 	} else
5432 		put_vnode(vnode);
5433 
5434 	insert_fd_path(vnode, newFD, kernel, directoryID, name);
5435 
5436 	return newFD;
5437 }
5438 
5439 
5440 static const char*
5441 leaf(const char* path)
5442 {
5443 	if (path == NULL)
5444 		return NULL;
5445 
5446 	int32 pathLength = strlen(path);
5447 	if (pathLength > B_FILE_NAME_LENGTH)
5448 		return NULL;
5449 	// only "/" has trailing slashes -- then we have to return the complete
5450 	// buffer, as we have to do in case there are no slashes at all
5451 	if (pathLength != 1 || path[0] != '/') {
5452 		for (int32 i = pathLength - 1; i >= 0; i--) {
5453 			if (path[i] == '/')
5454 				return path + i + 1;
5455 		}
5456 	}
5457 	return path;
5458 }
5459 
5460 
5461 static int
5462 file_open(int fd, char* path, int openMode, bool kernel)
5463 {
5464 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5465 
5466 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5467 		fd, path, openMode, kernel));
5468 
5469 	// extract the leaf here because path get screed later
5470 	const char* name = leaf(path);
5471 	// get the vnode matching the vnode + path combination
5472 	struct vnode* vnode;
5473 	ino_t parentID;
5474 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5475 		&parentID, kernel);
5476 	if (status != B_OK)
5477 		return status;
5478 
5479 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5480 		put_vnode(vnode);
5481 		return B_LINK_LIMIT;
5482 	}
5483 
5484 	// open the vnode
5485 	int newFD = open_vnode(vnode, openMode, kernel);
5486 	if (newFD >= 0) {
5487 		// The vnode reference has been transferred to the FD
5488 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5489 			vnode->device, parentID, vnode->id, NULL);
5490 	} else
5491 		put_vnode(vnode);
5492 
5493 	insert_fd_path(vnode, newFD, kernel, parentID, name);
5494 
5495 	return newFD;
5496 }
5497 
5498 
5499 static status_t
5500 file_close(struct file_descriptor* descriptor)
5501 {
5502 	struct vnode* vnode = descriptor->u.vnode;
5503 	status_t status = B_OK;
5504 
5505 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5506 
5507 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5508 		vnode->id);
5509 	if (HAS_FS_CALL(vnode, close)) {
5510 		status = FS_CALL(vnode, close, descriptor->cookie);
5511 	}
5512 
5513 	if (status == B_OK) {
5514 		// remove all outstanding locks for this team
5515 		release_advisory_lock(vnode, NULL);
5516 	}
5517 	return status;
5518 }
5519 
5520 
5521 static void
5522 file_free_fd(struct file_descriptor* descriptor)
5523 {
5524 	struct vnode* vnode = descriptor->u.vnode;
5525 
5526 	if (vnode != NULL) {
5527 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5528 
5529 		remove_fd_path(descriptor);
5530 
5531 		put_vnode(vnode);
5532 	}
5533 }
5534 
5535 
5536 static status_t
5537 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5538 	size_t* length)
5539 {
5540 	struct vnode* vnode = descriptor->u.vnode;
5541 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5542 		*length));
5543 
5544 	if (S_ISDIR(vnode->Type()))
5545 		return B_IS_A_DIRECTORY;
5546 
5547 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5548 }
5549 
5550 
5551 static status_t
5552 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5553 	size_t* length)
5554 {
5555 	struct vnode* vnode = descriptor->u.vnode;
5556 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5557 
5558 	if (S_ISDIR(vnode->Type()))
5559 		return B_IS_A_DIRECTORY;
5560 	if (!HAS_FS_CALL(vnode, write))
5561 		return B_READ_ONLY_DEVICE;
5562 
5563 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5564 }
5565 
5566 
5567 static off_t
5568 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5569 {
5570 	struct vnode* vnode = descriptor->u.vnode;
5571 	off_t offset;
5572 
5573 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5574 
5575 	// some kinds of files are not seekable
5576 	switch (vnode->Type() & S_IFMT) {
5577 		case S_IFIFO:
5578 		case S_IFSOCK:
5579 			return ESPIPE;
5580 
5581 		// The Open Group Base Specs don't mention any file types besides pipes,
5582 		// fifos, and sockets specially, so we allow seeking them.
5583 		case S_IFREG:
5584 		case S_IFBLK:
5585 		case S_IFDIR:
5586 		case S_IFLNK:
5587 		case S_IFCHR:
5588 			break;
5589 	}
5590 
5591 	switch (seekType) {
5592 		case SEEK_SET:
5593 			offset = 0;
5594 			break;
5595 		case SEEK_CUR:
5596 			offset = descriptor->pos;
5597 			break;
5598 		case SEEK_END:
5599 		{
5600 			// stat() the node
5601 			if (!HAS_FS_CALL(vnode, read_stat))
5602 				return B_UNSUPPORTED;
5603 
5604 			struct stat stat;
5605 			status_t status = FS_CALL(vnode, read_stat, &stat);
5606 			if (status != B_OK)
5607 				return status;
5608 
5609 			offset = stat.st_size;
5610 			break;
5611 		}
5612 		default:
5613 			return B_BAD_VALUE;
5614 	}
5615 
5616 	// assumes off_t is 64 bits wide
5617 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5618 		return B_BUFFER_OVERFLOW;
5619 
5620 	pos += offset;
5621 	if (pos < 0)
5622 		return B_BAD_VALUE;
5623 
5624 	return descriptor->pos = pos;
5625 }
5626 
5627 
5628 static status_t
5629 file_select(struct file_descriptor* descriptor, uint8 event,
5630 	struct selectsync* sync)
5631 {
5632 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5633 
5634 	struct vnode* vnode = descriptor->u.vnode;
5635 
5636 	// If the FS has no select() hook, notify select() now.
5637 	if (!HAS_FS_CALL(vnode, select))
5638 		return notify_select_event(sync, event);
5639 
5640 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5641 }
5642 
5643 
5644 static status_t
5645 file_deselect(struct file_descriptor* descriptor, uint8 event,
5646 	struct selectsync* sync)
5647 {
5648 	struct vnode* vnode = descriptor->u.vnode;
5649 
5650 	if (!HAS_FS_CALL(vnode, deselect))
5651 		return B_OK;
5652 
5653 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5654 }
5655 
5656 
5657 static status_t
5658 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5659 	bool kernel)
5660 {
5661 	struct vnode* vnode;
5662 	status_t status;
5663 
5664 	if (name == NULL || *name == '\0')
5665 		return B_BAD_VALUE;
5666 
5667 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5668 		"perms = %d)\n", mountID, parentID, name, perms));
5669 
5670 	status = get_vnode(mountID, parentID, &vnode, true, false);
5671 	if (status != B_OK)
5672 		return status;
5673 
5674 	if (HAS_FS_CALL(vnode, create_dir))
5675 		status = FS_CALL(vnode, create_dir, name, perms);
5676 	else
5677 		status = B_READ_ONLY_DEVICE;
5678 
5679 	put_vnode(vnode);
5680 	return status;
5681 }
5682 
5683 
5684 static status_t
5685 dir_create(int fd, char* path, int perms, bool kernel)
5686 {
5687 	char filename[B_FILE_NAME_LENGTH];
5688 	struct vnode* vnode;
5689 	status_t status;
5690 
5691 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5692 		kernel));
5693 
5694 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5695 	if (status < 0)
5696 		return status;
5697 
5698 	if (HAS_FS_CALL(vnode, create_dir)) {
5699 		status = FS_CALL(vnode, create_dir, filename, perms);
5700 	} else
5701 		status = B_READ_ONLY_DEVICE;
5702 
5703 	put_vnode(vnode);
5704 	return status;
5705 }
5706 
5707 
5708 static int
5709 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5710 {
5711 	FUNCTION(("dir_open_entry_ref()\n"));
5712 
5713 	if (name && name[0] == '\0')
5714 		return B_BAD_VALUE;
5715 
5716 	// get the vnode matching the entry_ref/node_ref
5717 	struct vnode* vnode;
5718 	status_t status;
5719 	if (name) {
5720 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5721 			&vnode);
5722 	} else
5723 		status = get_vnode(mountID, parentID, &vnode, true, false);
5724 	if (status != B_OK)
5725 		return status;
5726 
5727 	int newFD = open_dir_vnode(vnode, kernel);
5728 	if (newFD >= 0) {
5729 		// The vnode reference has been transferred to the FD
5730 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5731 			vnode->id, name);
5732 	} else
5733 		put_vnode(vnode);
5734 
5735 	return newFD;
5736 }
5737 
5738 
5739 static int
5740 dir_open(int fd, char* path, bool kernel)
5741 {
5742 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5743 		kernel));
5744 
5745 	// get the vnode matching the vnode + path combination
5746 	struct vnode* vnode = NULL;
5747 	ino_t parentID;
5748 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5749 		kernel);
5750 	if (status != B_OK)
5751 		return status;
5752 
5753 	// open the dir
5754 	int newFD = open_dir_vnode(vnode, kernel);
5755 	if (newFD >= 0) {
5756 		// The vnode reference has been transferred to the FD
5757 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5758 			parentID, vnode->id, NULL);
5759 	} else
5760 		put_vnode(vnode);
5761 
5762 	return newFD;
5763 }
5764 
5765 
5766 static status_t
5767 dir_close(struct file_descriptor* descriptor)
5768 {
5769 	struct vnode* vnode = descriptor->u.vnode;
5770 
5771 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5772 
5773 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5774 		vnode->id);
5775 	if (HAS_FS_CALL(vnode, close_dir))
5776 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5777 
5778 	return B_OK;
5779 }
5780 
5781 
5782 static void
5783 dir_free_fd(struct file_descriptor* descriptor)
5784 {
5785 	struct vnode* vnode = descriptor->u.vnode;
5786 
5787 	if (vnode != NULL) {
5788 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5789 		put_vnode(vnode);
5790 	}
5791 }
5792 
5793 
5794 static status_t
5795 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5796 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5797 {
5798 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5799 		bufferSize, _count);
5800 }
5801 
5802 
5803 static status_t
5804 fix_dirent(struct vnode* parent, struct dirent* entry,
5805 	struct io_context* ioContext)
5806 {
5807 	// set d_pdev and d_pino
5808 	entry->d_pdev = parent->device;
5809 	entry->d_pino = parent->id;
5810 
5811 	// If this is the ".." entry and the directory covering another vnode,
5812 	// we need to replace d_dev and d_ino with the actual values.
5813 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5814 		// Make sure the IO context root is not bypassed.
5815 		if (parent == ioContext->root) {
5816 			entry->d_dev = parent->device;
5817 			entry->d_ino = parent->id;
5818 		} else {
5819 			inc_vnode_ref_count(parent);
5820 				// vnode_path_to_vnode() puts the node
5821 
5822 			// ".." is guaranteed not to be clobbered by this call
5823 			struct vnode* vnode;
5824 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5825 				ioContext, &vnode, NULL);
5826 
5827 			if (status == B_OK) {
5828 				entry->d_dev = vnode->device;
5829 				entry->d_ino = vnode->id;
5830 				put_vnode(vnode);
5831 			}
5832 		}
5833 	} else {
5834 		// resolve covered vnodes
5835 		ReadLocker _(&sVnodeLock);
5836 
5837 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5838 		if (vnode != NULL && vnode->covered_by != NULL) {
5839 			do {
5840 				vnode = vnode->covered_by;
5841 			} while (vnode->covered_by != NULL);
5842 
5843 			entry->d_dev = vnode->device;
5844 			entry->d_ino = vnode->id;
5845 		}
5846 	}
5847 
5848 	return B_OK;
5849 }
5850 
5851 
5852 static status_t
5853 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5854 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5855 {
5856 	if (!HAS_FS_CALL(vnode, read_dir))
5857 		return B_UNSUPPORTED;
5858 
5859 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5860 		_count);
5861 	if (error != B_OK)
5862 		return error;
5863 
5864 	// we need to adjust the read dirents
5865 	uint32 count = *_count;
5866 	for (uint32 i = 0; i < count; i++) {
5867 		error = fix_dirent(vnode, buffer, ioContext);
5868 		if (error != B_OK)
5869 			return error;
5870 
5871 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5872 	}
5873 
5874 	return error;
5875 }
5876 
5877 
5878 static status_t
5879 dir_rewind(struct file_descriptor* descriptor)
5880 {
5881 	struct vnode* vnode = descriptor->u.vnode;
5882 
5883 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5884 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5885 	}
5886 
5887 	return B_UNSUPPORTED;
5888 }
5889 
5890 
5891 static status_t
5892 dir_remove(int fd, char* path, bool kernel)
5893 {
5894 	char name[B_FILE_NAME_LENGTH];
5895 	struct vnode* directory;
5896 	status_t status;
5897 
5898 	if (path != NULL) {
5899 		// we need to make sure our path name doesn't stop with "/", ".",
5900 		// or ".."
5901 		char* lastSlash;
5902 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5903 			char* leaf = lastSlash + 1;
5904 			if (!strcmp(leaf, ".."))
5905 				return B_NOT_ALLOWED;
5906 
5907 			// omit multiple slashes
5908 			while (lastSlash > path && lastSlash[-1] == '/')
5909 				lastSlash--;
5910 
5911 			if (leaf[0]
5912 				&& strcmp(leaf, ".")) {
5913 				break;
5914 			}
5915 			// "name/" -> "name", or "name/." -> "name"
5916 			lastSlash[0] = '\0';
5917 		}
5918 
5919 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5920 			return B_NOT_ALLOWED;
5921 	}
5922 
5923 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5924 	if (status != B_OK)
5925 		return status;
5926 
5927 	if (HAS_FS_CALL(directory, remove_dir))
5928 		status = FS_CALL(directory, remove_dir, name);
5929 	else
5930 		status = B_READ_ONLY_DEVICE;
5931 
5932 	put_vnode(directory);
5933 	return status;
5934 }
5935 
5936 
5937 static status_t
5938 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5939 	size_t length)
5940 {
5941 	struct vnode* vnode = descriptor->u.vnode;
5942 
5943 	if (HAS_FS_CALL(vnode, ioctl))
5944 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5945 
5946 	return B_DEV_INVALID_IOCTL;
5947 }
5948 
5949 
5950 static status_t
5951 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5952 {
5953 	struct flock flock;
5954 
5955 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5956 		fd, op, argument, kernel ? "kernel" : "user"));
5957 
5958 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5959 		fd);
5960 	if (descriptor == NULL)
5961 		return B_FILE_ERROR;
5962 
5963 	struct vnode* vnode = fd_vnode(descriptor);
5964 
5965 	status_t status = B_OK;
5966 
5967 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5968 		if (descriptor->type != FDTYPE_FILE)
5969 			status = B_BAD_VALUE;
5970 		else if (user_memcpy(&flock, (struct flock*)argument,
5971 				sizeof(struct flock)) != B_OK)
5972 			status = B_BAD_ADDRESS;
5973 
5974 		if (status != B_OK) {
5975 			put_fd(descriptor);
5976 			return status;
5977 		}
5978 	}
5979 
5980 	switch (op) {
5981 		case F_SETFD:
5982 		{
5983 			struct io_context* context = get_current_io_context(kernel);
5984 			// Set file descriptor flags
5985 
5986 			// O_CLOEXEC is the only flag available at this time
5987 			mutex_lock(&context->io_mutex);
5988 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5989 			mutex_unlock(&context->io_mutex);
5990 
5991 			status = B_OK;
5992 			break;
5993 		}
5994 
5995 		case F_GETFD:
5996 		{
5997 			struct io_context* context = get_current_io_context(kernel);
5998 
5999 			// Get file descriptor flags
6000 			mutex_lock(&context->io_mutex);
6001 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6002 			mutex_unlock(&context->io_mutex);
6003 			break;
6004 		}
6005 
6006 		case F_SETFL:
6007 			// Set file descriptor open mode
6008 
6009 			// we only accept changes to O_APPEND and O_NONBLOCK
6010 			argument &= O_APPEND | O_NONBLOCK;
6011 			if (descriptor->ops->fd_set_flags != NULL) {
6012 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6013 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6014 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6015 					(int)argument);
6016 			} else
6017 				status = B_UNSUPPORTED;
6018 
6019 			if (status == B_OK) {
6020 				// update this descriptor's open_mode field
6021 				descriptor->open_mode = (descriptor->open_mode
6022 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6023 			}
6024 
6025 			break;
6026 
6027 		case F_GETFL:
6028 			// Get file descriptor open mode
6029 			status = descriptor->open_mode;
6030 			break;
6031 
6032 		case F_DUPFD:
6033 		{
6034 			struct io_context* context = get_current_io_context(kernel);
6035 
6036 			status = new_fd_etc(context, descriptor, (int)argument);
6037 			if (status >= 0) {
6038 				mutex_lock(&context->io_mutex);
6039 				fd_set_close_on_exec(context, fd, false);
6040 				mutex_unlock(&context->io_mutex);
6041 
6042 				atomic_add(&descriptor->ref_count, 1);
6043 			}
6044 			break;
6045 		}
6046 
6047 		case F_GETLK:
6048 			if (vnode != NULL) {
6049 				status = get_advisory_lock(vnode, &flock);
6050 				if (status == B_OK) {
6051 					// copy back flock structure
6052 					status = user_memcpy((struct flock*)argument, &flock,
6053 						sizeof(struct flock));
6054 				}
6055 			} else
6056 				status = B_BAD_VALUE;
6057 			break;
6058 
6059 		case F_SETLK:
6060 		case F_SETLKW:
6061 			status = normalize_flock(descriptor, &flock);
6062 			if (status != B_OK)
6063 				break;
6064 
6065 			if (vnode == NULL) {
6066 				status = B_BAD_VALUE;
6067 			} else if (flock.l_type == F_UNLCK) {
6068 				status = release_advisory_lock(vnode, &flock);
6069 			} else {
6070 				// the open mode must match the lock type
6071 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6072 						&& flock.l_type == F_WRLCK)
6073 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6074 						&& flock.l_type == F_RDLCK))
6075 					status = B_FILE_ERROR;
6076 				else {
6077 					status = acquire_advisory_lock(vnode, -1,
6078 						&flock, op == F_SETLKW);
6079 				}
6080 			}
6081 			break;
6082 
6083 		// ToDo: add support for more ops?
6084 
6085 		default:
6086 			status = B_BAD_VALUE;
6087 	}
6088 
6089 	put_fd(descriptor);
6090 	return status;
6091 }
6092 
6093 
6094 static status_t
6095 common_sync(int fd, bool kernel)
6096 {
6097 	struct file_descriptor* descriptor;
6098 	struct vnode* vnode;
6099 	status_t status;
6100 
6101 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6102 
6103 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6104 	if (descriptor == NULL)
6105 		return B_FILE_ERROR;
6106 
6107 	if (HAS_FS_CALL(vnode, fsync))
6108 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6109 	else
6110 		status = B_UNSUPPORTED;
6111 
6112 	put_fd(descriptor);
6113 	return status;
6114 }
6115 
6116 
6117 static status_t
6118 common_lock_node(int fd, bool kernel)
6119 {
6120 	struct file_descriptor* descriptor;
6121 	struct vnode* vnode;
6122 
6123 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6124 	if (descriptor == NULL)
6125 		return B_FILE_ERROR;
6126 
6127 	status_t status = B_OK;
6128 
6129 	// We need to set the locking atomically - someone
6130 	// else might set one at the same time
6131 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6132 			(file_descriptor*)NULL) != NULL)
6133 		status = B_BUSY;
6134 
6135 	put_fd(descriptor);
6136 	return status;
6137 }
6138 
6139 
6140 static status_t
6141 common_unlock_node(int fd, bool kernel)
6142 {
6143 	struct file_descriptor* descriptor;
6144 	struct vnode* vnode;
6145 
6146 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6147 	if (descriptor == NULL)
6148 		return B_FILE_ERROR;
6149 
6150 	status_t status = B_OK;
6151 
6152 	// We need to set the locking atomically - someone
6153 	// else might set one at the same time
6154 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6155 			(file_descriptor*)NULL, descriptor) != descriptor)
6156 		status = B_BAD_VALUE;
6157 
6158 	put_fd(descriptor);
6159 	return status;
6160 }
6161 
6162 
6163 static status_t
6164 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6165 	bool kernel)
6166 {
6167 	struct vnode* vnode;
6168 	status_t status;
6169 
6170 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6171 	if (status != B_OK)
6172 		return status;
6173 
6174 	if (HAS_FS_CALL(vnode, read_symlink)) {
6175 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6176 	} else
6177 		status = B_BAD_VALUE;
6178 
6179 	put_vnode(vnode);
6180 	return status;
6181 }
6182 
6183 
6184 static status_t
6185 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6186 	bool kernel)
6187 {
6188 	// path validity checks have to be in the calling function!
6189 	char name[B_FILE_NAME_LENGTH];
6190 	struct vnode* vnode;
6191 	status_t status;
6192 
6193 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6194 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6195 
6196 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6197 	if (status != B_OK)
6198 		return status;
6199 
6200 	if (HAS_FS_CALL(vnode, create_symlink))
6201 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6202 	else {
6203 		status = HAS_FS_CALL(vnode, write)
6204 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6205 	}
6206 
6207 	put_vnode(vnode);
6208 
6209 	return status;
6210 }
6211 
6212 
6213 static status_t
6214 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6215 	bool traverseLeafLink, bool kernel)
6216 {
6217 	// path validity checks have to be in the calling function!
6218 
6219 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6220 		toPath, kernel));
6221 
6222 	char name[B_FILE_NAME_LENGTH];
6223 	struct vnode* directory;
6224 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6225 		kernel);
6226 	if (status != B_OK)
6227 		return status;
6228 
6229 	struct vnode* vnode;
6230 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6231 		kernel);
6232 	if (status != B_OK)
6233 		goto err;
6234 
6235 	if (directory->mount != vnode->mount) {
6236 		status = B_CROSS_DEVICE_LINK;
6237 		goto err1;
6238 	}
6239 
6240 	if (HAS_FS_CALL(directory, link))
6241 		status = FS_CALL(directory, link, name, vnode);
6242 	else
6243 		status = B_READ_ONLY_DEVICE;
6244 
6245 err1:
6246 	put_vnode(vnode);
6247 err:
6248 	put_vnode(directory);
6249 
6250 	return status;
6251 }
6252 
6253 
6254 static status_t
6255 common_unlink(int fd, char* path, bool kernel)
6256 {
6257 	char filename[B_FILE_NAME_LENGTH];
6258 	struct vnode* vnode;
6259 	status_t status;
6260 
6261 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6262 		kernel));
6263 
6264 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6265 	if (status < 0)
6266 		return status;
6267 
6268 	if (HAS_FS_CALL(vnode, unlink))
6269 		status = FS_CALL(vnode, unlink, filename);
6270 	else
6271 		status = B_READ_ONLY_DEVICE;
6272 
6273 	put_vnode(vnode);
6274 
6275 	return status;
6276 }
6277 
6278 
6279 static status_t
6280 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6281 {
6282 	struct vnode* vnode;
6283 	status_t status;
6284 
6285 	// TODO: honor effectiveUserGroup argument
6286 
6287 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6288 	if (status != B_OK)
6289 		return status;
6290 
6291 	if (HAS_FS_CALL(vnode, access))
6292 		status = FS_CALL(vnode, access, mode);
6293 	else
6294 		status = B_OK;
6295 
6296 	put_vnode(vnode);
6297 
6298 	return status;
6299 }
6300 
6301 
6302 static status_t
6303 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6304 {
6305 	struct vnode* fromVnode;
6306 	struct vnode* toVnode;
6307 	char fromName[B_FILE_NAME_LENGTH];
6308 	char toName[B_FILE_NAME_LENGTH];
6309 	status_t status;
6310 
6311 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6312 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6313 
6314 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6315 	if (status != B_OK)
6316 		return status;
6317 
6318 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6319 	if (status != B_OK)
6320 		goto err1;
6321 
6322 	if (fromVnode->device != toVnode->device) {
6323 		status = B_CROSS_DEVICE_LINK;
6324 		goto err2;
6325 	}
6326 
6327 	if (fromName[0] == '\0' || toName[0] == '\0'
6328 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6329 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6330 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6331 		status = B_BAD_VALUE;
6332 		goto err2;
6333 	}
6334 
6335 	if (HAS_FS_CALL(fromVnode, rename))
6336 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6337 	else
6338 		status = B_READ_ONLY_DEVICE;
6339 
6340 err2:
6341 	put_vnode(toVnode);
6342 err1:
6343 	put_vnode(fromVnode);
6344 
6345 	return status;
6346 }
6347 
6348 
6349 static status_t
6350 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6351 {
6352 	struct vnode* vnode = descriptor->u.vnode;
6353 
6354 	FUNCTION(("common_read_stat: stat %p\n", stat));
6355 
6356 	// TODO: remove this once all file systems properly set them!
6357 	stat->st_crtim.tv_nsec = 0;
6358 	stat->st_ctim.tv_nsec = 0;
6359 	stat->st_mtim.tv_nsec = 0;
6360 	stat->st_atim.tv_nsec = 0;
6361 
6362 	status_t status = FS_CALL(vnode, read_stat, stat);
6363 
6364 	// fill in the st_dev and st_ino fields
6365 	if (status == B_OK) {
6366 		stat->st_dev = vnode->device;
6367 		stat->st_ino = vnode->id;
6368 		stat->st_rdev = -1;
6369 	}
6370 
6371 	return status;
6372 }
6373 
6374 
6375 static status_t
6376 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6377 	int statMask)
6378 {
6379 	struct vnode* vnode = descriptor->u.vnode;
6380 
6381 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6382 		vnode, stat, statMask));
6383 
6384 	if (!HAS_FS_CALL(vnode, write_stat))
6385 		return B_READ_ONLY_DEVICE;
6386 
6387 	return FS_CALL(vnode, write_stat, stat, statMask);
6388 }
6389 
6390 
6391 static status_t
6392 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6393 	struct stat* stat, bool kernel)
6394 {
6395 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6396 		stat));
6397 
6398 	struct vnode* vnode;
6399 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6400 		NULL, kernel);
6401 	if (status != B_OK)
6402 		return status;
6403 
6404 	status = FS_CALL(vnode, read_stat, stat);
6405 
6406 	// fill in the st_dev and st_ino fields
6407 	if (status == B_OK) {
6408 		stat->st_dev = vnode->device;
6409 		stat->st_ino = vnode->id;
6410 		stat->st_rdev = -1;
6411 	}
6412 
6413 	put_vnode(vnode);
6414 	return status;
6415 }
6416 
6417 
6418 static status_t
6419 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6420 	const struct stat* stat, int statMask, bool kernel)
6421 {
6422 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6423 		"kernel %d\n", fd, path, stat, statMask, kernel));
6424 
6425 	struct vnode* vnode;
6426 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6427 		NULL, kernel);
6428 	if (status != B_OK)
6429 		return status;
6430 
6431 	if (HAS_FS_CALL(vnode, write_stat))
6432 		status = FS_CALL(vnode, write_stat, stat, statMask);
6433 	else
6434 		status = B_READ_ONLY_DEVICE;
6435 
6436 	put_vnode(vnode);
6437 
6438 	return status;
6439 }
6440 
6441 
6442 static int
6443 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6444 {
6445 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6446 		kernel));
6447 
6448 	struct vnode* vnode;
6449 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6450 		NULL, kernel);
6451 	if (status != B_OK)
6452 		return status;
6453 
6454 	status = open_attr_dir_vnode(vnode, kernel);
6455 	if (status < 0)
6456 		put_vnode(vnode);
6457 
6458 	return status;
6459 }
6460 
6461 
6462 static status_t
6463 attr_dir_close(struct file_descriptor* descriptor)
6464 {
6465 	struct vnode* vnode = descriptor->u.vnode;
6466 
6467 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6468 
6469 	if (HAS_FS_CALL(vnode, close_attr_dir))
6470 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6471 
6472 	return B_OK;
6473 }
6474 
6475 
6476 static void
6477 attr_dir_free_fd(struct file_descriptor* descriptor)
6478 {
6479 	struct vnode* vnode = descriptor->u.vnode;
6480 
6481 	if (vnode != NULL) {
6482 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6483 		put_vnode(vnode);
6484 	}
6485 }
6486 
6487 
6488 static status_t
6489 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6490 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6491 {
6492 	struct vnode* vnode = descriptor->u.vnode;
6493 
6494 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6495 
6496 	if (HAS_FS_CALL(vnode, read_attr_dir))
6497 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6498 			bufferSize, _count);
6499 
6500 	return B_UNSUPPORTED;
6501 }
6502 
6503 
6504 static status_t
6505 attr_dir_rewind(struct file_descriptor* descriptor)
6506 {
6507 	struct vnode* vnode = descriptor->u.vnode;
6508 
6509 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6510 
6511 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6512 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6513 
6514 	return B_UNSUPPORTED;
6515 }
6516 
6517 
6518 static int
6519 attr_create(int fd, char* path, const char* name, uint32 type,
6520 	int openMode, bool kernel)
6521 {
6522 	if (name == NULL || *name == '\0')
6523 		return B_BAD_VALUE;
6524 
6525 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6526 	struct vnode* vnode;
6527 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6528 		kernel);
6529 	if (status != B_OK)
6530 		return status;
6531 
6532 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6533 		status = B_LINK_LIMIT;
6534 		goto err;
6535 	}
6536 
6537 	if (!HAS_FS_CALL(vnode, create_attr)) {
6538 		status = B_READ_ONLY_DEVICE;
6539 		goto err;
6540 	}
6541 
6542 	void* cookie;
6543 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6544 	if (status != B_OK)
6545 		goto err;
6546 
6547 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6548 	if (fd >= 0)
6549 		return fd;
6550 
6551 	status = fd;
6552 
6553 	FS_CALL(vnode, close_attr, cookie);
6554 	FS_CALL(vnode, free_attr_cookie, cookie);
6555 
6556 	FS_CALL(vnode, remove_attr, name);
6557 
6558 err:
6559 	put_vnode(vnode);
6560 
6561 	return status;
6562 }
6563 
6564 
6565 static int
6566 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6567 {
6568 	if (name == NULL || *name == '\0')
6569 		return B_BAD_VALUE;
6570 
6571 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6572 	struct vnode* vnode;
6573 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6574 		kernel);
6575 	if (status != B_OK)
6576 		return status;
6577 
6578 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6579 		status = B_LINK_LIMIT;
6580 		goto err;
6581 	}
6582 
6583 	if (!HAS_FS_CALL(vnode, open_attr)) {
6584 		status = B_UNSUPPORTED;
6585 		goto err;
6586 	}
6587 
6588 	void* cookie;
6589 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6590 	if (status != B_OK)
6591 		goto err;
6592 
6593 	// now we only need a file descriptor for this attribute and we're done
6594 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6595 	if (fd >= 0)
6596 		return fd;
6597 
6598 	status = fd;
6599 
6600 	FS_CALL(vnode, close_attr, cookie);
6601 	FS_CALL(vnode, free_attr_cookie, cookie);
6602 
6603 err:
6604 	put_vnode(vnode);
6605 
6606 	return status;
6607 }
6608 
6609 
6610 static status_t
6611 attr_close(struct file_descriptor* descriptor)
6612 {
6613 	struct vnode* vnode = descriptor->u.vnode;
6614 
6615 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6616 
6617 	if (HAS_FS_CALL(vnode, close_attr))
6618 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6619 
6620 	return B_OK;
6621 }
6622 
6623 
6624 static void
6625 attr_free_fd(struct file_descriptor* descriptor)
6626 {
6627 	struct vnode* vnode = descriptor->u.vnode;
6628 
6629 	if (vnode != NULL) {
6630 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6631 		put_vnode(vnode);
6632 	}
6633 }
6634 
6635 
6636 static status_t
6637 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6638 	size_t* length)
6639 {
6640 	struct vnode* vnode = descriptor->u.vnode;
6641 
6642 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6643 		*length));
6644 
6645 	if (!HAS_FS_CALL(vnode, read_attr))
6646 		return B_UNSUPPORTED;
6647 
6648 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6649 }
6650 
6651 
6652 static status_t
6653 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6654 	size_t* length)
6655 {
6656 	struct vnode* vnode = descriptor->u.vnode;
6657 
6658 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6659 	if (!HAS_FS_CALL(vnode, write_attr))
6660 		return B_UNSUPPORTED;
6661 
6662 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6663 }
6664 
6665 
6666 static off_t
6667 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6668 {
6669 	off_t offset;
6670 
6671 	switch (seekType) {
6672 		case SEEK_SET:
6673 			offset = 0;
6674 			break;
6675 		case SEEK_CUR:
6676 			offset = descriptor->pos;
6677 			break;
6678 		case SEEK_END:
6679 		{
6680 			struct vnode* vnode = descriptor->u.vnode;
6681 			if (!HAS_FS_CALL(vnode, read_stat))
6682 				return B_UNSUPPORTED;
6683 
6684 			struct stat stat;
6685 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6686 				&stat);
6687 			if (status != B_OK)
6688 				return status;
6689 
6690 			offset = stat.st_size;
6691 			break;
6692 		}
6693 		default:
6694 			return B_BAD_VALUE;
6695 	}
6696 
6697 	// assumes off_t is 64 bits wide
6698 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6699 		return B_BUFFER_OVERFLOW;
6700 
6701 	pos += offset;
6702 	if (pos < 0)
6703 		return B_BAD_VALUE;
6704 
6705 	return descriptor->pos = pos;
6706 }
6707 
6708 
6709 static status_t
6710 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6711 {
6712 	struct vnode* vnode = descriptor->u.vnode;
6713 
6714 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6715 
6716 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6717 		return B_UNSUPPORTED;
6718 
6719 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6720 }
6721 
6722 
6723 static status_t
6724 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6725 	int statMask)
6726 {
6727 	struct vnode* vnode = descriptor->u.vnode;
6728 
6729 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6730 
6731 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6732 		return B_READ_ONLY_DEVICE;
6733 
6734 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6735 }
6736 
6737 
6738 static status_t
6739 attr_remove(int fd, const char* name, bool kernel)
6740 {
6741 	struct file_descriptor* descriptor;
6742 	struct vnode* vnode;
6743 	status_t status;
6744 
6745 	if (name == NULL || *name == '\0')
6746 		return B_BAD_VALUE;
6747 
6748 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6749 		kernel));
6750 
6751 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6752 	if (descriptor == NULL)
6753 		return B_FILE_ERROR;
6754 
6755 	if (HAS_FS_CALL(vnode, remove_attr))
6756 		status = FS_CALL(vnode, remove_attr, name);
6757 	else
6758 		status = B_READ_ONLY_DEVICE;
6759 
6760 	put_fd(descriptor);
6761 
6762 	return status;
6763 }
6764 
6765 
6766 static status_t
6767 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6768 	bool kernel)
6769 {
6770 	struct file_descriptor* fromDescriptor;
6771 	struct file_descriptor* toDescriptor;
6772 	struct vnode* fromVnode;
6773 	struct vnode* toVnode;
6774 	status_t status;
6775 
6776 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6777 		|| *toName == '\0')
6778 		return B_BAD_VALUE;
6779 
6780 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6781 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6782 
6783 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6784 	if (fromDescriptor == NULL)
6785 		return B_FILE_ERROR;
6786 
6787 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6788 	if (toDescriptor == NULL) {
6789 		status = B_FILE_ERROR;
6790 		goto err;
6791 	}
6792 
6793 	// are the files on the same volume?
6794 	if (fromVnode->device != toVnode->device) {
6795 		status = B_CROSS_DEVICE_LINK;
6796 		goto err1;
6797 	}
6798 
6799 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6800 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6801 	} else
6802 		status = B_READ_ONLY_DEVICE;
6803 
6804 err1:
6805 	put_fd(toDescriptor);
6806 err:
6807 	put_fd(fromDescriptor);
6808 
6809 	return status;
6810 }
6811 
6812 
6813 static int
6814 index_dir_open(dev_t mountID, bool kernel)
6815 {
6816 	struct fs_mount* mount;
6817 	void* cookie;
6818 
6819 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6820 
6821 	status_t status = get_mount(mountID, &mount);
6822 	if (status != B_OK)
6823 		return status;
6824 
6825 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6826 		status = B_UNSUPPORTED;
6827 		goto error;
6828 	}
6829 
6830 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6831 	if (status != B_OK)
6832 		goto error;
6833 
6834 	// get fd for the index directory
6835 	int fd;
6836 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6837 	if (fd >= 0)
6838 		return fd;
6839 
6840 	// something went wrong
6841 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6842 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6843 
6844 	status = fd;
6845 
6846 error:
6847 	put_mount(mount);
6848 	return status;
6849 }
6850 
6851 
6852 static status_t
6853 index_dir_close(struct file_descriptor* descriptor)
6854 {
6855 	struct fs_mount* mount = descriptor->u.mount;
6856 
6857 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6858 
6859 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6860 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6861 
6862 	return B_OK;
6863 }
6864 
6865 
6866 static void
6867 index_dir_free_fd(struct file_descriptor* descriptor)
6868 {
6869 	struct fs_mount* mount = descriptor->u.mount;
6870 
6871 	if (mount != NULL) {
6872 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6873 		put_mount(mount);
6874 	}
6875 }
6876 
6877 
6878 static status_t
6879 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6880 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6881 {
6882 	struct fs_mount* mount = descriptor->u.mount;
6883 
6884 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6885 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6886 			bufferSize, _count);
6887 	}
6888 
6889 	return B_UNSUPPORTED;
6890 }
6891 
6892 
6893 static status_t
6894 index_dir_rewind(struct file_descriptor* descriptor)
6895 {
6896 	struct fs_mount* mount = descriptor->u.mount;
6897 
6898 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6899 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6900 
6901 	return B_UNSUPPORTED;
6902 }
6903 
6904 
6905 static status_t
6906 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6907 	bool kernel)
6908 {
6909 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6910 		name, kernel));
6911 
6912 	struct fs_mount* mount;
6913 	status_t status = get_mount(mountID, &mount);
6914 	if (status != B_OK)
6915 		return status;
6916 
6917 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6918 		status = B_READ_ONLY_DEVICE;
6919 		goto out;
6920 	}
6921 
6922 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6923 
6924 out:
6925 	put_mount(mount);
6926 	return status;
6927 }
6928 
6929 
6930 #if 0
6931 static status_t
6932 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6933 {
6934 	struct vnode* vnode = descriptor->u.vnode;
6935 
6936 	// ToDo: currently unused!
6937 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6938 	if (!HAS_FS_CALL(vnode, read_index_stat))
6939 		return B_UNSUPPORTED;
6940 
6941 	return B_UNSUPPORTED;
6942 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6943 }
6944 
6945 
6946 static void
6947 index_free_fd(struct file_descriptor* descriptor)
6948 {
6949 	struct vnode* vnode = descriptor->u.vnode;
6950 
6951 	if (vnode != NULL) {
6952 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6953 		put_vnode(vnode);
6954 	}
6955 }
6956 #endif
6957 
6958 
6959 static status_t
6960 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6961 	bool kernel)
6962 {
6963 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6964 		name, kernel));
6965 
6966 	struct fs_mount* mount;
6967 	status_t status = get_mount(mountID, &mount);
6968 	if (status != B_OK)
6969 		return status;
6970 
6971 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6972 		status = B_UNSUPPORTED;
6973 		goto out;
6974 	}
6975 
6976 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6977 
6978 out:
6979 	put_mount(mount);
6980 	return status;
6981 }
6982 
6983 
6984 static status_t
6985 index_remove(dev_t mountID, const char* name, bool kernel)
6986 {
6987 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6988 		name, kernel));
6989 
6990 	struct fs_mount* mount;
6991 	status_t status = get_mount(mountID, &mount);
6992 	if (status != B_OK)
6993 		return status;
6994 
6995 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6996 		status = B_READ_ONLY_DEVICE;
6997 		goto out;
6998 	}
6999 
7000 	status = FS_MOUNT_CALL(mount, remove_index, name);
7001 
7002 out:
7003 	put_mount(mount);
7004 	return status;
7005 }
7006 
7007 
7008 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7009 		It would be nice if the FS would find some more kernel support
7010 		for them.
7011 		For example, query parsing should be moved into the kernel.
7012 */
7013 static int
7014 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7015 	int32 token, bool kernel)
7016 {
7017 	struct fs_mount* mount;
7018 	void* cookie;
7019 
7020 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
7021 		query, kernel));
7022 
7023 	status_t status = get_mount(device, &mount);
7024 	if (status != B_OK)
7025 		return status;
7026 
7027 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7028 		status = B_UNSUPPORTED;
7029 		goto error;
7030 	}
7031 
7032 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7033 		&cookie);
7034 	if (status != B_OK)
7035 		goto error;
7036 
7037 	// get fd for the index directory
7038 	int fd;
7039 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7040 	if (fd >= 0)
7041 		return fd;
7042 
7043 	status = fd;
7044 
7045 	// something went wrong
7046 	FS_MOUNT_CALL(mount, close_query, cookie);
7047 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7048 
7049 error:
7050 	put_mount(mount);
7051 	return status;
7052 }
7053 
7054 
7055 static status_t
7056 query_close(struct file_descriptor* descriptor)
7057 {
7058 	struct fs_mount* mount = descriptor->u.mount;
7059 
7060 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7061 
7062 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7063 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7064 
7065 	return B_OK;
7066 }
7067 
7068 
7069 static void
7070 query_free_fd(struct file_descriptor* descriptor)
7071 {
7072 	struct fs_mount* mount = descriptor->u.mount;
7073 
7074 	if (mount != NULL) {
7075 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7076 		put_mount(mount);
7077 	}
7078 }
7079 
7080 
7081 static status_t
7082 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7083 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7084 {
7085 	struct fs_mount* mount = descriptor->u.mount;
7086 
7087 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7088 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7089 			bufferSize, _count);
7090 	}
7091 
7092 	return B_UNSUPPORTED;
7093 }
7094 
7095 
7096 static status_t
7097 query_rewind(struct file_descriptor* descriptor)
7098 {
7099 	struct fs_mount* mount = descriptor->u.mount;
7100 
7101 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7102 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7103 
7104 	return B_UNSUPPORTED;
7105 }
7106 
7107 
7108 //	#pragma mark - General File System functions
7109 
7110 
7111 static dev_t
7112 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7113 	const char* args, bool kernel)
7114 {
7115 	struct ::fs_mount* mount;
7116 	status_t status = B_OK;
7117 	fs_volume* volume = NULL;
7118 	int32 layer = 0;
7119 	Vnode* coveredNode = NULL;
7120 
7121 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
7122 
7123 	// The path is always safe, we just have to make sure that fsName is
7124 	// almost valid - we can't make any assumptions about args, though.
7125 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7126 	// We'll get it from the DDM later.
7127 	if (fsName == NULL) {
7128 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7129 			return B_BAD_VALUE;
7130 	} else if (fsName[0] == '\0')
7131 		return B_BAD_VALUE;
7132 
7133 	RecursiveLocker mountOpLocker(sMountOpLock);
7134 
7135 	// Helper to delete a newly created file device on failure.
7136 	// Not exactly beautiful, but helps to keep the code below cleaner.
7137 	struct FileDeviceDeleter {
7138 		FileDeviceDeleter() : id(-1) {}
7139 		~FileDeviceDeleter()
7140 		{
7141 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7142 		}
7143 
7144 		partition_id id;
7145 	} fileDeviceDeleter;
7146 
7147 	// If the file system is not a "virtual" one, the device argument should
7148 	// point to a real file/device (if given at all).
7149 	// get the partition
7150 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7151 	KPartition* partition = NULL;
7152 	KPath normalizedDevice;
7153 	bool newlyCreatedFileDevice = false;
7154 
7155 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7156 		// normalize the device path
7157 		status = normalizedDevice.SetTo(device, true);
7158 		if (status != B_OK)
7159 			return status;
7160 
7161 		// get a corresponding partition from the DDM
7162 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7163 		if (partition == NULL) {
7164 			// Partition not found: This either means, the user supplied
7165 			// an invalid path, or the path refers to an image file. We try
7166 			// to let the DDM create a file device for the path.
7167 			partition_id deviceID = ddm->CreateFileDevice(
7168 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7169 			if (deviceID >= 0) {
7170 				partition = ddm->RegisterPartition(deviceID);
7171 				if (newlyCreatedFileDevice)
7172 					fileDeviceDeleter.id = deviceID;
7173 			}
7174 		}
7175 
7176 		if (!partition) {
7177 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7178 				normalizedDevice.Path()));
7179 			return B_ENTRY_NOT_FOUND;
7180 		}
7181 
7182 		device = normalizedDevice.Path();
7183 			// correct path to file device
7184 	}
7185 	PartitionRegistrar partitionRegistrar(partition, true);
7186 
7187 	// Write lock the partition's device. For the time being, we keep the lock
7188 	// until we're done mounting -- not nice, but ensure, that no-one is
7189 	// interfering.
7190 	// TODO: Just mark the partition busy while mounting!
7191 	KDiskDevice* diskDevice = NULL;
7192 	if (partition) {
7193 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7194 		if (!diskDevice) {
7195 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7196 			return B_ERROR;
7197 		}
7198 	}
7199 
7200 	DeviceWriteLocker writeLocker(diskDevice, true);
7201 		// this takes over the write lock acquired before
7202 
7203 	if (partition != NULL) {
7204 		// make sure, that the partition is not busy
7205 		if (partition->IsBusy()) {
7206 			TRACE(("fs_mount(): Partition is busy.\n"));
7207 			return B_BUSY;
7208 		}
7209 
7210 		// if no FS name had been supplied, we get it from the partition
7211 		if (fsName == NULL) {
7212 			KDiskSystem* diskSystem = partition->DiskSystem();
7213 			if (!diskSystem) {
7214 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7215 					"recognize it.\n"));
7216 				return B_BAD_VALUE;
7217 			}
7218 
7219 			if (!diskSystem->IsFileSystem()) {
7220 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7221 					"partitioning system.\n"));
7222 				return B_BAD_VALUE;
7223 			}
7224 
7225 			// The disk system name will not change, and the KDiskSystem
7226 			// object will not go away while the disk device is locked (and
7227 			// the partition has a reference to it), so this is safe.
7228 			fsName = diskSystem->Name();
7229 		}
7230 	}
7231 
7232 	mount = new(std::nothrow) (struct ::fs_mount);
7233 	if (mount == NULL)
7234 		return B_NO_MEMORY;
7235 
7236 	mount->device_name = strdup(device);
7237 		// "device" can be NULL
7238 
7239 	status = mount->entry_cache.Init();
7240 	if (status != B_OK)
7241 		goto err1;
7242 
7243 	// initialize structure
7244 	mount->id = sNextMountID++;
7245 	mount->partition = NULL;
7246 	mount->root_vnode = NULL;
7247 	mount->covers_vnode = NULL;
7248 	mount->unmounting = false;
7249 	mount->owns_file_device = false;
7250 	mount->volume = NULL;
7251 
7252 	// build up the volume(s)
7253 	while (true) {
7254 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7255 		if (layerFSName == NULL) {
7256 			if (layer == 0) {
7257 				status = B_NO_MEMORY;
7258 				goto err1;
7259 			}
7260 
7261 			break;
7262 		}
7263 
7264 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7265 		if (volume == NULL) {
7266 			status = B_NO_MEMORY;
7267 			free(layerFSName);
7268 			goto err1;
7269 		}
7270 
7271 		volume->id = mount->id;
7272 		volume->partition = partition != NULL ? partition->ID() : -1;
7273 		volume->layer = layer++;
7274 		volume->private_volume = NULL;
7275 		volume->ops = NULL;
7276 		volume->sub_volume = NULL;
7277 		volume->super_volume = NULL;
7278 		volume->file_system = NULL;
7279 		volume->file_system_name = NULL;
7280 
7281 		volume->file_system_name = get_file_system_name(layerFSName);
7282 		if (volume->file_system_name == NULL) {
7283 			status = B_NO_MEMORY;
7284 			free(layerFSName);
7285 			free(volume);
7286 			goto err1;
7287 		}
7288 
7289 		volume->file_system = get_file_system(layerFSName);
7290 		if (volume->file_system == NULL) {
7291 			status = B_DEVICE_NOT_FOUND;
7292 			free(layerFSName);
7293 			free(volume->file_system_name);
7294 			free(volume);
7295 			goto err1;
7296 		}
7297 
7298 		if (mount->volume == NULL)
7299 			mount->volume = volume;
7300 		else {
7301 			volume->super_volume = mount->volume;
7302 			mount->volume->sub_volume = volume;
7303 			mount->volume = volume;
7304 		}
7305 	}
7306 
7307 	// insert mount struct into list before we call FS's mount() function
7308 	// so that vnodes can be created for this mount
7309 	mutex_lock(&sMountMutex);
7310 	hash_insert(sMountsTable, mount);
7311 	mutex_unlock(&sMountMutex);
7312 
7313 	ino_t rootID;
7314 
7315 	if (!sRoot) {
7316 		// we haven't mounted anything yet
7317 		if (strcmp(path, "/") != 0) {
7318 			status = B_ERROR;
7319 			goto err2;
7320 		}
7321 
7322 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7323 			args, &rootID);
7324 		if (status != 0)
7325 			goto err2;
7326 	} else {
7327 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7328 		if (status != B_OK)
7329 			goto err2;
7330 
7331 		mount->covers_vnode = coveredNode;
7332 
7333 		// make sure covered_vnode is a directory
7334 		if (!S_ISDIR(coveredNode->Type())) {
7335 			status = B_NOT_A_DIRECTORY;
7336 			goto err3;
7337 		}
7338 
7339 		if (coveredNode->IsCovered()) {
7340 			// this is already a covered vnode
7341 			status = B_BUSY;
7342 			goto err3;
7343 		}
7344 
7345 		// mount it/them
7346 		fs_volume* volume = mount->volume;
7347 		while (volume) {
7348 			status = volume->file_system->mount(volume, device, flags, args,
7349 				&rootID);
7350 			if (status != B_OK) {
7351 				if (volume->sub_volume)
7352 					goto err4;
7353 				goto err3;
7354 			}
7355 
7356 			volume = volume->super_volume;
7357 		}
7358 
7359 		volume = mount->volume;
7360 		while (volume) {
7361 			if (volume->ops->all_layers_mounted != NULL)
7362 				volume->ops->all_layers_mounted(volume);
7363 			volume = volume->super_volume;
7364 		}
7365 	}
7366 
7367 	// the root node is supposed to be owned by the file system - it must
7368 	// exist at this point
7369 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7370 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7371 		panic("fs_mount: file system does not own its root node!\n");
7372 		status = B_ERROR;
7373 		goto err4;
7374 	}
7375 
7376 	// set up the links between the root vnode and the vnode it covers
7377 	rw_lock_write_lock(&sVnodeLock);
7378 	if (coveredNode != NULL) {
7379 		if (coveredNode->IsCovered()) {
7380 			// the vnode is covered now
7381 			status = B_BUSY;
7382 			rw_lock_write_unlock(&sVnodeLock);
7383 			goto err4;
7384 		}
7385 
7386 		mount->root_vnode->covers = coveredNode;
7387 		mount->root_vnode->SetCovering(true);
7388 
7389 		coveredNode->covered_by = mount->root_vnode;
7390 		coveredNode->SetCovered(true);
7391 	}
7392 	rw_lock_write_unlock(&sVnodeLock);
7393 
7394 	if (!sRoot) {
7395 		sRoot = mount->root_vnode;
7396 		mutex_lock(&sIOContextRootLock);
7397 		get_current_io_context(true)->root = sRoot;
7398 		mutex_unlock(&sIOContextRootLock);
7399 		inc_vnode_ref_count(sRoot);
7400 	}
7401 
7402 	// supply the partition (if any) with the mount cookie and mark it mounted
7403 	if (partition) {
7404 		partition->SetMountCookie(mount->volume->private_volume);
7405 		partition->SetVolumeID(mount->id);
7406 
7407 		// keep a partition reference as long as the partition is mounted
7408 		partitionRegistrar.Detach();
7409 		mount->partition = partition;
7410 		mount->owns_file_device = newlyCreatedFileDevice;
7411 		fileDeviceDeleter.id = -1;
7412 	}
7413 
7414 	notify_mount(mount->id,
7415 		coveredNode != NULL ? coveredNode->device : -1,
7416 		coveredNode ? coveredNode->id : -1);
7417 
7418 	return mount->id;
7419 
7420 err4:
7421 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7422 err3:
7423 	if (coveredNode != NULL)
7424 		put_vnode(coveredNode);
7425 err2:
7426 	mutex_lock(&sMountMutex);
7427 	hash_remove(sMountsTable, mount);
7428 	mutex_unlock(&sMountMutex);
7429 err1:
7430 	delete mount;
7431 
7432 	return status;
7433 }
7434 
7435 
7436 static status_t
7437 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7438 {
7439 	struct fs_mount* mount;
7440 	status_t err;
7441 
7442 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7443 		kernel));
7444 
7445 	struct vnode* pathVnode = NULL;
7446 	if (path != NULL) {
7447 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7448 		if (err != B_OK)
7449 			return B_ENTRY_NOT_FOUND;
7450 	}
7451 
7452 	RecursiveLocker mountOpLocker(sMountOpLock);
7453 
7454 	// this lock is not strictly necessary, but here in case of KDEBUG
7455 	// to keep the ASSERT in find_mount() working.
7456 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7457 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7458 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7459 	if (mount == NULL) {
7460 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7461 			pathVnode);
7462 	}
7463 
7464 	if (path != NULL) {
7465 		put_vnode(pathVnode);
7466 
7467 		if (mount->root_vnode != pathVnode) {
7468 			// not mountpoint
7469 			return B_BAD_VALUE;
7470 		}
7471 	}
7472 
7473 	// if the volume is associated with a partition, lock the device of the
7474 	// partition as long as we are unmounting
7475 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7476 	KPartition* partition = mount->partition;
7477 	KDiskDevice* diskDevice = NULL;
7478 	if (partition != NULL) {
7479 		if (partition->Device() == NULL) {
7480 			dprintf("fs_unmount(): There is no device!\n");
7481 			return B_ERROR;
7482 		}
7483 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7484 		if (!diskDevice) {
7485 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7486 			return B_ERROR;
7487 		}
7488 	}
7489 	DeviceWriteLocker writeLocker(diskDevice, true);
7490 
7491 	// make sure, that the partition is not busy
7492 	if (partition != NULL) {
7493 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7494 			TRACE(("fs_unmount(): Partition is busy.\n"));
7495 			return B_BUSY;
7496 		}
7497 	}
7498 
7499 	// grab the vnode master mutex to keep someone from creating
7500 	// a vnode while we're figuring out if we can continue
7501 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7502 
7503 	bool disconnectedDescriptors = false;
7504 
7505 	while (true) {
7506 		bool busy = false;
7507 
7508 		// cycle through the list of vnodes associated with this mount and
7509 		// make sure all of them are not busy or have refs on them
7510 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7511 		while (struct vnode* vnode = iterator.Next()) {
7512 			if (vnode->IsBusy()) {
7513 				busy = true;
7514 				break;
7515 			}
7516 
7517 			// check the vnode's ref count -- subtract additional references for
7518 			// covering
7519 			int32 refCount = vnode->ref_count;
7520 			if (vnode->covers != NULL)
7521 				refCount--;
7522 			if (vnode->covered_by != NULL)
7523 				refCount--;
7524 
7525 			if (refCount != 0) {
7526 				// there are still vnodes in use on this mount, so we cannot
7527 				// unmount yet
7528 				busy = true;
7529 				break;
7530 			}
7531 		}
7532 
7533 		if (!busy)
7534 			break;
7535 
7536 		if ((flags & B_FORCE_UNMOUNT) == 0)
7537 			return B_BUSY;
7538 
7539 		if (disconnectedDescriptors) {
7540 			// wait a bit until the last access is finished, and then try again
7541 			vnodesWriteLocker.Unlock();
7542 			snooze(100000);
7543 			// TODO: if there is some kind of bug that prevents the ref counts
7544 			// from getting back to zero, this will fall into an endless loop...
7545 			vnodesWriteLocker.Lock();
7546 			continue;
7547 		}
7548 
7549 		// the file system is still busy - but we're forced to unmount it,
7550 		// so let's disconnect all open file descriptors
7551 
7552 		mount->unmounting = true;
7553 			// prevent new vnodes from being created
7554 
7555 		vnodesWriteLocker.Unlock();
7556 
7557 		disconnect_mount_or_vnode_fds(mount, NULL);
7558 		disconnectedDescriptors = true;
7559 
7560 		vnodesWriteLocker.Lock();
7561 	}
7562 
7563 	// We can safely continue. Mark all of the vnodes busy and this mount
7564 	// structure in unmounting state. Also undo the vnode covers/covered_by
7565 	// links.
7566 	mount->unmounting = true;
7567 
7568 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7569 	while (struct vnode* vnode = iterator.Next()) {
7570 		// Remove all covers/covered_by links from other mounts' nodes to this
7571 		// vnode and adjust the node ref count accordingly. We will release the
7572 		// references to the external vnodes below.
7573 		if (Vnode* coveredNode = vnode->covers) {
7574 			if (Vnode* coveringNode = vnode->covered_by) {
7575 				// We have both covered and covering vnodes, so just remove us
7576 				// from the chain.
7577 				coveredNode->covered_by = coveringNode;
7578 				coveringNode->covers = coveredNode;
7579 				vnode->ref_count -= 2;
7580 
7581 				vnode->covered_by = NULL;
7582 				vnode->covers = NULL;
7583 				vnode->SetCovering(false);
7584 				vnode->SetCovered(false);
7585 			} else {
7586 				// We only have a covered vnode. Remove its link to us.
7587 				coveredNode->covered_by = NULL;
7588 				coveredNode->SetCovered(false);
7589 				vnode->ref_count--;
7590 
7591 				// If the other node is an external vnode, we keep its link
7592 				// link around so we can put the reference later on. Otherwise
7593 				// we get rid of it right now.
7594 				if (coveredNode->mount == mount) {
7595 					vnode->covers = NULL;
7596 					coveredNode->ref_count--;
7597 				}
7598 			}
7599 		} else if (Vnode* coveringNode = vnode->covered_by) {
7600 			// We only have a covering vnode. Remove its link to us.
7601 			coveringNode->covers = NULL;
7602 			coveringNode->SetCovering(false);
7603 			vnode->ref_count--;
7604 
7605 			// If the other node is an external vnode, we keep its link
7606 			// link around so we can put the reference later on. Otherwise
7607 			// we get rid of it right now.
7608 			if (coveringNode->mount == mount) {
7609 				vnode->covered_by = NULL;
7610 				coveringNode->ref_count--;
7611 			}
7612 		}
7613 
7614 		vnode->SetBusy(true);
7615 		vnode_to_be_freed(vnode);
7616 	}
7617 
7618 	vnodesWriteLocker.Unlock();
7619 
7620 	// Free all vnodes associated with this mount.
7621 	// They will be removed from the mount list by free_vnode(), so
7622 	// we don't have to do this.
7623 	while (struct vnode* vnode = mount->vnodes.Head()) {
7624 		// Put the references to external covered/covering vnodes we kept above.
7625 		if (Vnode* coveredNode = vnode->covers)
7626 			put_vnode(coveredNode);
7627 		if (Vnode* coveringNode = vnode->covered_by)
7628 			put_vnode(coveringNode);
7629 
7630 		free_vnode(vnode, false);
7631 	}
7632 
7633 	// remove the mount structure from the hash table
7634 	mutex_lock(&sMountMutex);
7635 	hash_remove(sMountsTable, mount);
7636 	mutex_unlock(&sMountMutex);
7637 
7638 	mountOpLocker.Unlock();
7639 
7640 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7641 	notify_unmount(mount->id);
7642 
7643 	// dereference the partition and mark it unmounted
7644 	if (partition) {
7645 		partition->SetVolumeID(-1);
7646 		partition->SetMountCookie(NULL);
7647 
7648 		if (mount->owns_file_device)
7649 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7650 		partition->Unregister();
7651 	}
7652 
7653 	delete mount;
7654 	return B_OK;
7655 }
7656 
7657 
7658 static status_t
7659 fs_sync(dev_t device)
7660 {
7661 	struct fs_mount* mount;
7662 	status_t status = get_mount(device, &mount);
7663 	if (status != B_OK)
7664 		return status;
7665 
7666 	struct vnode marker;
7667 	memset(&marker, 0, sizeof(marker));
7668 	marker.SetBusy(true);
7669 	marker.SetRemoved(true);
7670 
7671 	// First, synchronize all file caches
7672 
7673 	while (true) {
7674 		WriteLocker locker(sVnodeLock);
7675 			// Note: That's the easy way. Which is probably OK for sync(),
7676 			// since it's a relatively rare call and doesn't need to allow for
7677 			// a lot of concurrency. Using a read lock would be possible, but
7678 			// also more involved, since we had to lock the individual nodes
7679 			// and take care of the locking order, which we might not want to
7680 			// do while holding fs_mount::rlock.
7681 
7682 		// synchronize access to vnode list
7683 		recursive_lock_lock(&mount->rlock);
7684 
7685 		struct vnode* vnode;
7686 		if (!marker.IsRemoved()) {
7687 			vnode = mount->vnodes.GetNext(&marker);
7688 			mount->vnodes.Remove(&marker);
7689 			marker.SetRemoved(true);
7690 		} else
7691 			vnode = mount->vnodes.First();
7692 
7693 		while (vnode != NULL && (vnode->cache == NULL
7694 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7695 			// TODO: we could track writes (and writable mapped vnodes)
7696 			//	and have a simple flag that we could test for here
7697 			vnode = mount->vnodes.GetNext(vnode);
7698 		}
7699 
7700 		if (vnode != NULL) {
7701 			// insert marker vnode again
7702 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7703 			marker.SetRemoved(false);
7704 		}
7705 
7706 		recursive_lock_unlock(&mount->rlock);
7707 
7708 		if (vnode == NULL)
7709 			break;
7710 
7711 		vnode = lookup_vnode(mount->id, vnode->id);
7712 		if (vnode == NULL || vnode->IsBusy())
7713 			continue;
7714 
7715 		if (vnode->ref_count == 0) {
7716 			// this vnode has been unused before
7717 			vnode_used(vnode);
7718 		}
7719 		inc_vnode_ref_count(vnode);
7720 
7721 		locker.Unlock();
7722 
7723 		if (vnode->cache != NULL && !vnode->IsRemoved())
7724 			vnode->cache->WriteModified();
7725 
7726 		put_vnode(vnode);
7727 	}
7728 
7729 	// And then, let the file systems do their synchronizing work
7730 
7731 	if (HAS_FS_MOUNT_CALL(mount, sync))
7732 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7733 
7734 	put_mount(mount);
7735 	return status;
7736 }
7737 
7738 
7739 static status_t
7740 fs_read_info(dev_t device, struct fs_info* info)
7741 {
7742 	struct fs_mount* mount;
7743 	status_t status = get_mount(device, &mount);
7744 	if (status != B_OK)
7745 		return status;
7746 
7747 	memset(info, 0, sizeof(struct fs_info));
7748 
7749 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7750 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7751 
7752 	// fill in info the file system doesn't (have to) know about
7753 	if (status == B_OK) {
7754 		info->dev = mount->id;
7755 		info->root = mount->root_vnode->id;
7756 
7757 		fs_volume* volume = mount->volume;
7758 		while (volume->super_volume != NULL)
7759 			volume = volume->super_volume;
7760 
7761 		strlcpy(info->fsh_name, volume->file_system_name,
7762 			sizeof(info->fsh_name));
7763 		if (mount->device_name != NULL) {
7764 			strlcpy(info->device_name, mount->device_name,
7765 				sizeof(info->device_name));
7766 		}
7767 	}
7768 
7769 	// if the call is not supported by the file system, there are still
7770 	// the parts that we filled out ourselves
7771 
7772 	put_mount(mount);
7773 	return status;
7774 }
7775 
7776 
7777 static status_t
7778 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7779 {
7780 	struct fs_mount* mount;
7781 	status_t status = get_mount(device, &mount);
7782 	if (status != B_OK)
7783 		return status;
7784 
7785 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7786 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7787 	else
7788 		status = B_READ_ONLY_DEVICE;
7789 
7790 	put_mount(mount);
7791 	return status;
7792 }
7793 
7794 
7795 static dev_t
7796 fs_next_device(int32* _cookie)
7797 {
7798 	struct fs_mount* mount = NULL;
7799 	dev_t device = *_cookie;
7800 
7801 	mutex_lock(&sMountMutex);
7802 
7803 	// Since device IDs are assigned sequentially, this algorithm
7804 	// does work good enough. It makes sure that the device list
7805 	// returned is sorted, and that no device is skipped when an
7806 	// already visited device got unmounted.
7807 
7808 	while (device < sNextMountID) {
7809 		mount = find_mount(device++);
7810 		if (mount != NULL && mount->volume->private_volume != NULL)
7811 			break;
7812 	}
7813 
7814 	*_cookie = device;
7815 
7816 	if (mount != NULL)
7817 		device = mount->id;
7818 	else
7819 		device = B_BAD_VALUE;
7820 
7821 	mutex_unlock(&sMountMutex);
7822 
7823 	return device;
7824 }
7825 
7826 
7827 ssize_t
7828 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7829 	void *buffer, size_t readBytes)
7830 {
7831 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7832 	if (attrFD < 0)
7833 		return attrFD;
7834 
7835 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7836 
7837 	_kern_close(attrFD);
7838 
7839 	return bytesRead;
7840 }
7841 
7842 
7843 static status_t
7844 get_cwd(char* buffer, size_t size, bool kernel)
7845 {
7846 	// Get current working directory from io context
7847 	struct io_context* context = get_current_io_context(kernel);
7848 	status_t status;
7849 
7850 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7851 
7852 	mutex_lock(&context->io_mutex);
7853 
7854 	struct vnode* vnode = context->cwd;
7855 	if (vnode)
7856 		inc_vnode_ref_count(vnode);
7857 
7858 	mutex_unlock(&context->io_mutex);
7859 
7860 	if (vnode) {
7861 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7862 		put_vnode(vnode);
7863 	} else
7864 		status = B_ERROR;
7865 
7866 	return status;
7867 }
7868 
7869 
7870 static status_t
7871 set_cwd(int fd, char* path, bool kernel)
7872 {
7873 	struct io_context* context;
7874 	struct vnode* vnode = NULL;
7875 	struct vnode* oldDirectory;
7876 	status_t status;
7877 
7878 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7879 
7880 	// Get vnode for passed path, and bail if it failed
7881 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7882 	if (status < 0)
7883 		return status;
7884 
7885 	if (!S_ISDIR(vnode->Type())) {
7886 		// nope, can't cwd to here
7887 		status = B_NOT_A_DIRECTORY;
7888 		goto err;
7889 	}
7890 
7891 	// Get current io context and lock
7892 	context = get_current_io_context(kernel);
7893 	mutex_lock(&context->io_mutex);
7894 
7895 	// save the old current working directory first
7896 	oldDirectory = context->cwd;
7897 	context->cwd = vnode;
7898 
7899 	mutex_unlock(&context->io_mutex);
7900 
7901 	if (oldDirectory)
7902 		put_vnode(oldDirectory);
7903 
7904 	return B_NO_ERROR;
7905 
7906 err:
7907 	put_vnode(vnode);
7908 	return status;
7909 }
7910 
7911 
7912 //	#pragma mark - kernel mirrored syscalls
7913 
7914 
7915 dev_t
7916 _kern_mount(const char* path, const char* device, const char* fsName,
7917 	uint32 flags, const char* args, size_t argsLength)
7918 {
7919 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7920 	if (pathBuffer.InitCheck() != B_OK)
7921 		return B_NO_MEMORY;
7922 
7923 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7924 }
7925 
7926 
7927 status_t
7928 _kern_unmount(const char* path, uint32 flags)
7929 {
7930 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7931 	if (pathBuffer.InitCheck() != B_OK)
7932 		return B_NO_MEMORY;
7933 
7934 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7935 }
7936 
7937 
7938 status_t
7939 _kern_read_fs_info(dev_t device, struct fs_info* info)
7940 {
7941 	if (info == NULL)
7942 		return B_BAD_VALUE;
7943 
7944 	return fs_read_info(device, info);
7945 }
7946 
7947 
7948 status_t
7949 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7950 {
7951 	if (info == NULL)
7952 		return B_BAD_VALUE;
7953 
7954 	return fs_write_info(device, info, mask);
7955 }
7956 
7957 
7958 status_t
7959 _kern_sync(void)
7960 {
7961 	// Note: _kern_sync() is also called from _user_sync()
7962 	int32 cookie = 0;
7963 	dev_t device;
7964 	while ((device = next_dev(&cookie)) >= 0) {
7965 		status_t status = fs_sync(device);
7966 		if (status != B_OK && status != B_BAD_VALUE) {
7967 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7968 				strerror(status));
7969 		}
7970 	}
7971 
7972 	return B_OK;
7973 }
7974 
7975 
7976 dev_t
7977 _kern_next_device(int32* _cookie)
7978 {
7979 	return fs_next_device(_cookie);
7980 }
7981 
7982 
7983 status_t
7984 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7985 	size_t infoSize)
7986 {
7987 	if (infoSize != sizeof(fd_info))
7988 		return B_BAD_VALUE;
7989 
7990 	// get the team
7991 	Team* team = Team::Get(teamID);
7992 	if (team == NULL)
7993 		return B_BAD_TEAM_ID;
7994 	BReference<Team> teamReference(team, true);
7995 
7996 	// now that we have a team reference, its I/O context won't go away
7997 	io_context* context = team->io_context;
7998 	MutexLocker contextLocker(context->io_mutex);
7999 
8000 	uint32 slot = *_cookie;
8001 
8002 	struct file_descriptor* descriptor;
8003 	while (slot < context->table_size
8004 		&& (descriptor = context->fds[slot]) == NULL) {
8005 		slot++;
8006 	}
8007 
8008 	if (slot >= context->table_size)
8009 		return B_ENTRY_NOT_FOUND;
8010 
8011 	info->number = slot;
8012 	info->open_mode = descriptor->open_mode;
8013 
8014 	struct vnode* vnode = fd_vnode(descriptor);
8015 	if (vnode != NULL) {
8016 		info->device = vnode->device;
8017 		info->node = vnode->id;
8018 	} else if (descriptor->u.mount != NULL) {
8019 		info->device = descriptor->u.mount->id;
8020 		info->node = -1;
8021 	}
8022 
8023 	*_cookie = slot + 1;
8024 	return B_OK;
8025 }
8026 
8027 
8028 int
8029 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8030 	int perms)
8031 {
8032 	if ((openMode & O_CREAT) != 0) {
8033 		return file_create_entry_ref(device, inode, name, openMode, perms,
8034 			true);
8035 	}
8036 
8037 	return file_open_entry_ref(device, inode, name, openMode, true);
8038 }
8039 
8040 
8041 /*!	\brief Opens a node specified by a FD + path pair.
8042 
8043 	At least one of \a fd and \a path must be specified.
8044 	If only \a fd is given, the function opens the node identified by this
8045 	FD. If only a path is given, this path is opened. If both are given and
8046 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8047 	of the directory (!) identified by \a fd.
8048 
8049 	\param fd The FD. May be < 0.
8050 	\param path The absolute or relative path. May be \c NULL.
8051 	\param openMode The open mode.
8052 	\return A FD referring to the newly opened node, or an error code,
8053 			if an error occurs.
8054 */
8055 int
8056 _kern_open(int fd, const char* path, int openMode, int perms)
8057 {
8058 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8059 	if (pathBuffer.InitCheck() != B_OK)
8060 		return B_NO_MEMORY;
8061 
8062 	if (openMode & O_CREAT)
8063 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8064 
8065 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8066 }
8067 
8068 
8069 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8070 
8071 	The supplied name may be \c NULL, in which case directory identified
8072 	by \a device and \a inode will be opened. Otherwise \a device and
8073 	\a inode identify the parent directory of the directory to be opened
8074 	and \a name its entry name.
8075 
8076 	\param device If \a name is specified the ID of the device the parent
8077 		   directory of the directory to be opened resides on, otherwise
8078 		   the device of the directory itself.
8079 	\param inode If \a name is specified the node ID of the parent
8080 		   directory of the directory to be opened, otherwise node ID of the
8081 		   directory itself.
8082 	\param name The entry name of the directory to be opened. If \c NULL,
8083 		   the \a device + \a inode pair identify the node to be opened.
8084 	\return The FD of the newly opened directory or an error code, if
8085 			something went wrong.
8086 */
8087 int
8088 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8089 {
8090 	return dir_open_entry_ref(device, inode, name, true);
8091 }
8092 
8093 
8094 /*!	\brief Opens a directory specified by a FD + path pair.
8095 
8096 	At least one of \a fd and \a path must be specified.
8097 	If only \a fd is given, the function opens the directory identified by this
8098 	FD. If only a path is given, this path is opened. If both are given and
8099 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8100 	of the directory (!) identified by \a fd.
8101 
8102 	\param fd The FD. May be < 0.
8103 	\param path The absolute or relative path. May be \c NULL.
8104 	\return A FD referring to the newly opened directory, or an error code,
8105 			if an error occurs.
8106 */
8107 int
8108 _kern_open_dir(int fd, const char* path)
8109 {
8110 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8111 	if (pathBuffer.InitCheck() != B_OK)
8112 		return B_NO_MEMORY;
8113 
8114 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8115 }
8116 
8117 
8118 status_t
8119 _kern_fcntl(int fd, int op, uint32 argument)
8120 {
8121 	return common_fcntl(fd, op, argument, true);
8122 }
8123 
8124 
8125 status_t
8126 _kern_fsync(int fd)
8127 {
8128 	return common_sync(fd, true);
8129 }
8130 
8131 
8132 status_t
8133 _kern_lock_node(int fd)
8134 {
8135 	return common_lock_node(fd, true);
8136 }
8137 
8138 
8139 status_t
8140 _kern_unlock_node(int fd)
8141 {
8142 	return common_unlock_node(fd, true);
8143 }
8144 
8145 
8146 status_t
8147 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8148 	int perms)
8149 {
8150 	return dir_create_entry_ref(device, inode, name, perms, true);
8151 }
8152 
8153 
8154 /*!	\brief Creates a directory specified by a FD + path pair.
8155 
8156 	\a path must always be specified (it contains the name of the new directory
8157 	at least). If only a path is given, this path identifies the location at
8158 	which the directory shall be created. If both \a fd and \a path are given
8159 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8160 	of the directory (!) identified by \a fd.
8161 
8162 	\param fd The FD. May be < 0.
8163 	\param path The absolute or relative path. Must not be \c NULL.
8164 	\param perms The access permissions the new directory shall have.
8165 	\return \c B_OK, if the directory has been created successfully, another
8166 			error code otherwise.
8167 */
8168 status_t
8169 _kern_create_dir(int fd, const char* path, int perms)
8170 {
8171 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8172 	if (pathBuffer.InitCheck() != B_OK)
8173 		return B_NO_MEMORY;
8174 
8175 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8176 }
8177 
8178 
8179 status_t
8180 _kern_remove_dir(int fd, const char* path)
8181 {
8182 	if (path) {
8183 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8184 		if (pathBuffer.InitCheck() != B_OK)
8185 			return B_NO_MEMORY;
8186 
8187 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8188 	}
8189 
8190 	return dir_remove(fd, NULL, true);
8191 }
8192 
8193 
8194 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8195 
8196 	At least one of \a fd and \a path must be specified.
8197 	If only \a fd is given, the function the symlink to be read is the node
8198 	identified by this FD. If only a path is given, this path identifies the
8199 	symlink to be read. If both are given and the path is absolute, \a fd is
8200 	ignored; a relative path is reckoned off of the directory (!) identified
8201 	by \a fd.
8202 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8203 	will still be updated to reflect the required buffer size.
8204 
8205 	\param fd The FD. May be < 0.
8206 	\param path The absolute or relative path. May be \c NULL.
8207 	\param buffer The buffer into which the contents of the symlink shall be
8208 		   written.
8209 	\param _bufferSize A pointer to the size of the supplied buffer.
8210 	\return The length of the link on success or an appropriate error code
8211 */
8212 status_t
8213 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8214 {
8215 	if (path) {
8216 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8217 		if (pathBuffer.InitCheck() != B_OK)
8218 			return B_NO_MEMORY;
8219 
8220 		return common_read_link(fd, pathBuffer.LockBuffer(),
8221 			buffer, _bufferSize, true);
8222 	}
8223 
8224 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8225 }
8226 
8227 
8228 /*!	\brief Creates a symlink specified by a FD + path pair.
8229 
8230 	\a path must always be specified (it contains the name of the new symlink
8231 	at least). If only a path is given, this path identifies the location at
8232 	which the symlink shall be created. If both \a fd and \a path are given and
8233 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8234 	of the directory (!) identified by \a fd.
8235 
8236 	\param fd The FD. May be < 0.
8237 	\param toPath The absolute or relative path. Must not be \c NULL.
8238 	\param mode The access permissions the new symlink shall have.
8239 	\return \c B_OK, if the symlink has been created successfully, another
8240 			error code otherwise.
8241 */
8242 status_t
8243 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8244 {
8245 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8246 	if (pathBuffer.InitCheck() != B_OK)
8247 		return B_NO_MEMORY;
8248 
8249 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8250 		toPath, mode, true);
8251 }
8252 
8253 
8254 status_t
8255 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8256 	bool traverseLeafLink)
8257 {
8258 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8259 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8260 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8261 		return B_NO_MEMORY;
8262 
8263 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8264 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8265 }
8266 
8267 
8268 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8269 
8270 	\a path must always be specified (it contains at least the name of the entry
8271 	to be deleted). If only a path is given, this path identifies the entry
8272 	directly. If both \a fd and \a path are given and the path is absolute,
8273 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8274 	identified by \a fd.
8275 
8276 	\param fd The FD. May be < 0.
8277 	\param path The absolute or relative path. Must not be \c NULL.
8278 	\return \c B_OK, if the entry has been removed successfully, another
8279 			error code otherwise.
8280 */
8281 status_t
8282 _kern_unlink(int fd, const char* path)
8283 {
8284 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8285 	if (pathBuffer.InitCheck() != B_OK)
8286 		return B_NO_MEMORY;
8287 
8288 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8289 }
8290 
8291 
8292 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8293 		   by another FD + path pair.
8294 
8295 	\a oldPath and \a newPath must always be specified (they contain at least
8296 	the name of the entry). If only a path is given, this path identifies the
8297 	entry directly. If both a FD and a path are given and the path is absolute,
8298 	the FD is ignored; a relative path is reckoned off of the directory (!)
8299 	identified by the respective FD.
8300 
8301 	\param oldFD The FD of the old location. May be < 0.
8302 	\param oldPath The absolute or relative path of the old location. Must not
8303 		   be \c NULL.
8304 	\param newFD The FD of the new location. May be < 0.
8305 	\param newPath The absolute or relative path of the new location. Must not
8306 		   be \c NULL.
8307 	\return \c B_OK, if the entry has been moved successfully, another
8308 			error code otherwise.
8309 */
8310 status_t
8311 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8312 {
8313 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8314 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8315 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8316 		return B_NO_MEMORY;
8317 
8318 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8319 		newFD, newPathBuffer.LockBuffer(), true);
8320 }
8321 
8322 
8323 status_t
8324 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8325 {
8326 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8327 	if (pathBuffer.InitCheck() != B_OK)
8328 		return B_NO_MEMORY;
8329 
8330 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8331 		true);
8332 }
8333 
8334 
8335 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8336 
8337 	If only \a fd is given, the stat operation associated with the type
8338 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8339 	given, this path identifies the entry for whose node to retrieve the
8340 	stat data. If both \a fd and \a path are given and the path is absolute,
8341 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8342 	identified by \a fd and specifies the entry whose stat data shall be
8343 	retrieved.
8344 
8345 	\param fd The FD. May be < 0.
8346 	\param path The absolute or relative path. Must not be \c NULL.
8347 	\param traverseLeafLink If \a path is given, \c true specifies that the
8348 		   function shall not stick to symlinks, but traverse them.
8349 	\param stat The buffer the stat data shall be written into.
8350 	\param statSize The size of the supplied stat buffer.
8351 	\return \c B_OK, if the the stat data have been read successfully, another
8352 			error code otherwise.
8353 */
8354 status_t
8355 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8356 	struct stat* stat, size_t statSize)
8357 {
8358 	struct stat completeStat;
8359 	struct stat* originalStat = NULL;
8360 	status_t status;
8361 
8362 	if (statSize > sizeof(struct stat))
8363 		return B_BAD_VALUE;
8364 
8365 	// this supports different stat extensions
8366 	if (statSize < sizeof(struct stat)) {
8367 		originalStat = stat;
8368 		stat = &completeStat;
8369 	}
8370 
8371 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8372 
8373 	if (status == B_OK && originalStat != NULL)
8374 		memcpy(originalStat, stat, statSize);
8375 
8376 	return status;
8377 }
8378 
8379 
8380 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8381 
8382 	If only \a fd is given, the stat operation associated with the type
8383 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8384 	given, this path identifies the entry for whose node to write the
8385 	stat data. If both \a fd and \a path are given and the path is absolute,
8386 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8387 	identified by \a fd and specifies the entry whose stat data shall be
8388 	written.
8389 
8390 	\param fd The FD. May be < 0.
8391 	\param path The absolute or relative path. Must not be \c NULL.
8392 	\param traverseLeafLink If \a path is given, \c true specifies that the
8393 		   function shall not stick to symlinks, but traverse them.
8394 	\param stat The buffer containing the stat data to be written.
8395 	\param statSize The size of the supplied stat buffer.
8396 	\param statMask A mask specifying which parts of the stat data shall be
8397 		   written.
8398 	\return \c B_OK, if the the stat data have been written successfully,
8399 			another error code otherwise.
8400 */
8401 status_t
8402 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8403 	const struct stat* stat, size_t statSize, int statMask)
8404 {
8405 	struct stat completeStat;
8406 
8407 	if (statSize > sizeof(struct stat))
8408 		return B_BAD_VALUE;
8409 
8410 	// this supports different stat extensions
8411 	if (statSize < sizeof(struct stat)) {
8412 		memset((uint8*)&completeStat + statSize, 0,
8413 			sizeof(struct stat) - statSize);
8414 		memcpy(&completeStat, stat, statSize);
8415 		stat = &completeStat;
8416 	}
8417 
8418 	status_t status;
8419 
8420 	if (path) {
8421 		// path given: write the stat of the node referred to by (fd, path)
8422 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8423 		if (pathBuffer.InitCheck() != B_OK)
8424 			return B_NO_MEMORY;
8425 
8426 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8427 			traverseLeafLink, stat, statMask, true);
8428 	} else {
8429 		// no path given: get the FD and use the FD operation
8430 		struct file_descriptor* descriptor
8431 			= get_fd(get_current_io_context(true), fd);
8432 		if (descriptor == NULL)
8433 			return B_FILE_ERROR;
8434 
8435 		if (descriptor->ops->fd_write_stat)
8436 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8437 		else
8438 			status = B_UNSUPPORTED;
8439 
8440 		put_fd(descriptor);
8441 	}
8442 
8443 	return status;
8444 }
8445 
8446 
8447 int
8448 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8449 {
8450 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8451 	if (pathBuffer.InitCheck() != B_OK)
8452 		return B_NO_MEMORY;
8453 
8454 	if (path != NULL)
8455 		pathBuffer.SetTo(path);
8456 
8457 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8458 		traverseLeafLink, true);
8459 }
8460 
8461 
8462 int
8463 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8464 	int openMode)
8465 {
8466 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8467 	if (pathBuffer.InitCheck() != B_OK)
8468 		return B_NO_MEMORY;
8469 
8470 	if ((openMode & O_CREAT) != 0) {
8471 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8472 			true);
8473 	}
8474 
8475 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8476 }
8477 
8478 
8479 status_t
8480 _kern_remove_attr(int fd, const char* name)
8481 {
8482 	return attr_remove(fd, name, true);
8483 }
8484 
8485 
8486 status_t
8487 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8488 	const char* toName)
8489 {
8490 	return attr_rename(fromFile, fromName, toFile, toName, true);
8491 }
8492 
8493 
8494 int
8495 _kern_open_index_dir(dev_t device)
8496 {
8497 	return index_dir_open(device, true);
8498 }
8499 
8500 
8501 status_t
8502 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8503 {
8504 	return index_create(device, name, type, flags, true);
8505 }
8506 
8507 
8508 status_t
8509 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8510 {
8511 	return index_name_read_stat(device, name, stat, true);
8512 }
8513 
8514 
8515 status_t
8516 _kern_remove_index(dev_t device, const char* name)
8517 {
8518 	return index_remove(device, name, true);
8519 }
8520 
8521 
8522 status_t
8523 _kern_getcwd(char* buffer, size_t size)
8524 {
8525 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8526 
8527 	// Call vfs to get current working directory
8528 	return get_cwd(buffer, size, true);
8529 }
8530 
8531 
8532 status_t
8533 _kern_setcwd(int fd, const char* path)
8534 {
8535 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8536 	if (pathBuffer.InitCheck() != B_OK)
8537 		return B_NO_MEMORY;
8538 
8539 	if (path != NULL)
8540 		pathBuffer.SetTo(path);
8541 
8542 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8543 }
8544 
8545 
8546 //	#pragma mark - userland syscalls
8547 
8548 
8549 dev_t
8550 _user_mount(const char* userPath, const char* userDevice,
8551 	const char* userFileSystem, uint32 flags, const char* userArgs,
8552 	size_t argsLength)
8553 {
8554 	char fileSystem[B_FILE_NAME_LENGTH];
8555 	KPath path, device;
8556 	char* args = NULL;
8557 	status_t status;
8558 
8559 	if (!IS_USER_ADDRESS(userPath)
8560 		|| !IS_USER_ADDRESS(userFileSystem)
8561 		|| !IS_USER_ADDRESS(userDevice))
8562 		return B_BAD_ADDRESS;
8563 
8564 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8565 		return B_NO_MEMORY;
8566 
8567 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8568 		return B_BAD_ADDRESS;
8569 
8570 	if (userFileSystem != NULL
8571 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8572 		return B_BAD_ADDRESS;
8573 
8574 	if (userDevice != NULL
8575 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8576 			< B_OK)
8577 		return B_BAD_ADDRESS;
8578 
8579 	if (userArgs != NULL && argsLength > 0) {
8580 		// this is a safety restriction
8581 		if (argsLength >= 65536)
8582 			return B_NAME_TOO_LONG;
8583 
8584 		args = (char*)malloc(argsLength + 1);
8585 		if (args == NULL)
8586 			return B_NO_MEMORY;
8587 
8588 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8589 			free(args);
8590 			return B_BAD_ADDRESS;
8591 		}
8592 	}
8593 	path.UnlockBuffer();
8594 	device.UnlockBuffer();
8595 
8596 	status = fs_mount(path.LockBuffer(),
8597 		userDevice != NULL ? device.Path() : NULL,
8598 		userFileSystem ? fileSystem : NULL, flags, args, false);
8599 
8600 	free(args);
8601 	return status;
8602 }
8603 
8604 
8605 status_t
8606 _user_unmount(const char* userPath, uint32 flags)
8607 {
8608 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8609 	if (pathBuffer.InitCheck() != B_OK)
8610 		return B_NO_MEMORY;
8611 
8612 	char* path = pathBuffer.LockBuffer();
8613 
8614 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8615 		return B_BAD_ADDRESS;
8616 
8617 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8618 }
8619 
8620 
8621 status_t
8622 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8623 {
8624 	struct fs_info info;
8625 	status_t status;
8626 
8627 	if (userInfo == NULL)
8628 		return B_BAD_VALUE;
8629 
8630 	if (!IS_USER_ADDRESS(userInfo))
8631 		return B_BAD_ADDRESS;
8632 
8633 	status = fs_read_info(device, &info);
8634 	if (status != B_OK)
8635 		return status;
8636 
8637 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8638 		return B_BAD_ADDRESS;
8639 
8640 	return B_OK;
8641 }
8642 
8643 
8644 status_t
8645 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8646 {
8647 	struct fs_info info;
8648 
8649 	if (userInfo == NULL)
8650 		return B_BAD_VALUE;
8651 
8652 	if (!IS_USER_ADDRESS(userInfo)
8653 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8654 		return B_BAD_ADDRESS;
8655 
8656 	return fs_write_info(device, &info, mask);
8657 }
8658 
8659 
8660 dev_t
8661 _user_next_device(int32* _userCookie)
8662 {
8663 	int32 cookie;
8664 	dev_t device;
8665 
8666 	if (!IS_USER_ADDRESS(_userCookie)
8667 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8668 		return B_BAD_ADDRESS;
8669 
8670 	device = fs_next_device(&cookie);
8671 
8672 	if (device >= B_OK) {
8673 		// update user cookie
8674 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8675 			return B_BAD_ADDRESS;
8676 	}
8677 
8678 	return device;
8679 }
8680 
8681 
8682 status_t
8683 _user_sync(void)
8684 {
8685 	return _kern_sync();
8686 }
8687 
8688 
8689 status_t
8690 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8691 	size_t infoSize)
8692 {
8693 	struct fd_info info;
8694 	uint32 cookie;
8695 
8696 	// only root can do this (or should root's group be enough?)
8697 	if (geteuid() != 0)
8698 		return B_NOT_ALLOWED;
8699 
8700 	if (infoSize != sizeof(fd_info))
8701 		return B_BAD_VALUE;
8702 
8703 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8704 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8705 		return B_BAD_ADDRESS;
8706 
8707 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8708 	if (status != B_OK)
8709 		return status;
8710 
8711 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8712 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8713 		return B_BAD_ADDRESS;
8714 
8715 	return status;
8716 }
8717 
8718 
8719 status_t
8720 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8721 	char* userPath, size_t pathLength)
8722 {
8723 	if (!IS_USER_ADDRESS(userPath))
8724 		return B_BAD_ADDRESS;
8725 
8726 	KPath path(B_PATH_NAME_LENGTH + 1);
8727 	if (path.InitCheck() != B_OK)
8728 		return B_NO_MEMORY;
8729 
8730 	// copy the leaf name onto the stack
8731 	char stackLeaf[B_FILE_NAME_LENGTH];
8732 	if (leaf) {
8733 		if (!IS_USER_ADDRESS(leaf))
8734 			return B_BAD_ADDRESS;
8735 
8736 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8737 		if (length < 0)
8738 			return length;
8739 		if (length >= B_FILE_NAME_LENGTH)
8740 			return B_NAME_TOO_LONG;
8741 
8742 		leaf = stackLeaf;
8743 	}
8744 
8745 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8746 		path.LockBuffer(), path.BufferSize());
8747 	if (status != B_OK)
8748 		return status;
8749 
8750 	path.UnlockBuffer();
8751 
8752 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8753 	if (length < 0)
8754 		return length;
8755 	if (length >= (int)pathLength)
8756 		return B_BUFFER_OVERFLOW;
8757 
8758 	return B_OK;
8759 }
8760 
8761 
8762 status_t
8763 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8764 {
8765 	if (userPath == NULL || buffer == NULL)
8766 		return B_BAD_VALUE;
8767 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8768 		return B_BAD_ADDRESS;
8769 
8770 	// copy path from userland
8771 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8772 	if (pathBuffer.InitCheck() != B_OK)
8773 		return B_NO_MEMORY;
8774 	char* path = pathBuffer.LockBuffer();
8775 
8776 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8777 		return B_BAD_ADDRESS;
8778 
8779 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8780 		false);
8781 	if (error != B_OK)
8782 		return error;
8783 
8784 	// copy back to userland
8785 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8786 	if (len < 0)
8787 		return len;
8788 	if (len >= B_PATH_NAME_LENGTH)
8789 		return B_BUFFER_OVERFLOW;
8790 
8791 	return B_OK;
8792 }
8793 
8794 
8795 int
8796 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8797 	int openMode, int perms)
8798 {
8799 	char name[B_FILE_NAME_LENGTH];
8800 
8801 	if (userName == NULL || device < 0 || inode < 0)
8802 		return B_BAD_VALUE;
8803 	if (!IS_USER_ADDRESS(userName)
8804 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8805 		return B_BAD_ADDRESS;
8806 
8807 	if ((openMode & O_CREAT) != 0) {
8808 		return file_create_entry_ref(device, inode, name, openMode, perms,
8809 		 false);
8810 	}
8811 
8812 	return file_open_entry_ref(device, inode, name, openMode, false);
8813 }
8814 
8815 
8816 int
8817 _user_open(int fd, const char* userPath, int openMode, int perms)
8818 {
8819 	KPath path(B_PATH_NAME_LENGTH + 1);
8820 	if (path.InitCheck() != B_OK)
8821 		return B_NO_MEMORY;
8822 
8823 	char* buffer = path.LockBuffer();
8824 
8825 	if (!IS_USER_ADDRESS(userPath)
8826 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8827 		return B_BAD_ADDRESS;
8828 
8829 	if ((openMode & O_CREAT) != 0)
8830 		return file_create(fd, buffer, openMode, perms, false);
8831 
8832 	return file_open(fd, buffer, openMode, false);
8833 }
8834 
8835 
8836 int
8837 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8838 {
8839 	if (userName != NULL) {
8840 		char name[B_FILE_NAME_LENGTH];
8841 
8842 		if (!IS_USER_ADDRESS(userName)
8843 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8844 			return B_BAD_ADDRESS;
8845 
8846 		return dir_open_entry_ref(device, inode, name, false);
8847 	}
8848 	return dir_open_entry_ref(device, inode, NULL, false);
8849 }
8850 
8851 
8852 int
8853 _user_open_dir(int fd, const char* userPath)
8854 {
8855 	if (userPath == NULL)
8856 		return dir_open(fd, NULL, false);
8857 
8858 	KPath path(B_PATH_NAME_LENGTH + 1);
8859 	if (path.InitCheck() != B_OK)
8860 		return B_NO_MEMORY;
8861 
8862 	char* buffer = path.LockBuffer();
8863 
8864 	if (!IS_USER_ADDRESS(userPath)
8865 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8866 		return B_BAD_ADDRESS;
8867 
8868 	return dir_open(fd, buffer, false);
8869 }
8870 
8871 
8872 /*!	\brief Opens a directory's parent directory and returns the entry name
8873 		   of the former.
8874 
8875 	Aside from that it returns the directory's entry name, this method is
8876 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8877 	equivalent, if \a userName is \c NULL.
8878 
8879 	If a name buffer is supplied and the name does not fit the buffer, the
8880 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8881 
8882 	\param fd A FD referring to a directory.
8883 	\param userName Buffer the directory's entry name shall be written into.
8884 		   May be \c NULL.
8885 	\param nameLength Size of the name buffer.
8886 	\return The file descriptor of the opened parent directory, if everything
8887 			went fine, an error code otherwise.
8888 */
8889 int
8890 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8891 {
8892 	bool kernel = false;
8893 
8894 	if (userName && !IS_USER_ADDRESS(userName))
8895 		return B_BAD_ADDRESS;
8896 
8897 	// open the parent dir
8898 	int parentFD = dir_open(fd, (char*)"..", kernel);
8899 	if (parentFD < 0)
8900 		return parentFD;
8901 	FDCloser fdCloser(parentFD, kernel);
8902 
8903 	if (userName) {
8904 		// get the vnodes
8905 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8906 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8907 		VNodePutter parentVNodePutter(parentVNode);
8908 		VNodePutter dirVNodePutter(dirVNode);
8909 		if (!parentVNode || !dirVNode)
8910 			return B_FILE_ERROR;
8911 
8912 		// get the vnode name
8913 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8914 		struct dirent* buffer = (struct dirent*)_buffer;
8915 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8916 			sizeof(_buffer), get_current_io_context(false));
8917 		if (status != B_OK)
8918 			return status;
8919 
8920 		// copy the name to the userland buffer
8921 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8922 		if (len < 0)
8923 			return len;
8924 		if (len >= (int)nameLength)
8925 			return B_BUFFER_OVERFLOW;
8926 	}
8927 
8928 	return fdCloser.Detach();
8929 }
8930 
8931 
8932 status_t
8933 _user_fcntl(int fd, int op, uint32 argument)
8934 {
8935 	status_t status = common_fcntl(fd, op, argument, false);
8936 	if (op == F_SETLKW)
8937 		syscall_restart_handle_post(status);
8938 
8939 	return status;
8940 }
8941 
8942 
8943 status_t
8944 _user_fsync(int fd)
8945 {
8946 	return common_sync(fd, false);
8947 }
8948 
8949 
8950 status_t
8951 _user_flock(int fd, int operation)
8952 {
8953 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8954 
8955 	// Check if the operation is valid
8956 	switch (operation & ~LOCK_NB) {
8957 		case LOCK_UN:
8958 		case LOCK_SH:
8959 		case LOCK_EX:
8960 			break;
8961 
8962 		default:
8963 			return B_BAD_VALUE;
8964 	}
8965 
8966 	struct file_descriptor* descriptor;
8967 	struct vnode* vnode;
8968 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8969 	if (descriptor == NULL)
8970 		return B_FILE_ERROR;
8971 
8972 	if (descriptor->type != FDTYPE_FILE) {
8973 		put_fd(descriptor);
8974 		return B_BAD_VALUE;
8975 	}
8976 
8977 	struct flock flock;
8978 	flock.l_start = 0;
8979 	flock.l_len = OFF_MAX;
8980 	flock.l_whence = 0;
8981 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8982 
8983 	status_t status;
8984 	if ((operation & LOCK_UN) != 0)
8985 		status = release_advisory_lock(vnode, &flock);
8986 	else {
8987 		status = acquire_advisory_lock(vnode,
8988 			thread_get_current_thread()->team->session_id, &flock,
8989 			(operation & LOCK_NB) == 0);
8990 	}
8991 
8992 	syscall_restart_handle_post(status);
8993 
8994 	put_fd(descriptor);
8995 	return status;
8996 }
8997 
8998 
8999 status_t
9000 _user_lock_node(int fd)
9001 {
9002 	return common_lock_node(fd, false);
9003 }
9004 
9005 
9006 status_t
9007 _user_unlock_node(int fd)
9008 {
9009 	return common_unlock_node(fd, false);
9010 }
9011 
9012 
9013 status_t
9014 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9015 	int perms)
9016 {
9017 	char name[B_FILE_NAME_LENGTH];
9018 	status_t status;
9019 
9020 	if (!IS_USER_ADDRESS(userName))
9021 		return B_BAD_ADDRESS;
9022 
9023 	status = user_strlcpy(name, userName, sizeof(name));
9024 	if (status < 0)
9025 		return status;
9026 
9027 	return dir_create_entry_ref(device, inode, name, perms, false);
9028 }
9029 
9030 
9031 status_t
9032 _user_create_dir(int fd, const char* userPath, int perms)
9033 {
9034 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9035 	if (pathBuffer.InitCheck() != B_OK)
9036 		return B_NO_MEMORY;
9037 
9038 	char* path = pathBuffer.LockBuffer();
9039 
9040 	if (!IS_USER_ADDRESS(userPath)
9041 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9042 		return B_BAD_ADDRESS;
9043 
9044 	return dir_create(fd, path, perms, false);
9045 }
9046 
9047 
9048 status_t
9049 _user_remove_dir(int fd, const char* userPath)
9050 {
9051 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9052 	if (pathBuffer.InitCheck() != B_OK)
9053 		return B_NO_MEMORY;
9054 
9055 	char* path = pathBuffer.LockBuffer();
9056 
9057 	if (userPath != NULL) {
9058 		if (!IS_USER_ADDRESS(userPath)
9059 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9060 			return B_BAD_ADDRESS;
9061 	}
9062 
9063 	return dir_remove(fd, userPath ? path : NULL, false);
9064 }
9065 
9066 
9067 status_t
9068 _user_read_link(int fd, const char* userPath, char* userBuffer,
9069 	size_t* userBufferSize)
9070 {
9071 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9072 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9073 		return B_NO_MEMORY;
9074 
9075 	size_t bufferSize;
9076 
9077 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9078 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9079 		return B_BAD_ADDRESS;
9080 
9081 	char* path = pathBuffer.LockBuffer();
9082 	char* buffer = linkBuffer.LockBuffer();
9083 
9084 	if (userPath) {
9085 		if (!IS_USER_ADDRESS(userPath)
9086 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9087 			return B_BAD_ADDRESS;
9088 
9089 		if (bufferSize > B_PATH_NAME_LENGTH)
9090 			bufferSize = B_PATH_NAME_LENGTH;
9091 	}
9092 
9093 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9094 		&bufferSize, false);
9095 
9096 	// we also update the bufferSize in case of errors
9097 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9098 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9099 		return B_BAD_ADDRESS;
9100 
9101 	if (status != B_OK)
9102 		return status;
9103 
9104 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9105 		return B_BAD_ADDRESS;
9106 
9107 	return B_OK;
9108 }
9109 
9110 
9111 status_t
9112 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9113 	int mode)
9114 {
9115 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9116 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9117 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9118 		return B_NO_MEMORY;
9119 
9120 	char* path = pathBuffer.LockBuffer();
9121 	char* toPath = toPathBuffer.LockBuffer();
9122 
9123 	if (!IS_USER_ADDRESS(userPath)
9124 		|| !IS_USER_ADDRESS(userToPath)
9125 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9126 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9127 		return B_BAD_ADDRESS;
9128 
9129 	return common_create_symlink(fd, path, toPath, mode, false);
9130 }
9131 
9132 
9133 status_t
9134 _user_create_link(int pathFD, const char* userPath, int toFD,
9135 	const char* userToPath, bool traverseLeafLink)
9136 {
9137 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9138 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9139 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9140 		return B_NO_MEMORY;
9141 
9142 	char* path = pathBuffer.LockBuffer();
9143 	char* toPath = toPathBuffer.LockBuffer();
9144 
9145 	if (!IS_USER_ADDRESS(userPath)
9146 		|| !IS_USER_ADDRESS(userToPath)
9147 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9148 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9149 		return B_BAD_ADDRESS;
9150 
9151 	status_t status = check_path(toPath);
9152 	if (status != B_OK)
9153 		return status;
9154 
9155 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9156 		false);
9157 }
9158 
9159 
9160 status_t
9161 _user_unlink(int fd, const char* userPath)
9162 {
9163 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9164 	if (pathBuffer.InitCheck() != B_OK)
9165 		return B_NO_MEMORY;
9166 
9167 	char* path = pathBuffer.LockBuffer();
9168 
9169 	if (!IS_USER_ADDRESS(userPath)
9170 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9171 		return B_BAD_ADDRESS;
9172 
9173 	return common_unlink(fd, path, false);
9174 }
9175 
9176 
9177 status_t
9178 _user_rename(int oldFD, const char* userOldPath, int newFD,
9179 	const char* userNewPath)
9180 {
9181 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9182 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9183 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9184 		return B_NO_MEMORY;
9185 
9186 	char* oldPath = oldPathBuffer.LockBuffer();
9187 	char* newPath = newPathBuffer.LockBuffer();
9188 
9189 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9190 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9191 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9192 		return B_BAD_ADDRESS;
9193 
9194 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9195 }
9196 
9197 
9198 status_t
9199 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9200 {
9201 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9202 	if (pathBuffer.InitCheck() != B_OK)
9203 		return B_NO_MEMORY;
9204 
9205 	char* path = pathBuffer.LockBuffer();
9206 
9207 	if (!IS_USER_ADDRESS(userPath)
9208 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9209 		return B_BAD_ADDRESS;
9210 	}
9211 
9212 	// split into directory vnode and filename path
9213 	char filename[B_FILE_NAME_LENGTH];
9214 	struct vnode* dir;
9215 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9216 	if (status != B_OK)
9217 		return status;
9218 
9219 	VNodePutter _(dir);
9220 
9221 	// the underlying FS needs to support creating FIFOs
9222 	if (!HAS_FS_CALL(dir, create_special_node))
9223 		return B_UNSUPPORTED;
9224 
9225 	// create the entry	-- the FIFO sub node is set up automatically
9226 	fs_vnode superVnode;
9227 	ino_t nodeID;
9228 	status = FS_CALL(dir, create_special_node, filename, NULL,
9229 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9230 
9231 	// create_special_node() acquired a reference for us that we don't need.
9232 	if (status == B_OK)
9233 		put_vnode(dir->mount->volume, nodeID);
9234 
9235 	return status;
9236 }
9237 
9238 
9239 status_t
9240 _user_create_pipe(int* userFDs)
9241 {
9242 	// rootfs should support creating FIFOs, but let's be sure
9243 	if (!HAS_FS_CALL(sRoot, create_special_node))
9244 		return B_UNSUPPORTED;
9245 
9246 	// create the node	-- the FIFO sub node is set up automatically
9247 	fs_vnode superVnode;
9248 	ino_t nodeID;
9249 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9250 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9251 	if (status != B_OK)
9252 		return status;
9253 
9254 	// We've got one reference to the node and need another one.
9255 	struct vnode* vnode;
9256 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9257 	if (status != B_OK) {
9258 		// that should not happen
9259 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9260 			sRoot->mount->id, sRoot->id);
9261 		return status;
9262 	}
9263 
9264 	// Everything looks good so far. Open two FDs for reading respectively
9265 	// writing.
9266 	int fds[2];
9267 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9268 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9269 
9270 	FDCloser closer0(fds[0], false);
9271 	FDCloser closer1(fds[1], false);
9272 
9273 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9274 
9275 	// copy FDs to userland
9276 	if (status == B_OK) {
9277 		if (!IS_USER_ADDRESS(userFDs)
9278 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9279 			status = B_BAD_ADDRESS;
9280 		}
9281 	}
9282 
9283 	// keep FDs, if everything went fine
9284 	if (status == B_OK) {
9285 		closer0.Detach();
9286 		closer1.Detach();
9287 	}
9288 
9289 	return status;
9290 }
9291 
9292 
9293 status_t
9294 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9295 {
9296 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9297 	if (pathBuffer.InitCheck() != B_OK)
9298 		return B_NO_MEMORY;
9299 
9300 	char* path = pathBuffer.LockBuffer();
9301 
9302 	if (!IS_USER_ADDRESS(userPath)
9303 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9304 		return B_BAD_ADDRESS;
9305 
9306 	return common_access(fd, path, mode, effectiveUserGroup, false);
9307 }
9308 
9309 
9310 status_t
9311 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9312 	struct stat* userStat, size_t statSize)
9313 {
9314 	struct stat stat;
9315 	status_t status;
9316 
9317 	if (statSize > sizeof(struct stat))
9318 		return B_BAD_VALUE;
9319 
9320 	if (!IS_USER_ADDRESS(userStat))
9321 		return B_BAD_ADDRESS;
9322 
9323 	if (userPath) {
9324 		// path given: get the stat of the node referred to by (fd, path)
9325 		if (!IS_USER_ADDRESS(userPath))
9326 			return B_BAD_ADDRESS;
9327 
9328 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9329 		if (pathBuffer.InitCheck() != B_OK)
9330 			return B_NO_MEMORY;
9331 
9332 		char* path = pathBuffer.LockBuffer();
9333 
9334 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9335 		if (length < B_OK)
9336 			return length;
9337 		if (length >= B_PATH_NAME_LENGTH)
9338 			return B_NAME_TOO_LONG;
9339 
9340 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9341 	} else {
9342 		// no path given: get the FD and use the FD operation
9343 		struct file_descriptor* descriptor
9344 			= get_fd(get_current_io_context(false), fd);
9345 		if (descriptor == NULL)
9346 			return B_FILE_ERROR;
9347 
9348 		if (descriptor->ops->fd_read_stat)
9349 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9350 		else
9351 			status = B_UNSUPPORTED;
9352 
9353 		put_fd(descriptor);
9354 	}
9355 
9356 	if (status != B_OK)
9357 		return status;
9358 
9359 	return user_memcpy(userStat, &stat, statSize);
9360 }
9361 
9362 
9363 status_t
9364 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9365 	const struct stat* userStat, size_t statSize, int statMask)
9366 {
9367 	if (statSize > sizeof(struct stat))
9368 		return B_BAD_VALUE;
9369 
9370 	struct stat stat;
9371 
9372 	if (!IS_USER_ADDRESS(userStat)
9373 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9374 		return B_BAD_ADDRESS;
9375 
9376 	// clear additional stat fields
9377 	if (statSize < sizeof(struct stat))
9378 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9379 
9380 	status_t status;
9381 
9382 	if (userPath) {
9383 		// path given: write the stat of the node referred to by (fd, path)
9384 		if (!IS_USER_ADDRESS(userPath))
9385 			return B_BAD_ADDRESS;
9386 
9387 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9388 		if (pathBuffer.InitCheck() != B_OK)
9389 			return B_NO_MEMORY;
9390 
9391 		char* path = pathBuffer.LockBuffer();
9392 
9393 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9394 		if (length < B_OK)
9395 			return length;
9396 		if (length >= B_PATH_NAME_LENGTH)
9397 			return B_NAME_TOO_LONG;
9398 
9399 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9400 			statMask, false);
9401 	} else {
9402 		// no path given: get the FD and use the FD operation
9403 		struct file_descriptor* descriptor
9404 			= get_fd(get_current_io_context(false), fd);
9405 		if (descriptor == NULL)
9406 			return B_FILE_ERROR;
9407 
9408 		if (descriptor->ops->fd_write_stat) {
9409 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9410 				statMask);
9411 		} else
9412 			status = B_UNSUPPORTED;
9413 
9414 		put_fd(descriptor);
9415 	}
9416 
9417 	return status;
9418 }
9419 
9420 
9421 int
9422 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9423 {
9424 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9425 	if (pathBuffer.InitCheck() != B_OK)
9426 		return B_NO_MEMORY;
9427 
9428 	char* path = pathBuffer.LockBuffer();
9429 
9430 	if (userPath != NULL) {
9431 		if (!IS_USER_ADDRESS(userPath)
9432 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9433 			return B_BAD_ADDRESS;
9434 	}
9435 
9436 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9437 }
9438 
9439 
9440 ssize_t
9441 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9442 	size_t readBytes)
9443 {
9444 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9445 	if (attr < 0)
9446 		return attr;
9447 
9448 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9449 	_user_close(attr);
9450 
9451 	return bytes;
9452 }
9453 
9454 
9455 ssize_t
9456 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9457 	const void* buffer, size_t writeBytes)
9458 {
9459 	// Try to support the BeOS typical truncation as well as the position
9460 	// argument
9461 	int attr = attr_create(fd, NULL, attribute, type,
9462 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9463 	if (attr < 0)
9464 		return attr;
9465 
9466 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9467 	_user_close(attr);
9468 
9469 	return bytes;
9470 }
9471 
9472 
9473 status_t
9474 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9475 {
9476 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9477 	if (attr < 0)
9478 		return attr;
9479 
9480 	struct file_descriptor* descriptor
9481 		= get_fd(get_current_io_context(false), attr);
9482 	if (descriptor == NULL) {
9483 		_user_close(attr);
9484 		return B_FILE_ERROR;
9485 	}
9486 
9487 	struct stat stat;
9488 	status_t status;
9489 	if (descriptor->ops->fd_read_stat)
9490 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9491 	else
9492 		status = B_UNSUPPORTED;
9493 
9494 	put_fd(descriptor);
9495 	_user_close(attr);
9496 
9497 	if (status == B_OK) {
9498 		attr_info info;
9499 		info.type = stat.st_type;
9500 		info.size = stat.st_size;
9501 
9502 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9503 			return B_BAD_ADDRESS;
9504 	}
9505 
9506 	return status;
9507 }
9508 
9509 
9510 int
9511 _user_open_attr(int fd, const char* userPath, const char* userName,
9512 	uint32 type, int openMode)
9513 {
9514 	char name[B_FILE_NAME_LENGTH];
9515 
9516 	if (!IS_USER_ADDRESS(userName)
9517 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9518 		return B_BAD_ADDRESS;
9519 
9520 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9521 	if (pathBuffer.InitCheck() != B_OK)
9522 		return B_NO_MEMORY;
9523 
9524 	char* path = pathBuffer.LockBuffer();
9525 
9526 	if (userPath != NULL) {
9527 		if (!IS_USER_ADDRESS(userPath)
9528 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9529 			return B_BAD_ADDRESS;
9530 	}
9531 
9532 	if ((openMode & O_CREAT) != 0) {
9533 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9534 			false);
9535 	}
9536 
9537 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9538 }
9539 
9540 
9541 status_t
9542 _user_remove_attr(int fd, const char* userName)
9543 {
9544 	char name[B_FILE_NAME_LENGTH];
9545 
9546 	if (!IS_USER_ADDRESS(userName)
9547 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9548 		return B_BAD_ADDRESS;
9549 
9550 	return attr_remove(fd, name, false);
9551 }
9552 
9553 
9554 status_t
9555 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9556 	const char* userToName)
9557 {
9558 	if (!IS_USER_ADDRESS(userFromName)
9559 		|| !IS_USER_ADDRESS(userToName))
9560 		return B_BAD_ADDRESS;
9561 
9562 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9563 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9564 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9565 		return B_NO_MEMORY;
9566 
9567 	char* fromName = fromNameBuffer.LockBuffer();
9568 	char* toName = toNameBuffer.LockBuffer();
9569 
9570 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9571 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9572 		return B_BAD_ADDRESS;
9573 
9574 	return attr_rename(fromFile, fromName, toFile, toName, false);
9575 }
9576 
9577 
9578 int
9579 _user_open_index_dir(dev_t device)
9580 {
9581 	return index_dir_open(device, false);
9582 }
9583 
9584 
9585 status_t
9586 _user_create_index(dev_t device, const char* userName, uint32 type,
9587 	uint32 flags)
9588 {
9589 	char name[B_FILE_NAME_LENGTH];
9590 
9591 	if (!IS_USER_ADDRESS(userName)
9592 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9593 		return B_BAD_ADDRESS;
9594 
9595 	return index_create(device, name, type, flags, false);
9596 }
9597 
9598 
9599 status_t
9600 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9601 {
9602 	char name[B_FILE_NAME_LENGTH];
9603 	struct stat stat;
9604 	status_t status;
9605 
9606 	if (!IS_USER_ADDRESS(userName)
9607 		|| !IS_USER_ADDRESS(userStat)
9608 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9609 		return B_BAD_ADDRESS;
9610 
9611 	status = index_name_read_stat(device, name, &stat, false);
9612 	if (status == B_OK) {
9613 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9614 			return B_BAD_ADDRESS;
9615 	}
9616 
9617 	return status;
9618 }
9619 
9620 
9621 status_t
9622 _user_remove_index(dev_t device, const char* userName)
9623 {
9624 	char name[B_FILE_NAME_LENGTH];
9625 
9626 	if (!IS_USER_ADDRESS(userName)
9627 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9628 		return B_BAD_ADDRESS;
9629 
9630 	return index_remove(device, name, false);
9631 }
9632 
9633 
9634 status_t
9635 _user_getcwd(char* userBuffer, size_t size)
9636 {
9637 	if (size == 0)
9638 		return B_BAD_VALUE;
9639 	if (!IS_USER_ADDRESS(userBuffer))
9640 		return B_BAD_ADDRESS;
9641 
9642 	if (size > kMaxPathLength)
9643 		size = kMaxPathLength;
9644 
9645 	KPath pathBuffer(size);
9646 	if (pathBuffer.InitCheck() != B_OK)
9647 		return B_NO_MEMORY;
9648 
9649 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9650 
9651 	char* path = pathBuffer.LockBuffer();
9652 
9653 	status_t status = get_cwd(path, size, false);
9654 	if (status != B_OK)
9655 		return status;
9656 
9657 	// Copy back the result
9658 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9659 		return B_BAD_ADDRESS;
9660 
9661 	return status;
9662 }
9663 
9664 
9665 status_t
9666 _user_setcwd(int fd, const char* userPath)
9667 {
9668 	TRACE(("user_setcwd: path = %p\n", userPath));
9669 
9670 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9671 	if (pathBuffer.InitCheck() != B_OK)
9672 		return B_NO_MEMORY;
9673 
9674 	char* path = pathBuffer.LockBuffer();
9675 
9676 	if (userPath != NULL) {
9677 		if (!IS_USER_ADDRESS(userPath)
9678 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9679 			return B_BAD_ADDRESS;
9680 	}
9681 
9682 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9683 }
9684 
9685 
9686 status_t
9687 _user_change_root(const char* userPath)
9688 {
9689 	// only root is allowed to chroot()
9690 	if (geteuid() != 0)
9691 		return B_NOT_ALLOWED;
9692 
9693 	// alloc path buffer
9694 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9695 	if (pathBuffer.InitCheck() != B_OK)
9696 		return B_NO_MEMORY;
9697 
9698 	// copy userland path to kernel
9699 	char* path = pathBuffer.LockBuffer();
9700 	if (userPath != NULL) {
9701 		if (!IS_USER_ADDRESS(userPath)
9702 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9703 			return B_BAD_ADDRESS;
9704 	}
9705 
9706 	// get the vnode
9707 	struct vnode* vnode;
9708 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9709 	if (status != B_OK)
9710 		return status;
9711 
9712 	// set the new root
9713 	struct io_context* context = get_current_io_context(false);
9714 	mutex_lock(&sIOContextRootLock);
9715 	struct vnode* oldRoot = context->root;
9716 	context->root = vnode;
9717 	mutex_unlock(&sIOContextRootLock);
9718 
9719 	put_vnode(oldRoot);
9720 
9721 	return B_OK;
9722 }
9723 
9724 
9725 int
9726 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9727 	uint32 flags, port_id port, int32 token)
9728 {
9729 	char* query;
9730 
9731 	if (device < 0 || userQuery == NULL || queryLength == 0)
9732 		return B_BAD_VALUE;
9733 
9734 	// this is a safety restriction
9735 	if (queryLength >= 65536)
9736 		return B_NAME_TOO_LONG;
9737 
9738 	query = (char*)malloc(queryLength + 1);
9739 	if (query == NULL)
9740 		return B_NO_MEMORY;
9741 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9742 		free(query);
9743 		return B_BAD_ADDRESS;
9744 	}
9745 
9746 	int fd = query_open(device, query, flags, port, token, false);
9747 
9748 	free(query);
9749 	return fd;
9750 }
9751 
9752 
9753 #include "vfs_request_io.cpp"
9754