xref: /haiku/src/system/kernel/fs/vfs.cpp (revision d06cbe081b7ea043aea2012359744091de6d604d)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags,
549 		generic_size_t bytesRequested, status_t status,
550 		generic_size_t bytesTransferred)
551 		:
552 		fVnode(vnode),
553 		fMountID(vnode->mount->id),
554 		fNodeID(vnode->id),
555 		fCookie(cookie),
556 		fPos(pos),
557 		fCount(count),
558 		fFlags(flags),
559 		fBytesRequested(bytesRequested),
560 		fStatus(status),
561 		fBytesTransferred(bytesTransferred)
562 	{
563 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
564 			sizeof(generic_io_vec) * count, false);
565 	}
566 
567 	void AddDump(TraceOutput& out, const char* mode)
568 	{
569 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
570 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
571 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
572 			(uint64)fBytesRequested);
573 
574 		if (fVecs != NULL) {
575 			for (uint32 i = 0; i < fCount; i++) {
576 				if (i > 0)
577 					out.Print(", ");
578 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
579 					(uint64)fVecs[i].length);
580 			}
581 		}
582 
583 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
584 			"transferred: %" B_PRIu64, fFlags, fStatus,
585 			(uint64)fBytesTransferred);
586 	}
587 
588 protected:
589 	struct vnode*	fVnode;
590 	dev_t			fMountID;
591 	ino_t			fNodeID;
592 	void*			fCookie;
593 	off_t			fPos;
594 	generic_io_vec*	fVecs;
595 	uint32			fCount;
596 	uint32			fFlags;
597 	generic_size_t	fBytesRequested;
598 	status_t		fStatus;
599 	generic_size_t	fBytesTransferred;
600 };
601 
602 
603 class ReadPages : public PagesIOTraceEntry {
604 public:
605 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
606 		const generic_io_vec* vecs, uint32 count, uint32 flags,
607 		generic_size_t bytesRequested, status_t status,
608 		generic_size_t bytesTransferred)
609 		:
610 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
611 			bytesRequested, status, bytesTransferred)
612 	{
613 		Initialized();
614 	}
615 
616 	virtual void AddDump(TraceOutput& out)
617 	{
618 		PagesIOTraceEntry::AddDump(out, "read");
619 	}
620 };
621 
622 
623 class WritePages : public PagesIOTraceEntry {
624 public:
625 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
631 			bytesRequested, status, bytesTransferred)
632 	{
633 		Initialized();
634 	}
635 
636 	virtual void AddDump(TraceOutput& out)
637 	{
638 		PagesIOTraceEntry::AddDump(out, "write");
639 	}
640 };
641 
642 }	// namespace VFSPagesIOTracing
643 
644 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
645 #else
646 #	define TPIO(x) ;
647 #endif	// VFS_PAGES_IO_TRACING
648 
649 
650 static int
651 mount_compare(void* _m, const void* _key)
652 {
653 	struct fs_mount* mount = (fs_mount*)_m;
654 	const dev_t* id = (dev_t*)_key;
655 
656 	if (mount->id == *id)
657 		return 0;
658 
659 	return -1;
660 }
661 
662 
663 static uint32
664 mount_hash(void* _m, const void* _key, uint32 range)
665 {
666 	struct fs_mount* mount = (fs_mount*)_m;
667 	const dev_t* id = (dev_t*)_key;
668 
669 	if (mount)
670 		return mount->id % range;
671 
672 	return (uint32)*id % range;
673 }
674 
675 
676 /*! Finds the mounted device (the fs_mount structure) with the given ID.
677 	Note, you must hold the gMountMutex lock when you call this function.
678 */
679 static struct fs_mount*
680 find_mount(dev_t id)
681 {
682 	ASSERT_LOCKED_MUTEX(&sMountMutex);
683 
684 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
685 }
686 
687 
688 static status_t
689 get_mount(dev_t id, struct fs_mount** _mount)
690 {
691 	struct fs_mount* mount;
692 
693 	ReadLocker nodeLocker(sVnodeLock);
694 	MutexLocker mountLocker(sMountMutex);
695 
696 	mount = find_mount(id);
697 	if (mount == NULL)
698 		return B_BAD_VALUE;
699 
700 	struct vnode* rootNode = mount->root_vnode;
701 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
702 		|| rootNode->ref_count == 0) {
703 		// might have been called during a mount/unmount operation
704 		return B_BUSY;
705 	}
706 
707 	inc_vnode_ref_count(rootNode);
708 	*_mount = mount;
709 	return B_OK;
710 }
711 
712 
713 static void
714 put_mount(struct fs_mount* mount)
715 {
716 	if (mount)
717 		put_vnode(mount->root_vnode);
718 }
719 
720 
721 /*!	Tries to open the specified file system module.
722 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
723 	Returns a pointer to file system module interface, or NULL if it
724 	could not open the module.
725 */
726 static file_system_module_info*
727 get_file_system(const char* fsName)
728 {
729 	char name[B_FILE_NAME_LENGTH];
730 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
731 		// construct module name if we didn't get one
732 		// (we currently support only one API)
733 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
734 		fsName = NULL;
735 	}
736 
737 	file_system_module_info* info;
738 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
739 		return NULL;
740 
741 	return info;
742 }
743 
744 
745 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
746 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
747 	The name is allocated for you, and you have to free() it when you're
748 	done with it.
749 	Returns NULL if the required memory is not available.
750 */
751 static char*
752 get_file_system_name(const char* fsName)
753 {
754 	const size_t length = strlen("file_systems/");
755 
756 	if (strncmp(fsName, "file_systems/", length)) {
757 		// the name already seems to be the module's file name
758 		return strdup(fsName);
759 	}
760 
761 	fsName += length;
762 	const char* end = strchr(fsName, '/');
763 	if (end == NULL) {
764 		// this doesn't seem to be a valid name, but well...
765 		return strdup(fsName);
766 	}
767 
768 	// cut off the trailing /v1
769 
770 	char* name = (char*)malloc(end + 1 - fsName);
771 	if (name == NULL)
772 		return NULL;
773 
774 	strlcpy(name, fsName, end + 1 - fsName);
775 	return name;
776 }
777 
778 
779 /*!	Accepts a list of file system names separated by a colon, one for each
780 	layer and returns the file system name for the specified layer.
781 	The name is allocated for you, and you have to free() it when you're
782 	done with it.
783 	Returns NULL if the required memory is not available or if there is no
784 	name for the specified layer.
785 */
786 static char*
787 get_file_system_name_for_layer(const char* fsNames, int32 layer)
788 {
789 	while (layer >= 0) {
790 		const char* end = strchr(fsNames, ':');
791 		if (end == NULL) {
792 			if (layer == 0)
793 				return strdup(fsNames);
794 			return NULL;
795 		}
796 
797 		if (layer == 0) {
798 			size_t length = end - fsNames + 1;
799 			char* result = (char*)malloc(length);
800 			strlcpy(result, fsNames, length);
801 			return result;
802 		}
803 
804 		fsNames = end + 1;
805 		layer--;
806 	}
807 
808 	return NULL;
809 }
810 
811 
812 static int
813 vnode_compare(void* _vnode, const void* _key)
814 {
815 	struct vnode* vnode = (struct vnode*)_vnode;
816 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
817 
818 	if (vnode->device == key->device && vnode->id == key->vnode)
819 		return 0;
820 
821 	return -1;
822 }
823 
824 
825 static uint32
826 vnode_hash(void* _vnode, const void* _key, uint32 range)
827 {
828 	struct vnode* vnode = (struct vnode*)_vnode;
829 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
830 
831 #define VHASH(mountid, vnodeid) \
832 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
833 
834 	if (vnode != NULL)
835 		return VHASH(vnode->device, vnode->id) % range;
836 
837 	return VHASH(key->device, key->vnode) % range;
838 
839 #undef VHASH
840 }
841 
842 
843 static void
844 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
845 {
846 	RecursiveLocker _(mount->rlock);
847 	mount->vnodes.Add(vnode);
848 }
849 
850 
851 static void
852 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
853 {
854 	RecursiveLocker _(mount->rlock);
855 	mount->vnodes.Remove(vnode);
856 }
857 
858 
859 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
860 
861 	The caller must hold the sVnodeLock (read lock at least).
862 
863 	\param mountID the mount ID.
864 	\param vnodeID the node ID.
865 
866 	\return The vnode structure, if it was found in the hash table, \c NULL
867 			otherwise.
868 */
869 static struct vnode*
870 lookup_vnode(dev_t mountID, ino_t vnodeID)
871 {
872 	struct vnode_hash_key key;
873 
874 	key.device = mountID;
875 	key.vnode = vnodeID;
876 
877 	return (vnode*)hash_lookup(sVnodeTable, &key);
878 }
879 
880 
881 /*!	Creates a new vnode with the given mount and node ID.
882 	If the node already exists, it is returned instead and no new node is
883 	created. In either case -- but not, if an error occurs -- the function write
884 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
885 	error the lock is not not held on return.
886 
887 	\param mountID The mount ID.
888 	\param vnodeID The vnode ID.
889 	\param _vnode Will be set to the new vnode on success.
890 	\param _nodeCreated Will be set to \c true when the returned vnode has
891 		been newly created, \c false when it already existed. Will not be
892 		changed on error.
893 	\return \c B_OK, when the vnode was successfully created and inserted or
894 		a node with the given ID was found, \c B_NO_MEMORY or
895 		\c B_ENTRY_NOT_FOUND on error.
896 */
897 static status_t
898 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
899 	bool& _nodeCreated)
900 {
901 	FUNCTION(("create_new_vnode_and_lock()\n"));
902 
903 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
904 	if (vnode == NULL)
905 		return B_NO_MEMORY;
906 
907 	// initialize basic values
908 	memset(vnode, 0, sizeof(struct vnode));
909 	vnode->device = mountID;
910 	vnode->id = vnodeID;
911 	vnode->ref_count = 1;
912 	vnode->SetBusy(true);
913 
914 	// look up the the node -- it might have been added by someone else in the
915 	// meantime
916 	rw_lock_write_lock(&sVnodeLock);
917 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
918 	if (existingVnode != NULL) {
919 		free(vnode);
920 		_vnode = existingVnode;
921 		_nodeCreated = false;
922 		return B_OK;
923 	}
924 
925 	// get the mount structure
926 	mutex_lock(&sMountMutex);
927 	vnode->mount = find_mount(mountID);
928 	if (!vnode->mount || vnode->mount->unmounting) {
929 		mutex_unlock(&sMountMutex);
930 		rw_lock_write_unlock(&sVnodeLock);
931 		free(vnode);
932 		return B_ENTRY_NOT_FOUND;
933 	}
934 
935 	// add the vnode to the mount's node list and the hash table
936 	hash_insert(sVnodeTable, vnode);
937 	add_vnode_to_mount_list(vnode, vnode->mount);
938 
939 	mutex_unlock(&sMountMutex);
940 
941 	_vnode = vnode;
942 	_nodeCreated = true;
943 
944 	// keep the vnode lock locked
945 	return B_OK;
946 }
947 
948 
949 /*!	Frees the vnode and all resources it has acquired, and removes
950 	it from the vnode hash as well as from its mount structure.
951 	Will also make sure that any cache modifications are written back.
952 */
953 static void
954 free_vnode(struct vnode* vnode, bool reenter)
955 {
956 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
957 		vnode);
958 
959 	// write back any changes in this vnode's cache -- but only
960 	// if the vnode won't be deleted, in which case the changes
961 	// will be discarded
962 
963 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
964 		FS_CALL_NO_PARAMS(vnode, fsync);
965 
966 	// Note: If this vnode has a cache attached, there will still be two
967 	// references to that cache at this point. The last one belongs to the vnode
968 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
969 	// cache. Each but the last reference to a cache also includes a reference
970 	// to the vnode. The file cache, however, released its reference (cf.
971 	// file_cache_create()), so that this vnode's ref count has the chance to
972 	// ever drop to 0. Deleting the file cache now, will cause the next to last
973 	// cache reference to be released, which will also release a (no longer
974 	// existing) vnode reference. To avoid problems, we set the vnode's ref
975 	// count, so that it will neither become negative nor 0.
976 	vnode->ref_count = 2;
977 
978 	if (!vnode->IsUnpublished()) {
979 		if (vnode->IsRemoved())
980 			FS_CALL(vnode, remove_vnode, reenter);
981 		else
982 			FS_CALL(vnode, put_vnode, reenter);
983 	}
984 
985 	// If the vnode has a VMCache attached, make sure that it won't try to get
986 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
987 	// long as the vnode is busy and in the hash, that won't happen, but as
988 	// soon as we've removed it from the hash, it could reload the vnode -- with
989 	// a new cache attached!
990 	if (vnode->cache != NULL)
991 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
992 
993 	// The file system has removed the resources of the vnode now, so we can
994 	// make it available again (by removing the busy vnode from the hash).
995 	rw_lock_write_lock(&sVnodeLock);
996 	hash_remove(sVnodeTable, vnode);
997 	rw_lock_write_unlock(&sVnodeLock);
998 
999 	// if we have a VMCache attached, remove it
1000 	if (vnode->cache)
1001 		vnode->cache->ReleaseRef();
1002 
1003 	vnode->cache = NULL;
1004 
1005 	remove_vnode_from_mount_list(vnode, vnode->mount);
1006 
1007 	free(vnode);
1008 }
1009 
1010 
1011 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1012 	if the counter dropped to 0.
1013 
1014 	The caller must, of course, own a reference to the vnode to call this
1015 	function.
1016 	The caller must not hold the sVnodeLock or the sMountMutex.
1017 
1018 	\param vnode the vnode.
1019 	\param alwaysFree don't move this vnode into the unused list, but really
1020 		   delete it if possible.
1021 	\param reenter \c true, if this function is called (indirectly) from within
1022 		   a file system. This will be passed to file system hooks only.
1023 	\return \c B_OK, if everything went fine, an error code otherwise.
1024 */
1025 static status_t
1026 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1027 {
1028 	ReadLocker locker(sVnodeLock);
1029 	AutoLocker<Vnode> nodeLocker(vnode);
1030 
1031 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1032 
1033 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1034 
1035 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1036 		vnode->ref_count));
1037 
1038 	if (oldRefCount != 1)
1039 		return B_OK;
1040 
1041 	if (vnode->IsBusy())
1042 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1043 
1044 	bool freeNode = false;
1045 	bool freeUnusedNodes = false;
1046 
1047 	// Just insert the vnode into an unused list if we don't need
1048 	// to delete it
1049 	if (vnode->IsRemoved() || alwaysFree) {
1050 		vnode_to_be_freed(vnode);
1051 		vnode->SetBusy(true);
1052 		freeNode = true;
1053 	} else
1054 		freeUnusedNodes = vnode_unused(vnode);
1055 
1056 	nodeLocker.Unlock();
1057 	locker.Unlock();
1058 
1059 	if (freeNode)
1060 		free_vnode(vnode, reenter);
1061 	else if (freeUnusedNodes)
1062 		free_unused_vnodes();
1063 
1064 	return B_OK;
1065 }
1066 
1067 
1068 /*!	\brief Increments the reference counter of the given vnode.
1069 
1070 	The caller must make sure that the node isn't deleted while this function
1071 	is called. This can be done either:
1072 	- by ensuring that a reference to the node exists and remains in existence,
1073 	  or
1074 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1075 	  or by holding sVnodeLock write locked.
1076 
1077 	In the second case the caller is responsible for dealing with the ref count
1078 	0 -> 1 transition. That is 1. this function must not be invoked when the
1079 	node is busy in the first place and 2. vnode_used() must be called for the
1080 	node.
1081 
1082 	\param vnode the vnode.
1083 */
1084 static void
1085 inc_vnode_ref_count(struct vnode* vnode)
1086 {
1087 	atomic_add(&vnode->ref_count, 1);
1088 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1089 		vnode->ref_count));
1090 }
1091 
1092 
1093 static bool
1094 is_special_node_type(int type)
1095 {
1096 	// at the moment only FIFOs are supported
1097 	return S_ISFIFO(type);
1098 }
1099 
1100 
1101 static status_t
1102 create_special_sub_node(struct vnode* vnode, uint32 flags)
1103 {
1104 	if (S_ISFIFO(vnode->Type()))
1105 		return create_fifo_vnode(vnode->mount->volume, vnode);
1106 
1107 	return B_BAD_VALUE;
1108 }
1109 
1110 
1111 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1112 
1113 	If the node is not yet in memory, it will be loaded.
1114 
1115 	The caller must not hold the sVnodeLock or the sMountMutex.
1116 
1117 	\param mountID the mount ID.
1118 	\param vnodeID the node ID.
1119 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1120 		   retrieved vnode structure shall be written.
1121 	\param reenter \c true, if this function is called (indirectly) from within
1122 		   a file system.
1123 	\return \c B_OK, if everything when fine, an error code otherwise.
1124 */
1125 static status_t
1126 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1127 	int reenter)
1128 {
1129 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1130 		mountID, vnodeID, _vnode));
1131 
1132 	rw_lock_read_lock(&sVnodeLock);
1133 
1134 	int32 tries = 2000;
1135 		// try for 10 secs
1136 restart:
1137 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1138 	AutoLocker<Vnode> nodeLocker(vnode);
1139 
1140 	if (vnode && vnode->IsBusy()) {
1141 		nodeLocker.Unlock();
1142 		rw_lock_read_unlock(&sVnodeLock);
1143 		if (!canWait || --tries < 0) {
1144 			// vnode doesn't seem to become unbusy
1145 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is not becoming unbusy!\n",
1146 				mountID, vnodeID);
1147 			return B_BUSY;
1148 		}
1149 		snooze(5000); // 5 ms
1150 		rw_lock_read_lock(&sVnodeLock);
1151 		goto restart;
1152 	}
1153 
1154 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1155 
1156 	status_t status;
1157 
1158 	if (vnode) {
1159 		if (vnode->ref_count == 0) {
1160 			// this vnode has been unused before
1161 			vnode_used(vnode);
1162 		}
1163 		inc_vnode_ref_count(vnode);
1164 
1165 		nodeLocker.Unlock();
1166 		rw_lock_read_unlock(&sVnodeLock);
1167 	} else {
1168 		// we need to create a new vnode and read it in
1169 		rw_lock_read_unlock(&sVnodeLock);
1170 			// unlock -- create_new_vnode_and_lock() write-locks on success
1171 		bool nodeCreated;
1172 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1173 			nodeCreated);
1174 		if (status != B_OK)
1175 			return status;
1176 
1177 		if (!nodeCreated) {
1178 			rw_lock_read_lock(&sVnodeLock);
1179 			rw_lock_write_unlock(&sVnodeLock);
1180 			goto restart;
1181 		}
1182 
1183 		rw_lock_write_unlock(&sVnodeLock);
1184 
1185 		int type;
1186 		uint32 flags;
1187 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1188 			&flags, reenter);
1189 		if (status == B_OK && vnode->private_node == NULL)
1190 			status = B_BAD_VALUE;
1191 
1192 		bool gotNode = status == B_OK;
1193 		bool publishSpecialSubNode = false;
1194 		if (gotNode) {
1195 			vnode->SetType(type);
1196 			publishSpecialSubNode = is_special_node_type(type)
1197 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1198 		}
1199 
1200 		if (gotNode && publishSpecialSubNode)
1201 			status = create_special_sub_node(vnode, flags);
1202 
1203 		if (status != B_OK) {
1204 			if (gotNode)
1205 				FS_CALL(vnode, put_vnode, reenter);
1206 
1207 			rw_lock_write_lock(&sVnodeLock);
1208 			hash_remove(sVnodeTable, vnode);
1209 			remove_vnode_from_mount_list(vnode, vnode->mount);
1210 			rw_lock_write_unlock(&sVnodeLock);
1211 
1212 			free(vnode);
1213 			return status;
1214 		}
1215 
1216 		rw_lock_read_lock(&sVnodeLock);
1217 		vnode->Lock();
1218 
1219 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1220 		vnode->SetBusy(false);
1221 
1222 		vnode->Unlock();
1223 		rw_lock_read_unlock(&sVnodeLock);
1224 	}
1225 
1226 	TRACE(("get_vnode: returning %p\n", vnode));
1227 
1228 	*_vnode = vnode;
1229 	return B_OK;
1230 }
1231 
1232 
1233 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1234 	if the counter dropped to 0.
1235 
1236 	The caller must, of course, own a reference to the vnode to call this
1237 	function.
1238 	The caller must not hold the sVnodeLock or the sMountMutex.
1239 
1240 	\param vnode the vnode.
1241 */
1242 static inline void
1243 put_vnode(struct vnode* vnode)
1244 {
1245 	dec_vnode_ref_count(vnode, false, false);
1246 }
1247 
1248 
1249 static void
1250 free_unused_vnodes(int32 level)
1251 {
1252 	unused_vnodes_check_started();
1253 
1254 	if (level == B_NO_LOW_RESOURCE) {
1255 		unused_vnodes_check_done();
1256 		return;
1257 	}
1258 
1259 	flush_hot_vnodes();
1260 
1261 	// determine how many nodes to free
1262 	uint32 count = 1;
1263 	{
1264 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1265 
1266 		switch (level) {
1267 			case B_LOW_RESOURCE_NOTE:
1268 				count = sUnusedVnodes / 100;
1269 				break;
1270 			case B_LOW_RESOURCE_WARNING:
1271 				count = sUnusedVnodes / 10;
1272 				break;
1273 			case B_LOW_RESOURCE_CRITICAL:
1274 				count = sUnusedVnodes;
1275 				break;
1276 		}
1277 
1278 		if (count > sUnusedVnodes)
1279 			count = sUnusedVnodes;
1280 	}
1281 
1282 	// Write back the modified pages of some unused vnodes and free them.
1283 
1284 	for (uint32 i = 0; i < count; i++) {
1285 		ReadLocker vnodesReadLocker(sVnodeLock);
1286 
1287 		// get the first node
1288 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1289 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1290 			&sUnusedVnodeList);
1291 		unusedVnodesLocker.Unlock();
1292 
1293 		if (vnode == NULL)
1294 			break;
1295 
1296 		// lock the node
1297 		AutoLocker<Vnode> nodeLocker(vnode);
1298 
1299 		// Check whether the node is still unused -- since we only append to the
1300 		// the tail of the unused queue, the vnode should still be at its head.
1301 		// Alternatively we could check its ref count for 0 and its busy flag,
1302 		// but if the node is no longer at the head of the queue, it means it
1303 		// has been touched in the meantime, i.e. it is no longer the least
1304 		// recently used unused vnode and we rather don't free it.
1305 		unusedVnodesLocker.Lock();
1306 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1307 			continue;
1308 		unusedVnodesLocker.Unlock();
1309 
1310 		ASSERT(!vnode->IsBusy());
1311 
1312 		// grab a reference
1313 		inc_vnode_ref_count(vnode);
1314 		vnode_used(vnode);
1315 
1316 		// write back changes and free the node
1317 		nodeLocker.Unlock();
1318 		vnodesReadLocker.Unlock();
1319 
1320 		if (vnode->cache != NULL)
1321 			vnode->cache->WriteModified();
1322 
1323 		dec_vnode_ref_count(vnode, true, false);
1324 			// this should free the vnode when it's still unused
1325 	}
1326 
1327 	unused_vnodes_check_done();
1328 }
1329 
1330 
1331 /*!	Gets the vnode the given vnode is covering.
1332 
1333 	The caller must have \c sVnodeLock read-locked at least.
1334 
1335 	The function returns a reference to the retrieved vnode (if any), the caller
1336 	is responsible to free.
1337 
1338 	\param vnode The vnode whose covered node shall be returned.
1339 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1340 		vnode.
1341 */
1342 static inline Vnode*
1343 get_covered_vnode_locked(Vnode* vnode)
1344 {
1345 	if (Vnode* coveredNode = vnode->covers) {
1346 		while (coveredNode->covers != NULL)
1347 			coveredNode = coveredNode->covers;
1348 
1349 		inc_vnode_ref_count(coveredNode);
1350 		return coveredNode;
1351 	}
1352 
1353 	return NULL;
1354 }
1355 
1356 
1357 /*!	Gets the vnode the given vnode is covering.
1358 
1359 	The caller must not hold \c sVnodeLock. Note that this implies a race
1360 	condition, since the situation can change at any time.
1361 
1362 	The function returns a reference to the retrieved vnode (if any), the caller
1363 	is responsible to free.
1364 
1365 	\param vnode The vnode whose covered node shall be returned.
1366 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1367 		vnode.
1368 */
1369 static inline Vnode*
1370 get_covered_vnode(Vnode* vnode)
1371 {
1372 	if (!vnode->IsCovering())
1373 		return NULL;
1374 
1375 	ReadLocker vnodeReadLocker(sVnodeLock);
1376 	return get_covered_vnode_locked(vnode);
1377 }
1378 
1379 
1380 /*!	Gets the vnode the given vnode is covered by.
1381 
1382 	The caller must have \c sVnodeLock read-locked at least.
1383 
1384 	The function returns a reference to the retrieved vnode (if any), the caller
1385 	is responsible to free.
1386 
1387 	\param vnode The vnode whose covering node shall be returned.
1388 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1389 		any vnode.
1390 */
1391 static Vnode*
1392 get_covering_vnode_locked(Vnode* vnode)
1393 {
1394 	if (Vnode* coveringNode = vnode->covered_by) {
1395 		while (coveringNode->covered_by != NULL)
1396 			coveringNode = coveringNode->covered_by;
1397 
1398 		inc_vnode_ref_count(coveringNode);
1399 		return coveringNode;
1400 	}
1401 
1402 	return NULL;
1403 }
1404 
1405 
1406 /*!	Gets the vnode the given vnode is covered by.
1407 
1408 	The caller must not hold \c sVnodeLock. Note that this implies a race
1409 	condition, since the situation can change at any time.
1410 
1411 	The function returns a reference to the retrieved vnode (if any), the caller
1412 	is responsible to free.
1413 
1414 	\param vnode The vnode whose covering node shall be returned.
1415 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1416 		any vnode.
1417 */
1418 static inline Vnode*
1419 get_covering_vnode(Vnode* vnode)
1420 {
1421 	if (!vnode->IsCovered())
1422 		return NULL;
1423 
1424 	ReadLocker vnodeReadLocker(sVnodeLock);
1425 	return get_covering_vnode_locked(vnode);
1426 }
1427 
1428 
1429 static void
1430 free_unused_vnodes()
1431 {
1432 	free_unused_vnodes(
1433 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1434 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1435 }
1436 
1437 
1438 static void
1439 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1440 {
1441 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1442 
1443 	free_unused_vnodes(level);
1444 }
1445 
1446 
1447 static inline void
1448 put_advisory_locking(struct advisory_locking* locking)
1449 {
1450 	release_sem(locking->lock);
1451 }
1452 
1453 
1454 /*!	Returns the advisory_locking object of the \a vnode in case it
1455 	has one, and locks it.
1456 	You have to call put_advisory_locking() when you're done with
1457 	it.
1458 	Note, you must not have the vnode mutex locked when calling
1459 	this function.
1460 */
1461 static struct advisory_locking*
1462 get_advisory_locking(struct vnode* vnode)
1463 {
1464 	rw_lock_read_lock(&sVnodeLock);
1465 	vnode->Lock();
1466 
1467 	struct advisory_locking* locking = vnode->advisory_locking;
1468 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1469 
1470 	vnode->Unlock();
1471 	rw_lock_read_unlock(&sVnodeLock);
1472 
1473 	if (lock >= 0)
1474 		lock = acquire_sem(lock);
1475 	if (lock < 0) {
1476 		// This means the locking has been deleted in the mean time
1477 		// or had never existed in the first place - otherwise, we
1478 		// would get the lock at some point.
1479 		return NULL;
1480 	}
1481 
1482 	return locking;
1483 }
1484 
1485 
1486 /*!	Creates a locked advisory_locking object, and attaches it to the
1487 	given \a vnode.
1488 	Returns B_OK in case of success - also if the vnode got such an
1489 	object from someone else in the mean time, you'll still get this
1490 	one locked then.
1491 */
1492 static status_t
1493 create_advisory_locking(struct vnode* vnode)
1494 {
1495 	if (vnode == NULL)
1496 		return B_FILE_ERROR;
1497 
1498 	ObjectDeleter<advisory_locking> lockingDeleter;
1499 	struct advisory_locking* locking = NULL;
1500 
1501 	while (get_advisory_locking(vnode) == NULL) {
1502 		// no locking object set on the vnode yet, create one
1503 		if (locking == NULL) {
1504 			locking = new(std::nothrow) advisory_locking;
1505 			if (locking == NULL)
1506 				return B_NO_MEMORY;
1507 			lockingDeleter.SetTo(locking);
1508 
1509 			locking->wait_sem = create_sem(0, "advisory lock");
1510 			if (locking->wait_sem < 0)
1511 				return locking->wait_sem;
1512 
1513 			locking->lock = create_sem(0, "advisory locking");
1514 			if (locking->lock < 0)
1515 				return locking->lock;
1516 		}
1517 
1518 		// set our newly created locking object
1519 		ReadLocker _(sVnodeLock);
1520 		AutoLocker<Vnode> nodeLocker(vnode);
1521 		if (vnode->advisory_locking == NULL) {
1522 			vnode->advisory_locking = locking;
1523 			lockingDeleter.Detach();
1524 			return B_OK;
1525 		}
1526 	}
1527 
1528 	// The vnode already had a locking object. That's just as well.
1529 
1530 	return B_OK;
1531 }
1532 
1533 
1534 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1535 	with the advisory_lock \a lock.
1536 */
1537 static bool
1538 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1539 {
1540 	if (flock == NULL)
1541 		return true;
1542 
1543 	return lock->start <= flock->l_start - 1 + flock->l_len
1544 		&& lock->end >= flock->l_start;
1545 }
1546 
1547 
1548 /*!	Tests whether acquiring a lock would block.
1549 */
1550 static status_t
1551 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1552 {
1553 	flock->l_type = F_UNLCK;
1554 
1555 	struct advisory_locking* locking = get_advisory_locking(vnode);
1556 	if (locking == NULL)
1557 		return B_OK;
1558 
1559 	team_id team = team_get_current_team_id();
1560 
1561 	LockList::Iterator iterator = locking->locks.GetIterator();
1562 	while (iterator.HasNext()) {
1563 		struct advisory_lock* lock = iterator.Next();
1564 
1565 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1566 			// locks do overlap
1567 			if (flock->l_type != F_RDLCK || !lock->shared) {
1568 				// collision
1569 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1570 				flock->l_whence = SEEK_SET;
1571 				flock->l_start = lock->start;
1572 				flock->l_len = lock->end - lock->start + 1;
1573 				flock->l_pid = lock->team;
1574 				break;
1575 			}
1576 		}
1577 	}
1578 
1579 	put_advisory_locking(locking);
1580 	return B_OK;
1581 }
1582 
1583 
1584 /*!	Removes the specified lock, or all locks of the calling team
1585 	if \a flock is NULL.
1586 */
1587 static status_t
1588 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1589 {
1590 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1591 
1592 	struct advisory_locking* locking = get_advisory_locking(vnode);
1593 	if (locking == NULL)
1594 		return B_OK;
1595 
1596 	// TODO: use the thread ID instead??
1597 	team_id team = team_get_current_team_id();
1598 	pid_t session = thread_get_current_thread()->team->session_id;
1599 
1600 	// find matching lock entries
1601 
1602 	LockList::Iterator iterator = locking->locks.GetIterator();
1603 	while (iterator.HasNext()) {
1604 		struct advisory_lock* lock = iterator.Next();
1605 		bool removeLock = false;
1606 
1607 		if (lock->session == session)
1608 			removeLock = true;
1609 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1610 			bool endsBeyond = false;
1611 			bool startsBefore = false;
1612 			if (flock != NULL) {
1613 				startsBefore = lock->start < flock->l_start;
1614 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1615 			}
1616 
1617 			if (!startsBefore && !endsBeyond) {
1618 				// lock is completely contained in flock
1619 				removeLock = true;
1620 			} else if (startsBefore && !endsBeyond) {
1621 				// cut the end of the lock
1622 				lock->end = flock->l_start - 1;
1623 			} else if (!startsBefore && endsBeyond) {
1624 				// cut the start of the lock
1625 				lock->start = flock->l_start + flock->l_len;
1626 			} else {
1627 				// divide the lock into two locks
1628 				struct advisory_lock* secondLock = new advisory_lock;
1629 				if (secondLock == NULL) {
1630 					// TODO: we should probably revert the locks we already
1631 					// changed... (ie. allocate upfront)
1632 					put_advisory_locking(locking);
1633 					return B_NO_MEMORY;
1634 				}
1635 
1636 				lock->end = flock->l_start - 1;
1637 
1638 				secondLock->team = lock->team;
1639 				secondLock->session = lock->session;
1640 				// values must already be normalized when getting here
1641 				secondLock->start = flock->l_start + flock->l_len;
1642 				secondLock->end = lock->end;
1643 				secondLock->shared = lock->shared;
1644 
1645 				locking->locks.Add(secondLock);
1646 			}
1647 		}
1648 
1649 		if (removeLock) {
1650 			// this lock is no longer used
1651 			iterator.Remove();
1652 			free(lock);
1653 		}
1654 	}
1655 
1656 	bool removeLocking = locking->locks.IsEmpty();
1657 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1658 
1659 	put_advisory_locking(locking);
1660 
1661 	if (removeLocking) {
1662 		// We can remove the whole advisory locking structure; it's no
1663 		// longer used
1664 		locking = get_advisory_locking(vnode);
1665 		if (locking != NULL) {
1666 			ReadLocker locker(sVnodeLock);
1667 			AutoLocker<Vnode> nodeLocker(vnode);
1668 
1669 			// the locking could have been changed in the mean time
1670 			if (locking->locks.IsEmpty()) {
1671 				vnode->advisory_locking = NULL;
1672 				nodeLocker.Unlock();
1673 				locker.Unlock();
1674 
1675 				// we've detached the locking from the vnode, so we can
1676 				// safely delete it
1677 				delete locking;
1678 			} else {
1679 				// the locking is in use again
1680 				nodeLocker.Unlock();
1681 				locker.Unlock();
1682 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1683 			}
1684 		}
1685 	}
1686 
1687 	return B_OK;
1688 }
1689 
1690 
1691 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1692 	will wait for the lock to become available, if there are any collisions
1693 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1694 
1695 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1696 	BSD flock() semantics are used, that is, all children can unlock the file
1697 	in question (we even allow parents to remove the lock, though, but that
1698 	seems to be in line to what the BSD's are doing).
1699 */
1700 static status_t
1701 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1702 	bool wait)
1703 {
1704 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1705 		vnode, flock, wait ? "yes" : "no"));
1706 
1707 	bool shared = flock->l_type == F_RDLCK;
1708 	status_t status = B_OK;
1709 
1710 	// TODO: do deadlock detection!
1711 
1712 	struct advisory_locking* locking;
1713 
1714 	while (true) {
1715 		// if this vnode has an advisory_locking structure attached,
1716 		// lock that one and search for any colliding file lock
1717 		status = create_advisory_locking(vnode);
1718 		if (status != B_OK)
1719 			return status;
1720 
1721 		locking = vnode->advisory_locking;
1722 		team_id team = team_get_current_team_id();
1723 		sem_id waitForLock = -1;
1724 
1725 		// test for collisions
1726 		LockList::Iterator iterator = locking->locks.GetIterator();
1727 		while (iterator.HasNext()) {
1728 			struct advisory_lock* lock = iterator.Next();
1729 
1730 			// TODO: locks from the same team might be joinable!
1731 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1732 				// locks do overlap
1733 				if (!shared || !lock->shared) {
1734 					// we need to wait
1735 					waitForLock = locking->wait_sem;
1736 					break;
1737 				}
1738 			}
1739 		}
1740 
1741 		if (waitForLock < 0)
1742 			break;
1743 
1744 		// We need to wait. Do that or fail now, if we've been asked not to.
1745 
1746 		if (!wait) {
1747 			put_advisory_locking(locking);
1748 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1749 		}
1750 
1751 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1752 			B_CAN_INTERRUPT, 0);
1753 		if (status != B_OK && status != B_BAD_SEM_ID)
1754 			return status;
1755 
1756 		// We have been notified, but we need to re-lock the locking object. So
1757 		// go another round...
1758 	}
1759 
1760 	// install new lock
1761 
1762 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1763 		sizeof(struct advisory_lock));
1764 	if (lock == NULL) {
1765 		put_advisory_locking(locking);
1766 		return B_NO_MEMORY;
1767 	}
1768 
1769 	lock->team = team_get_current_team_id();
1770 	lock->session = session;
1771 	// values must already be normalized when getting here
1772 	lock->start = flock->l_start;
1773 	lock->end = flock->l_start - 1 + flock->l_len;
1774 	lock->shared = shared;
1775 
1776 	locking->locks.Add(lock);
1777 	put_advisory_locking(locking);
1778 
1779 	return status;
1780 }
1781 
1782 
1783 /*!	Normalizes the \a flock structure to make it easier to compare the
1784 	structure with others. The l_start and l_len fields are set to absolute
1785 	values according to the l_whence field.
1786 */
1787 static status_t
1788 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1789 {
1790 	switch (flock->l_whence) {
1791 		case SEEK_SET:
1792 			break;
1793 		case SEEK_CUR:
1794 			flock->l_start += descriptor->pos;
1795 			break;
1796 		case SEEK_END:
1797 		{
1798 			struct vnode* vnode = descriptor->u.vnode;
1799 			struct stat stat;
1800 			status_t status;
1801 
1802 			if (!HAS_FS_CALL(vnode, read_stat))
1803 				return B_UNSUPPORTED;
1804 
1805 			status = FS_CALL(vnode, read_stat, &stat);
1806 			if (status != B_OK)
1807 				return status;
1808 
1809 			flock->l_start += stat.st_size;
1810 			break;
1811 		}
1812 		default:
1813 			return B_BAD_VALUE;
1814 	}
1815 
1816 	if (flock->l_start < 0)
1817 		flock->l_start = 0;
1818 	if (flock->l_len == 0)
1819 		flock->l_len = OFF_MAX;
1820 
1821 	// don't let the offset and length overflow
1822 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1823 		flock->l_len = OFF_MAX - flock->l_start;
1824 
1825 	if (flock->l_len < 0) {
1826 		// a negative length reverses the region
1827 		flock->l_start += flock->l_len;
1828 		flock->l_len = -flock->l_len;
1829 	}
1830 
1831 	return B_OK;
1832 }
1833 
1834 
1835 static void
1836 replace_vnode_if_disconnected(struct fs_mount* mount,
1837 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1838 	struct vnode* fallBack, bool lockRootLock)
1839 {
1840 	struct vnode* givenVnode = vnode;
1841 	bool vnodeReplaced = false;
1842 
1843 	ReadLocker vnodeReadLocker(sVnodeLock);
1844 
1845 	if (lockRootLock)
1846 		mutex_lock(&sIOContextRootLock);
1847 
1848 	while (vnode != NULL && vnode->mount == mount
1849 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1850 		if (vnode->covers != NULL) {
1851 			// redirect the vnode to the covered vnode
1852 			vnode = vnode->covers;
1853 		} else
1854 			vnode = fallBack;
1855 
1856 		vnodeReplaced = true;
1857 	}
1858 
1859 	// If we've replaced the node, grab a reference for the new one.
1860 	if (vnodeReplaced && vnode != NULL)
1861 		inc_vnode_ref_count(vnode);
1862 
1863 	if (lockRootLock)
1864 		mutex_unlock(&sIOContextRootLock);
1865 
1866 	vnodeReadLocker.Unlock();
1867 
1868 	if (vnodeReplaced)
1869 		put_vnode(givenVnode);
1870 }
1871 
1872 
1873 /*!	Disconnects all file descriptors that are associated with the
1874 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1875 	\a mount object.
1876 
1877 	Note, after you've called this function, there might still be ongoing
1878 	accesses - they won't be interrupted if they already happened before.
1879 	However, any subsequent access will fail.
1880 
1881 	This is not a cheap function and should be used with care and rarely.
1882 	TODO: there is currently no means to stop a blocking read/write!
1883 */
1884 static void
1885 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1886 	struct vnode* vnodeToDisconnect)
1887 {
1888 	// iterate over all teams and peek into their file descriptors
1889 	TeamListIterator teamIterator;
1890 	while (Team* team = teamIterator.Next()) {
1891 		BReference<Team> teamReference(team, true);
1892 
1893 		// lock the I/O context
1894 		io_context* context = team->io_context;
1895 		MutexLocker contextLocker(context->io_mutex);
1896 
1897 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1898 			sRoot, true);
1899 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1900 			sRoot, false);
1901 
1902 		for (uint32 i = 0; i < context->table_size; i++) {
1903 			if (struct file_descriptor* descriptor = context->fds[i]) {
1904 				inc_fd_ref_count(descriptor);
1905 
1906 				// if this descriptor points at this mount, we
1907 				// need to disconnect it to be able to unmount
1908 				struct vnode* vnode = fd_vnode(descriptor);
1909 				if (vnodeToDisconnect != NULL) {
1910 					if (vnode == vnodeToDisconnect)
1911 						disconnect_fd(descriptor);
1912 				} else if ((vnode != NULL && vnode->mount == mount)
1913 					|| (vnode == NULL && descriptor->u.mount == mount))
1914 					disconnect_fd(descriptor);
1915 
1916 				put_fd(descriptor);
1917 			}
1918 		}
1919 	}
1920 }
1921 
1922 
1923 /*!	\brief Gets the root node of the current IO context.
1924 	If \a kernel is \c true, the kernel IO context will be used.
1925 	The caller obtains a reference to the returned node.
1926 */
1927 struct vnode*
1928 get_root_vnode(bool kernel)
1929 {
1930 	if (!kernel) {
1931 		// Get current working directory from io context
1932 		struct io_context* context = get_current_io_context(kernel);
1933 
1934 		mutex_lock(&sIOContextRootLock);
1935 
1936 		struct vnode* root = context->root;
1937 		if (root != NULL)
1938 			inc_vnode_ref_count(root);
1939 
1940 		mutex_unlock(&sIOContextRootLock);
1941 
1942 		if (root != NULL)
1943 			return root;
1944 
1945 		// That should never happen.
1946 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1947 			"have a root\n", team_get_current_team_id());
1948 	}
1949 
1950 	inc_vnode_ref_count(sRoot);
1951 	return sRoot;
1952 }
1953 
1954 
1955 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1956 
1957 	Given an arbitrary vnode (identified by mount and node ID), the function
1958 	checks, whether the vnode is covered by another vnode. If it is, the
1959 	function returns the mount and node ID of the covering vnode. Otherwise
1960 	it simply returns the supplied mount and node ID.
1961 
1962 	In case of error (e.g. the supplied node could not be found) the variables
1963 	for storing the resolved mount and node ID remain untouched and an error
1964 	code is returned.
1965 
1966 	\param mountID The mount ID of the vnode in question.
1967 	\param nodeID The node ID of the vnode in question.
1968 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1969 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1970 	\return
1971 	- \c B_OK, if everything went fine,
1972 	- another error code, if something went wrong.
1973 */
1974 status_t
1975 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1976 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1977 {
1978 	// get the node
1979 	struct vnode* node;
1980 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1981 	if (error != B_OK)
1982 		return error;
1983 
1984 	// resolve the node
1985 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1986 		put_vnode(node);
1987 		node = coveringNode;
1988 	}
1989 
1990 	// set the return values
1991 	*resolvedMountID = node->device;
1992 	*resolvedNodeID = node->id;
1993 
1994 	put_vnode(node);
1995 
1996 	return B_OK;
1997 }
1998 
1999 
2000 /*!	\brief Gets the directory path and leaf name for a given path.
2001 
2002 	The supplied \a path is transformed to refer to the directory part of
2003 	the entry identified by the original path, and into the buffer \a filename
2004 	the leaf name of the original entry is written.
2005 	Neither the returned path nor the leaf name can be expected to be
2006 	canonical.
2007 
2008 	\param path The path to be analyzed. Must be able to store at least one
2009 		   additional character.
2010 	\param filename The buffer into which the leaf name will be written.
2011 		   Must be of size B_FILE_NAME_LENGTH at least.
2012 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2013 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2014 		   if the given path name is empty.
2015 */
2016 static status_t
2017 get_dir_path_and_leaf(char* path, char* filename)
2018 {
2019 	if (*path == '\0')
2020 		return B_ENTRY_NOT_FOUND;
2021 
2022 	char* last = strrchr(path, '/');
2023 		// '/' are not allowed in file names!
2024 
2025 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2026 
2027 	if (last == NULL) {
2028 		// this path is single segment with no '/' in it
2029 		// ex. "foo"
2030 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2031 			return B_NAME_TOO_LONG;
2032 
2033 		strcpy(path, ".");
2034 	} else {
2035 		last++;
2036 		if (last[0] == '\0') {
2037 			// special case: the path ends in one or more '/' - remove them
2038 			while (*--last == '/' && last != path);
2039 			last[1] = '\0';
2040 
2041 			if (last == path && last[0] == '/') {
2042 				// This path points to the root of the file system
2043 				strcpy(filename, ".");
2044 				return B_OK;
2045 			}
2046 			for (; last != path && *(last - 1) != '/'; last--);
2047 				// rewind to the start of the leaf before the '/'
2048 		}
2049 
2050 		// normal leaf: replace the leaf portion of the path with a '.'
2051 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2052 			return B_NAME_TOO_LONG;
2053 
2054 		last[0] = '.';
2055 		last[1] = '\0';
2056 	}
2057 	return B_OK;
2058 }
2059 
2060 
2061 static status_t
2062 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2063 	bool traverse, bool kernel, struct vnode** _vnode)
2064 {
2065 	char clonedName[B_FILE_NAME_LENGTH + 1];
2066 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2067 		return B_NAME_TOO_LONG;
2068 
2069 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2070 	struct vnode* directory;
2071 
2072 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2073 	if (status < 0)
2074 		return status;
2075 
2076 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2077 		_vnode, NULL);
2078 }
2079 
2080 
2081 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2082 	and returns the respective vnode.
2083 	On success a reference to the vnode is acquired for the caller.
2084 */
2085 static status_t
2086 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2087 {
2088 	ino_t id;
2089 
2090 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2091 		return get_vnode(dir->device, id, _vnode, true, false);
2092 
2093 	status_t status = FS_CALL(dir, lookup, name, &id);
2094 	if (status != B_OK)
2095 		return status;
2096 
2097 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2098 	// have a reference and just need to look the node up.
2099 	rw_lock_read_lock(&sVnodeLock);
2100 	*_vnode = lookup_vnode(dir->device, id);
2101 	rw_lock_read_unlock(&sVnodeLock);
2102 
2103 	if (*_vnode == NULL) {
2104 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2105 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2106 		return B_ENTRY_NOT_FOUND;
2107 	}
2108 
2109 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2110 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2111 //		(*_vnode)->mount->id, (*_vnode)->id);
2112 
2113 	return B_OK;
2114 }
2115 
2116 
2117 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2118 	\a path must not be NULL.
2119 	If it returns successfully, \a path contains the name of the last path
2120 	component. This function clobbers the buffer pointed to by \a path only
2121 	if it does contain more than one component.
2122 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2123 	it is successful or not!
2124 */
2125 static status_t
2126 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2127 	int count, struct io_context* ioContext, struct vnode** _vnode,
2128 	ino_t* _parentID)
2129 {
2130 	status_t status = B_OK;
2131 	ino_t lastParentID = vnode->id;
2132 
2133 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 
2135 	if (path == NULL) {
2136 		put_vnode(vnode);
2137 		return B_BAD_VALUE;
2138 	}
2139 
2140 	if (*path == '\0') {
2141 		put_vnode(vnode);
2142 		return B_ENTRY_NOT_FOUND;
2143 	}
2144 
2145 	while (true) {
2146 		struct vnode* nextVnode;
2147 		char* nextPath;
2148 
2149 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2150 			path));
2151 
2152 		// done?
2153 		if (path[0] == '\0')
2154 			break;
2155 
2156 		// walk to find the next path component ("path" will point to a single
2157 		// path component), and filter out multiple slashes
2158 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2159 				nextPath++);
2160 
2161 		if (*nextPath == '/') {
2162 			*nextPath = '\0';
2163 			do
2164 				nextPath++;
2165 			while (*nextPath == '/');
2166 		}
2167 
2168 		// See if the '..' is at a covering vnode move to the covered
2169 		// vnode so we pass the '..' path to the underlying filesystem.
2170 		// Also prevent breaking the root of the IO context.
2171 		if (strcmp("..", path) == 0) {
2172 			if (vnode == ioContext->root) {
2173 				// Attempted prison break! Keep it contained.
2174 				path = nextPath;
2175 				continue;
2176 			}
2177 
2178 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2179 				nextVnode = coveredVnode;
2180 				put_vnode(vnode);
2181 				vnode = nextVnode;
2182 			}
2183 		}
2184 
2185 		// check if vnode is really a directory
2186 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2187 			status = B_NOT_A_DIRECTORY;
2188 
2189 		// Check if we have the right to search the current directory vnode.
2190 		// If a file system doesn't have the access() function, we assume that
2191 		// searching a directory is always allowed
2192 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2193 			status = FS_CALL(vnode, access, X_OK);
2194 
2195 		// Tell the filesystem to get the vnode of this path component (if we
2196 		// got the permission from the call above)
2197 		if (status == B_OK)
2198 			status = lookup_dir_entry(vnode, path, &nextVnode);
2199 
2200 		if (status != B_OK) {
2201 			put_vnode(vnode);
2202 			return status;
2203 		}
2204 
2205 		// If the new node is a symbolic link, resolve it (if we've been told
2206 		// to do it)
2207 		if (S_ISLNK(nextVnode->Type())
2208 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2209 			size_t bufferSize;
2210 			char* buffer;
2211 
2212 			TRACE(("traverse link\n"));
2213 
2214 			// it's not exactly nice style using goto in this way, but hey,
2215 			// it works :-/
2216 			if (count + 1 > B_MAX_SYMLINKS) {
2217 				status = B_LINK_LIMIT;
2218 				goto resolve_link_error;
2219 			}
2220 
2221 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2222 			if (buffer == NULL) {
2223 				status = B_NO_MEMORY;
2224 				goto resolve_link_error;
2225 			}
2226 
2227 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2228 				bufferSize--;
2229 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2230 				// null-terminate
2231 				if (status >= 0)
2232 					buffer[bufferSize] = '\0';
2233 			} else
2234 				status = B_BAD_VALUE;
2235 
2236 			if (status != B_OK) {
2237 				free(buffer);
2238 
2239 		resolve_link_error:
2240 				put_vnode(vnode);
2241 				put_vnode(nextVnode);
2242 
2243 				return status;
2244 			}
2245 			put_vnode(nextVnode);
2246 
2247 			// Check if we start from the root directory or the current
2248 			// directory ("vnode" still points to that one).
2249 			// Cut off all leading slashes if it's the root directory
2250 			path = buffer;
2251 			bool absoluteSymlink = false;
2252 			if (path[0] == '/') {
2253 				// we don't need the old directory anymore
2254 				put_vnode(vnode);
2255 
2256 				while (*++path == '/')
2257 					;
2258 
2259 				mutex_lock(&sIOContextRootLock);
2260 				vnode = ioContext->root;
2261 				inc_vnode_ref_count(vnode);
2262 				mutex_unlock(&sIOContextRootLock);
2263 
2264 				absoluteSymlink = true;
2265 			}
2266 
2267 			inc_vnode_ref_count(vnode);
2268 				// balance the next recursion - we will decrement the
2269 				// ref_count of the vnode, no matter if we succeeded or not
2270 
2271 			if (absoluteSymlink && *path == '\0') {
2272 				// symlink was just "/"
2273 				nextVnode = vnode;
2274 			} else {
2275 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2276 					ioContext, &nextVnode, &lastParentID);
2277 			}
2278 
2279 			free(buffer);
2280 
2281 			if (status != B_OK) {
2282 				put_vnode(vnode);
2283 				return status;
2284 			}
2285 		} else
2286 			lastParentID = vnode->id;
2287 
2288 		// decrease the ref count on the old dir we just looked up into
2289 		put_vnode(vnode);
2290 
2291 		path = nextPath;
2292 		vnode = nextVnode;
2293 
2294 		// see if we hit a covered node
2295 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2296 			put_vnode(vnode);
2297 			vnode = coveringNode;
2298 		}
2299 	}
2300 
2301 	*_vnode = vnode;
2302 	if (_parentID)
2303 		*_parentID = lastParentID;
2304 
2305 	return B_OK;
2306 }
2307 
2308 
2309 static status_t
2310 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2311 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2312 {
2313 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2314 		get_current_io_context(kernel), _vnode, _parentID);
2315 }
2316 
2317 
2318 static status_t
2319 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2320 	ino_t* _parentID, bool kernel)
2321 {
2322 	struct vnode* start = NULL;
2323 
2324 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2325 
2326 	if (!path)
2327 		return B_BAD_VALUE;
2328 
2329 	if (*path == '\0')
2330 		return B_ENTRY_NOT_FOUND;
2331 
2332 	// figure out if we need to start at root or at cwd
2333 	if (*path == '/') {
2334 		if (sRoot == NULL) {
2335 			// we're a bit early, aren't we?
2336 			return B_ERROR;
2337 		}
2338 
2339 		while (*++path == '/')
2340 			;
2341 		start = get_root_vnode(kernel);
2342 
2343 		if (*path == '\0') {
2344 			*_vnode = start;
2345 			return B_OK;
2346 		}
2347 
2348 	} else {
2349 		struct io_context* context = get_current_io_context(kernel);
2350 
2351 		mutex_lock(&context->io_mutex);
2352 		start = context->cwd;
2353 		if (start != NULL)
2354 			inc_vnode_ref_count(start);
2355 		mutex_unlock(&context->io_mutex);
2356 
2357 		if (start == NULL)
2358 			return B_ERROR;
2359 	}
2360 
2361 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2362 		_parentID);
2363 }
2364 
2365 
2366 /*! Returns the vnode in the next to last segment of the path, and returns
2367 	the last portion in filename.
2368 	The path buffer must be able to store at least one additional character.
2369 */
2370 static status_t
2371 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2372 	bool kernel)
2373 {
2374 	status_t status = get_dir_path_and_leaf(path, filename);
2375 	if (status != B_OK)
2376 		return status;
2377 
2378 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2379 }
2380 
2381 
2382 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2383 		   to by a FD + path pair.
2384 
2385 	\a path must be given in either case. \a fd might be omitted, in which
2386 	case \a path is either an absolute path or one relative to the current
2387 	directory. If both a supplied and \a path is relative it is reckoned off
2388 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2389 	ignored.
2390 
2391 	The caller has the responsibility to call put_vnode() on the returned
2392 	directory vnode.
2393 
2394 	\param fd The FD. May be < 0.
2395 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2396 	       is modified by this function. It must have at least room for a
2397 	       string one character longer than the path it contains.
2398 	\param _vnode A pointer to a variable the directory vnode shall be written
2399 		   into.
2400 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2401 		   the leaf name of the specified entry will be written.
2402 	\param kernel \c true, if invoked from inside the kernel, \c false if
2403 		   invoked from userland.
2404 	\return \c B_OK, if everything went fine, another error code otherwise.
2405 */
2406 static status_t
2407 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2408 	char* filename, bool kernel)
2409 {
2410 	if (!path)
2411 		return B_BAD_VALUE;
2412 	if (*path == '\0')
2413 		return B_ENTRY_NOT_FOUND;
2414 	if (fd < 0)
2415 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2416 
2417 	status_t status = get_dir_path_and_leaf(path, filename);
2418 	if (status != B_OK)
2419 		return status;
2420 
2421 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2422 }
2423 
2424 
2425 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2426 		   to by a vnode + path pair.
2427 
2428 	\a path must be given in either case. \a vnode might be omitted, in which
2429 	case \a path is either an absolute path or one relative to the current
2430 	directory. If both a supplied and \a path is relative it is reckoned off
2431 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2432 	ignored.
2433 
2434 	The caller has the responsibility to call put_vnode() on the returned
2435 	directory vnode.
2436 
2437 	\param vnode The vnode. May be \c NULL.
2438 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2439 	       is modified by this function. It must have at least room for a
2440 	       string one character longer than the path it contains.
2441 	\param _vnode A pointer to a variable the directory vnode shall be written
2442 		   into.
2443 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2444 		   the leaf name of the specified entry will be written.
2445 	\param kernel \c true, if invoked from inside the kernel, \c false if
2446 		   invoked from userland.
2447 	\return \c B_OK, if everything went fine, another error code otherwise.
2448 */
2449 static status_t
2450 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2451 	struct vnode** _vnode, char* filename, bool kernel)
2452 {
2453 	if (!path)
2454 		return B_BAD_VALUE;
2455 	if (*path == '\0')
2456 		return B_ENTRY_NOT_FOUND;
2457 	if (vnode == NULL || path[0] == '/')
2458 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2459 
2460 	status_t status = get_dir_path_and_leaf(path, filename);
2461 	if (status != B_OK)
2462 		return status;
2463 
2464 	inc_vnode_ref_count(vnode);
2465 		// vnode_path_to_vnode() always decrements the ref count
2466 
2467 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2468 }
2469 
2470 
2471 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2472 */
2473 static status_t
2474 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2475 	size_t bufferSize, struct io_context* ioContext)
2476 {
2477 	if (bufferSize < sizeof(struct dirent))
2478 		return B_BAD_VALUE;
2479 
2480 	// See if the vnode is convering another vnode and move to the covered
2481 	// vnode so we get the underlying file system
2482 	VNodePutter vnodePutter;
2483 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2484 		vnode = coveredVnode;
2485 		vnodePutter.SetTo(vnode);
2486 	}
2487 
2488 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2489 		// The FS supports getting the name of a vnode.
2490 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2491 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2492 			return B_OK;
2493 	}
2494 
2495 	// The FS doesn't support getting the name of a vnode. So we search the
2496 	// parent directory for the vnode, if the caller let us.
2497 
2498 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2499 		return B_UNSUPPORTED;
2500 
2501 	void* cookie;
2502 
2503 	status_t status = FS_CALL(parent, open_dir, &cookie);
2504 	if (status >= B_OK) {
2505 		while (true) {
2506 			uint32 num = 1;
2507 			// We use the FS hook directly instead of dir_read(), since we don't
2508 			// want the entries to be fixed. We have already resolved vnode to
2509 			// the covered node.
2510 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2511 				&num);
2512 			if (status != B_OK)
2513 				break;
2514 			if (num == 0) {
2515 				status = B_ENTRY_NOT_FOUND;
2516 				break;
2517 			}
2518 
2519 			if (vnode->id == buffer->d_ino) {
2520 				// found correct entry!
2521 				break;
2522 			}
2523 		}
2524 
2525 		FS_CALL(vnode, close_dir, cookie);
2526 		FS_CALL(vnode, free_dir_cookie, cookie);
2527 	}
2528 	return status;
2529 }
2530 
2531 
2532 static status_t
2533 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2534 	size_t nameSize, bool kernel)
2535 {
2536 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2537 	struct dirent* dirent = (struct dirent*)buffer;
2538 
2539 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2540 		get_current_io_context(kernel));
2541 	if (status != B_OK)
2542 		return status;
2543 
2544 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2545 		return B_BUFFER_OVERFLOW;
2546 
2547 	return B_OK;
2548 }
2549 
2550 
2551 /*!	Gets the full path to a given directory vnode.
2552 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2553 	file system doesn't support this call, it will fall back to iterating
2554 	through the parent directory to get the name of the child.
2555 
2556 	To protect against circular loops, it supports a maximum tree depth
2557 	of 256 levels.
2558 
2559 	Note that the path may not be correct the time this function returns!
2560 	It doesn't use any locking to prevent returning the correct path, as
2561 	paths aren't safe anyway: the path to a file can change at any time.
2562 
2563 	It might be a good idea, though, to check if the returned path exists
2564 	in the calling function (it's not done here because of efficiency)
2565 */
2566 static status_t
2567 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2568 	bool kernel)
2569 {
2570 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2571 
2572 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2573 		return B_BAD_VALUE;
2574 
2575 	if (!S_ISDIR(vnode->Type()))
2576 		return B_NOT_A_DIRECTORY;
2577 
2578 	char* path = buffer;
2579 	int32 insert = bufferSize;
2580 	int32 maxLevel = 256;
2581 	int32 length;
2582 	status_t status = B_OK;
2583 	struct io_context* ioContext = get_current_io_context(kernel);
2584 
2585 	// we don't use get_vnode() here because this call is more
2586 	// efficient and does all we need from get_vnode()
2587 	inc_vnode_ref_count(vnode);
2588 
2589 	path[--insert] = '\0';
2590 		// the path is filled right to left
2591 
2592 	while (true) {
2593 		// If the node is the context's root, bail out. Otherwise resolve mount
2594 		// points.
2595 		if (vnode == ioContext->root)
2596 			break;
2597 
2598 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2599 			put_vnode(vnode);
2600 			vnode = coveredVnode;
2601 		}
2602 
2603 		// lookup the parent vnode
2604 		struct vnode* parentVnode;
2605 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2606 		if (status != B_OK)
2607 			goto out;
2608 
2609 		if (parentVnode == vnode) {
2610 			// The caller apparently got their hands on a node outside of their
2611 			// context's root. Now we've hit the global root.
2612 			put_vnode(parentVnode);
2613 			break;
2614 		}
2615 
2616 		// get the node's name
2617 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2618 			// also used for fs_read_dir()
2619 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2620 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2621 			sizeof(nameBuffer), ioContext);
2622 
2623 		// release the current vnode, we only need its parent from now on
2624 		put_vnode(vnode);
2625 		vnode = parentVnode;
2626 
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		// TODO: add an explicit check for loops in about 10 levels to do
2631 		// real loop detection
2632 
2633 		// don't go deeper as 'maxLevel' to prevent circular loops
2634 		if (maxLevel-- < 0) {
2635 			status = B_LINK_LIMIT;
2636 			goto out;
2637 		}
2638 
2639 		// add the name in front of the current path
2640 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2641 		length = strlen(name);
2642 		insert -= length;
2643 		if (insert <= 0) {
2644 			status = B_RESULT_NOT_REPRESENTABLE;
2645 			goto out;
2646 		}
2647 		memcpy(path + insert, name, length);
2648 		path[--insert] = '/';
2649 	}
2650 
2651 	// the root dir will result in an empty path: fix it
2652 	if (path[insert] == '\0')
2653 		path[--insert] = '/';
2654 
2655 	TRACE(("  path is: %s\n", path + insert));
2656 
2657 	// move the path to the start of the buffer
2658 	length = bufferSize - insert;
2659 	memmove(buffer, path + insert, length);
2660 
2661 out:
2662 	put_vnode(vnode);
2663 	return status;
2664 }
2665 
2666 
2667 /*!	Checks the length of every path component, and adds a '.'
2668 	if the path ends in a slash.
2669 	The given path buffer must be able to store at least one
2670 	additional character.
2671 */
2672 static status_t
2673 check_path(char* to)
2674 {
2675 	int32 length = 0;
2676 
2677 	// check length of every path component
2678 
2679 	while (*to) {
2680 		char* begin;
2681 		if (*to == '/')
2682 			to++, length++;
2683 
2684 		begin = to;
2685 		while (*to != '/' && *to)
2686 			to++, length++;
2687 
2688 		if (to - begin > B_FILE_NAME_LENGTH)
2689 			return B_NAME_TOO_LONG;
2690 	}
2691 
2692 	if (length == 0)
2693 		return B_ENTRY_NOT_FOUND;
2694 
2695 	// complete path if there is a slash at the end
2696 
2697 	if (*(to - 1) == '/') {
2698 		if (length > B_PATH_NAME_LENGTH - 2)
2699 			return B_NAME_TOO_LONG;
2700 
2701 		to[0] = '.';
2702 		to[1] = '\0';
2703 	}
2704 
2705 	return B_OK;
2706 }
2707 
2708 
2709 static struct file_descriptor*
2710 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2711 {
2712 	struct file_descriptor* descriptor
2713 		= get_fd(get_current_io_context(kernel), fd);
2714 	if (descriptor == NULL)
2715 		return NULL;
2716 
2717 	struct vnode* vnode = fd_vnode(descriptor);
2718 	if (vnode == NULL) {
2719 		put_fd(descriptor);
2720 		return NULL;
2721 	}
2722 
2723 	// ToDo: when we can close a file descriptor at any point, investigate
2724 	//	if this is still valid to do (accessing the vnode without ref_count
2725 	//	or locking)
2726 	*_vnode = vnode;
2727 	return descriptor;
2728 }
2729 
2730 
2731 static struct vnode*
2732 get_vnode_from_fd(int fd, bool kernel)
2733 {
2734 	struct file_descriptor* descriptor;
2735 	struct vnode* vnode;
2736 
2737 	descriptor = get_fd(get_current_io_context(kernel), fd);
2738 	if (descriptor == NULL)
2739 		return NULL;
2740 
2741 	vnode = fd_vnode(descriptor);
2742 	if (vnode != NULL)
2743 		inc_vnode_ref_count(vnode);
2744 
2745 	put_fd(descriptor);
2746 	return vnode;
2747 }
2748 
2749 
2750 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2751 	only the path will be considered. In this case, the \a path must not be
2752 	NULL.
2753 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2754 	and should be NULL for files.
2755 */
2756 static status_t
2757 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2758 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2759 {
2760 	if (fd < 0 && !path)
2761 		return B_BAD_VALUE;
2762 
2763 	if (path != NULL && *path == '\0')
2764 		return B_ENTRY_NOT_FOUND;
2765 
2766 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2767 		// no FD or absolute path
2768 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2769 	}
2770 
2771 	// FD only, or FD + relative path
2772 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2773 	if (!vnode)
2774 		return B_FILE_ERROR;
2775 
2776 	if (path != NULL) {
2777 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2778 			_vnode, _parentID);
2779 	}
2780 
2781 	// there is no relative path to take into account
2782 
2783 	*_vnode = vnode;
2784 	if (_parentID)
2785 		*_parentID = -1;
2786 
2787 	return B_OK;
2788 }
2789 
2790 
2791 static int
2792 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2793 	void* cookie, int openMode, bool kernel)
2794 {
2795 	struct file_descriptor* descriptor;
2796 	int fd;
2797 
2798 	// If the vnode is locked, we don't allow creating a new file/directory
2799 	// file_descriptor for it
2800 	if (vnode && vnode->mandatory_locked_by != NULL
2801 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2802 		return B_BUSY;
2803 
2804 	descriptor = alloc_fd();
2805 	if (!descriptor)
2806 		return B_NO_MEMORY;
2807 
2808 	if (vnode)
2809 		descriptor->u.vnode = vnode;
2810 	else
2811 		descriptor->u.mount = mount;
2812 	descriptor->cookie = cookie;
2813 
2814 	switch (type) {
2815 		// vnode types
2816 		case FDTYPE_FILE:
2817 			descriptor->ops = &sFileOps;
2818 			break;
2819 		case FDTYPE_DIR:
2820 			descriptor->ops = &sDirectoryOps;
2821 			break;
2822 		case FDTYPE_ATTR:
2823 			descriptor->ops = &sAttributeOps;
2824 			break;
2825 		case FDTYPE_ATTR_DIR:
2826 			descriptor->ops = &sAttributeDirectoryOps;
2827 			break;
2828 
2829 		// mount types
2830 		case FDTYPE_INDEX_DIR:
2831 			descriptor->ops = &sIndexDirectoryOps;
2832 			break;
2833 		case FDTYPE_QUERY:
2834 			descriptor->ops = &sQueryOps;
2835 			break;
2836 
2837 		default:
2838 			panic("get_new_fd() called with unknown type %d\n", type);
2839 			break;
2840 	}
2841 	descriptor->type = type;
2842 	descriptor->open_mode = openMode;
2843 
2844 	io_context* context = get_current_io_context(kernel);
2845 	fd = new_fd(context, descriptor);
2846 	if (fd < 0) {
2847 		free(descriptor);
2848 		return B_NO_MORE_FDS;
2849 	}
2850 
2851 	mutex_lock(&context->io_mutex);
2852 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2853 	mutex_unlock(&context->io_mutex);
2854 
2855 	return fd;
2856 }
2857 
2858 
2859 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2860 	vfs_normalize_path(). See there for more documentation.
2861 */
2862 static status_t
2863 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2864 {
2865 	VNodePutter dirPutter;
2866 	struct vnode* dir = NULL;
2867 	status_t error;
2868 
2869 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2870 		// get dir vnode + leaf name
2871 		struct vnode* nextDir;
2872 		char leaf[B_FILE_NAME_LENGTH];
2873 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2874 		if (error != B_OK)
2875 			return error;
2876 
2877 		dir = nextDir;
2878 		strcpy(path, leaf);
2879 		dirPutter.SetTo(dir);
2880 
2881 		// get file vnode, if we shall resolve links
2882 		bool fileExists = false;
2883 		struct vnode* fileVnode;
2884 		VNodePutter fileVnodePutter;
2885 		if (traverseLink) {
2886 			inc_vnode_ref_count(dir);
2887 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2888 					NULL) == B_OK) {
2889 				fileVnodePutter.SetTo(fileVnode);
2890 				fileExists = true;
2891 			}
2892 		}
2893 
2894 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2895 			// we're done -- construct the path
2896 			bool hasLeaf = true;
2897 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2898 				// special cases "." and ".." -- get the dir, forget the leaf
2899 				inc_vnode_ref_count(dir);
2900 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2901 					&nextDir, NULL);
2902 				if (error != B_OK)
2903 					return error;
2904 				dir = nextDir;
2905 				dirPutter.SetTo(dir);
2906 				hasLeaf = false;
2907 			}
2908 
2909 			// get the directory path
2910 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2911 			if (error != B_OK)
2912 				return error;
2913 
2914 			// append the leaf name
2915 			if (hasLeaf) {
2916 				// insert a directory separator if this is not the file system
2917 				// root
2918 				if ((strcmp(path, "/") != 0
2919 					&& strlcat(path, "/", pathSize) >= pathSize)
2920 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2921 					return B_NAME_TOO_LONG;
2922 				}
2923 			}
2924 
2925 			return B_OK;
2926 		}
2927 
2928 		// read link
2929 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2930 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2931 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2932 			if (error != B_OK)
2933 				return error;
2934 			path[bufferSize] = '\0';
2935 		} else
2936 			return B_BAD_VALUE;
2937 	}
2938 
2939 	return B_LINK_LIMIT;
2940 }
2941 
2942 
2943 #ifdef ADD_DEBUGGER_COMMANDS
2944 
2945 
2946 static void
2947 _dump_advisory_locking(advisory_locking* locking)
2948 {
2949 	if (locking == NULL)
2950 		return;
2951 
2952 	kprintf("   lock:        %" B_PRId32, locking->lock);
2953 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2954 
2955 	int32 index = 0;
2956 	LockList::Iterator iterator = locking->locks.GetIterator();
2957 	while (iterator.HasNext()) {
2958 		struct advisory_lock* lock = iterator.Next();
2959 
2960 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2961 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2962 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2963 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2964 	}
2965 }
2966 
2967 
2968 static void
2969 _dump_mount(struct fs_mount* mount)
2970 {
2971 	kprintf("MOUNT: %p\n", mount);
2972 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
2973 	kprintf(" device_name:   %s\n", mount->device_name);
2974 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2975 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2976 	kprintf(" partition:     %p\n", mount->partition);
2977 	kprintf(" lock:          %p\n", &mount->rlock);
2978 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2979 		mount->owns_file_device ? " owns_file_device" : "");
2980 
2981 	fs_volume* volume = mount->volume;
2982 	while (volume != NULL) {
2983 		kprintf(" volume %p:\n", volume);
2984 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
2985 		kprintf("  private_volume:   %p\n", volume->private_volume);
2986 		kprintf("  ops:              %p\n", volume->ops);
2987 		kprintf("  file_system:      %p\n", volume->file_system);
2988 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2989 		volume = volume->super_volume;
2990 	}
2991 
2992 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2993 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2994 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
2995 	set_debug_variable("_partition", (addr_t)mount->partition);
2996 }
2997 
2998 
2999 static bool
3000 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3001 	const char* name)
3002 {
3003 	bool insertSlash = buffer[bufferSize] != '\0';
3004 	size_t nameLength = strlen(name);
3005 
3006 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3007 		return false;
3008 
3009 	if (insertSlash)
3010 		buffer[--bufferSize] = '/';
3011 
3012 	bufferSize -= nameLength;
3013 	memcpy(buffer + bufferSize, name, nameLength);
3014 
3015 	return true;
3016 }
3017 
3018 
3019 static bool
3020 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3021 	ino_t nodeID)
3022 {
3023 	if (bufferSize == 0)
3024 		return false;
3025 
3026 	bool insertSlash = buffer[bufferSize] != '\0';
3027 	if (insertSlash)
3028 		buffer[--bufferSize] = '/';
3029 
3030 	size_t size = snprintf(buffer, bufferSize,
3031 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3032 	if (size > bufferSize) {
3033 		if (insertSlash)
3034 			bufferSize++;
3035 		return false;
3036 	}
3037 
3038 	if (size < bufferSize)
3039 		memmove(buffer + bufferSize - size, buffer, size);
3040 
3041 	bufferSize -= size;
3042 	return true;
3043 }
3044 
3045 
3046 static char*
3047 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3048 	bool& _truncated)
3049 {
3050 	// null-terminate the path
3051 	buffer[--bufferSize] = '\0';
3052 
3053 	while (true) {
3054 		while (vnode->covers != NULL)
3055 			vnode = vnode->covers;
3056 
3057 		if (vnode == sRoot) {
3058 			_truncated = bufferSize == 0;
3059 			if (!_truncated)
3060 				buffer[--bufferSize] = '/';
3061 			return buffer + bufferSize;
3062 		}
3063 
3064 		// resolve the name
3065 		ino_t dirID;
3066 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3067 			vnode->id, dirID);
3068 		if (name == NULL) {
3069 			// Failed to resolve the name -- prepend "<dev,node>/".
3070 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3071 				vnode->mount->id, vnode->id);
3072 			return buffer + bufferSize;
3073 		}
3074 
3075 		// prepend the name
3076 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3077 			_truncated = true;
3078 			return buffer + bufferSize;
3079 		}
3080 
3081 		// resolve the directory node
3082 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3083 		if (nextVnode == NULL) {
3084 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3085 				vnode->mount->id, dirID);
3086 			return buffer + bufferSize;
3087 		}
3088 
3089 		vnode = nextVnode;
3090 	}
3091 }
3092 
3093 
3094 static void
3095 _dump_vnode(struct vnode* vnode, bool printPath)
3096 {
3097 	kprintf("VNODE: %p\n", vnode);
3098 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3099 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3100 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3101 	kprintf(" private_node:  %p\n", vnode->private_node);
3102 	kprintf(" mount:         %p\n", vnode->mount);
3103 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3104 	kprintf(" covers:        %p\n", vnode->covers);
3105 	kprintf(" cache:         %p\n", vnode->cache);
3106 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3107 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3108 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3109 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3110 
3111 	_dump_advisory_locking(vnode->advisory_locking);
3112 
3113 	if (printPath) {
3114 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3115 		if (buffer != NULL) {
3116 			bool truncated;
3117 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3118 				B_PATH_NAME_LENGTH, truncated);
3119 			if (path != NULL) {
3120 				kprintf(" path:          ");
3121 				if (truncated)
3122 					kputs("<truncated>/");
3123 				kputs(path);
3124 				kputs("\n");
3125 			} else
3126 				kprintf("Failed to resolve vnode path.\n");
3127 
3128 			debug_free(buffer);
3129 		} else
3130 			kprintf("Failed to allocate memory for constructing the path.\n");
3131 	}
3132 
3133 	set_debug_variable("_node", (addr_t)vnode->private_node);
3134 	set_debug_variable("_mount", (addr_t)vnode->mount);
3135 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3136 	set_debug_variable("_covers", (addr_t)vnode->covers);
3137 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3138 }
3139 
3140 
3141 static int
3142 dump_mount(int argc, char** argv)
3143 {
3144 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3145 		kprintf("usage: %s [id|address]\n", argv[0]);
3146 		return 0;
3147 	}
3148 
3149 	ulong val = parse_expression(argv[1]);
3150 	uint32 id = val;
3151 
3152 	struct fs_mount* mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3153 	if (mount == NULL) {
3154 		if (IS_USER_ADDRESS(id)) {
3155 			kprintf("fs_mount not found\n");
3156 			return 0;
3157 		}
3158 		mount = (fs_mount*)val;
3159 	}
3160 
3161 	_dump_mount(mount);
3162 	return 0;
3163 }
3164 
3165 
3166 static int
3167 dump_mounts(int argc, char** argv)
3168 {
3169 	if (argc != 1) {
3170 		kprintf("usage: %s\n", argv[0]);
3171 		return 0;
3172 	}
3173 
3174 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3175 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3176 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3177 
3178 	struct hash_iterator iterator;
3179 	struct fs_mount* mount;
3180 
3181 	hash_open(sMountsTable, &iterator);
3182 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3183 			!= NULL) {
3184 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3185 			mount->root_vnode->covers, mount->volume->private_volume,
3186 			mount->volume->file_system_name);
3187 
3188 		fs_volume* volume = mount->volume;
3189 		while (volume->super_volume != NULL) {
3190 			volume = volume->super_volume;
3191 			kprintf("                                     %p %s\n",
3192 				volume->private_volume, volume->file_system_name);
3193 		}
3194 	}
3195 
3196 	hash_close(sMountsTable, &iterator, false);
3197 	return 0;
3198 }
3199 
3200 
3201 static int
3202 dump_vnode(int argc, char** argv)
3203 {
3204 	bool printPath = false;
3205 	int argi = 1;
3206 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3207 		printPath = true;
3208 		argi++;
3209 	}
3210 
3211 	if (argi >= argc || argi + 2 < argc) {
3212 		print_debugger_command_usage(argv[0]);
3213 		return 0;
3214 	}
3215 
3216 	struct vnode* vnode = NULL;
3217 
3218 	if (argi + 1 == argc) {
3219 		vnode = (struct vnode*)parse_expression(argv[argi]);
3220 		if (IS_USER_ADDRESS(vnode)) {
3221 			kprintf("invalid vnode address\n");
3222 			return 0;
3223 		}
3224 		_dump_vnode(vnode, printPath);
3225 		return 0;
3226 	}
3227 
3228 	struct hash_iterator iterator;
3229 	dev_t device = parse_expression(argv[argi]);
3230 	ino_t id = parse_expression(argv[argi + 1]);
3231 
3232 	hash_open(sVnodeTable, &iterator);
3233 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3234 		if (vnode->id != id || vnode->device != device)
3235 			continue;
3236 
3237 		_dump_vnode(vnode, printPath);
3238 	}
3239 
3240 	hash_close(sVnodeTable, &iterator, false);
3241 	return 0;
3242 }
3243 
3244 
3245 static int
3246 dump_vnodes(int argc, char** argv)
3247 {
3248 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s [device]\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	// restrict dumped nodes to a certain device if requested
3254 	dev_t device = parse_expression(argv[1]);
3255 
3256 	struct hash_iterator iterator;
3257 	struct vnode* vnode;
3258 
3259 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3260 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3261 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3262 
3263 	hash_open(sVnodeTable, &iterator);
3264 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3265 		if (vnode->device != device)
3266 			continue;
3267 
3268 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3269 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3270 			vnode->private_node, vnode->advisory_locking,
3271 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3272 			vnode->IsUnpublished() ? "u" : "-");
3273 	}
3274 
3275 	hash_close(sVnodeTable, &iterator, false);
3276 	return 0;
3277 }
3278 
3279 
3280 static int
3281 dump_vnode_caches(int argc, char** argv)
3282 {
3283 	struct hash_iterator iterator;
3284 	struct vnode* vnode;
3285 
3286 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3287 		kprintf("usage: %s [device]\n", argv[0]);
3288 		return 0;
3289 	}
3290 
3291 	// restrict dumped nodes to a certain device if requested
3292 	dev_t device = -1;
3293 	if (argc > 1)
3294 		device = parse_expression(argv[1]);
3295 
3296 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3297 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3298 
3299 	hash_open(sVnodeTable, &iterator);
3300 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3301 		if (vnode->cache == NULL)
3302 			continue;
3303 		if (device != -1 && vnode->device != device)
3304 			continue;
3305 
3306 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3307 			vnode, vnode->device, vnode->id, vnode->cache,
3308 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3309 			vnode->cache->page_count);
3310 	}
3311 
3312 	hash_close(sVnodeTable, &iterator, false);
3313 	return 0;
3314 }
3315 
3316 
3317 int
3318 dump_io_context(int argc, char** argv)
3319 {
3320 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3321 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3322 		return 0;
3323 	}
3324 
3325 	struct io_context* context = NULL;
3326 
3327 	if (argc > 1) {
3328 		ulong num = parse_expression(argv[1]);
3329 		if (IS_KERNEL_ADDRESS(num))
3330 			context = (struct io_context*)num;
3331 		else {
3332 			Team* team = team_get_team_struct_locked(num);
3333 			if (team == NULL) {
3334 				kprintf("could not find team with ID %lu\n", num);
3335 				return 0;
3336 			}
3337 			context = (struct io_context*)team->io_context;
3338 		}
3339 	} else
3340 		context = get_current_io_context(true);
3341 
3342 	kprintf("I/O CONTEXT: %p\n", context);
3343 	kprintf(" root vnode:\t%p\n", context->root);
3344 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3345 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3346 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3347 
3348 	if (context->num_used_fds) {
3349 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3350 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3351 	}
3352 
3353 	for (uint32 i = 0; i < context->table_size; i++) {
3354 		struct file_descriptor* fd = context->fds[i];
3355 		if (fd == NULL)
3356 			continue;
3357 
3358 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3359 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3360 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3361 			fd->pos, fd->cookie,
3362 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3363 				? "mount" : "vnode",
3364 			fd->u.vnode);
3365 	}
3366 
3367 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3368 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3369 
3370 	set_debug_variable("_cwd", (addr_t)context->cwd);
3371 
3372 	return 0;
3373 }
3374 
3375 
3376 int
3377 dump_vnode_usage(int argc, char** argv)
3378 {
3379 	if (argc != 1) {
3380 		kprintf("usage: %s\n", argv[0]);
3381 		return 0;
3382 	}
3383 
3384 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3385 		sUnusedVnodes, kMaxUnusedVnodes);
3386 
3387 	struct hash_iterator iterator;
3388 	hash_open(sVnodeTable, &iterator);
3389 
3390 	uint32 count = 0;
3391 	struct vnode* vnode;
3392 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3393 		count++;
3394 	}
3395 
3396 	hash_close(sVnodeTable, &iterator, false);
3397 
3398 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3399 		count - sUnusedVnodes);
3400 	return 0;
3401 }
3402 
3403 #endif	// ADD_DEBUGGER_COMMANDS
3404 
3405 
3406 /*!	Clears memory specified by an iovec array.
3407 */
3408 static void
3409 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3410 {
3411 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3412 		size_t length = std::min(vecs[i].iov_len, bytes);
3413 		memset(vecs[i].iov_base, 0, length);
3414 		bytes -= length;
3415 	}
3416 }
3417 
3418 
3419 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3420 	and calls the file system hooks to read/write the request to disk.
3421 */
3422 static status_t
3423 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3424 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3425 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3426 	bool doWrite)
3427 {
3428 	if (fileVecCount == 0) {
3429 		// There are no file vecs at this offset, so we're obviously trying
3430 		// to access the file outside of its bounds
3431 		return B_BAD_VALUE;
3432 	}
3433 
3434 	size_t numBytes = *_numBytes;
3435 	uint32 fileVecIndex;
3436 	size_t vecOffset = *_vecOffset;
3437 	uint32 vecIndex = *_vecIndex;
3438 	status_t status;
3439 	size_t size;
3440 
3441 	if (!doWrite && vecOffset == 0) {
3442 		// now directly read the data from the device
3443 		// the first file_io_vec can be read directly
3444 
3445 		if (fileVecs[0].length < (off_t)numBytes)
3446 			size = fileVecs[0].length;
3447 		else
3448 			size = numBytes;
3449 
3450 		if (fileVecs[0].offset >= 0) {
3451 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3452 				&vecs[vecIndex], vecCount - vecIndex, &size);
3453 		} else {
3454 			// sparse read
3455 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3456 			status = B_OK;
3457 		}
3458 		if (status != B_OK)
3459 			return status;
3460 
3461 		// TODO: this is a work-around for buggy device drivers!
3462 		//	When our own drivers honour the length, we can:
3463 		//	a) also use this direct I/O for writes (otherwise, it would
3464 		//	   overwrite precious data)
3465 		//	b) panic if the term below is true (at least for writes)
3466 		if ((off_t)size > fileVecs[0].length) {
3467 			//dprintf("warning: device driver %p doesn't respect total length "
3468 			//	"in read_pages() call!\n", ref->device);
3469 			size = fileVecs[0].length;
3470 		}
3471 
3472 		ASSERT((off_t)size <= fileVecs[0].length);
3473 
3474 		// If the file portion was contiguous, we're already done now
3475 		if (size == numBytes)
3476 			return B_OK;
3477 
3478 		// if we reached the end of the file, we can return as well
3479 		if ((off_t)size != fileVecs[0].length) {
3480 			*_numBytes = size;
3481 			return B_OK;
3482 		}
3483 
3484 		fileVecIndex = 1;
3485 
3486 		// first, find out where we have to continue in our iovecs
3487 		for (; vecIndex < vecCount; vecIndex++) {
3488 			if (size < vecs[vecIndex].iov_len)
3489 				break;
3490 
3491 			size -= vecs[vecIndex].iov_len;
3492 		}
3493 
3494 		vecOffset = size;
3495 	} else {
3496 		fileVecIndex = 0;
3497 		size = 0;
3498 	}
3499 
3500 	// Too bad, let's process the rest of the file_io_vecs
3501 
3502 	size_t totalSize = size;
3503 	size_t bytesLeft = numBytes - size;
3504 
3505 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3506 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3507 		off_t fileOffset = fileVec.offset;
3508 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3509 
3510 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3511 			fileLeft));
3512 
3513 		// process the complete fileVec
3514 		while (fileLeft > 0) {
3515 			iovec tempVecs[MAX_TEMP_IO_VECS];
3516 			uint32 tempCount = 0;
3517 
3518 			// size tracks how much of what is left of the current fileVec
3519 			// (fileLeft) has been assigned to tempVecs
3520 			size = 0;
3521 
3522 			// assign what is left of the current fileVec to the tempVecs
3523 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3524 					&& tempCount < MAX_TEMP_IO_VECS;) {
3525 				// try to satisfy one iovec per iteration (or as much as
3526 				// possible)
3527 
3528 				// bytes left of the current iovec
3529 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3530 				if (vecLeft == 0) {
3531 					vecOffset = 0;
3532 					vecIndex++;
3533 					continue;
3534 				}
3535 
3536 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3537 					vecIndex, vecOffset, size));
3538 
3539 				// actually available bytes
3540 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3541 
3542 				tempVecs[tempCount].iov_base
3543 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3544 				tempVecs[tempCount].iov_len = tempVecSize;
3545 				tempCount++;
3546 
3547 				size += tempVecSize;
3548 				vecOffset += tempVecSize;
3549 			}
3550 
3551 			size_t bytes = size;
3552 
3553 			if (fileOffset == -1) {
3554 				if (doWrite) {
3555 					panic("sparse write attempt: vnode %p", vnode);
3556 					status = B_IO_ERROR;
3557 				} else {
3558 					// sparse read
3559 					zero_iovecs(tempVecs, tempCount, bytes);
3560 					status = B_OK;
3561 				}
3562 			} else if (doWrite) {
3563 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3564 					tempVecs, tempCount, &bytes);
3565 			} else {
3566 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3567 					tempVecs, tempCount, &bytes);
3568 			}
3569 			if (status != B_OK)
3570 				return status;
3571 
3572 			totalSize += bytes;
3573 			bytesLeft -= size;
3574 			if (fileOffset >= 0)
3575 				fileOffset += size;
3576 			fileLeft -= size;
3577 			//dprintf("-> file left = %Lu\n", fileLeft);
3578 
3579 			if (size != bytes || vecIndex >= vecCount) {
3580 				// there are no more bytes or iovecs, let's bail out
3581 				*_numBytes = totalSize;
3582 				return B_OK;
3583 			}
3584 		}
3585 	}
3586 
3587 	*_vecIndex = vecIndex;
3588 	*_vecOffset = vecOffset;
3589 	*_numBytes = totalSize;
3590 	return B_OK;
3591 }
3592 
3593 
3594 //	#pragma mark - public API for file systems
3595 
3596 
3597 extern "C" status_t
3598 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3599 	fs_vnode_ops* ops)
3600 {
3601 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3602 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3603 
3604 	if (privateNode == NULL)
3605 		return B_BAD_VALUE;
3606 
3607 	// create the node
3608 	bool nodeCreated;
3609 	struct vnode* vnode;
3610 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3611 		nodeCreated);
3612 	if (status != B_OK)
3613 		return status;
3614 
3615 	WriteLocker nodeLocker(sVnodeLock, true);
3616 		// create_new_vnode_and_lock() has locked for us
3617 
3618 	// file system integrity check:
3619 	// test if the vnode already exists and bail out if this is the case!
3620 	if (!nodeCreated) {
3621 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3622 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3623 			vnode->private_node);
3624 		return B_ERROR;
3625 	}
3626 
3627 	vnode->private_node = privateNode;
3628 	vnode->ops = ops;
3629 	vnode->SetUnpublished(true);
3630 
3631 	TRACE(("returns: %s\n", strerror(status)));
3632 
3633 	return status;
3634 }
3635 
3636 
3637 extern "C" status_t
3638 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3639 	fs_vnode_ops* ops, int type, uint32 flags)
3640 {
3641 	FUNCTION(("publish_vnode()\n"));
3642 
3643 	WriteLocker locker(sVnodeLock);
3644 
3645 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3646 
3647 	bool nodeCreated = false;
3648 	if (vnode == NULL) {
3649 		if (privateNode == NULL)
3650 			return B_BAD_VALUE;
3651 
3652 		// create the node
3653 		locker.Unlock();
3654 			// create_new_vnode_and_lock() will re-lock for us on success
3655 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3656 			nodeCreated);
3657 		if (status != B_OK)
3658 			return status;
3659 
3660 		locker.SetTo(sVnodeLock, true);
3661 	}
3662 
3663 	if (nodeCreated) {
3664 		vnode->private_node = privateNode;
3665 		vnode->ops = ops;
3666 		vnode->SetUnpublished(true);
3667 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3668 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3669 		// already known, but not published
3670 	} else
3671 		return B_BAD_VALUE;
3672 
3673 	bool publishSpecialSubNode = false;
3674 
3675 	vnode->SetType(type);
3676 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3677 	publishSpecialSubNode = is_special_node_type(type)
3678 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3679 
3680 	status_t status = B_OK;
3681 
3682 	// create sub vnodes, if necessary
3683 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3684 		locker.Unlock();
3685 
3686 		fs_volume* subVolume = volume;
3687 		if (volume->sub_volume != NULL) {
3688 			while (status == B_OK && subVolume->sub_volume != NULL) {
3689 				subVolume = subVolume->sub_volume;
3690 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3691 					vnode);
3692 			}
3693 		}
3694 
3695 		if (status == B_OK && publishSpecialSubNode)
3696 			status = create_special_sub_node(vnode, flags);
3697 
3698 		if (status != B_OK) {
3699 			// error -- clean up the created sub vnodes
3700 			while (subVolume->super_volume != volume) {
3701 				subVolume = subVolume->super_volume;
3702 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3703 			}
3704 		}
3705 
3706 		if (status == B_OK) {
3707 			ReadLocker vnodesReadLocker(sVnodeLock);
3708 			AutoLocker<Vnode> nodeLocker(vnode);
3709 			vnode->SetBusy(false);
3710 			vnode->SetUnpublished(false);
3711 		} else {
3712 			locker.Lock();
3713 			hash_remove(sVnodeTable, vnode);
3714 			remove_vnode_from_mount_list(vnode, vnode->mount);
3715 			free(vnode);
3716 		}
3717 	} else {
3718 		// we still hold the write lock -- mark the node unbusy and published
3719 		vnode->SetBusy(false);
3720 		vnode->SetUnpublished(false);
3721 	}
3722 
3723 	TRACE(("returns: %s\n", strerror(status)));
3724 
3725 	return status;
3726 }
3727 
3728 
3729 extern "C" status_t
3730 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3731 {
3732 	struct vnode* vnode;
3733 
3734 	if (volume == NULL)
3735 		return B_BAD_VALUE;
3736 
3737 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3738 	if (status != B_OK)
3739 		return status;
3740 
3741 	// If this is a layered FS, we need to get the node cookie for the requested
3742 	// layer.
3743 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3744 		fs_vnode resolvedNode;
3745 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3746 			&resolvedNode);
3747 		if (status != B_OK) {
3748 			panic("get_vnode(): Failed to get super node for vnode %p, "
3749 				"volume: %p", vnode, volume);
3750 			put_vnode(vnode);
3751 			return status;
3752 		}
3753 
3754 		if (_privateNode != NULL)
3755 			*_privateNode = resolvedNode.private_node;
3756 	} else if (_privateNode != NULL)
3757 		*_privateNode = vnode->private_node;
3758 
3759 	return B_OK;
3760 }
3761 
3762 
3763 extern "C" status_t
3764 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3765 {
3766 	struct vnode* vnode;
3767 
3768 	rw_lock_read_lock(&sVnodeLock);
3769 	vnode = lookup_vnode(volume->id, vnodeID);
3770 	rw_lock_read_unlock(&sVnodeLock);
3771 
3772 	if (vnode == NULL)
3773 		return B_BAD_VALUE;
3774 
3775 	inc_vnode_ref_count(vnode);
3776 	return B_OK;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 put_vnode(fs_volume* volume, ino_t vnodeID)
3782 {
3783 	struct vnode* vnode;
3784 
3785 	rw_lock_read_lock(&sVnodeLock);
3786 	vnode = lookup_vnode(volume->id, vnodeID);
3787 	rw_lock_read_unlock(&sVnodeLock);
3788 
3789 	if (vnode == NULL)
3790 		return B_BAD_VALUE;
3791 
3792 	dec_vnode_ref_count(vnode, false, true);
3793 	return B_OK;
3794 }
3795 
3796 
3797 extern "C" status_t
3798 remove_vnode(fs_volume* volume, ino_t vnodeID)
3799 {
3800 	ReadLocker locker(sVnodeLock);
3801 
3802 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3803 	if (vnode == NULL)
3804 		return B_ENTRY_NOT_FOUND;
3805 
3806 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3807 		// this vnode is in use
3808 		return B_BUSY;
3809 	}
3810 
3811 	vnode->Lock();
3812 
3813 	vnode->SetRemoved(true);
3814 	bool removeUnpublished = false;
3815 
3816 	if (vnode->IsUnpublished()) {
3817 		// prepare the vnode for deletion
3818 		removeUnpublished = true;
3819 		vnode->SetBusy(true);
3820 	}
3821 
3822 	vnode->Unlock();
3823 	locker.Unlock();
3824 
3825 	if (removeUnpublished) {
3826 		// If the vnode hasn't been published yet, we delete it here
3827 		atomic_add(&vnode->ref_count, -1);
3828 		free_vnode(vnode, true);
3829 	}
3830 
3831 	return B_OK;
3832 }
3833 
3834 
3835 extern "C" status_t
3836 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3837 {
3838 	struct vnode* vnode;
3839 
3840 	rw_lock_read_lock(&sVnodeLock);
3841 
3842 	vnode = lookup_vnode(volume->id, vnodeID);
3843 	if (vnode) {
3844 		AutoLocker<Vnode> nodeLocker(vnode);
3845 		vnode->SetRemoved(false);
3846 	}
3847 
3848 	rw_lock_read_unlock(&sVnodeLock);
3849 	return B_OK;
3850 }
3851 
3852 
3853 extern "C" status_t
3854 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3855 {
3856 	ReadLocker _(sVnodeLock);
3857 
3858 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3859 		if (_removed != NULL)
3860 			*_removed = vnode->IsRemoved();
3861 		return B_OK;
3862 	}
3863 
3864 	return B_BAD_VALUE;
3865 }
3866 
3867 
3868 extern "C" fs_volume*
3869 volume_for_vnode(fs_vnode* _vnode)
3870 {
3871 	if (_vnode == NULL)
3872 		return NULL;
3873 
3874 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3875 	return vnode->mount->volume;
3876 }
3877 
3878 
3879 #if 0
3880 extern "C" status_t
3881 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3882 	size_t* _numBytes)
3883 {
3884 	struct file_descriptor* descriptor;
3885 	struct vnode* vnode;
3886 
3887 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3888 	if (descriptor == NULL)
3889 		return B_FILE_ERROR;
3890 
3891 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3892 		count, 0, _numBytes);
3893 
3894 	put_fd(descriptor);
3895 	return status;
3896 }
3897 
3898 
3899 extern "C" status_t
3900 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3901 	size_t* _numBytes)
3902 {
3903 	struct file_descriptor* descriptor;
3904 	struct vnode* vnode;
3905 
3906 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3907 	if (descriptor == NULL)
3908 		return B_FILE_ERROR;
3909 
3910 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3911 		count, 0, _numBytes);
3912 
3913 	put_fd(descriptor);
3914 	return status;
3915 }
3916 #endif
3917 
3918 
3919 extern "C" status_t
3920 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3921 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3922 	size_t* _bytes)
3923 {
3924 	struct file_descriptor* descriptor;
3925 	struct vnode* vnode;
3926 
3927 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3928 	if (descriptor == NULL)
3929 		return B_FILE_ERROR;
3930 
3931 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3932 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3933 		false);
3934 
3935 	put_fd(descriptor);
3936 	return status;
3937 }
3938 
3939 
3940 extern "C" status_t
3941 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3942 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3943 	size_t* _bytes)
3944 {
3945 	struct file_descriptor* descriptor;
3946 	struct vnode* vnode;
3947 
3948 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3949 	if (descriptor == NULL)
3950 		return B_FILE_ERROR;
3951 
3952 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3953 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3954 		true);
3955 
3956 	put_fd(descriptor);
3957 	return status;
3958 }
3959 
3960 
3961 extern "C" status_t
3962 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3963 {
3964 	// lookup mount -- the caller is required to make sure that the mount
3965 	// won't go away
3966 	MutexLocker locker(sMountMutex);
3967 	struct fs_mount* mount = find_mount(mountID);
3968 	if (mount == NULL)
3969 		return B_BAD_VALUE;
3970 	locker.Unlock();
3971 
3972 	return mount->entry_cache.Add(dirID, name, nodeID);
3973 }
3974 
3975 
3976 extern "C" status_t
3977 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3978 {
3979 	// lookup mount -- the caller is required to make sure that the mount
3980 	// won't go away
3981 	MutexLocker locker(sMountMutex);
3982 	struct fs_mount* mount = find_mount(mountID);
3983 	if (mount == NULL)
3984 		return B_BAD_VALUE;
3985 	locker.Unlock();
3986 
3987 	return mount->entry_cache.Remove(dirID, name);
3988 }
3989 
3990 
3991 //	#pragma mark - private VFS API
3992 //	Functions the VFS exports for other parts of the kernel
3993 
3994 
3995 /*! Acquires another reference to the vnode that has to be released
3996 	by calling vfs_put_vnode().
3997 */
3998 void
3999 vfs_acquire_vnode(struct vnode* vnode)
4000 {
4001 	inc_vnode_ref_count(vnode);
4002 }
4003 
4004 
4005 /*! This is currently called from file_cache_create() only.
4006 	It's probably a temporary solution as long as devfs requires that
4007 	fs_read_pages()/fs_write_pages() are called with the standard
4008 	open cookie and not with a device cookie.
4009 	If that's done differently, remove this call; it has no other
4010 	purpose.
4011 */
4012 extern "C" status_t
4013 vfs_get_cookie_from_fd(int fd, void** _cookie)
4014 {
4015 	struct file_descriptor* descriptor;
4016 
4017 	descriptor = get_fd(get_current_io_context(true), fd);
4018 	if (descriptor == NULL)
4019 		return B_FILE_ERROR;
4020 
4021 	*_cookie = descriptor->cookie;
4022 	return B_OK;
4023 }
4024 
4025 
4026 extern "C" status_t
4027 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4028 {
4029 	*vnode = get_vnode_from_fd(fd, kernel);
4030 
4031 	if (*vnode == NULL)
4032 		return B_FILE_ERROR;
4033 
4034 	return B_NO_ERROR;
4035 }
4036 
4037 
4038 extern "C" status_t
4039 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4040 {
4041 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4042 		path, kernel));
4043 
4044 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4045 	if (pathBuffer.InitCheck() != B_OK)
4046 		return B_NO_MEMORY;
4047 
4048 	char* buffer = pathBuffer.LockBuffer();
4049 	strlcpy(buffer, path, pathBuffer.BufferSize());
4050 
4051 	struct vnode* vnode;
4052 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4053 	if (status != B_OK)
4054 		return status;
4055 
4056 	*_vnode = vnode;
4057 	return B_OK;
4058 }
4059 
4060 
4061 extern "C" status_t
4062 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4063 {
4064 	struct vnode* vnode;
4065 
4066 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4067 	if (status != B_OK)
4068 		return status;
4069 
4070 	*_vnode = vnode;
4071 	return B_OK;
4072 }
4073 
4074 
4075 extern "C" status_t
4076 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4077 	const char* name, struct vnode** _vnode)
4078 {
4079 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4080 }
4081 
4082 
4083 extern "C" void
4084 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4085 {
4086 	*_mountID = vnode->device;
4087 	*_vnodeID = vnode->id;
4088 }
4089 
4090 
4091 /*!
4092 	Helper function abstracting the process of "converting" a given
4093 	vnode-pointer to a fs_vnode-pointer.
4094 	Currently only used in bindfs.
4095 */
4096 extern "C" fs_vnode*
4097 vfs_fsnode_for_vnode(struct vnode* vnode)
4098 {
4099 	return vnode;
4100 }
4101 
4102 
4103 /*!
4104 	Calls fs_open() on the given vnode and returns a new
4105 	file descriptor for it
4106 */
4107 int
4108 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4109 {
4110 	return open_vnode(vnode, openMode, kernel);
4111 }
4112 
4113 
4114 /*!	Looks up a vnode with the given mount and vnode ID.
4115 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4116 	to the node.
4117 	It's currently only be used by file_cache_create().
4118 */
4119 extern "C" status_t
4120 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4121 {
4122 	rw_lock_read_lock(&sVnodeLock);
4123 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4124 	rw_lock_read_unlock(&sVnodeLock);
4125 
4126 	if (vnode == NULL)
4127 		return B_ERROR;
4128 
4129 	*_vnode = vnode;
4130 	return B_OK;
4131 }
4132 
4133 
4134 extern "C" status_t
4135 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4136 	bool traverseLeafLink, bool kernel, void** _node)
4137 {
4138 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4139 		volume, path, kernel));
4140 
4141 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4142 	if (pathBuffer.InitCheck() != B_OK)
4143 		return B_NO_MEMORY;
4144 
4145 	fs_mount* mount;
4146 	status_t status = get_mount(volume->id, &mount);
4147 	if (status != B_OK)
4148 		return status;
4149 
4150 	char* buffer = pathBuffer.LockBuffer();
4151 	strlcpy(buffer, path, pathBuffer.BufferSize());
4152 
4153 	struct vnode* vnode = mount->root_vnode;
4154 
4155 	if (buffer[0] == '/')
4156 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4157 	else {
4158 		inc_vnode_ref_count(vnode);
4159 			// vnode_path_to_vnode() releases a reference to the starting vnode
4160 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4161 			kernel, &vnode, NULL);
4162 	}
4163 
4164 	put_mount(mount);
4165 
4166 	if (status != B_OK)
4167 		return status;
4168 
4169 	if (vnode->device != volume->id) {
4170 		// wrong mount ID - must not gain access on foreign file system nodes
4171 		put_vnode(vnode);
4172 		return B_BAD_VALUE;
4173 	}
4174 
4175 	// Use get_vnode() to resolve the cookie for the right layer.
4176 	status = get_vnode(volume, vnode->id, _node);
4177 	put_vnode(vnode);
4178 
4179 	return status;
4180 }
4181 
4182 
4183 status_t
4184 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4185 	struct stat* stat, bool kernel)
4186 {
4187 	status_t status;
4188 
4189 	if (path) {
4190 		// path given: get the stat of the node referred to by (fd, path)
4191 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4192 		if (pathBuffer.InitCheck() != B_OK)
4193 			return B_NO_MEMORY;
4194 
4195 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4196 			traverseLeafLink, stat, kernel);
4197 	} else {
4198 		// no path given: get the FD and use the FD operation
4199 		struct file_descriptor* descriptor
4200 			= get_fd(get_current_io_context(kernel), fd);
4201 		if (descriptor == NULL)
4202 			return B_FILE_ERROR;
4203 
4204 		if (descriptor->ops->fd_read_stat)
4205 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4206 		else
4207 			status = B_UNSUPPORTED;
4208 
4209 		put_fd(descriptor);
4210 	}
4211 
4212 	return status;
4213 }
4214 
4215 
4216 /*!	Finds the full path to the file that contains the module \a moduleName,
4217 	puts it into \a pathBuffer, and returns B_OK for success.
4218 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4219 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4220 	\a pathBuffer is clobbered in any case and must not be relied on if this
4221 	functions returns unsuccessfully.
4222 	\a basePath and \a pathBuffer must not point to the same space.
4223 */
4224 status_t
4225 vfs_get_module_path(const char* basePath, const char* moduleName,
4226 	char* pathBuffer, size_t bufferSize)
4227 {
4228 	struct vnode* dir;
4229 	struct vnode* file;
4230 	status_t status;
4231 	size_t length;
4232 	char* path;
4233 
4234 	if (bufferSize == 0
4235 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4236 		return B_BUFFER_OVERFLOW;
4237 
4238 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4239 	if (status != B_OK)
4240 		return status;
4241 
4242 	// the path buffer had been clobbered by the above call
4243 	length = strlcpy(pathBuffer, basePath, bufferSize);
4244 	if (pathBuffer[length - 1] != '/')
4245 		pathBuffer[length++] = '/';
4246 
4247 	path = pathBuffer + length;
4248 	bufferSize -= length;
4249 
4250 	while (moduleName) {
4251 		char* nextPath = strchr(moduleName, '/');
4252 		if (nextPath == NULL)
4253 			length = strlen(moduleName);
4254 		else {
4255 			length = nextPath - moduleName;
4256 			nextPath++;
4257 		}
4258 
4259 		if (length + 1 >= bufferSize) {
4260 			status = B_BUFFER_OVERFLOW;
4261 			goto err;
4262 		}
4263 
4264 		memcpy(path, moduleName, length);
4265 		path[length] = '\0';
4266 		moduleName = nextPath;
4267 
4268 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4269 		if (status != B_OK) {
4270 			// vnode_path_to_vnode() has already released the reference to dir
4271 			return status;
4272 		}
4273 
4274 		if (S_ISDIR(file->Type())) {
4275 			// goto the next directory
4276 			path[length] = '/';
4277 			path[length + 1] = '\0';
4278 			path += length + 1;
4279 			bufferSize -= length + 1;
4280 
4281 			dir = file;
4282 		} else if (S_ISREG(file->Type())) {
4283 			// it's a file so it should be what we've searched for
4284 			put_vnode(file);
4285 
4286 			return B_OK;
4287 		} else {
4288 			TRACE(("vfs_get_module_path(): something is strange here: "
4289 				"0x%08" B_PRIx32 "...\n", file->Type()));
4290 			status = B_ERROR;
4291 			dir = file;
4292 			goto err;
4293 		}
4294 	}
4295 
4296 	// if we got here, the moduleName just pointed to a directory, not to
4297 	// a real module - what should we do in this case?
4298 	status = B_ENTRY_NOT_FOUND;
4299 
4300 err:
4301 	put_vnode(dir);
4302 	return status;
4303 }
4304 
4305 
4306 /*!	\brief Normalizes a given path.
4307 
4308 	The path must refer to an existing or non-existing entry in an existing
4309 	directory, that is chopping off the leaf component the remaining path must
4310 	refer to an existing directory.
4311 
4312 	The returned will be canonical in that it will be absolute, will not
4313 	contain any "." or ".." components or duplicate occurrences of '/'s,
4314 	and none of the directory components will by symbolic links.
4315 
4316 	Any two paths referring to the same entry, will result in the same
4317 	normalized path (well, that is pretty much the definition of `normalized',
4318 	isn't it :-).
4319 
4320 	\param path The path to be normalized.
4321 	\param buffer The buffer into which the normalized path will be written.
4322 		   May be the same one as \a path.
4323 	\param bufferSize The size of \a buffer.
4324 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4325 	\param kernel \c true, if the IO context of the kernel shall be used,
4326 		   otherwise that of the team this thread belongs to. Only relevant,
4327 		   if the path is relative (to get the CWD).
4328 	\return \c B_OK if everything went fine, another error code otherwise.
4329 */
4330 status_t
4331 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4332 	bool traverseLink, bool kernel)
4333 {
4334 	if (!path || !buffer || bufferSize < 1)
4335 		return B_BAD_VALUE;
4336 
4337 	if (path != buffer) {
4338 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4339 			return B_BUFFER_OVERFLOW;
4340 	}
4341 
4342 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4343 }
4344 
4345 
4346 /*!	\brief Creates a special node in the file system.
4347 
4348 	The caller gets a reference to the newly created node (which is passed
4349 	back through \a _createdVnode) and is responsible for releasing it.
4350 
4351 	\param path The path where to create the entry for the node. Can be \c NULL,
4352 		in which case the node is created without an entry in the root FS -- it
4353 		will automatically be deleted when the last reference has been released.
4354 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4355 		the target file system will just create the node with its standard
4356 		operations. Depending on the type of the node a subnode might be created
4357 		automatically, though.
4358 	\param mode The type and permissions for the node to be created.
4359 	\param flags Flags to be passed to the creating FS.
4360 	\param kernel \c true, if called in the kernel context (relevant only if
4361 		\a path is not \c NULL and not absolute).
4362 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4363 		file system creating the node, with the private data pointer and
4364 		operations for the super node. Can be \c NULL.
4365 	\param _createVnode Pointer to pre-allocated storage where to store the
4366 		pointer to the newly created node.
4367 	\return \c B_OK, if everything went fine, another error code otherwise.
4368 */
4369 status_t
4370 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4371 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4372 	struct vnode** _createdVnode)
4373 {
4374 	struct vnode* dirNode;
4375 	char _leaf[B_FILE_NAME_LENGTH];
4376 	char* leaf = NULL;
4377 
4378 	if (path) {
4379 		// We've got a path. Get the dir vnode and the leaf name.
4380 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4381 		if (tmpPathBuffer.InitCheck() != B_OK)
4382 			return B_NO_MEMORY;
4383 
4384 		char* tmpPath = tmpPathBuffer.LockBuffer();
4385 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4386 			return B_NAME_TOO_LONG;
4387 
4388 		// get the dir vnode and the leaf name
4389 		leaf = _leaf;
4390 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4391 		if (error != B_OK)
4392 			return error;
4393 	} else {
4394 		// No path. Create the node in the root FS.
4395 		dirNode = sRoot;
4396 		inc_vnode_ref_count(dirNode);
4397 	}
4398 
4399 	VNodePutter _(dirNode);
4400 
4401 	// check support for creating special nodes
4402 	if (!HAS_FS_CALL(dirNode, create_special_node))
4403 		return B_UNSUPPORTED;
4404 
4405 	// create the node
4406 	fs_vnode superVnode;
4407 	ino_t nodeID;
4408 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4409 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4410 	if (status != B_OK)
4411 		return status;
4412 
4413 	// lookup the node
4414 	rw_lock_read_lock(&sVnodeLock);
4415 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4416 	rw_lock_read_unlock(&sVnodeLock);
4417 
4418 	if (*_createdVnode == NULL) {
4419 		panic("vfs_create_special_node(): lookup of node failed");
4420 		return B_ERROR;
4421 	}
4422 
4423 	return B_OK;
4424 }
4425 
4426 
4427 extern "C" void
4428 vfs_put_vnode(struct vnode* vnode)
4429 {
4430 	put_vnode(vnode);
4431 }
4432 
4433 
4434 extern "C" status_t
4435 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4436 {
4437 	// Get current working directory from io context
4438 	struct io_context* context = get_current_io_context(false);
4439 	status_t status = B_OK;
4440 
4441 	mutex_lock(&context->io_mutex);
4442 
4443 	if (context->cwd != NULL) {
4444 		*_mountID = context->cwd->device;
4445 		*_vnodeID = context->cwd->id;
4446 	} else
4447 		status = B_ERROR;
4448 
4449 	mutex_unlock(&context->io_mutex);
4450 	return status;
4451 }
4452 
4453 
4454 status_t
4455 vfs_unmount(dev_t mountID, uint32 flags)
4456 {
4457 	return fs_unmount(NULL, mountID, flags, true);
4458 }
4459 
4460 
4461 extern "C" status_t
4462 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4463 {
4464 	struct vnode* vnode;
4465 
4466 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4467 	if (status != B_OK)
4468 		return status;
4469 
4470 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4471 	put_vnode(vnode);
4472 	return B_OK;
4473 }
4474 
4475 
4476 extern "C" void
4477 vfs_free_unused_vnodes(int32 level)
4478 {
4479 	vnode_low_resource_handler(NULL,
4480 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4481 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4482 		level);
4483 }
4484 
4485 
4486 extern "C" bool
4487 vfs_can_page(struct vnode* vnode, void* cookie)
4488 {
4489 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4490 
4491 	if (HAS_FS_CALL(vnode, can_page))
4492 		return FS_CALL(vnode, can_page, cookie);
4493 	return false;
4494 }
4495 
4496 
4497 extern "C" status_t
4498 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4499 	const generic_io_vec* vecs, size_t count, uint32 flags,
4500 	generic_size_t* _numBytes)
4501 {
4502 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4503 		vecs, pos));
4504 
4505 #if VFS_PAGES_IO_TRACING
4506 	generic_size_t bytesRequested = *_numBytes;
4507 #endif
4508 
4509 	IORequest request;
4510 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4511 	if (status == B_OK) {
4512 		status = vfs_vnode_io(vnode, cookie, &request);
4513 		if (status == B_OK)
4514 			status = request.Wait();
4515 		*_numBytes = request.TransferredBytes();
4516 	}
4517 
4518 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4519 		status, *_numBytes));
4520 
4521 	return status;
4522 }
4523 
4524 
4525 extern "C" status_t
4526 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4527 	const generic_io_vec* vecs, size_t count, uint32 flags,
4528 	generic_size_t* _numBytes)
4529 {
4530 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4531 		vecs, pos));
4532 
4533 #if VFS_PAGES_IO_TRACING
4534 	generic_size_t bytesRequested = *_numBytes;
4535 #endif
4536 
4537 	IORequest request;
4538 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4539 	if (status == B_OK) {
4540 		status = vfs_vnode_io(vnode, cookie, &request);
4541 		if (status == B_OK)
4542 			status = request.Wait();
4543 		*_numBytes = request.TransferredBytes();
4544 	}
4545 
4546 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4547 		status, *_numBytes));
4548 
4549 	return status;
4550 }
4551 
4552 
4553 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4554 	created if \a allocate is \c true.
4555 	In case it's successful, it will also grab a reference to the cache
4556 	it returns.
4557 */
4558 extern "C" status_t
4559 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4560 {
4561 	if (vnode->cache != NULL) {
4562 		vnode->cache->AcquireRef();
4563 		*_cache = vnode->cache;
4564 		return B_OK;
4565 	}
4566 
4567 	rw_lock_read_lock(&sVnodeLock);
4568 	vnode->Lock();
4569 
4570 	status_t status = B_OK;
4571 
4572 	// The cache could have been created in the meantime
4573 	if (vnode->cache == NULL) {
4574 		if (allocate) {
4575 			// TODO: actually the vnode needs to be busy already here, or
4576 			//	else this won't work...
4577 			bool wasBusy = vnode->IsBusy();
4578 			vnode->SetBusy(true);
4579 
4580 			vnode->Unlock();
4581 			rw_lock_read_unlock(&sVnodeLock);
4582 
4583 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4584 
4585 			rw_lock_read_lock(&sVnodeLock);
4586 			vnode->Lock();
4587 			vnode->SetBusy(wasBusy);
4588 		} else
4589 			status = B_BAD_VALUE;
4590 	}
4591 
4592 	vnode->Unlock();
4593 	rw_lock_read_unlock(&sVnodeLock);
4594 
4595 	if (status == B_OK) {
4596 		vnode->cache->AcquireRef();
4597 		*_cache = vnode->cache;
4598 	}
4599 
4600 	return status;
4601 }
4602 
4603 
4604 status_t
4605 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4606 	file_io_vec* vecs, size_t* _count)
4607 {
4608 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4609 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4610 
4611 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4612 }
4613 
4614 
4615 status_t
4616 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4617 {
4618 	status_t status = FS_CALL(vnode, read_stat, stat);
4619 
4620 	// fill in the st_dev and st_ino fields
4621 	if (status == B_OK) {
4622 		stat->st_dev = vnode->device;
4623 		stat->st_ino = vnode->id;
4624 		stat->st_rdev = -1;
4625 	}
4626 
4627 	return status;
4628 }
4629 
4630 
4631 status_t
4632 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4633 {
4634 	struct vnode* vnode;
4635 	status_t status = get_vnode(device, inode, &vnode, true, false);
4636 	if (status != B_OK)
4637 		return status;
4638 
4639 	status = FS_CALL(vnode, read_stat, stat);
4640 
4641 	// fill in the st_dev and st_ino fields
4642 	if (status == B_OK) {
4643 		stat->st_dev = vnode->device;
4644 		stat->st_ino = vnode->id;
4645 		stat->st_rdev = -1;
4646 	}
4647 
4648 	put_vnode(vnode);
4649 	return status;
4650 }
4651 
4652 
4653 status_t
4654 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4655 {
4656 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4657 }
4658 
4659 
4660 status_t
4661 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4662 	bool kernel, char* path, size_t pathLength)
4663 {
4664 	struct vnode* vnode;
4665 	status_t status;
4666 
4667 	// filter invalid leaf names
4668 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4669 		return B_BAD_VALUE;
4670 
4671 	// get the vnode matching the dir's node_ref
4672 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4673 		// special cases "." and "..": we can directly get the vnode of the
4674 		// referenced directory
4675 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4676 		leaf = NULL;
4677 	} else
4678 		status = get_vnode(device, inode, &vnode, true, false);
4679 	if (status != B_OK)
4680 		return status;
4681 
4682 	// get the directory path
4683 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4684 	put_vnode(vnode);
4685 		// we don't need the vnode anymore
4686 	if (status != B_OK)
4687 		return status;
4688 
4689 	// append the leaf name
4690 	if (leaf) {
4691 		// insert a directory separator if this is not the file system root
4692 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4693 				>= pathLength)
4694 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4695 			return B_NAME_TOO_LONG;
4696 		}
4697 	}
4698 
4699 	return B_OK;
4700 }
4701 
4702 
4703 /*!	If the given descriptor locked its vnode, that lock will be released. */
4704 void
4705 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4706 {
4707 	struct vnode* vnode = fd_vnode(descriptor);
4708 
4709 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4710 		vnode->mandatory_locked_by = NULL;
4711 }
4712 
4713 
4714 /*!	Closes all file descriptors of the specified I/O context that
4715 	have the O_CLOEXEC flag set.
4716 */
4717 void
4718 vfs_exec_io_context(io_context* context)
4719 {
4720 	uint32 i;
4721 
4722 	for (i = 0; i < context->table_size; i++) {
4723 		mutex_lock(&context->io_mutex);
4724 
4725 		struct file_descriptor* descriptor = context->fds[i];
4726 		bool remove = false;
4727 
4728 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4729 			context->fds[i] = NULL;
4730 			context->num_used_fds--;
4731 
4732 			remove = true;
4733 		}
4734 
4735 		mutex_unlock(&context->io_mutex);
4736 
4737 		if (remove) {
4738 			close_fd(descriptor);
4739 			put_fd(descriptor);
4740 		}
4741 	}
4742 }
4743 
4744 
4745 /*! Sets up a new io_control structure, and inherits the properties
4746 	of the parent io_control if it is given.
4747 */
4748 io_context*
4749 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4750 {
4751 	io_context* context = (io_context*)malloc(sizeof(io_context));
4752 	if (context == NULL)
4753 		return NULL;
4754 
4755 	TIOC(NewIOContext(context, parentContext));
4756 
4757 	memset(context, 0, sizeof(io_context));
4758 	context->ref_count = 1;
4759 
4760 	MutexLocker parentLocker;
4761 
4762 	size_t tableSize;
4763 	if (parentContext) {
4764 		parentLocker.SetTo(parentContext->io_mutex, false);
4765 		tableSize = parentContext->table_size;
4766 	} else
4767 		tableSize = DEFAULT_FD_TABLE_SIZE;
4768 
4769 	// allocate space for FDs and their close-on-exec flag
4770 	context->fds = (file_descriptor**)malloc(
4771 		sizeof(struct file_descriptor*) * tableSize
4772 		+ sizeof(struct select_sync*) * tableSize
4773 		+ (tableSize + 7) / 8);
4774 	if (context->fds == NULL) {
4775 		free(context);
4776 		return NULL;
4777 	}
4778 
4779 	context->select_infos = (select_info**)(context->fds + tableSize);
4780 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4781 
4782 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4783 		+ sizeof(struct select_sync*) * tableSize
4784 		+ (tableSize + 7) / 8);
4785 
4786 	mutex_init(&context->io_mutex, "I/O context");
4787 
4788 	// Copy all parent file descriptors
4789 
4790 	if (parentContext) {
4791 		size_t i;
4792 
4793 		mutex_lock(&sIOContextRootLock);
4794 		context->root = parentContext->root;
4795 		if (context->root)
4796 			inc_vnode_ref_count(context->root);
4797 		mutex_unlock(&sIOContextRootLock);
4798 
4799 		context->cwd = parentContext->cwd;
4800 		if (context->cwd)
4801 			inc_vnode_ref_count(context->cwd);
4802 
4803 		for (i = 0; i < tableSize; i++) {
4804 			struct file_descriptor* descriptor = parentContext->fds[i];
4805 
4806 			if (descriptor != NULL) {
4807 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4808 				if (closeOnExec && purgeCloseOnExec)
4809 					continue;
4810 
4811 				TFD(InheritFD(context, i, descriptor, parentContext));
4812 
4813 				context->fds[i] = descriptor;
4814 				context->num_used_fds++;
4815 				atomic_add(&descriptor->ref_count, 1);
4816 				atomic_add(&descriptor->open_count, 1);
4817 
4818 				if (closeOnExec)
4819 					fd_set_close_on_exec(context, i, true);
4820 			}
4821 		}
4822 
4823 		parentLocker.Unlock();
4824 	} else {
4825 		context->root = sRoot;
4826 		context->cwd = sRoot;
4827 
4828 		if (context->root)
4829 			inc_vnode_ref_count(context->root);
4830 
4831 		if (context->cwd)
4832 			inc_vnode_ref_count(context->cwd);
4833 	}
4834 
4835 	context->table_size = tableSize;
4836 
4837 	list_init(&context->node_monitors);
4838 	context->max_monitors = DEFAULT_NODE_MONITORS;
4839 
4840 	return context;
4841 }
4842 
4843 
4844 static status_t
4845 vfs_free_io_context(io_context* context)
4846 {
4847 	uint32 i;
4848 
4849 	TIOC(FreeIOContext(context));
4850 
4851 	if (context->root)
4852 		put_vnode(context->root);
4853 
4854 	if (context->cwd)
4855 		put_vnode(context->cwd);
4856 
4857 	mutex_lock(&context->io_mutex);
4858 
4859 	for (i = 0; i < context->table_size; i++) {
4860 		if (struct file_descriptor* descriptor = context->fds[i]) {
4861 			close_fd(descriptor);
4862 			put_fd(descriptor);
4863 		}
4864 	}
4865 
4866 	mutex_destroy(&context->io_mutex);
4867 
4868 	remove_node_monitors(context);
4869 	free(context->fds);
4870 	free(context);
4871 
4872 	return B_OK;
4873 }
4874 
4875 
4876 void
4877 vfs_get_io_context(io_context* context)
4878 {
4879 	atomic_add(&context->ref_count, 1);
4880 }
4881 
4882 
4883 void
4884 vfs_put_io_context(io_context* context)
4885 {
4886 	if (atomic_add(&context->ref_count, -1) == 1)
4887 		vfs_free_io_context(context);
4888 }
4889 
4890 
4891 static status_t
4892 vfs_resize_fd_table(struct io_context* context, const int newSize)
4893 {
4894 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4895 		return B_BAD_VALUE;
4896 
4897 	TIOC(ResizeIOContext(context, newSize));
4898 
4899 	MutexLocker _(context->io_mutex);
4900 
4901 	int oldSize = context->table_size;
4902 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4903 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4904 
4905 	// If the tables shrink, make sure none of the fds being dropped are in use.
4906 	if (newSize < oldSize) {
4907 		for (int i = oldSize; i-- > newSize;) {
4908 			if (context->fds[i])
4909 				return B_BUSY;
4910 		}
4911 	}
4912 
4913 	// store pointers to the old tables
4914 	file_descriptor** oldFDs = context->fds;
4915 	select_info** oldSelectInfos = context->select_infos;
4916 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4917 
4918 	// allocate new tables
4919 	file_descriptor** newFDs = (file_descriptor**)malloc(
4920 		sizeof(struct file_descriptor*) * newSize
4921 		+ sizeof(struct select_sync*) * newSize
4922 		+ newCloseOnExitBitmapSize);
4923 	if (newFDs == NULL)
4924 		return B_NO_MEMORY;
4925 
4926 	context->fds = newFDs;
4927 	context->select_infos = (select_info**)(context->fds + newSize);
4928 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4929 	context->table_size = newSize;
4930 
4931 	// copy entries from old tables
4932 	int toCopy = min_c(oldSize, newSize);
4933 
4934 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4935 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4936 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4937 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4938 
4939 	// clear additional entries, if the tables grow
4940 	if (newSize > oldSize) {
4941 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4942 		memset(context->select_infos + oldSize, 0,
4943 			sizeof(void*) * (newSize - oldSize));
4944 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4945 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4946 	}
4947 
4948 	free(oldFDs);
4949 
4950 	return B_OK;
4951 }
4952 
4953 
4954 static status_t
4955 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4956 {
4957 	int	status = B_OK;
4958 
4959 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4960 		return B_BAD_VALUE;
4961 
4962 	mutex_lock(&context->io_mutex);
4963 
4964 	if ((size_t)newSize < context->num_monitors) {
4965 		status = B_BUSY;
4966 		goto out;
4967 	}
4968 	context->max_monitors = newSize;
4969 
4970 out:
4971 	mutex_unlock(&context->io_mutex);
4972 	return status;
4973 }
4974 
4975 
4976 status_t
4977 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
4978 	ino_t* _mountPointNodeID)
4979 {
4980 	ReadLocker nodeLocker(sVnodeLock);
4981 	MutexLocker mountLocker(sMountMutex);
4982 
4983 	struct fs_mount* mount = find_mount(mountID);
4984 	if (mount == NULL)
4985 		return B_BAD_VALUE;
4986 
4987 	Vnode* mountPoint = mount->covers_vnode;
4988 
4989 	*_mountPointMountID = mountPoint->device;
4990 	*_mountPointNodeID = mountPoint->id;
4991 
4992 	return B_OK;
4993 }
4994 
4995 
4996 status_t
4997 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
4998 	ino_t coveredNodeID)
4999 {
5000 	// get the vnodes
5001 	Vnode* vnode;
5002 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5003 	if (error != B_OK)
5004 		return B_BAD_VALUE;
5005 	VNodePutter vnodePutter(vnode);
5006 
5007 	Vnode* coveredVnode;
5008 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5009 		false);
5010 	if (error != B_OK)
5011 		return B_BAD_VALUE;
5012 	VNodePutter coveredVnodePutter(coveredVnode);
5013 
5014 	// establish the covered/covering links
5015 	WriteLocker locker(sVnodeLock);
5016 
5017 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5018 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5019 		return B_BUSY;
5020 	}
5021 
5022 	vnode->covers = coveredVnode;
5023 	vnode->SetCovering(true);
5024 
5025 	coveredVnode->covered_by = vnode;
5026 	coveredVnode->SetCovered(true);
5027 
5028 	// the vnodes do now reference each other
5029 	inc_vnode_ref_count(vnode);
5030 	inc_vnode_ref_count(coveredVnode);
5031 
5032 	return B_OK;
5033 }
5034 
5035 
5036 int
5037 vfs_getrlimit(int resource, struct rlimit* rlp)
5038 {
5039 	if (!rlp)
5040 		return B_BAD_ADDRESS;
5041 
5042 	switch (resource) {
5043 		case RLIMIT_NOFILE:
5044 		{
5045 			struct io_context* context = get_current_io_context(false);
5046 			MutexLocker _(context->io_mutex);
5047 
5048 			rlp->rlim_cur = context->table_size;
5049 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5050 			return 0;
5051 		}
5052 
5053 		case RLIMIT_NOVMON:
5054 		{
5055 			struct io_context* context = get_current_io_context(false);
5056 			MutexLocker _(context->io_mutex);
5057 
5058 			rlp->rlim_cur = context->max_monitors;
5059 			rlp->rlim_max = MAX_NODE_MONITORS;
5060 			return 0;
5061 		}
5062 
5063 		default:
5064 			return B_BAD_VALUE;
5065 	}
5066 }
5067 
5068 
5069 int
5070 vfs_setrlimit(int resource, const struct rlimit* rlp)
5071 {
5072 	if (!rlp)
5073 		return B_BAD_ADDRESS;
5074 
5075 	switch (resource) {
5076 		case RLIMIT_NOFILE:
5077 			/* TODO: check getuid() */
5078 			if (rlp->rlim_max != RLIM_SAVED_MAX
5079 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5080 				return B_NOT_ALLOWED;
5081 
5082 			return vfs_resize_fd_table(get_current_io_context(false),
5083 				rlp->rlim_cur);
5084 
5085 		case RLIMIT_NOVMON:
5086 			/* TODO: check getuid() */
5087 			if (rlp->rlim_max != RLIM_SAVED_MAX
5088 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5089 				return B_NOT_ALLOWED;
5090 
5091 			return vfs_resize_monitor_table(get_current_io_context(false),
5092 				rlp->rlim_cur);
5093 
5094 		default:
5095 			return B_BAD_VALUE;
5096 	}
5097 }
5098 
5099 
5100 status_t
5101 vfs_init(kernel_args* args)
5102 {
5103 	vnode::StaticInit();
5104 
5105 	struct vnode dummyVnode;
5106 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5107 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5108 	if (sVnodeTable == NULL)
5109 		panic("vfs_init: error creating vnode hash table\n");
5110 
5111 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5112 
5113 	struct fs_mount dummyMount;
5114 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5115 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5116 	if (sMountsTable == NULL)
5117 		panic("vfs_init: error creating mounts hash table\n");
5118 
5119 	node_monitor_init();
5120 
5121 	sRoot = NULL;
5122 
5123 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5124 
5125 	if (block_cache_init() != B_OK)
5126 		return B_ERROR;
5127 
5128 #ifdef ADD_DEBUGGER_COMMANDS
5129 	// add some debugger commands
5130 	add_debugger_command_etc("vnode", &dump_vnode,
5131 		"Print info about the specified vnode",
5132 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5133 		"Prints information about the vnode specified by address <vnode> or\n"
5134 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5135 		"constructed and printed. It might not be possible to construct a\n"
5136 		"complete path, though.\n",
5137 		0);
5138 	add_debugger_command("vnodes", &dump_vnodes,
5139 		"list all vnodes (from the specified device)");
5140 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5141 		"list all vnode caches");
5142 	add_debugger_command("mount", &dump_mount,
5143 		"info about the specified fs_mount");
5144 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5145 	add_debugger_command("io_context", &dump_io_context,
5146 		"info about the I/O context");
5147 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5148 		"info about vnode usage");
5149 #endif
5150 
5151 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5152 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5153 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5154 		0);
5155 
5156 	fifo_init();
5157 	file_map_init();
5158 
5159 	return file_cache_init();
5160 }
5161 
5162 
5163 //	#pragma mark - fd_ops implementations
5164 
5165 
5166 /*!
5167 	Calls fs_open() on the given vnode and returns a new
5168 	file descriptor for it
5169 */
5170 static int
5171 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5172 {
5173 	void* cookie;
5174 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5175 	if (status != B_OK)
5176 		return status;
5177 
5178 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5179 	if (fd < 0) {
5180 		FS_CALL(vnode, close, cookie);
5181 		FS_CALL(vnode, free_cookie, cookie);
5182 	}
5183 	return fd;
5184 }
5185 
5186 
5187 /*!
5188 	Calls fs_open() on the given vnode and returns a new
5189 	file descriptor for it
5190 */
5191 static int
5192 create_vnode(struct vnode* directory, const char* name, int openMode,
5193 	int perms, bool kernel)
5194 {
5195 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5196 	status_t status = B_ERROR;
5197 	struct vnode* vnode;
5198 	void* cookie;
5199 	ino_t newID;
5200 
5201 	// This is somewhat tricky: If the entry already exists, the FS responsible
5202 	// for the directory might not necessarily also be the one responsible for
5203 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5204 	// we can actually never call the create() hook without O_EXCL. Instead we
5205 	// try to look the entry up first. If it already exists, we just open the
5206 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5207 	// introduces a race condition, since someone else might have created the
5208 	// entry in the meantime. We hope the respective FS returns the correct
5209 	// error code and retry (up to 3 times) again.
5210 
5211 	for (int i = 0; i < 3 && status != B_OK; i++) {
5212 		// look the node up
5213 		status = lookup_dir_entry(directory, name, &vnode);
5214 		if (status == B_OK) {
5215 			VNodePutter putter(vnode);
5216 
5217 			if ((openMode & O_EXCL) != 0)
5218 				return B_FILE_EXISTS;
5219 
5220 			// If the node is a symlink, we have to follow it, unless
5221 			// O_NOTRAVERSE is set.
5222 			if (S_ISLNK(vnode->Type()) && traverse) {
5223 				putter.Put();
5224 				char clonedName[B_FILE_NAME_LENGTH + 1];
5225 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5226 						>= B_FILE_NAME_LENGTH) {
5227 					return B_NAME_TOO_LONG;
5228 				}
5229 
5230 				inc_vnode_ref_count(directory);
5231 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5232 					kernel, &vnode, NULL);
5233 				if (status != B_OK)
5234 					return status;
5235 
5236 				putter.SetTo(vnode);
5237 			}
5238 
5239 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5240 				return B_LINK_LIMIT;
5241 
5242 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5243 			// on success keep the vnode reference for the FD
5244 			if (fd >= 0)
5245 				putter.Detach();
5246 
5247 			return fd;
5248 		}
5249 
5250 		// it doesn't exist yet -- try to create it
5251 
5252 		if (!HAS_FS_CALL(directory, create))
5253 			return B_READ_ONLY_DEVICE;
5254 
5255 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5256 			&cookie, &newID);
5257 		if (status != B_OK
5258 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5259 			return status;
5260 		}
5261 	}
5262 
5263 	if (status != B_OK)
5264 		return status;
5265 
5266 	// the node has been created successfully
5267 
5268 	rw_lock_read_lock(&sVnodeLock);
5269 	vnode = lookup_vnode(directory->device, newID);
5270 	rw_lock_read_unlock(&sVnodeLock);
5271 
5272 	if (vnode == NULL) {
5273 		panic("vfs: fs_create() returned success but there is no vnode, "
5274 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5275 		return B_BAD_VALUE;
5276 	}
5277 
5278 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5279 	if (fd >= 0)
5280 		return fd;
5281 
5282 	status = fd;
5283 
5284 	// something went wrong, clean up
5285 
5286 	FS_CALL(vnode, close, cookie);
5287 	FS_CALL(vnode, free_cookie, cookie);
5288 	put_vnode(vnode);
5289 
5290 	FS_CALL(directory, unlink, name);
5291 
5292 	return status;
5293 }
5294 
5295 
5296 /*! Calls fs open_dir() on the given vnode and returns a new
5297 	file descriptor for it
5298 */
5299 static int
5300 open_dir_vnode(struct vnode* vnode, bool kernel)
5301 {
5302 	void* cookie;
5303 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5304 	if (status != B_OK)
5305 		return status;
5306 
5307 	// directory is opened, create a fd
5308 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5309 	if (status >= 0)
5310 		return status;
5311 
5312 	FS_CALL(vnode, close_dir, cookie);
5313 	FS_CALL(vnode, free_dir_cookie, cookie);
5314 
5315 	return status;
5316 }
5317 
5318 
5319 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5320 	file descriptor for it.
5321 	Used by attr_dir_open(), and attr_dir_open_fd().
5322 */
5323 static int
5324 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5325 {
5326 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5327 		return B_UNSUPPORTED;
5328 
5329 	void* cookie;
5330 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5331 	if (status != B_OK)
5332 		return status;
5333 
5334 	// directory is opened, create a fd
5335 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5336 		kernel);
5337 	if (status >= 0)
5338 		return status;
5339 
5340 	FS_CALL(vnode, close_attr_dir, cookie);
5341 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5342 
5343 	return status;
5344 }
5345 
5346 
5347 static int
5348 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5349 	int openMode, int perms, bool kernel)
5350 {
5351 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5352 		"kernel %d\n", name, openMode, perms, kernel));
5353 
5354 	// get directory to put the new file in
5355 	struct vnode* directory;
5356 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5357 	if (status != B_OK)
5358 		return status;
5359 
5360 	status = create_vnode(directory, name, openMode, perms, kernel);
5361 	put_vnode(directory);
5362 
5363 	return status;
5364 }
5365 
5366 
5367 static int
5368 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5369 {
5370 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5371 		openMode, perms, kernel));
5372 
5373 	// get directory to put the new file in
5374 	char name[B_FILE_NAME_LENGTH];
5375 	struct vnode* directory;
5376 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5377 		kernel);
5378 	if (status < 0)
5379 		return status;
5380 
5381 	status = create_vnode(directory, name, openMode, perms, kernel);
5382 
5383 	put_vnode(directory);
5384 	return status;
5385 }
5386 
5387 
5388 static int
5389 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5390 	int openMode, bool kernel)
5391 {
5392 	if (name == NULL || *name == '\0')
5393 		return B_BAD_VALUE;
5394 
5395 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5396 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5397 
5398 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5399 
5400 	// get the vnode matching the entry_ref
5401 	struct vnode* vnode;
5402 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5403 		kernel, &vnode);
5404 	if (status != B_OK)
5405 		return status;
5406 
5407 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5408 		put_vnode(vnode);
5409 		return B_LINK_LIMIT;
5410 	}
5411 
5412 	int newFD = open_vnode(vnode, openMode, kernel);
5413 	if (newFD >= 0) {
5414 		// The vnode reference has been transferred to the FD
5415 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5416 			directoryID, vnode->id, name);
5417 	} else
5418 		put_vnode(vnode);
5419 
5420 	return newFD;
5421 }
5422 
5423 
5424 static int
5425 file_open(int fd, char* path, int openMode, bool kernel)
5426 {
5427 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5428 
5429 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5430 		fd, path, openMode, kernel));
5431 
5432 	// get the vnode matching the vnode + path combination
5433 	struct vnode* vnode;
5434 	ino_t parentID;
5435 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5436 		&parentID, kernel);
5437 	if (status != B_OK)
5438 		return status;
5439 
5440 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5441 		put_vnode(vnode);
5442 		return B_LINK_LIMIT;
5443 	}
5444 
5445 	// open the vnode
5446 	int newFD = open_vnode(vnode, openMode, kernel);
5447 	if (newFD >= 0) {
5448 		// The vnode reference has been transferred to the FD
5449 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5450 			vnode->device, parentID, vnode->id, NULL);
5451 	} else
5452 		put_vnode(vnode);
5453 
5454 	return newFD;
5455 }
5456 
5457 
5458 static status_t
5459 file_close(struct file_descriptor* descriptor)
5460 {
5461 	struct vnode* vnode = descriptor->u.vnode;
5462 	status_t status = B_OK;
5463 
5464 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5465 
5466 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5467 		vnode->id);
5468 	if (HAS_FS_CALL(vnode, close)) {
5469 		status = FS_CALL(vnode, close, descriptor->cookie);
5470 	}
5471 
5472 	if (status == B_OK) {
5473 		// remove all outstanding locks for this team
5474 		if (HAS_FS_CALL(vnode, release_lock))
5475 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5476 		else
5477 			status = release_advisory_lock(vnode, NULL);
5478 	}
5479 	return status;
5480 }
5481 
5482 
5483 static void
5484 file_free_fd(struct file_descriptor* descriptor)
5485 {
5486 	struct vnode* vnode = descriptor->u.vnode;
5487 
5488 	if (vnode != NULL) {
5489 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5490 		put_vnode(vnode);
5491 	}
5492 }
5493 
5494 
5495 static status_t
5496 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5497 	size_t* length)
5498 {
5499 	struct vnode* vnode = descriptor->u.vnode;
5500 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5501 		pos, length, *length));
5502 
5503 	if (S_ISDIR(vnode->Type()))
5504 		return B_IS_A_DIRECTORY;
5505 
5506 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5507 }
5508 
5509 
5510 static status_t
5511 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5512 	size_t* length)
5513 {
5514 	struct vnode* vnode = descriptor->u.vnode;
5515 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5516 		length));
5517 
5518 	if (S_ISDIR(vnode->Type()))
5519 		return B_IS_A_DIRECTORY;
5520 	if (!HAS_FS_CALL(vnode, write))
5521 		return B_READ_ONLY_DEVICE;
5522 
5523 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5524 }
5525 
5526 
5527 static off_t
5528 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5529 {
5530 	struct vnode* vnode = descriptor->u.vnode;
5531 	off_t offset;
5532 
5533 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5534 		seekType));
5535 
5536 	// some kinds of files are not seekable
5537 	switch (vnode->Type() & S_IFMT) {
5538 		case S_IFIFO:
5539 		case S_IFSOCK:
5540 			return ESPIPE;
5541 
5542 		// The Open Group Base Specs don't mention any file types besides pipes,
5543 		// fifos, and sockets specially, so we allow seeking them.
5544 		case S_IFREG:
5545 		case S_IFBLK:
5546 		case S_IFDIR:
5547 		case S_IFLNK:
5548 		case S_IFCHR:
5549 			break;
5550 	}
5551 
5552 	switch (seekType) {
5553 		case SEEK_SET:
5554 			offset = 0;
5555 			break;
5556 		case SEEK_CUR:
5557 			offset = descriptor->pos;
5558 			break;
5559 		case SEEK_END:
5560 		{
5561 			// stat() the node
5562 			if (!HAS_FS_CALL(vnode, read_stat))
5563 				return B_UNSUPPORTED;
5564 
5565 			struct stat stat;
5566 			status_t status = FS_CALL(vnode, read_stat, &stat);
5567 			if (status != B_OK)
5568 				return status;
5569 
5570 			offset = stat.st_size;
5571 			break;
5572 		}
5573 		default:
5574 			return B_BAD_VALUE;
5575 	}
5576 
5577 	// assumes off_t is 64 bits wide
5578 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5579 		return B_BUFFER_OVERFLOW;
5580 
5581 	pos += offset;
5582 	if (pos < 0)
5583 		return B_BAD_VALUE;
5584 
5585 	return descriptor->pos = pos;
5586 }
5587 
5588 
5589 static status_t
5590 file_select(struct file_descriptor* descriptor, uint8 event,
5591 	struct selectsync* sync)
5592 {
5593 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5594 
5595 	struct vnode* vnode = descriptor->u.vnode;
5596 
5597 	// If the FS has no select() hook, notify select() now.
5598 	if (!HAS_FS_CALL(vnode, select))
5599 		return notify_select_event(sync, event);
5600 
5601 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5602 }
5603 
5604 
5605 static status_t
5606 file_deselect(struct file_descriptor* descriptor, uint8 event,
5607 	struct selectsync* sync)
5608 {
5609 	struct vnode* vnode = descriptor->u.vnode;
5610 
5611 	if (!HAS_FS_CALL(vnode, deselect))
5612 		return B_OK;
5613 
5614 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5615 }
5616 
5617 
5618 static status_t
5619 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5620 	bool kernel)
5621 {
5622 	struct vnode* vnode;
5623 	status_t status;
5624 
5625 	if (name == NULL || *name == '\0')
5626 		return B_BAD_VALUE;
5627 
5628 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5629 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5630 
5631 	status = get_vnode(mountID, parentID, &vnode, true, false);
5632 	if (status != B_OK)
5633 		return status;
5634 
5635 	if (HAS_FS_CALL(vnode, create_dir))
5636 		status = FS_CALL(vnode, create_dir, name, perms);
5637 	else
5638 		status = B_READ_ONLY_DEVICE;
5639 
5640 	put_vnode(vnode);
5641 	return status;
5642 }
5643 
5644 
5645 static status_t
5646 dir_create(int fd, char* path, int perms, bool kernel)
5647 {
5648 	char filename[B_FILE_NAME_LENGTH];
5649 	struct vnode* vnode;
5650 	status_t status;
5651 
5652 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5653 		kernel));
5654 
5655 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5656 	if (status < 0)
5657 		return status;
5658 
5659 	if (HAS_FS_CALL(vnode, create_dir)) {
5660 		status = FS_CALL(vnode, create_dir, filename, perms);
5661 	} else
5662 		status = B_READ_ONLY_DEVICE;
5663 
5664 	put_vnode(vnode);
5665 	return status;
5666 }
5667 
5668 
5669 static int
5670 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5671 {
5672 	FUNCTION(("dir_open_entry_ref()\n"));
5673 
5674 	if (name && name[0] == '\0')
5675 		return B_BAD_VALUE;
5676 
5677 	// get the vnode matching the entry_ref/node_ref
5678 	struct vnode* vnode;
5679 	status_t status;
5680 	if (name) {
5681 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5682 			&vnode);
5683 	} else
5684 		status = get_vnode(mountID, parentID, &vnode, true, false);
5685 	if (status != B_OK)
5686 		return status;
5687 
5688 	int newFD = open_dir_vnode(vnode, kernel);
5689 	if (newFD >= 0) {
5690 		// The vnode reference has been transferred to the FD
5691 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5692 			vnode->id, name);
5693 	} else
5694 		put_vnode(vnode);
5695 
5696 	return newFD;
5697 }
5698 
5699 
5700 static int
5701 dir_open(int fd, char* path, bool kernel)
5702 {
5703 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5704 		kernel));
5705 
5706 	// get the vnode matching the vnode + path combination
5707 	struct vnode* vnode = NULL;
5708 	ino_t parentID;
5709 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5710 		kernel);
5711 	if (status != B_OK)
5712 		return status;
5713 
5714 	// open the dir
5715 	int newFD = open_dir_vnode(vnode, kernel);
5716 	if (newFD >= 0) {
5717 		// The vnode reference has been transferred to the FD
5718 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5719 			parentID, vnode->id, NULL);
5720 	} else
5721 		put_vnode(vnode);
5722 
5723 	return newFD;
5724 }
5725 
5726 
5727 static status_t
5728 dir_close(struct file_descriptor* descriptor)
5729 {
5730 	struct vnode* vnode = descriptor->u.vnode;
5731 
5732 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5733 
5734 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5735 		vnode->id);
5736 	if (HAS_FS_CALL(vnode, close_dir))
5737 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5738 
5739 	return B_OK;
5740 }
5741 
5742 
5743 static void
5744 dir_free_fd(struct file_descriptor* descriptor)
5745 {
5746 	struct vnode* vnode = descriptor->u.vnode;
5747 
5748 	if (vnode != NULL) {
5749 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5750 		put_vnode(vnode);
5751 	}
5752 }
5753 
5754 
5755 static status_t
5756 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5757 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5758 {
5759 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5760 		bufferSize, _count);
5761 }
5762 
5763 
5764 static status_t
5765 fix_dirent(struct vnode* parent, struct dirent* entry,
5766 	struct io_context* ioContext)
5767 {
5768 	// set d_pdev and d_pino
5769 	entry->d_pdev = parent->device;
5770 	entry->d_pino = parent->id;
5771 
5772 	// If this is the ".." entry and the directory covering another vnode,
5773 	// we need to replace d_dev and d_ino with the actual values.
5774 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5775 		// Make sure the IO context root is not bypassed.
5776 		if (parent == ioContext->root) {
5777 			entry->d_dev = parent->device;
5778 			entry->d_ino = parent->id;
5779 		} else {
5780 			inc_vnode_ref_count(parent);
5781 				// vnode_path_to_vnode() puts the node
5782 
5783 			// ".." is guaranteed not to be clobbered by this call
5784 			struct vnode* vnode;
5785 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5786 				ioContext, &vnode, NULL);
5787 
5788 			if (status == B_OK) {
5789 				entry->d_dev = vnode->device;
5790 				entry->d_ino = vnode->id;
5791 				put_vnode(vnode);
5792 			}
5793 		}
5794 	} else {
5795 		// resolve covered vnodes
5796 		ReadLocker _(&sVnodeLock);
5797 
5798 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5799 		if (vnode != NULL && vnode->covered_by != NULL) {
5800 			do {
5801 				vnode = vnode->covered_by;
5802 			} while (vnode->covered_by != NULL);
5803 
5804 			entry->d_dev = vnode->device;
5805 			entry->d_ino = vnode->id;
5806 		}
5807 	}
5808 
5809 	return B_OK;
5810 }
5811 
5812 
5813 static status_t
5814 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5815 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5816 {
5817 	if (!HAS_FS_CALL(vnode, read_dir))
5818 		return B_UNSUPPORTED;
5819 
5820 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5821 		_count);
5822 	if (error != B_OK)
5823 		return error;
5824 
5825 	// we need to adjust the read dirents
5826 	uint32 count = *_count;
5827 	for (uint32 i = 0; i < count; i++) {
5828 		error = fix_dirent(vnode, buffer, ioContext);
5829 		if (error != B_OK)
5830 			return error;
5831 
5832 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5833 	}
5834 
5835 	return error;
5836 }
5837 
5838 
5839 static status_t
5840 dir_rewind(struct file_descriptor* descriptor)
5841 {
5842 	struct vnode* vnode = descriptor->u.vnode;
5843 
5844 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5845 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5846 	}
5847 
5848 	return B_UNSUPPORTED;
5849 }
5850 
5851 
5852 static status_t
5853 dir_remove(int fd, char* path, bool kernel)
5854 {
5855 	char name[B_FILE_NAME_LENGTH];
5856 	struct vnode* directory;
5857 	status_t status;
5858 
5859 	if (path != NULL) {
5860 		// we need to make sure our path name doesn't stop with "/", ".",
5861 		// or ".."
5862 		char* lastSlash;
5863 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5864 			char* leaf = lastSlash + 1;
5865 			if (!strcmp(leaf, ".."))
5866 				return B_NOT_ALLOWED;
5867 
5868 			// omit multiple slashes
5869 			while (lastSlash > path && lastSlash[-1] == '/')
5870 				lastSlash--;
5871 
5872 			if (leaf[0]
5873 				&& strcmp(leaf, ".")) {
5874 				break;
5875 			}
5876 			// "name/" -> "name", or "name/." -> "name"
5877 			lastSlash[0] = '\0';
5878 		}
5879 
5880 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5881 			return B_NOT_ALLOWED;
5882 	}
5883 
5884 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5885 	if (status != B_OK)
5886 		return status;
5887 
5888 	if (HAS_FS_CALL(directory, remove_dir))
5889 		status = FS_CALL(directory, remove_dir, name);
5890 	else
5891 		status = B_READ_ONLY_DEVICE;
5892 
5893 	put_vnode(directory);
5894 	return status;
5895 }
5896 
5897 
5898 static status_t
5899 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5900 	size_t length)
5901 {
5902 	struct vnode* vnode = descriptor->u.vnode;
5903 
5904 	if (HAS_FS_CALL(vnode, ioctl))
5905 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5906 
5907 	return B_DEV_INVALID_IOCTL;
5908 }
5909 
5910 
5911 static status_t
5912 common_fcntl(int fd, int op, size_t argument, bool kernel)
5913 {
5914 	struct flock flock;
5915 
5916 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5917 		fd, op, argument, kernel ? "kernel" : "user"));
5918 
5919 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5920 		fd);
5921 	if (descriptor == NULL)
5922 		return B_FILE_ERROR;
5923 
5924 	struct vnode* vnode = fd_vnode(descriptor);
5925 
5926 	status_t status = B_OK;
5927 
5928 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5929 		if (descriptor->type != FDTYPE_FILE)
5930 			status = B_BAD_VALUE;
5931 		else if (user_memcpy(&flock, (struct flock*)argument,
5932 				sizeof(struct flock)) != B_OK)
5933 			status = B_BAD_ADDRESS;
5934 
5935 		if (status != B_OK) {
5936 			put_fd(descriptor);
5937 			return status;
5938 		}
5939 	}
5940 
5941 	switch (op) {
5942 		case F_SETFD:
5943 		{
5944 			struct io_context* context = get_current_io_context(kernel);
5945 			// Set file descriptor flags
5946 
5947 			// O_CLOEXEC is the only flag available at this time
5948 			mutex_lock(&context->io_mutex);
5949 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5950 			mutex_unlock(&context->io_mutex);
5951 
5952 			status = B_OK;
5953 			break;
5954 		}
5955 
5956 		case F_GETFD:
5957 		{
5958 			struct io_context* context = get_current_io_context(kernel);
5959 
5960 			// Get file descriptor flags
5961 			mutex_lock(&context->io_mutex);
5962 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5963 			mutex_unlock(&context->io_mutex);
5964 			break;
5965 		}
5966 
5967 		case F_SETFL:
5968 			// Set file descriptor open mode
5969 
5970 			// we only accept changes to O_APPEND and O_NONBLOCK
5971 			argument &= O_APPEND | O_NONBLOCK;
5972 			if (descriptor->ops->fd_set_flags != NULL) {
5973 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5974 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5975 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5976 					(int)argument);
5977 			} else
5978 				status = B_UNSUPPORTED;
5979 
5980 			if (status == B_OK) {
5981 				// update this descriptor's open_mode field
5982 				descriptor->open_mode = (descriptor->open_mode
5983 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5984 			}
5985 
5986 			break;
5987 
5988 		case F_GETFL:
5989 			// Get file descriptor open mode
5990 			status = descriptor->open_mode;
5991 			break;
5992 
5993 		case F_DUPFD:
5994 		{
5995 			struct io_context* context = get_current_io_context(kernel);
5996 
5997 			status = new_fd_etc(context, descriptor, (int)argument);
5998 			if (status >= 0) {
5999 				mutex_lock(&context->io_mutex);
6000 				fd_set_close_on_exec(context, fd, false);
6001 				mutex_unlock(&context->io_mutex);
6002 
6003 				atomic_add(&descriptor->ref_count, 1);
6004 			}
6005 			break;
6006 		}
6007 
6008 		case F_GETLK:
6009 			if (vnode != NULL) {
6010 				struct flock normalizedLock;
6011 
6012 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6013 				status = normalize_flock(descriptor, &normalizedLock);
6014 				if (status != B_OK)
6015 					break;
6016 
6017 				if (HAS_FS_CALL(vnode, test_lock)) {
6018 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6019 						&normalizedLock);
6020 				} else
6021 					status = test_advisory_lock(vnode, &normalizedLock);
6022 				if (status == B_OK) {
6023 					if (normalizedLock.l_type == F_UNLCK) {
6024 						// no conflicting lock found, copy back the same struct
6025 						// we were given except change type to F_UNLCK
6026 						flock.l_type = F_UNLCK;
6027 						status = user_memcpy((struct flock*)argument, &flock,
6028 							sizeof(struct flock));
6029 					} else {
6030 						// a conflicting lock was found, copy back its range and
6031 						// type
6032 						if (normalizedLock.l_len == OFF_MAX)
6033 							normalizedLock.l_len = 0;
6034 
6035 						status = user_memcpy((struct flock*)argument,
6036 							&normalizedLock, sizeof(struct flock));
6037 					}
6038 				}
6039 			} else
6040 				status = B_BAD_VALUE;
6041 			break;
6042 
6043 		case F_SETLK:
6044 		case F_SETLKW:
6045 			status = normalize_flock(descriptor, &flock);
6046 			if (status != B_OK)
6047 				break;
6048 
6049 			if (vnode == NULL) {
6050 				status = B_BAD_VALUE;
6051 			} else if (flock.l_type == F_UNLCK) {
6052 				if (HAS_FS_CALL(vnode, release_lock)) {
6053 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6054 						&flock);
6055 				} else
6056 					status = release_advisory_lock(vnode, &flock);
6057 			} else {
6058 				// the open mode must match the lock type
6059 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6060 						&& flock.l_type == F_WRLCK)
6061 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6062 						&& flock.l_type == F_RDLCK))
6063 					status = B_FILE_ERROR;
6064 				else {
6065 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6066 						status = FS_CALL(vnode, acquire_lock,
6067 							descriptor->cookie, &flock, op == F_SETLKW);
6068 					} else {
6069 						status = acquire_advisory_lock(vnode, -1,
6070 							&flock, op == F_SETLKW);
6071 					}
6072 				}
6073 			}
6074 			break;
6075 
6076 		// ToDo: add support for more ops?
6077 
6078 		default:
6079 			status = B_BAD_VALUE;
6080 	}
6081 
6082 	put_fd(descriptor);
6083 	return status;
6084 }
6085 
6086 
6087 static status_t
6088 common_sync(int fd, bool kernel)
6089 {
6090 	struct file_descriptor* descriptor;
6091 	struct vnode* vnode;
6092 	status_t status;
6093 
6094 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6095 
6096 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6097 	if (descriptor == NULL)
6098 		return B_FILE_ERROR;
6099 
6100 	if (HAS_FS_CALL(vnode, fsync))
6101 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6102 	else
6103 		status = B_UNSUPPORTED;
6104 
6105 	put_fd(descriptor);
6106 	return status;
6107 }
6108 
6109 
6110 static status_t
6111 common_lock_node(int fd, bool kernel)
6112 {
6113 	struct file_descriptor* descriptor;
6114 	struct vnode* vnode;
6115 
6116 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6117 	if (descriptor == NULL)
6118 		return B_FILE_ERROR;
6119 
6120 	status_t status = B_OK;
6121 
6122 	// We need to set the locking atomically - someone
6123 	// else might set one at the same time
6124 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6125 			(file_descriptor*)NULL) != NULL)
6126 		status = B_BUSY;
6127 
6128 	put_fd(descriptor);
6129 	return status;
6130 }
6131 
6132 
6133 static status_t
6134 common_unlock_node(int fd, bool kernel)
6135 {
6136 	struct file_descriptor* descriptor;
6137 	struct vnode* vnode;
6138 
6139 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6140 	if (descriptor == NULL)
6141 		return B_FILE_ERROR;
6142 
6143 	status_t status = B_OK;
6144 
6145 	// We need to set the locking atomically - someone
6146 	// else might set one at the same time
6147 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6148 			(file_descriptor*)NULL, descriptor) != descriptor)
6149 		status = B_BAD_VALUE;
6150 
6151 	put_fd(descriptor);
6152 	return status;
6153 }
6154 
6155 
6156 static status_t
6157 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6158 	bool kernel)
6159 {
6160 	struct vnode* vnode;
6161 	status_t status;
6162 
6163 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6164 	if (status != B_OK)
6165 		return status;
6166 
6167 	if (HAS_FS_CALL(vnode, read_symlink)) {
6168 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6169 	} else
6170 		status = B_BAD_VALUE;
6171 
6172 	put_vnode(vnode);
6173 	return status;
6174 }
6175 
6176 
6177 static status_t
6178 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6179 	bool kernel)
6180 {
6181 	// path validity checks have to be in the calling function!
6182 	char name[B_FILE_NAME_LENGTH];
6183 	struct vnode* vnode;
6184 	status_t status;
6185 
6186 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6187 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6188 
6189 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6190 	if (status != B_OK)
6191 		return status;
6192 
6193 	if (HAS_FS_CALL(vnode, create_symlink))
6194 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6195 	else {
6196 		status = HAS_FS_CALL(vnode, write)
6197 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6198 	}
6199 
6200 	put_vnode(vnode);
6201 
6202 	return status;
6203 }
6204 
6205 
6206 static status_t
6207 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6208 	bool traverseLeafLink, bool kernel)
6209 {
6210 	// path validity checks have to be in the calling function!
6211 
6212 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6213 		toPath, kernel));
6214 
6215 	char name[B_FILE_NAME_LENGTH];
6216 	struct vnode* directory;
6217 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6218 		kernel);
6219 	if (status != B_OK)
6220 		return status;
6221 
6222 	struct vnode* vnode;
6223 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6224 		kernel);
6225 	if (status != B_OK)
6226 		goto err;
6227 
6228 	if (directory->mount != vnode->mount) {
6229 		status = B_CROSS_DEVICE_LINK;
6230 		goto err1;
6231 	}
6232 
6233 	if (HAS_FS_CALL(directory, link))
6234 		status = FS_CALL(directory, link, name, vnode);
6235 	else
6236 		status = B_READ_ONLY_DEVICE;
6237 
6238 err1:
6239 	put_vnode(vnode);
6240 err:
6241 	put_vnode(directory);
6242 
6243 	return status;
6244 }
6245 
6246 
6247 static status_t
6248 common_unlink(int fd, char* path, bool kernel)
6249 {
6250 	char filename[B_FILE_NAME_LENGTH];
6251 	struct vnode* vnode;
6252 	status_t status;
6253 
6254 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6255 		kernel));
6256 
6257 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6258 	if (status < 0)
6259 		return status;
6260 
6261 	if (HAS_FS_CALL(vnode, unlink))
6262 		status = FS_CALL(vnode, unlink, filename);
6263 	else
6264 		status = B_READ_ONLY_DEVICE;
6265 
6266 	put_vnode(vnode);
6267 
6268 	return status;
6269 }
6270 
6271 
6272 static status_t
6273 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6274 {
6275 	struct vnode* vnode;
6276 	status_t status;
6277 
6278 	// TODO: honor effectiveUserGroup argument
6279 
6280 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6281 	if (status != B_OK)
6282 		return status;
6283 
6284 	if (HAS_FS_CALL(vnode, access))
6285 		status = FS_CALL(vnode, access, mode);
6286 	else
6287 		status = B_OK;
6288 
6289 	put_vnode(vnode);
6290 
6291 	return status;
6292 }
6293 
6294 
6295 static status_t
6296 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6297 {
6298 	struct vnode* fromVnode;
6299 	struct vnode* toVnode;
6300 	char fromName[B_FILE_NAME_LENGTH];
6301 	char toName[B_FILE_NAME_LENGTH];
6302 	status_t status;
6303 
6304 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6305 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6306 
6307 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6308 	if (status != B_OK)
6309 		return status;
6310 
6311 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6312 	if (status != B_OK)
6313 		goto err1;
6314 
6315 	if (fromVnode->device != toVnode->device) {
6316 		status = B_CROSS_DEVICE_LINK;
6317 		goto err2;
6318 	}
6319 
6320 	if (fromName[0] == '\0' || toName[0] == '\0'
6321 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6322 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6323 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6324 		status = B_BAD_VALUE;
6325 		goto err2;
6326 	}
6327 
6328 	if (HAS_FS_CALL(fromVnode, rename))
6329 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6330 	else
6331 		status = B_READ_ONLY_DEVICE;
6332 
6333 err2:
6334 	put_vnode(toVnode);
6335 err1:
6336 	put_vnode(fromVnode);
6337 
6338 	return status;
6339 }
6340 
6341 
6342 static status_t
6343 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6344 {
6345 	struct vnode* vnode = descriptor->u.vnode;
6346 
6347 	FUNCTION(("common_read_stat: stat %p\n", stat));
6348 
6349 	// TODO: remove this once all file systems properly set them!
6350 	stat->st_crtim.tv_nsec = 0;
6351 	stat->st_ctim.tv_nsec = 0;
6352 	stat->st_mtim.tv_nsec = 0;
6353 	stat->st_atim.tv_nsec = 0;
6354 
6355 	status_t status = FS_CALL(vnode, read_stat, stat);
6356 
6357 	// fill in the st_dev and st_ino fields
6358 	if (status == B_OK) {
6359 		stat->st_dev = vnode->device;
6360 		stat->st_ino = vnode->id;
6361 		stat->st_rdev = -1;
6362 	}
6363 
6364 	return status;
6365 }
6366 
6367 
6368 static status_t
6369 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6370 	int statMask)
6371 {
6372 	struct vnode* vnode = descriptor->u.vnode;
6373 
6374 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6375 		vnode, stat, statMask));
6376 
6377 	if (!HAS_FS_CALL(vnode, write_stat))
6378 		return B_READ_ONLY_DEVICE;
6379 
6380 	return FS_CALL(vnode, write_stat, stat, statMask);
6381 }
6382 
6383 
6384 static status_t
6385 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6386 	struct stat* stat, bool kernel)
6387 {
6388 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6389 		stat));
6390 
6391 	struct vnode* vnode;
6392 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6393 		NULL, kernel);
6394 	if (status != B_OK)
6395 		return status;
6396 
6397 	status = FS_CALL(vnode, read_stat, stat);
6398 
6399 	// fill in the st_dev and st_ino fields
6400 	if (status == B_OK) {
6401 		stat->st_dev = vnode->device;
6402 		stat->st_ino = vnode->id;
6403 		stat->st_rdev = -1;
6404 	}
6405 
6406 	put_vnode(vnode);
6407 	return status;
6408 }
6409 
6410 
6411 static status_t
6412 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6413 	const struct stat* stat, int statMask, bool kernel)
6414 {
6415 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6416 		"kernel %d\n", fd, path, stat, statMask, kernel));
6417 
6418 	struct vnode* vnode;
6419 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6420 		NULL, kernel);
6421 	if (status != B_OK)
6422 		return status;
6423 
6424 	if (HAS_FS_CALL(vnode, write_stat))
6425 		status = FS_CALL(vnode, write_stat, stat, statMask);
6426 	else
6427 		status = B_READ_ONLY_DEVICE;
6428 
6429 	put_vnode(vnode);
6430 
6431 	return status;
6432 }
6433 
6434 
6435 static int
6436 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6437 {
6438 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6439 		kernel));
6440 
6441 	struct vnode* vnode;
6442 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6443 		NULL, kernel);
6444 	if (status != B_OK)
6445 		return status;
6446 
6447 	status = open_attr_dir_vnode(vnode, kernel);
6448 	if (status < 0)
6449 		put_vnode(vnode);
6450 
6451 	return status;
6452 }
6453 
6454 
6455 static status_t
6456 attr_dir_close(struct file_descriptor* descriptor)
6457 {
6458 	struct vnode* vnode = descriptor->u.vnode;
6459 
6460 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6461 
6462 	if (HAS_FS_CALL(vnode, close_attr_dir))
6463 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6464 
6465 	return B_OK;
6466 }
6467 
6468 
6469 static void
6470 attr_dir_free_fd(struct file_descriptor* descriptor)
6471 {
6472 	struct vnode* vnode = descriptor->u.vnode;
6473 
6474 	if (vnode != NULL) {
6475 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6476 		put_vnode(vnode);
6477 	}
6478 }
6479 
6480 
6481 static status_t
6482 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6483 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6484 {
6485 	struct vnode* vnode = descriptor->u.vnode;
6486 
6487 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6488 
6489 	if (HAS_FS_CALL(vnode, read_attr_dir))
6490 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6491 			bufferSize, _count);
6492 
6493 	return B_UNSUPPORTED;
6494 }
6495 
6496 
6497 static status_t
6498 attr_dir_rewind(struct file_descriptor* descriptor)
6499 {
6500 	struct vnode* vnode = descriptor->u.vnode;
6501 
6502 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6503 
6504 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6505 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6506 
6507 	return B_UNSUPPORTED;
6508 }
6509 
6510 
6511 static int
6512 attr_create(int fd, char* path, const char* name, uint32 type,
6513 	int openMode, bool kernel)
6514 {
6515 	if (name == NULL || *name == '\0')
6516 		return B_BAD_VALUE;
6517 
6518 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6519 	struct vnode* vnode;
6520 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6521 		kernel);
6522 	if (status != B_OK)
6523 		return status;
6524 
6525 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6526 		status = B_LINK_LIMIT;
6527 		goto err;
6528 	}
6529 
6530 	if (!HAS_FS_CALL(vnode, create_attr)) {
6531 		status = B_READ_ONLY_DEVICE;
6532 		goto err;
6533 	}
6534 
6535 	void* cookie;
6536 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6537 	if (status != B_OK)
6538 		goto err;
6539 
6540 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6541 	if (fd >= 0)
6542 		return fd;
6543 
6544 	status = fd;
6545 
6546 	FS_CALL(vnode, close_attr, cookie);
6547 	FS_CALL(vnode, free_attr_cookie, cookie);
6548 
6549 	FS_CALL(vnode, remove_attr, name);
6550 
6551 err:
6552 	put_vnode(vnode);
6553 
6554 	return status;
6555 }
6556 
6557 
6558 static int
6559 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6560 {
6561 	if (name == NULL || *name == '\0')
6562 		return B_BAD_VALUE;
6563 
6564 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6565 	struct vnode* vnode;
6566 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6567 		kernel);
6568 	if (status != B_OK)
6569 		return status;
6570 
6571 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6572 		status = B_LINK_LIMIT;
6573 		goto err;
6574 	}
6575 
6576 	if (!HAS_FS_CALL(vnode, open_attr)) {
6577 		status = B_UNSUPPORTED;
6578 		goto err;
6579 	}
6580 
6581 	void* cookie;
6582 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6583 	if (status != B_OK)
6584 		goto err;
6585 
6586 	// now we only need a file descriptor for this attribute and we're done
6587 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6588 	if (fd >= 0)
6589 		return fd;
6590 
6591 	status = fd;
6592 
6593 	FS_CALL(vnode, close_attr, cookie);
6594 	FS_CALL(vnode, free_attr_cookie, cookie);
6595 
6596 err:
6597 	put_vnode(vnode);
6598 
6599 	return status;
6600 }
6601 
6602 
6603 static status_t
6604 attr_close(struct file_descriptor* descriptor)
6605 {
6606 	struct vnode* vnode = descriptor->u.vnode;
6607 
6608 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6609 
6610 	if (HAS_FS_CALL(vnode, close_attr))
6611 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6612 
6613 	return B_OK;
6614 }
6615 
6616 
6617 static void
6618 attr_free_fd(struct file_descriptor* descriptor)
6619 {
6620 	struct vnode* vnode = descriptor->u.vnode;
6621 
6622 	if (vnode != NULL) {
6623 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6624 		put_vnode(vnode);
6625 	}
6626 }
6627 
6628 
6629 static status_t
6630 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6631 	size_t* length)
6632 {
6633 	struct vnode* vnode = descriptor->u.vnode;
6634 
6635 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6636 		pos, length, *length));
6637 
6638 	if (!HAS_FS_CALL(vnode, read_attr))
6639 		return B_UNSUPPORTED;
6640 
6641 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6642 }
6643 
6644 
6645 static status_t
6646 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6647 	size_t* length)
6648 {
6649 	struct vnode* vnode = descriptor->u.vnode;
6650 
6651 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6652 		length));
6653 
6654 	if (!HAS_FS_CALL(vnode, write_attr))
6655 		return B_UNSUPPORTED;
6656 
6657 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6658 }
6659 
6660 
6661 static off_t
6662 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6663 {
6664 	off_t offset;
6665 
6666 	switch (seekType) {
6667 		case SEEK_SET:
6668 			offset = 0;
6669 			break;
6670 		case SEEK_CUR:
6671 			offset = descriptor->pos;
6672 			break;
6673 		case SEEK_END:
6674 		{
6675 			struct vnode* vnode = descriptor->u.vnode;
6676 			if (!HAS_FS_CALL(vnode, read_stat))
6677 				return B_UNSUPPORTED;
6678 
6679 			struct stat stat;
6680 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6681 				&stat);
6682 			if (status != B_OK)
6683 				return status;
6684 
6685 			offset = stat.st_size;
6686 			break;
6687 		}
6688 		default:
6689 			return B_BAD_VALUE;
6690 	}
6691 
6692 	// assumes off_t is 64 bits wide
6693 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6694 		return B_BUFFER_OVERFLOW;
6695 
6696 	pos += offset;
6697 	if (pos < 0)
6698 		return B_BAD_VALUE;
6699 
6700 	return descriptor->pos = pos;
6701 }
6702 
6703 
6704 static status_t
6705 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6706 {
6707 	struct vnode* vnode = descriptor->u.vnode;
6708 
6709 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6710 
6711 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6712 		return B_UNSUPPORTED;
6713 
6714 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6715 }
6716 
6717 
6718 static status_t
6719 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6720 	int statMask)
6721 {
6722 	struct vnode* vnode = descriptor->u.vnode;
6723 
6724 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6725 
6726 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6727 		return B_READ_ONLY_DEVICE;
6728 
6729 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6730 }
6731 
6732 
6733 static status_t
6734 attr_remove(int fd, const char* name, bool kernel)
6735 {
6736 	struct file_descriptor* descriptor;
6737 	struct vnode* vnode;
6738 	status_t status;
6739 
6740 	if (name == NULL || *name == '\0')
6741 		return B_BAD_VALUE;
6742 
6743 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6744 		kernel));
6745 
6746 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6747 	if (descriptor == NULL)
6748 		return B_FILE_ERROR;
6749 
6750 	if (HAS_FS_CALL(vnode, remove_attr))
6751 		status = FS_CALL(vnode, remove_attr, name);
6752 	else
6753 		status = B_READ_ONLY_DEVICE;
6754 
6755 	put_fd(descriptor);
6756 
6757 	return status;
6758 }
6759 
6760 
6761 static status_t
6762 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6763 	bool kernel)
6764 {
6765 	struct file_descriptor* fromDescriptor;
6766 	struct file_descriptor* toDescriptor;
6767 	struct vnode* fromVnode;
6768 	struct vnode* toVnode;
6769 	status_t status;
6770 
6771 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6772 		|| *toName == '\0')
6773 		return B_BAD_VALUE;
6774 
6775 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6776 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6777 
6778 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6779 	if (fromDescriptor == NULL)
6780 		return B_FILE_ERROR;
6781 
6782 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6783 	if (toDescriptor == NULL) {
6784 		status = B_FILE_ERROR;
6785 		goto err;
6786 	}
6787 
6788 	// are the files on the same volume?
6789 	if (fromVnode->device != toVnode->device) {
6790 		status = B_CROSS_DEVICE_LINK;
6791 		goto err1;
6792 	}
6793 
6794 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6795 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6796 	} else
6797 		status = B_READ_ONLY_DEVICE;
6798 
6799 err1:
6800 	put_fd(toDescriptor);
6801 err:
6802 	put_fd(fromDescriptor);
6803 
6804 	return status;
6805 }
6806 
6807 
6808 static int
6809 index_dir_open(dev_t mountID, bool kernel)
6810 {
6811 	struct fs_mount* mount;
6812 	void* cookie;
6813 
6814 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6815 		kernel));
6816 
6817 	status_t status = get_mount(mountID, &mount);
6818 	if (status != B_OK)
6819 		return status;
6820 
6821 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6822 		status = B_UNSUPPORTED;
6823 		goto error;
6824 	}
6825 
6826 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6827 	if (status != B_OK)
6828 		goto error;
6829 
6830 	// get fd for the index directory
6831 	int fd;
6832 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6833 	if (fd >= 0)
6834 		return fd;
6835 
6836 	// something went wrong
6837 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6838 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6839 
6840 	status = fd;
6841 
6842 error:
6843 	put_mount(mount);
6844 	return status;
6845 }
6846 
6847 
6848 static status_t
6849 index_dir_close(struct file_descriptor* descriptor)
6850 {
6851 	struct fs_mount* mount = descriptor->u.mount;
6852 
6853 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6854 
6855 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6856 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6857 
6858 	return B_OK;
6859 }
6860 
6861 
6862 static void
6863 index_dir_free_fd(struct file_descriptor* descriptor)
6864 {
6865 	struct fs_mount* mount = descriptor->u.mount;
6866 
6867 	if (mount != NULL) {
6868 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6869 		put_mount(mount);
6870 	}
6871 }
6872 
6873 
6874 static status_t
6875 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6876 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6877 {
6878 	struct fs_mount* mount = descriptor->u.mount;
6879 
6880 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6881 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6882 			bufferSize, _count);
6883 	}
6884 
6885 	return B_UNSUPPORTED;
6886 }
6887 
6888 
6889 static status_t
6890 index_dir_rewind(struct file_descriptor* descriptor)
6891 {
6892 	struct fs_mount* mount = descriptor->u.mount;
6893 
6894 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6895 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6896 
6897 	return B_UNSUPPORTED;
6898 }
6899 
6900 
6901 static status_t
6902 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6903 	bool kernel)
6904 {
6905 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6906 		mountID, name, kernel));
6907 
6908 	struct fs_mount* mount;
6909 	status_t status = get_mount(mountID, &mount);
6910 	if (status != B_OK)
6911 		return status;
6912 
6913 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6914 		status = B_READ_ONLY_DEVICE;
6915 		goto out;
6916 	}
6917 
6918 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6919 
6920 out:
6921 	put_mount(mount);
6922 	return status;
6923 }
6924 
6925 
6926 #if 0
6927 static status_t
6928 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6929 {
6930 	struct vnode* vnode = descriptor->u.vnode;
6931 
6932 	// ToDo: currently unused!
6933 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6934 	if (!HAS_FS_CALL(vnode, read_index_stat))
6935 		return B_UNSUPPORTED;
6936 
6937 	return B_UNSUPPORTED;
6938 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6939 }
6940 
6941 
6942 static void
6943 index_free_fd(struct file_descriptor* descriptor)
6944 {
6945 	struct vnode* vnode = descriptor->u.vnode;
6946 
6947 	if (vnode != NULL) {
6948 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6949 		put_vnode(vnode);
6950 	}
6951 }
6952 #endif
6953 
6954 
6955 static status_t
6956 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6957 	bool kernel)
6958 {
6959 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6960 		mountID, name, kernel));
6961 
6962 	struct fs_mount* mount;
6963 	status_t status = get_mount(mountID, &mount);
6964 	if (status != B_OK)
6965 		return status;
6966 
6967 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6968 		status = B_UNSUPPORTED;
6969 		goto out;
6970 	}
6971 
6972 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6973 
6974 out:
6975 	put_mount(mount);
6976 	return status;
6977 }
6978 
6979 
6980 static status_t
6981 index_remove(dev_t mountID, const char* name, bool kernel)
6982 {
6983 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6984 		mountID, name, kernel));
6985 
6986 	struct fs_mount* mount;
6987 	status_t status = get_mount(mountID, &mount);
6988 	if (status != B_OK)
6989 		return status;
6990 
6991 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6992 		status = B_READ_ONLY_DEVICE;
6993 		goto out;
6994 	}
6995 
6996 	status = FS_MOUNT_CALL(mount, remove_index, name);
6997 
6998 out:
6999 	put_mount(mount);
7000 	return status;
7001 }
7002 
7003 
7004 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7005 		It would be nice if the FS would find some more kernel support
7006 		for them.
7007 		For example, query parsing should be moved into the kernel.
7008 */
7009 static int
7010 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7011 	int32 token, bool kernel)
7012 {
7013 	struct fs_mount* mount;
7014 	void* cookie;
7015 
7016 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7017 		device, query, kernel));
7018 
7019 	status_t status = get_mount(device, &mount);
7020 	if (status != B_OK)
7021 		return status;
7022 
7023 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7024 		status = B_UNSUPPORTED;
7025 		goto error;
7026 	}
7027 
7028 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7029 		&cookie);
7030 	if (status != B_OK)
7031 		goto error;
7032 
7033 	// get fd for the index directory
7034 	int fd;
7035 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7036 	if (fd >= 0)
7037 		return fd;
7038 
7039 	status = fd;
7040 
7041 	// something went wrong
7042 	FS_MOUNT_CALL(mount, close_query, cookie);
7043 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7044 
7045 error:
7046 	put_mount(mount);
7047 	return status;
7048 }
7049 
7050 
7051 static status_t
7052 query_close(struct file_descriptor* descriptor)
7053 {
7054 	struct fs_mount* mount = descriptor->u.mount;
7055 
7056 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7057 
7058 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7059 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7060 
7061 	return B_OK;
7062 }
7063 
7064 
7065 static void
7066 query_free_fd(struct file_descriptor* descriptor)
7067 {
7068 	struct fs_mount* mount = descriptor->u.mount;
7069 
7070 	if (mount != NULL) {
7071 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7072 		put_mount(mount);
7073 	}
7074 }
7075 
7076 
7077 static status_t
7078 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7079 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7080 {
7081 	struct fs_mount* mount = descriptor->u.mount;
7082 
7083 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7084 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7085 			bufferSize, _count);
7086 	}
7087 
7088 	return B_UNSUPPORTED;
7089 }
7090 
7091 
7092 static status_t
7093 query_rewind(struct file_descriptor* descriptor)
7094 {
7095 	struct fs_mount* mount = descriptor->u.mount;
7096 
7097 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7098 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7099 
7100 	return B_UNSUPPORTED;
7101 }
7102 
7103 
7104 //	#pragma mark - General File System functions
7105 
7106 
7107 static dev_t
7108 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7109 	const char* args, bool kernel)
7110 {
7111 	struct ::fs_mount* mount;
7112 	status_t status = B_OK;
7113 	fs_volume* volume = NULL;
7114 	int32 layer = 0;
7115 	Vnode* coveredNode = NULL;
7116 
7117 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7118 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7119 
7120 	// The path is always safe, we just have to make sure that fsName is
7121 	// almost valid - we can't make any assumptions about args, though.
7122 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7123 	// We'll get it from the DDM later.
7124 	if (fsName == NULL) {
7125 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7126 			return B_BAD_VALUE;
7127 	} else if (fsName[0] == '\0')
7128 		return B_BAD_VALUE;
7129 
7130 	RecursiveLocker mountOpLocker(sMountOpLock);
7131 
7132 	// Helper to delete a newly created file device on failure.
7133 	// Not exactly beautiful, but helps to keep the code below cleaner.
7134 	struct FileDeviceDeleter {
7135 		FileDeviceDeleter() : id(-1) {}
7136 		~FileDeviceDeleter()
7137 		{
7138 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7139 		}
7140 
7141 		partition_id id;
7142 	} fileDeviceDeleter;
7143 
7144 	// If the file system is not a "virtual" one, the device argument should
7145 	// point to a real file/device (if given at all).
7146 	// get the partition
7147 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7148 	KPartition* partition = NULL;
7149 	KPath normalizedDevice;
7150 	bool newlyCreatedFileDevice = false;
7151 
7152 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7153 		// normalize the device path
7154 		status = normalizedDevice.SetTo(device, true);
7155 		if (status != B_OK)
7156 			return status;
7157 
7158 		// get a corresponding partition from the DDM
7159 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7160 		if (partition == NULL) {
7161 			// Partition not found: This either means, the user supplied
7162 			// an invalid path, or the path refers to an image file. We try
7163 			// to let the DDM create a file device for the path.
7164 			partition_id deviceID = ddm->CreateFileDevice(
7165 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7166 			if (deviceID >= 0) {
7167 				partition = ddm->RegisterPartition(deviceID);
7168 				if (newlyCreatedFileDevice)
7169 					fileDeviceDeleter.id = deviceID;
7170 			}
7171 		}
7172 
7173 		if (!partition) {
7174 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7175 				normalizedDevice.Path()));
7176 			return B_ENTRY_NOT_FOUND;
7177 		}
7178 
7179 		device = normalizedDevice.Path();
7180 			// correct path to file device
7181 	}
7182 	PartitionRegistrar partitionRegistrar(partition, true);
7183 
7184 	// Write lock the partition's device. For the time being, we keep the lock
7185 	// until we're done mounting -- not nice, but ensure, that no-one is
7186 	// interfering.
7187 	// TODO: Just mark the partition busy while mounting!
7188 	KDiskDevice* diskDevice = NULL;
7189 	if (partition) {
7190 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7191 		if (!diskDevice) {
7192 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7193 			return B_ERROR;
7194 		}
7195 	}
7196 
7197 	DeviceWriteLocker writeLocker(diskDevice, true);
7198 		// this takes over the write lock acquired before
7199 
7200 	if (partition != NULL) {
7201 		// make sure, that the partition is not busy
7202 		if (partition->IsBusy()) {
7203 			TRACE(("fs_mount(): Partition is busy.\n"));
7204 			return B_BUSY;
7205 		}
7206 
7207 		// if no FS name had been supplied, we get it from the partition
7208 		if (fsName == NULL) {
7209 			KDiskSystem* diskSystem = partition->DiskSystem();
7210 			if (!diskSystem) {
7211 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7212 					"recognize it.\n"));
7213 				return B_BAD_VALUE;
7214 			}
7215 
7216 			if (!diskSystem->IsFileSystem()) {
7217 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7218 					"partitioning system.\n"));
7219 				return B_BAD_VALUE;
7220 			}
7221 
7222 			// The disk system name will not change, and the KDiskSystem
7223 			// object will not go away while the disk device is locked (and
7224 			// the partition has a reference to it), so this is safe.
7225 			fsName = diskSystem->Name();
7226 		}
7227 	}
7228 
7229 	mount = new(std::nothrow) (struct ::fs_mount);
7230 	if (mount == NULL)
7231 		return B_NO_MEMORY;
7232 
7233 	mount->device_name = strdup(device);
7234 		// "device" can be NULL
7235 
7236 	status = mount->entry_cache.Init();
7237 	if (status != B_OK)
7238 		goto err1;
7239 
7240 	// initialize structure
7241 	mount->id = sNextMountID++;
7242 	mount->partition = NULL;
7243 	mount->root_vnode = NULL;
7244 	mount->covers_vnode = NULL;
7245 	mount->unmounting = false;
7246 	mount->owns_file_device = false;
7247 	mount->volume = NULL;
7248 
7249 	// build up the volume(s)
7250 	while (true) {
7251 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7252 		if (layerFSName == NULL) {
7253 			if (layer == 0) {
7254 				status = B_NO_MEMORY;
7255 				goto err1;
7256 			}
7257 
7258 			break;
7259 		}
7260 		MemoryDeleter layerFSNameDeleter(layerFSName);
7261 
7262 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7263 		if (volume == NULL) {
7264 			status = B_NO_MEMORY;
7265 			goto err1;
7266 		}
7267 
7268 		volume->id = mount->id;
7269 		volume->partition = partition != NULL ? partition->ID() : -1;
7270 		volume->layer = layer++;
7271 		volume->private_volume = NULL;
7272 		volume->ops = NULL;
7273 		volume->sub_volume = NULL;
7274 		volume->super_volume = NULL;
7275 		volume->file_system = NULL;
7276 		volume->file_system_name = NULL;
7277 
7278 		volume->file_system_name = get_file_system_name(layerFSName);
7279 		if (volume->file_system_name == NULL) {
7280 			status = B_NO_MEMORY;
7281 			free(volume);
7282 			goto err1;
7283 		}
7284 
7285 		volume->file_system = get_file_system(layerFSName);
7286 		if (volume->file_system == NULL) {
7287 			status = B_DEVICE_NOT_FOUND;
7288 			free(volume->file_system_name);
7289 			free(volume);
7290 			goto err1;
7291 		}
7292 
7293 		if (mount->volume == NULL)
7294 			mount->volume = volume;
7295 		else {
7296 			volume->super_volume = mount->volume;
7297 			mount->volume->sub_volume = volume;
7298 			mount->volume = volume;
7299 		}
7300 	}
7301 
7302 	// insert mount struct into list before we call FS's mount() function
7303 	// so that vnodes can be created for this mount
7304 	mutex_lock(&sMountMutex);
7305 	hash_insert(sMountsTable, mount);
7306 	mutex_unlock(&sMountMutex);
7307 
7308 	ino_t rootID;
7309 
7310 	if (!sRoot) {
7311 		// we haven't mounted anything yet
7312 		if (strcmp(path, "/") != 0) {
7313 			status = B_ERROR;
7314 			goto err2;
7315 		}
7316 
7317 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7318 			args, &rootID);
7319 		if (status != 0)
7320 			goto err2;
7321 	} else {
7322 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7323 		if (status != B_OK)
7324 			goto err2;
7325 
7326 		mount->covers_vnode = coveredNode;
7327 
7328 		// make sure covered_vnode is a directory
7329 		if (!S_ISDIR(coveredNode->Type())) {
7330 			status = B_NOT_A_DIRECTORY;
7331 			goto err3;
7332 		}
7333 
7334 		if (coveredNode->IsCovered()) {
7335 			// this is already a covered vnode
7336 			status = B_BUSY;
7337 			goto err3;
7338 		}
7339 
7340 		// mount it/them
7341 		fs_volume* volume = mount->volume;
7342 		while (volume) {
7343 			status = volume->file_system->mount(volume, device, flags, args,
7344 				&rootID);
7345 			if (status != B_OK) {
7346 				if (volume->sub_volume)
7347 					goto err4;
7348 				goto err3;
7349 			}
7350 
7351 			volume = volume->super_volume;
7352 		}
7353 
7354 		volume = mount->volume;
7355 		while (volume) {
7356 			if (volume->ops->all_layers_mounted != NULL)
7357 				volume->ops->all_layers_mounted(volume);
7358 			volume = volume->super_volume;
7359 		}
7360 	}
7361 
7362 	// the root node is supposed to be owned by the file system - it must
7363 	// exist at this point
7364 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7365 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7366 		panic("fs_mount: file system does not own its root node!\n");
7367 		status = B_ERROR;
7368 		goto err4;
7369 	}
7370 
7371 	// set up the links between the root vnode and the vnode it covers
7372 	rw_lock_write_lock(&sVnodeLock);
7373 	if (coveredNode != NULL) {
7374 		if (coveredNode->IsCovered()) {
7375 			// the vnode is covered now
7376 			status = B_BUSY;
7377 			rw_lock_write_unlock(&sVnodeLock);
7378 			goto err4;
7379 		}
7380 
7381 		mount->root_vnode->covers = coveredNode;
7382 		mount->root_vnode->SetCovering(true);
7383 
7384 		coveredNode->covered_by = mount->root_vnode;
7385 		coveredNode->SetCovered(true);
7386 	}
7387 	rw_lock_write_unlock(&sVnodeLock);
7388 
7389 	if (!sRoot) {
7390 		sRoot = mount->root_vnode;
7391 		mutex_lock(&sIOContextRootLock);
7392 		get_current_io_context(true)->root = sRoot;
7393 		mutex_unlock(&sIOContextRootLock);
7394 		inc_vnode_ref_count(sRoot);
7395 	}
7396 
7397 	// supply the partition (if any) with the mount cookie and mark it mounted
7398 	if (partition) {
7399 		partition->SetMountCookie(mount->volume->private_volume);
7400 		partition->SetVolumeID(mount->id);
7401 
7402 		// keep a partition reference as long as the partition is mounted
7403 		partitionRegistrar.Detach();
7404 		mount->partition = partition;
7405 		mount->owns_file_device = newlyCreatedFileDevice;
7406 		fileDeviceDeleter.id = -1;
7407 	}
7408 
7409 	notify_mount(mount->id,
7410 		coveredNode != NULL ? coveredNode->device : -1,
7411 		coveredNode ? coveredNode->id : -1);
7412 
7413 	return mount->id;
7414 
7415 err4:
7416 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7417 err3:
7418 	if (coveredNode != NULL)
7419 		put_vnode(coveredNode);
7420 err2:
7421 	mutex_lock(&sMountMutex);
7422 	hash_remove(sMountsTable, mount);
7423 	mutex_unlock(&sMountMutex);
7424 err1:
7425 	delete mount;
7426 
7427 	return status;
7428 }
7429 
7430 
7431 static status_t
7432 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7433 {
7434 	struct fs_mount* mount;
7435 	status_t err;
7436 
7437 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7438 		mountID, kernel));
7439 
7440 	struct vnode* pathVnode = NULL;
7441 	if (path != NULL) {
7442 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7443 		if (err != B_OK)
7444 			return B_ENTRY_NOT_FOUND;
7445 	}
7446 
7447 	RecursiveLocker mountOpLocker(sMountOpLock);
7448 
7449 	// this lock is not strictly necessary, but here in case of KDEBUG
7450 	// to keep the ASSERT in find_mount() working.
7451 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7452 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7453 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7454 	if (mount == NULL) {
7455 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7456 			pathVnode);
7457 	}
7458 
7459 	if (path != NULL) {
7460 		put_vnode(pathVnode);
7461 
7462 		if (mount->root_vnode != pathVnode) {
7463 			// not mountpoint
7464 			return B_BAD_VALUE;
7465 		}
7466 	}
7467 
7468 	// if the volume is associated with a partition, lock the device of the
7469 	// partition as long as we are unmounting
7470 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7471 	KPartition* partition = mount->partition;
7472 	KDiskDevice* diskDevice = NULL;
7473 	if (partition != NULL) {
7474 		if (partition->Device() == NULL) {
7475 			dprintf("fs_unmount(): There is no device!\n");
7476 			return B_ERROR;
7477 		}
7478 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7479 		if (!diskDevice) {
7480 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7481 			return B_ERROR;
7482 		}
7483 	}
7484 	DeviceWriteLocker writeLocker(diskDevice, true);
7485 
7486 	// make sure, that the partition is not busy
7487 	if (partition != NULL) {
7488 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7489 			TRACE(("fs_unmount(): Partition is busy.\n"));
7490 			return B_BUSY;
7491 		}
7492 	}
7493 
7494 	// grab the vnode master mutex to keep someone from creating
7495 	// a vnode while we're figuring out if we can continue
7496 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7497 
7498 	bool disconnectedDescriptors = false;
7499 
7500 	while (true) {
7501 		bool busy = false;
7502 
7503 		// cycle through the list of vnodes associated with this mount and
7504 		// make sure all of them are not busy or have refs on them
7505 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7506 		while (struct vnode* vnode = iterator.Next()) {
7507 			if (vnode->IsBusy()) {
7508 				busy = true;
7509 				break;
7510 			}
7511 
7512 			// check the vnode's ref count -- subtract additional references for
7513 			// covering
7514 			int32 refCount = vnode->ref_count;
7515 			if (vnode->covers != NULL)
7516 				refCount--;
7517 			if (vnode->covered_by != NULL)
7518 				refCount--;
7519 
7520 			if (refCount != 0) {
7521 				// there are still vnodes in use on this mount, so we cannot
7522 				// unmount yet
7523 				busy = true;
7524 				break;
7525 			}
7526 		}
7527 
7528 		if (!busy)
7529 			break;
7530 
7531 		if ((flags & B_FORCE_UNMOUNT) == 0)
7532 			return B_BUSY;
7533 
7534 		if (disconnectedDescriptors) {
7535 			// wait a bit until the last access is finished, and then try again
7536 			vnodesWriteLocker.Unlock();
7537 			snooze(100000);
7538 			// TODO: if there is some kind of bug that prevents the ref counts
7539 			// from getting back to zero, this will fall into an endless loop...
7540 			vnodesWriteLocker.Lock();
7541 			continue;
7542 		}
7543 
7544 		// the file system is still busy - but we're forced to unmount it,
7545 		// so let's disconnect all open file descriptors
7546 
7547 		mount->unmounting = true;
7548 			// prevent new vnodes from being created
7549 
7550 		vnodesWriteLocker.Unlock();
7551 
7552 		disconnect_mount_or_vnode_fds(mount, NULL);
7553 		disconnectedDescriptors = true;
7554 
7555 		vnodesWriteLocker.Lock();
7556 	}
7557 
7558 	// We can safely continue. Mark all of the vnodes busy and this mount
7559 	// structure in unmounting state. Also undo the vnode covers/covered_by
7560 	// links.
7561 	mount->unmounting = true;
7562 
7563 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7564 	while (struct vnode* vnode = iterator.Next()) {
7565 		// Remove all covers/covered_by links from other mounts' nodes to this
7566 		// vnode and adjust the node ref count accordingly. We will release the
7567 		// references to the external vnodes below.
7568 		if (Vnode* coveredNode = vnode->covers) {
7569 			if (Vnode* coveringNode = vnode->covered_by) {
7570 				// We have both covered and covering vnodes, so just remove us
7571 				// from the chain.
7572 				coveredNode->covered_by = coveringNode;
7573 				coveringNode->covers = coveredNode;
7574 				vnode->ref_count -= 2;
7575 
7576 				vnode->covered_by = NULL;
7577 				vnode->covers = NULL;
7578 				vnode->SetCovering(false);
7579 				vnode->SetCovered(false);
7580 			} else {
7581 				// We only have a covered vnode. Remove its link to us.
7582 				coveredNode->covered_by = NULL;
7583 				coveredNode->SetCovered(false);
7584 				vnode->ref_count--;
7585 
7586 				// If the other node is an external vnode, we keep its link
7587 				// link around so we can put the reference later on. Otherwise
7588 				// we get rid of it right now.
7589 				if (coveredNode->mount == mount) {
7590 					vnode->covers = NULL;
7591 					coveredNode->ref_count--;
7592 				}
7593 			}
7594 		} else if (Vnode* coveringNode = vnode->covered_by) {
7595 			// We only have a covering vnode. Remove its link to us.
7596 			coveringNode->covers = NULL;
7597 			coveringNode->SetCovering(false);
7598 			vnode->ref_count--;
7599 
7600 			// If the other node is an external vnode, we keep its link
7601 			// link around so we can put the reference later on. Otherwise
7602 			// we get rid of it right now.
7603 			if (coveringNode->mount == mount) {
7604 				vnode->covered_by = NULL;
7605 				coveringNode->ref_count--;
7606 			}
7607 		}
7608 
7609 		vnode->SetBusy(true);
7610 		vnode_to_be_freed(vnode);
7611 	}
7612 
7613 	vnodesWriteLocker.Unlock();
7614 
7615 	// Free all vnodes associated with this mount.
7616 	// They will be removed from the mount list by free_vnode(), so
7617 	// we don't have to do this.
7618 	while (struct vnode* vnode = mount->vnodes.Head()) {
7619 		// Put the references to external covered/covering vnodes we kept above.
7620 		if (Vnode* coveredNode = vnode->covers)
7621 			put_vnode(coveredNode);
7622 		if (Vnode* coveringNode = vnode->covered_by)
7623 			put_vnode(coveringNode);
7624 
7625 		free_vnode(vnode, false);
7626 	}
7627 
7628 	// remove the mount structure from the hash table
7629 	mutex_lock(&sMountMutex);
7630 	hash_remove(sMountsTable, mount);
7631 	mutex_unlock(&sMountMutex);
7632 
7633 	mountOpLocker.Unlock();
7634 
7635 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7636 	notify_unmount(mount->id);
7637 
7638 	// dereference the partition and mark it unmounted
7639 	if (partition) {
7640 		partition->SetVolumeID(-1);
7641 		partition->SetMountCookie(NULL);
7642 
7643 		if (mount->owns_file_device)
7644 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7645 		partition->Unregister();
7646 	}
7647 
7648 	delete mount;
7649 	return B_OK;
7650 }
7651 
7652 
7653 static status_t
7654 fs_sync(dev_t device)
7655 {
7656 	struct fs_mount* mount;
7657 	status_t status = get_mount(device, &mount);
7658 	if (status != B_OK)
7659 		return status;
7660 
7661 	struct vnode marker;
7662 	memset(&marker, 0, sizeof(marker));
7663 	marker.SetBusy(true);
7664 	marker.SetRemoved(true);
7665 
7666 	// First, synchronize all file caches
7667 
7668 	while (true) {
7669 		WriteLocker locker(sVnodeLock);
7670 			// Note: That's the easy way. Which is probably OK for sync(),
7671 			// since it's a relatively rare call and doesn't need to allow for
7672 			// a lot of concurrency. Using a read lock would be possible, but
7673 			// also more involved, since we had to lock the individual nodes
7674 			// and take care of the locking order, which we might not want to
7675 			// do while holding fs_mount::rlock.
7676 
7677 		// synchronize access to vnode list
7678 		recursive_lock_lock(&mount->rlock);
7679 
7680 		struct vnode* vnode;
7681 		if (!marker.IsRemoved()) {
7682 			vnode = mount->vnodes.GetNext(&marker);
7683 			mount->vnodes.Remove(&marker);
7684 			marker.SetRemoved(true);
7685 		} else
7686 			vnode = mount->vnodes.First();
7687 
7688 		while (vnode != NULL && (vnode->cache == NULL
7689 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7690 			// TODO: we could track writes (and writable mapped vnodes)
7691 			//	and have a simple flag that we could test for here
7692 			vnode = mount->vnodes.GetNext(vnode);
7693 		}
7694 
7695 		if (vnode != NULL) {
7696 			// insert marker vnode again
7697 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7698 			marker.SetRemoved(false);
7699 		}
7700 
7701 		recursive_lock_unlock(&mount->rlock);
7702 
7703 		if (vnode == NULL)
7704 			break;
7705 
7706 		vnode = lookup_vnode(mount->id, vnode->id);
7707 		if (vnode == NULL || vnode->IsBusy())
7708 			continue;
7709 
7710 		if (vnode->ref_count == 0) {
7711 			// this vnode has been unused before
7712 			vnode_used(vnode);
7713 		}
7714 		inc_vnode_ref_count(vnode);
7715 
7716 		locker.Unlock();
7717 
7718 		if (vnode->cache != NULL && !vnode->IsRemoved())
7719 			vnode->cache->WriteModified();
7720 
7721 		put_vnode(vnode);
7722 	}
7723 
7724 	// And then, let the file systems do their synchronizing work
7725 
7726 	if (HAS_FS_MOUNT_CALL(mount, sync))
7727 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7728 
7729 	put_mount(mount);
7730 	return status;
7731 }
7732 
7733 
7734 static status_t
7735 fs_read_info(dev_t device, struct fs_info* info)
7736 {
7737 	struct fs_mount* mount;
7738 	status_t status = get_mount(device, &mount);
7739 	if (status != B_OK)
7740 		return status;
7741 
7742 	memset(info, 0, sizeof(struct fs_info));
7743 
7744 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7745 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7746 
7747 	// fill in info the file system doesn't (have to) know about
7748 	if (status == B_OK) {
7749 		info->dev = mount->id;
7750 		info->root = mount->root_vnode->id;
7751 
7752 		fs_volume* volume = mount->volume;
7753 		while (volume->super_volume != NULL)
7754 			volume = volume->super_volume;
7755 
7756 		strlcpy(info->fsh_name, volume->file_system_name,
7757 			sizeof(info->fsh_name));
7758 		if (mount->device_name != NULL) {
7759 			strlcpy(info->device_name, mount->device_name,
7760 				sizeof(info->device_name));
7761 		}
7762 	}
7763 
7764 	// if the call is not supported by the file system, there are still
7765 	// the parts that we filled out ourselves
7766 
7767 	put_mount(mount);
7768 	return status;
7769 }
7770 
7771 
7772 static status_t
7773 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7774 {
7775 	struct fs_mount* mount;
7776 	status_t status = get_mount(device, &mount);
7777 	if (status != B_OK)
7778 		return status;
7779 
7780 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7781 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7782 	else
7783 		status = B_READ_ONLY_DEVICE;
7784 
7785 	put_mount(mount);
7786 	return status;
7787 }
7788 
7789 
7790 static dev_t
7791 fs_next_device(int32* _cookie)
7792 {
7793 	struct fs_mount* mount = NULL;
7794 	dev_t device = *_cookie;
7795 
7796 	mutex_lock(&sMountMutex);
7797 
7798 	// Since device IDs are assigned sequentially, this algorithm
7799 	// does work good enough. It makes sure that the device list
7800 	// returned is sorted, and that no device is skipped when an
7801 	// already visited device got unmounted.
7802 
7803 	while (device < sNextMountID) {
7804 		mount = find_mount(device++);
7805 		if (mount != NULL && mount->volume->private_volume != NULL)
7806 			break;
7807 	}
7808 
7809 	*_cookie = device;
7810 
7811 	if (mount != NULL)
7812 		device = mount->id;
7813 	else
7814 		device = B_BAD_VALUE;
7815 
7816 	mutex_unlock(&sMountMutex);
7817 
7818 	return device;
7819 }
7820 
7821 
7822 ssize_t
7823 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7824 	void *buffer, size_t readBytes)
7825 {
7826 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7827 	if (attrFD < 0)
7828 		return attrFD;
7829 
7830 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7831 
7832 	_kern_close(attrFD);
7833 
7834 	return bytesRead;
7835 }
7836 
7837 
7838 static status_t
7839 get_cwd(char* buffer, size_t size, bool kernel)
7840 {
7841 	// Get current working directory from io context
7842 	struct io_context* context = get_current_io_context(kernel);
7843 	status_t status;
7844 
7845 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7846 
7847 	mutex_lock(&context->io_mutex);
7848 
7849 	struct vnode* vnode = context->cwd;
7850 	if (vnode)
7851 		inc_vnode_ref_count(vnode);
7852 
7853 	mutex_unlock(&context->io_mutex);
7854 
7855 	if (vnode) {
7856 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7857 		put_vnode(vnode);
7858 	} else
7859 		status = B_ERROR;
7860 
7861 	return status;
7862 }
7863 
7864 
7865 static status_t
7866 set_cwd(int fd, char* path, bool kernel)
7867 {
7868 	struct io_context* context;
7869 	struct vnode* vnode = NULL;
7870 	struct vnode* oldDirectory;
7871 	status_t status;
7872 
7873 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7874 
7875 	// Get vnode for passed path, and bail if it failed
7876 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7877 	if (status < 0)
7878 		return status;
7879 
7880 	if (!S_ISDIR(vnode->Type())) {
7881 		// nope, can't cwd to here
7882 		status = B_NOT_A_DIRECTORY;
7883 		goto err;
7884 	}
7885 
7886 	// Get current io context and lock
7887 	context = get_current_io_context(kernel);
7888 	mutex_lock(&context->io_mutex);
7889 
7890 	// save the old current working directory first
7891 	oldDirectory = context->cwd;
7892 	context->cwd = vnode;
7893 
7894 	mutex_unlock(&context->io_mutex);
7895 
7896 	if (oldDirectory)
7897 		put_vnode(oldDirectory);
7898 
7899 	return B_NO_ERROR;
7900 
7901 err:
7902 	put_vnode(vnode);
7903 	return status;
7904 }
7905 
7906 
7907 //	#pragma mark - kernel mirrored syscalls
7908 
7909 
7910 dev_t
7911 _kern_mount(const char* path, const char* device, const char* fsName,
7912 	uint32 flags, const char* args, size_t argsLength)
7913 {
7914 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7915 	if (pathBuffer.InitCheck() != B_OK)
7916 		return B_NO_MEMORY;
7917 
7918 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7919 }
7920 
7921 
7922 status_t
7923 _kern_unmount(const char* path, uint32 flags)
7924 {
7925 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7926 	if (pathBuffer.InitCheck() != B_OK)
7927 		return B_NO_MEMORY;
7928 
7929 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7930 }
7931 
7932 
7933 status_t
7934 _kern_read_fs_info(dev_t device, struct fs_info* info)
7935 {
7936 	if (info == NULL)
7937 		return B_BAD_VALUE;
7938 
7939 	return fs_read_info(device, info);
7940 }
7941 
7942 
7943 status_t
7944 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7945 {
7946 	if (info == NULL)
7947 		return B_BAD_VALUE;
7948 
7949 	return fs_write_info(device, info, mask);
7950 }
7951 
7952 
7953 status_t
7954 _kern_sync(void)
7955 {
7956 	// Note: _kern_sync() is also called from _user_sync()
7957 	int32 cookie = 0;
7958 	dev_t device;
7959 	while ((device = next_dev(&cookie)) >= 0) {
7960 		status_t status = fs_sync(device);
7961 		if (status != B_OK && status != B_BAD_VALUE) {
7962 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
7963 				strerror(status));
7964 		}
7965 	}
7966 
7967 	return B_OK;
7968 }
7969 
7970 
7971 dev_t
7972 _kern_next_device(int32* _cookie)
7973 {
7974 	return fs_next_device(_cookie);
7975 }
7976 
7977 
7978 status_t
7979 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7980 	size_t infoSize)
7981 {
7982 	if (infoSize != sizeof(fd_info))
7983 		return B_BAD_VALUE;
7984 
7985 	// get the team
7986 	Team* team = Team::Get(teamID);
7987 	if (team == NULL)
7988 		return B_BAD_TEAM_ID;
7989 	BReference<Team> teamReference(team, true);
7990 
7991 	// now that we have a team reference, its I/O context won't go away
7992 	io_context* context = team->io_context;
7993 	MutexLocker contextLocker(context->io_mutex);
7994 
7995 	uint32 slot = *_cookie;
7996 
7997 	struct file_descriptor* descriptor;
7998 	while (slot < context->table_size
7999 		&& (descriptor = context->fds[slot]) == NULL) {
8000 		slot++;
8001 	}
8002 
8003 	if (slot >= context->table_size)
8004 		return B_ENTRY_NOT_FOUND;
8005 
8006 	info->number = slot;
8007 	info->open_mode = descriptor->open_mode;
8008 
8009 	struct vnode* vnode = fd_vnode(descriptor);
8010 	if (vnode != NULL) {
8011 		info->device = vnode->device;
8012 		info->node = vnode->id;
8013 	} else if (descriptor->u.mount != NULL) {
8014 		info->device = descriptor->u.mount->id;
8015 		info->node = -1;
8016 	}
8017 
8018 	*_cookie = slot + 1;
8019 	return B_OK;
8020 }
8021 
8022 
8023 int
8024 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8025 	int perms)
8026 {
8027 	if ((openMode & O_CREAT) != 0) {
8028 		return file_create_entry_ref(device, inode, name, openMode, perms,
8029 			true);
8030 	}
8031 
8032 	return file_open_entry_ref(device, inode, name, openMode, true);
8033 }
8034 
8035 
8036 /*!	\brief Opens a node specified by a FD + path pair.
8037 
8038 	At least one of \a fd and \a path must be specified.
8039 	If only \a fd is given, the function opens the node identified by this
8040 	FD. If only a path is given, this path is opened. If both are given and
8041 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8042 	of the directory (!) identified by \a fd.
8043 
8044 	\param fd The FD. May be < 0.
8045 	\param path The absolute or relative path. May be \c NULL.
8046 	\param openMode The open mode.
8047 	\return A FD referring to the newly opened node, or an error code,
8048 			if an error occurs.
8049 */
8050 int
8051 _kern_open(int fd, const char* path, int openMode, int perms)
8052 {
8053 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8054 	if (pathBuffer.InitCheck() != B_OK)
8055 		return B_NO_MEMORY;
8056 
8057 	if (openMode & O_CREAT)
8058 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8059 
8060 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8061 }
8062 
8063 
8064 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8065 
8066 	The supplied name may be \c NULL, in which case directory identified
8067 	by \a device and \a inode will be opened. Otherwise \a device and
8068 	\a inode identify the parent directory of the directory to be opened
8069 	and \a name its entry name.
8070 
8071 	\param device If \a name is specified the ID of the device the parent
8072 		   directory of the directory to be opened resides on, otherwise
8073 		   the device of the directory itself.
8074 	\param inode If \a name is specified the node ID of the parent
8075 		   directory of the directory to be opened, otherwise node ID of the
8076 		   directory itself.
8077 	\param name The entry name of the directory to be opened. If \c NULL,
8078 		   the \a device + \a inode pair identify the node to be opened.
8079 	\return The FD of the newly opened directory or an error code, if
8080 			something went wrong.
8081 */
8082 int
8083 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8084 {
8085 	return dir_open_entry_ref(device, inode, name, true);
8086 }
8087 
8088 
8089 /*!	\brief Opens a directory specified by a FD + path pair.
8090 
8091 	At least one of \a fd and \a path must be specified.
8092 	If only \a fd is given, the function opens the directory identified by this
8093 	FD. If only a path is given, this path is opened. If both are given and
8094 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8095 	of the directory (!) identified by \a fd.
8096 
8097 	\param fd The FD. May be < 0.
8098 	\param path The absolute or relative path. May be \c NULL.
8099 	\return A FD referring to the newly opened directory, or an error code,
8100 			if an error occurs.
8101 */
8102 int
8103 _kern_open_dir(int fd, const char* path)
8104 {
8105 	if (path == NULL)
8106 		return dir_open(fd, NULL, true);;
8107 
8108 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8109 	if (pathBuffer.InitCheck() != B_OK)
8110 		return B_NO_MEMORY;
8111 
8112 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8113 }
8114 
8115 
8116 status_t
8117 _kern_fcntl(int fd, int op, size_t argument)
8118 {
8119 	return common_fcntl(fd, op, argument, true);
8120 }
8121 
8122 
8123 status_t
8124 _kern_fsync(int fd)
8125 {
8126 	return common_sync(fd, true);
8127 }
8128 
8129 
8130 status_t
8131 _kern_lock_node(int fd)
8132 {
8133 	return common_lock_node(fd, true);
8134 }
8135 
8136 
8137 status_t
8138 _kern_unlock_node(int fd)
8139 {
8140 	return common_unlock_node(fd, true);
8141 }
8142 
8143 
8144 status_t
8145 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8146 	int perms)
8147 {
8148 	return dir_create_entry_ref(device, inode, name, perms, true);
8149 }
8150 
8151 
8152 /*!	\brief Creates a directory specified by a FD + path pair.
8153 
8154 	\a path must always be specified (it contains the name of the new directory
8155 	at least). If only a path is given, this path identifies the location at
8156 	which the directory shall be created. If both \a fd and \a path are given
8157 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8158 	of the directory (!) identified by \a fd.
8159 
8160 	\param fd The FD. May be < 0.
8161 	\param path The absolute or relative path. Must not be \c NULL.
8162 	\param perms The access permissions the new directory shall have.
8163 	\return \c B_OK, if the directory has been created successfully, another
8164 			error code otherwise.
8165 */
8166 status_t
8167 _kern_create_dir(int fd, const char* path, int perms)
8168 {
8169 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8170 	if (pathBuffer.InitCheck() != B_OK)
8171 		return B_NO_MEMORY;
8172 
8173 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8174 }
8175 
8176 
8177 status_t
8178 _kern_remove_dir(int fd, const char* path)
8179 {
8180 	if (path) {
8181 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8182 		if (pathBuffer.InitCheck() != B_OK)
8183 			return B_NO_MEMORY;
8184 
8185 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8186 	}
8187 
8188 	return dir_remove(fd, NULL, true);
8189 }
8190 
8191 
8192 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8193 
8194 	At least one of \a fd and \a path must be specified.
8195 	If only \a fd is given, the function the symlink to be read is the node
8196 	identified by this FD. If only a path is given, this path identifies the
8197 	symlink to be read. If both are given and the path is absolute, \a fd is
8198 	ignored; a relative path is reckoned off of the directory (!) identified
8199 	by \a fd.
8200 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8201 	will still be updated to reflect the required buffer size.
8202 
8203 	\param fd The FD. May be < 0.
8204 	\param path The absolute or relative path. May be \c NULL.
8205 	\param buffer The buffer into which the contents of the symlink shall be
8206 		   written.
8207 	\param _bufferSize A pointer to the size of the supplied buffer.
8208 	\return The length of the link on success or an appropriate error code
8209 */
8210 status_t
8211 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8212 {
8213 	if (path) {
8214 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8215 		if (pathBuffer.InitCheck() != B_OK)
8216 			return B_NO_MEMORY;
8217 
8218 		return common_read_link(fd, pathBuffer.LockBuffer(),
8219 			buffer, _bufferSize, true);
8220 	}
8221 
8222 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8223 }
8224 
8225 
8226 /*!	\brief Creates a symlink specified by a FD + path pair.
8227 
8228 	\a path must always be specified (it contains the name of the new symlink
8229 	at least). If only a path is given, this path identifies the location at
8230 	which the symlink shall be created. If both \a fd and \a path are given and
8231 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8232 	of the directory (!) identified by \a fd.
8233 
8234 	\param fd The FD. May be < 0.
8235 	\param toPath The absolute or relative path. Must not be \c NULL.
8236 	\param mode The access permissions the new symlink shall have.
8237 	\return \c B_OK, if the symlink has been created successfully, another
8238 			error code otherwise.
8239 */
8240 status_t
8241 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8242 {
8243 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8244 	if (pathBuffer.InitCheck() != B_OK)
8245 		return B_NO_MEMORY;
8246 
8247 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8248 		toPath, mode, true);
8249 }
8250 
8251 
8252 status_t
8253 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8254 	bool traverseLeafLink)
8255 {
8256 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8257 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8258 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8259 		return B_NO_MEMORY;
8260 
8261 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8262 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8263 }
8264 
8265 
8266 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8267 
8268 	\a path must always be specified (it contains at least the name of the entry
8269 	to be deleted). If only a path is given, this path identifies the entry
8270 	directly. If both \a fd and \a path are given and the path is absolute,
8271 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8272 	identified by \a fd.
8273 
8274 	\param fd The FD. May be < 0.
8275 	\param path The absolute or relative path. Must not be \c NULL.
8276 	\return \c B_OK, if the entry has been removed successfully, another
8277 			error code otherwise.
8278 */
8279 status_t
8280 _kern_unlink(int fd, const char* path)
8281 {
8282 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8283 	if (pathBuffer.InitCheck() != B_OK)
8284 		return B_NO_MEMORY;
8285 
8286 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8287 }
8288 
8289 
8290 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8291 		   by another FD + path pair.
8292 
8293 	\a oldPath and \a newPath must always be specified (they contain at least
8294 	the name of the entry). If only a path is given, this path identifies the
8295 	entry directly. If both a FD and a path are given and the path is absolute,
8296 	the FD is ignored; a relative path is reckoned off of the directory (!)
8297 	identified by the respective FD.
8298 
8299 	\param oldFD The FD of the old location. May be < 0.
8300 	\param oldPath The absolute or relative path of the old location. Must not
8301 		   be \c NULL.
8302 	\param newFD The FD of the new location. May be < 0.
8303 	\param newPath The absolute or relative path of the new location. Must not
8304 		   be \c NULL.
8305 	\return \c B_OK, if the entry has been moved successfully, another
8306 			error code otherwise.
8307 */
8308 status_t
8309 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8310 {
8311 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8312 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8313 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8314 		return B_NO_MEMORY;
8315 
8316 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8317 		newFD, newPathBuffer.LockBuffer(), true);
8318 }
8319 
8320 
8321 status_t
8322 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8323 {
8324 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8325 	if (pathBuffer.InitCheck() != B_OK)
8326 		return B_NO_MEMORY;
8327 
8328 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8329 		true);
8330 }
8331 
8332 
8333 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8334 
8335 	If only \a fd is given, the stat operation associated with the type
8336 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8337 	given, this path identifies the entry for whose node to retrieve the
8338 	stat data. If both \a fd and \a path are given and the path is absolute,
8339 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8340 	identified by \a fd and specifies the entry whose stat data shall be
8341 	retrieved.
8342 
8343 	\param fd The FD. May be < 0.
8344 	\param path The absolute or relative path. Must not be \c NULL.
8345 	\param traverseLeafLink If \a path is given, \c true specifies that the
8346 		   function shall not stick to symlinks, but traverse them.
8347 	\param stat The buffer the stat data shall be written into.
8348 	\param statSize The size of the supplied stat buffer.
8349 	\return \c B_OK, if the the stat data have been read successfully, another
8350 			error code otherwise.
8351 */
8352 status_t
8353 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8354 	struct stat* stat, size_t statSize)
8355 {
8356 	struct stat completeStat;
8357 	struct stat* originalStat = NULL;
8358 	status_t status;
8359 
8360 	if (statSize > sizeof(struct stat))
8361 		return B_BAD_VALUE;
8362 
8363 	// this supports different stat extensions
8364 	if (statSize < sizeof(struct stat)) {
8365 		originalStat = stat;
8366 		stat = &completeStat;
8367 	}
8368 
8369 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8370 
8371 	if (status == B_OK && originalStat != NULL)
8372 		memcpy(originalStat, stat, statSize);
8373 
8374 	return status;
8375 }
8376 
8377 
8378 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8379 
8380 	If only \a fd is given, the stat operation associated with the type
8381 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8382 	given, this path identifies the entry for whose node to write the
8383 	stat data. If both \a fd and \a path are given and the path is absolute,
8384 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8385 	identified by \a fd and specifies the entry whose stat data shall be
8386 	written.
8387 
8388 	\param fd The FD. May be < 0.
8389 	\param path The absolute or relative path. Must not be \c NULL.
8390 	\param traverseLeafLink If \a path is given, \c true specifies that the
8391 		   function shall not stick to symlinks, but traverse them.
8392 	\param stat The buffer containing the stat data to be written.
8393 	\param statSize The size of the supplied stat buffer.
8394 	\param statMask A mask specifying which parts of the stat data shall be
8395 		   written.
8396 	\return \c B_OK, if the the stat data have been written successfully,
8397 			another error code otherwise.
8398 */
8399 status_t
8400 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8401 	const struct stat* stat, size_t statSize, int statMask)
8402 {
8403 	struct stat completeStat;
8404 
8405 	if (statSize > sizeof(struct stat))
8406 		return B_BAD_VALUE;
8407 
8408 	// this supports different stat extensions
8409 	if (statSize < sizeof(struct stat)) {
8410 		memset((uint8*)&completeStat + statSize, 0,
8411 			sizeof(struct stat) - statSize);
8412 		memcpy(&completeStat, stat, statSize);
8413 		stat = &completeStat;
8414 	}
8415 
8416 	status_t status;
8417 
8418 	if (path) {
8419 		// path given: write the stat of the node referred to by (fd, path)
8420 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8421 		if (pathBuffer.InitCheck() != B_OK)
8422 			return B_NO_MEMORY;
8423 
8424 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8425 			traverseLeafLink, stat, statMask, true);
8426 	} else {
8427 		// no path given: get the FD and use the FD operation
8428 		struct file_descriptor* descriptor
8429 			= get_fd(get_current_io_context(true), fd);
8430 		if (descriptor == NULL)
8431 			return B_FILE_ERROR;
8432 
8433 		if (descriptor->ops->fd_write_stat)
8434 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8435 		else
8436 			status = B_UNSUPPORTED;
8437 
8438 		put_fd(descriptor);
8439 	}
8440 
8441 	return status;
8442 }
8443 
8444 
8445 int
8446 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8447 {
8448 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8449 	if (pathBuffer.InitCheck() != B_OK)
8450 		return B_NO_MEMORY;
8451 
8452 	if (path != NULL)
8453 		pathBuffer.SetTo(path);
8454 
8455 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8456 		traverseLeafLink, true);
8457 }
8458 
8459 
8460 int
8461 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8462 	int openMode)
8463 {
8464 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8465 	if (pathBuffer.InitCheck() != B_OK)
8466 		return B_NO_MEMORY;
8467 
8468 	if ((openMode & O_CREAT) != 0) {
8469 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8470 			true);
8471 	}
8472 
8473 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8474 }
8475 
8476 
8477 status_t
8478 _kern_remove_attr(int fd, const char* name)
8479 {
8480 	return attr_remove(fd, name, true);
8481 }
8482 
8483 
8484 status_t
8485 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8486 	const char* toName)
8487 {
8488 	return attr_rename(fromFile, fromName, toFile, toName, true);
8489 }
8490 
8491 
8492 int
8493 _kern_open_index_dir(dev_t device)
8494 {
8495 	return index_dir_open(device, true);
8496 }
8497 
8498 
8499 status_t
8500 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8501 {
8502 	return index_create(device, name, type, flags, true);
8503 }
8504 
8505 
8506 status_t
8507 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8508 {
8509 	return index_name_read_stat(device, name, stat, true);
8510 }
8511 
8512 
8513 status_t
8514 _kern_remove_index(dev_t device, const char* name)
8515 {
8516 	return index_remove(device, name, true);
8517 }
8518 
8519 
8520 status_t
8521 _kern_getcwd(char* buffer, size_t size)
8522 {
8523 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8524 
8525 	// Call vfs to get current working directory
8526 	return get_cwd(buffer, size, true);
8527 }
8528 
8529 
8530 status_t
8531 _kern_setcwd(int fd, const char* path)
8532 {
8533 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8534 	if (pathBuffer.InitCheck() != B_OK)
8535 		return B_NO_MEMORY;
8536 
8537 	if (path != NULL)
8538 		pathBuffer.SetTo(path);
8539 
8540 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8541 }
8542 
8543 
8544 //	#pragma mark - userland syscalls
8545 
8546 
8547 dev_t
8548 _user_mount(const char* userPath, const char* userDevice,
8549 	const char* userFileSystem, uint32 flags, const char* userArgs,
8550 	size_t argsLength)
8551 {
8552 	char fileSystem[B_FILE_NAME_LENGTH];
8553 	KPath path, device;
8554 	char* args = NULL;
8555 	status_t status;
8556 
8557 	if (!IS_USER_ADDRESS(userPath)
8558 		|| !IS_USER_ADDRESS(userFileSystem)
8559 		|| !IS_USER_ADDRESS(userDevice))
8560 		return B_BAD_ADDRESS;
8561 
8562 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8563 		return B_NO_MEMORY;
8564 
8565 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8566 		return B_BAD_ADDRESS;
8567 
8568 	if (userFileSystem != NULL
8569 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8570 		return B_BAD_ADDRESS;
8571 
8572 	if (userDevice != NULL
8573 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8574 			< B_OK)
8575 		return B_BAD_ADDRESS;
8576 
8577 	if (userArgs != NULL && argsLength > 0) {
8578 		// this is a safety restriction
8579 		if (argsLength >= 65536)
8580 			return B_NAME_TOO_LONG;
8581 
8582 		args = (char*)malloc(argsLength + 1);
8583 		if (args == NULL)
8584 			return B_NO_MEMORY;
8585 
8586 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8587 			free(args);
8588 			return B_BAD_ADDRESS;
8589 		}
8590 	}
8591 	path.UnlockBuffer();
8592 	device.UnlockBuffer();
8593 
8594 	status = fs_mount(path.LockBuffer(),
8595 		userDevice != NULL ? device.Path() : NULL,
8596 		userFileSystem ? fileSystem : NULL, flags, args, false);
8597 
8598 	free(args);
8599 	return status;
8600 }
8601 
8602 
8603 status_t
8604 _user_unmount(const char* userPath, uint32 flags)
8605 {
8606 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8607 	if (pathBuffer.InitCheck() != B_OK)
8608 		return B_NO_MEMORY;
8609 
8610 	char* path = pathBuffer.LockBuffer();
8611 
8612 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8613 		return B_BAD_ADDRESS;
8614 
8615 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8616 }
8617 
8618 
8619 status_t
8620 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8621 {
8622 	struct fs_info info;
8623 	status_t status;
8624 
8625 	if (userInfo == NULL)
8626 		return B_BAD_VALUE;
8627 
8628 	if (!IS_USER_ADDRESS(userInfo))
8629 		return B_BAD_ADDRESS;
8630 
8631 	status = fs_read_info(device, &info);
8632 	if (status != B_OK)
8633 		return status;
8634 
8635 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8636 		return B_BAD_ADDRESS;
8637 
8638 	return B_OK;
8639 }
8640 
8641 
8642 status_t
8643 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8644 {
8645 	struct fs_info info;
8646 
8647 	if (userInfo == NULL)
8648 		return B_BAD_VALUE;
8649 
8650 	if (!IS_USER_ADDRESS(userInfo)
8651 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8652 		return B_BAD_ADDRESS;
8653 
8654 	return fs_write_info(device, &info, mask);
8655 }
8656 
8657 
8658 dev_t
8659 _user_next_device(int32* _userCookie)
8660 {
8661 	int32 cookie;
8662 	dev_t device;
8663 
8664 	if (!IS_USER_ADDRESS(_userCookie)
8665 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8666 		return B_BAD_ADDRESS;
8667 
8668 	device = fs_next_device(&cookie);
8669 
8670 	if (device >= B_OK) {
8671 		// update user cookie
8672 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8673 			return B_BAD_ADDRESS;
8674 	}
8675 
8676 	return device;
8677 }
8678 
8679 
8680 status_t
8681 _user_sync(void)
8682 {
8683 	return _kern_sync();
8684 }
8685 
8686 
8687 status_t
8688 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8689 	size_t infoSize)
8690 {
8691 	struct fd_info info;
8692 	uint32 cookie;
8693 
8694 	// only root can do this (or should root's group be enough?)
8695 	if (geteuid() != 0)
8696 		return B_NOT_ALLOWED;
8697 
8698 	if (infoSize != sizeof(fd_info))
8699 		return B_BAD_VALUE;
8700 
8701 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8702 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8703 		return B_BAD_ADDRESS;
8704 
8705 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8706 	if (status != B_OK)
8707 		return status;
8708 
8709 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8710 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8711 		return B_BAD_ADDRESS;
8712 
8713 	return status;
8714 }
8715 
8716 
8717 status_t
8718 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8719 	char* userPath, size_t pathLength)
8720 {
8721 	if (!IS_USER_ADDRESS(userPath))
8722 		return B_BAD_ADDRESS;
8723 
8724 	KPath path(B_PATH_NAME_LENGTH + 1);
8725 	if (path.InitCheck() != B_OK)
8726 		return B_NO_MEMORY;
8727 
8728 	// copy the leaf name onto the stack
8729 	char stackLeaf[B_FILE_NAME_LENGTH];
8730 	if (leaf) {
8731 		if (!IS_USER_ADDRESS(leaf))
8732 			return B_BAD_ADDRESS;
8733 
8734 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8735 		if (length < 0)
8736 			return length;
8737 		if (length >= B_FILE_NAME_LENGTH)
8738 			return B_NAME_TOO_LONG;
8739 
8740 		leaf = stackLeaf;
8741 	}
8742 
8743 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8744 		false, path.LockBuffer(), path.BufferSize());
8745 	if (status != B_OK)
8746 		return status;
8747 
8748 	path.UnlockBuffer();
8749 
8750 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8751 	if (length < 0)
8752 		return length;
8753 	if (length >= (int)pathLength)
8754 		return B_BUFFER_OVERFLOW;
8755 
8756 	return B_OK;
8757 }
8758 
8759 
8760 status_t
8761 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8762 {
8763 	if (userPath == NULL || buffer == NULL)
8764 		return B_BAD_VALUE;
8765 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8766 		return B_BAD_ADDRESS;
8767 
8768 	// copy path from userland
8769 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8770 	if (pathBuffer.InitCheck() != B_OK)
8771 		return B_NO_MEMORY;
8772 	char* path = pathBuffer.LockBuffer();
8773 
8774 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8775 		return B_BAD_ADDRESS;
8776 
8777 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8778 		false);
8779 	if (error != B_OK)
8780 		return error;
8781 
8782 	// copy back to userland
8783 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8784 	if (len < 0)
8785 		return len;
8786 	if (len >= B_PATH_NAME_LENGTH)
8787 		return B_BUFFER_OVERFLOW;
8788 
8789 	return B_OK;
8790 }
8791 
8792 
8793 int
8794 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8795 	int openMode, int perms)
8796 {
8797 	char name[B_FILE_NAME_LENGTH];
8798 
8799 	if (userName == NULL || device < 0 || inode < 0)
8800 		return B_BAD_VALUE;
8801 	if (!IS_USER_ADDRESS(userName)
8802 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8803 		return B_BAD_ADDRESS;
8804 
8805 	if ((openMode & O_CREAT) != 0) {
8806 		return file_create_entry_ref(device, inode, name, openMode, perms,
8807 		 false);
8808 	}
8809 
8810 	return file_open_entry_ref(device, inode, name, openMode, false);
8811 }
8812 
8813 
8814 int
8815 _user_open(int fd, const char* userPath, int openMode, int perms)
8816 {
8817 	KPath path(B_PATH_NAME_LENGTH + 1);
8818 	if (path.InitCheck() != B_OK)
8819 		return B_NO_MEMORY;
8820 
8821 	char* buffer = path.LockBuffer();
8822 
8823 	if (!IS_USER_ADDRESS(userPath)
8824 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8825 		return B_BAD_ADDRESS;
8826 
8827 	if ((openMode & O_CREAT) != 0)
8828 		return file_create(fd, buffer, openMode, perms, false);
8829 
8830 	return file_open(fd, buffer, openMode, false);
8831 }
8832 
8833 
8834 int
8835 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8836 {
8837 	if (userName != NULL) {
8838 		char name[B_FILE_NAME_LENGTH];
8839 
8840 		if (!IS_USER_ADDRESS(userName)
8841 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8842 			return B_BAD_ADDRESS;
8843 
8844 		return dir_open_entry_ref(device, inode, name, false);
8845 	}
8846 	return dir_open_entry_ref(device, inode, NULL, false);
8847 }
8848 
8849 
8850 int
8851 _user_open_dir(int fd, const char* userPath)
8852 {
8853 	if (userPath == NULL)
8854 		return dir_open(fd, NULL, false);
8855 
8856 	KPath path(B_PATH_NAME_LENGTH + 1);
8857 	if (path.InitCheck() != B_OK)
8858 		return B_NO_MEMORY;
8859 
8860 	char* buffer = path.LockBuffer();
8861 
8862 	if (!IS_USER_ADDRESS(userPath)
8863 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8864 		return B_BAD_ADDRESS;
8865 
8866 	return dir_open(fd, buffer, false);
8867 }
8868 
8869 
8870 /*!	\brief Opens a directory's parent directory and returns the entry name
8871 		   of the former.
8872 
8873 	Aside from that it returns the directory's entry name, this method is
8874 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8875 	equivalent, if \a userName is \c NULL.
8876 
8877 	If a name buffer is supplied and the name does not fit the buffer, the
8878 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8879 
8880 	\param fd A FD referring to a directory.
8881 	\param userName Buffer the directory's entry name shall be written into.
8882 		   May be \c NULL.
8883 	\param nameLength Size of the name buffer.
8884 	\return The file descriptor of the opened parent directory, if everything
8885 			went fine, an error code otherwise.
8886 */
8887 int
8888 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8889 {
8890 	bool kernel = false;
8891 
8892 	if (userName && !IS_USER_ADDRESS(userName))
8893 		return B_BAD_ADDRESS;
8894 
8895 	// open the parent dir
8896 	int parentFD = dir_open(fd, (char*)"..", kernel);
8897 	if (parentFD < 0)
8898 		return parentFD;
8899 	FDCloser fdCloser(parentFD, kernel);
8900 
8901 	if (userName) {
8902 		// get the vnodes
8903 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8904 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8905 		VNodePutter parentVNodePutter(parentVNode);
8906 		VNodePutter dirVNodePutter(dirVNode);
8907 		if (!parentVNode || !dirVNode)
8908 			return B_FILE_ERROR;
8909 
8910 		// get the vnode name
8911 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8912 		struct dirent* buffer = (struct dirent*)_buffer;
8913 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8914 			sizeof(_buffer), get_current_io_context(false));
8915 		if (status != B_OK)
8916 			return status;
8917 
8918 		// copy the name to the userland buffer
8919 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8920 		if (len < 0)
8921 			return len;
8922 		if (len >= (int)nameLength)
8923 			return B_BUFFER_OVERFLOW;
8924 	}
8925 
8926 	return fdCloser.Detach();
8927 }
8928 
8929 
8930 status_t
8931 _user_fcntl(int fd, int op, size_t argument)
8932 {
8933 	status_t status = common_fcntl(fd, op, argument, false);
8934 	if (op == F_SETLKW)
8935 		syscall_restart_handle_post(status);
8936 
8937 	return status;
8938 }
8939 
8940 
8941 status_t
8942 _user_fsync(int fd)
8943 {
8944 	return common_sync(fd, false);
8945 }
8946 
8947 
8948 status_t
8949 _user_flock(int fd, int operation)
8950 {
8951 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8952 
8953 	// Check if the operation is valid
8954 	switch (operation & ~LOCK_NB) {
8955 		case LOCK_UN:
8956 		case LOCK_SH:
8957 		case LOCK_EX:
8958 			break;
8959 
8960 		default:
8961 			return B_BAD_VALUE;
8962 	}
8963 
8964 	struct file_descriptor* descriptor;
8965 	struct vnode* vnode;
8966 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8967 	if (descriptor == NULL)
8968 		return B_FILE_ERROR;
8969 
8970 	if (descriptor->type != FDTYPE_FILE) {
8971 		put_fd(descriptor);
8972 		return B_BAD_VALUE;
8973 	}
8974 
8975 	struct flock flock;
8976 	flock.l_start = 0;
8977 	flock.l_len = OFF_MAX;
8978 	flock.l_whence = 0;
8979 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8980 
8981 	status_t status;
8982 	if ((operation & LOCK_UN) != 0)
8983 		status = release_advisory_lock(vnode, &flock);
8984 	else {
8985 		status = acquire_advisory_lock(vnode,
8986 			thread_get_current_thread()->team->session_id, &flock,
8987 			(operation & LOCK_NB) == 0);
8988 	}
8989 
8990 	syscall_restart_handle_post(status);
8991 
8992 	put_fd(descriptor);
8993 	return status;
8994 }
8995 
8996 
8997 status_t
8998 _user_lock_node(int fd)
8999 {
9000 	return common_lock_node(fd, false);
9001 }
9002 
9003 
9004 status_t
9005 _user_unlock_node(int fd)
9006 {
9007 	return common_unlock_node(fd, false);
9008 }
9009 
9010 
9011 status_t
9012 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9013 	int perms)
9014 {
9015 	char name[B_FILE_NAME_LENGTH];
9016 	status_t status;
9017 
9018 	if (!IS_USER_ADDRESS(userName))
9019 		return B_BAD_ADDRESS;
9020 
9021 	status = user_strlcpy(name, userName, sizeof(name));
9022 	if (status < 0)
9023 		return status;
9024 
9025 	return dir_create_entry_ref(device, inode, name, perms, false);
9026 }
9027 
9028 
9029 status_t
9030 _user_create_dir(int fd, const char* userPath, int perms)
9031 {
9032 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9033 	if (pathBuffer.InitCheck() != B_OK)
9034 		return B_NO_MEMORY;
9035 
9036 	char* path = pathBuffer.LockBuffer();
9037 
9038 	if (!IS_USER_ADDRESS(userPath)
9039 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9040 		return B_BAD_ADDRESS;
9041 
9042 	return dir_create(fd, path, perms, false);
9043 }
9044 
9045 
9046 status_t
9047 _user_remove_dir(int fd, const char* userPath)
9048 {
9049 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9050 	if (pathBuffer.InitCheck() != B_OK)
9051 		return B_NO_MEMORY;
9052 
9053 	char* path = pathBuffer.LockBuffer();
9054 
9055 	if (userPath != NULL) {
9056 		if (!IS_USER_ADDRESS(userPath)
9057 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9058 			return B_BAD_ADDRESS;
9059 	}
9060 
9061 	return dir_remove(fd, userPath ? path : NULL, false);
9062 }
9063 
9064 
9065 status_t
9066 _user_read_link(int fd, const char* userPath, char* userBuffer,
9067 	size_t* userBufferSize)
9068 {
9069 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9070 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9071 		return B_NO_MEMORY;
9072 
9073 	size_t bufferSize;
9074 
9075 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9076 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9077 		return B_BAD_ADDRESS;
9078 
9079 	char* path = pathBuffer.LockBuffer();
9080 	char* buffer = linkBuffer.LockBuffer();
9081 
9082 	if (userPath) {
9083 		if (!IS_USER_ADDRESS(userPath)
9084 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9085 			return B_BAD_ADDRESS;
9086 
9087 		if (bufferSize > B_PATH_NAME_LENGTH)
9088 			bufferSize = B_PATH_NAME_LENGTH;
9089 	}
9090 
9091 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9092 		&bufferSize, false);
9093 
9094 	// we also update the bufferSize in case of errors
9095 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9096 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9097 		return B_BAD_ADDRESS;
9098 
9099 	if (status != B_OK)
9100 		return status;
9101 
9102 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9103 		return B_BAD_ADDRESS;
9104 
9105 	return B_OK;
9106 }
9107 
9108 
9109 status_t
9110 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9111 	int mode)
9112 {
9113 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9114 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9115 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9116 		return B_NO_MEMORY;
9117 
9118 	char* path = pathBuffer.LockBuffer();
9119 	char* toPath = toPathBuffer.LockBuffer();
9120 
9121 	if (!IS_USER_ADDRESS(userPath)
9122 		|| !IS_USER_ADDRESS(userToPath)
9123 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9124 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9125 		return B_BAD_ADDRESS;
9126 
9127 	return common_create_symlink(fd, path, toPath, mode, false);
9128 }
9129 
9130 
9131 status_t
9132 _user_create_link(int pathFD, const char* userPath, int toFD,
9133 	const char* userToPath, bool traverseLeafLink)
9134 {
9135 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9136 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9137 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9138 		return B_NO_MEMORY;
9139 
9140 	char* path = pathBuffer.LockBuffer();
9141 	char* toPath = toPathBuffer.LockBuffer();
9142 
9143 	if (!IS_USER_ADDRESS(userPath)
9144 		|| !IS_USER_ADDRESS(userToPath)
9145 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9146 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9147 		return B_BAD_ADDRESS;
9148 
9149 	status_t status = check_path(toPath);
9150 	if (status != B_OK)
9151 		return status;
9152 
9153 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9154 		false);
9155 }
9156 
9157 
9158 status_t
9159 _user_unlink(int fd, const char* userPath)
9160 {
9161 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9162 	if (pathBuffer.InitCheck() != B_OK)
9163 		return B_NO_MEMORY;
9164 
9165 	char* path = pathBuffer.LockBuffer();
9166 
9167 	if (!IS_USER_ADDRESS(userPath)
9168 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9169 		return B_BAD_ADDRESS;
9170 
9171 	return common_unlink(fd, path, false);
9172 }
9173 
9174 
9175 status_t
9176 _user_rename(int oldFD, const char* userOldPath, int newFD,
9177 	const char* userNewPath)
9178 {
9179 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9180 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9181 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9182 		return B_NO_MEMORY;
9183 
9184 	char* oldPath = oldPathBuffer.LockBuffer();
9185 	char* newPath = newPathBuffer.LockBuffer();
9186 
9187 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9188 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9189 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9190 		return B_BAD_ADDRESS;
9191 
9192 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9193 }
9194 
9195 
9196 status_t
9197 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9198 {
9199 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9200 	if (pathBuffer.InitCheck() != B_OK)
9201 		return B_NO_MEMORY;
9202 
9203 	char* path = pathBuffer.LockBuffer();
9204 
9205 	if (!IS_USER_ADDRESS(userPath)
9206 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9207 		return B_BAD_ADDRESS;
9208 	}
9209 
9210 	// split into directory vnode and filename path
9211 	char filename[B_FILE_NAME_LENGTH];
9212 	struct vnode* dir;
9213 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9214 	if (status != B_OK)
9215 		return status;
9216 
9217 	VNodePutter _(dir);
9218 
9219 	// the underlying FS needs to support creating FIFOs
9220 	if (!HAS_FS_CALL(dir, create_special_node))
9221 		return B_UNSUPPORTED;
9222 
9223 	// create the entry	-- the FIFO sub node is set up automatically
9224 	fs_vnode superVnode;
9225 	ino_t nodeID;
9226 	status = FS_CALL(dir, create_special_node, filename, NULL,
9227 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9228 
9229 	// create_special_node() acquired a reference for us that we don't need.
9230 	if (status == B_OK)
9231 		put_vnode(dir->mount->volume, nodeID);
9232 
9233 	return status;
9234 }
9235 
9236 
9237 status_t
9238 _user_create_pipe(int* userFDs)
9239 {
9240 	// rootfs should support creating FIFOs, but let's be sure
9241 	if (!HAS_FS_CALL(sRoot, create_special_node))
9242 		return B_UNSUPPORTED;
9243 
9244 	// create the node	-- the FIFO sub node is set up automatically
9245 	fs_vnode superVnode;
9246 	ino_t nodeID;
9247 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9248 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9249 	if (status != B_OK)
9250 		return status;
9251 
9252 	// We've got one reference to the node and need another one.
9253 	struct vnode* vnode;
9254 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9255 	if (status != B_OK) {
9256 		// that should not happen
9257 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9258 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9259 		return status;
9260 	}
9261 
9262 	// Everything looks good so far. Open two FDs for reading respectively
9263 	// writing.
9264 	int fds[2];
9265 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9266 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9267 
9268 	FDCloser closer0(fds[0], false);
9269 	FDCloser closer1(fds[1], false);
9270 
9271 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9272 
9273 	// copy FDs to userland
9274 	if (status == B_OK) {
9275 		if (!IS_USER_ADDRESS(userFDs)
9276 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9277 			status = B_BAD_ADDRESS;
9278 		}
9279 	}
9280 
9281 	// keep FDs, if everything went fine
9282 	if (status == B_OK) {
9283 		closer0.Detach();
9284 		closer1.Detach();
9285 	}
9286 
9287 	return status;
9288 }
9289 
9290 
9291 status_t
9292 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9293 {
9294 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9295 	if (pathBuffer.InitCheck() != B_OK)
9296 		return B_NO_MEMORY;
9297 
9298 	char* path = pathBuffer.LockBuffer();
9299 
9300 	if (!IS_USER_ADDRESS(userPath)
9301 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9302 		return B_BAD_ADDRESS;
9303 
9304 	return common_access(fd, path, mode, effectiveUserGroup, false);
9305 }
9306 
9307 
9308 status_t
9309 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9310 	struct stat* userStat, size_t statSize)
9311 {
9312 	struct stat stat;
9313 	status_t status;
9314 
9315 	if (statSize > sizeof(struct stat))
9316 		return B_BAD_VALUE;
9317 
9318 	if (!IS_USER_ADDRESS(userStat))
9319 		return B_BAD_ADDRESS;
9320 
9321 	if (userPath) {
9322 		// path given: get the stat of the node referred to by (fd, path)
9323 		if (!IS_USER_ADDRESS(userPath))
9324 			return B_BAD_ADDRESS;
9325 
9326 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9327 		if (pathBuffer.InitCheck() != B_OK)
9328 			return B_NO_MEMORY;
9329 
9330 		char* path = pathBuffer.LockBuffer();
9331 
9332 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9333 		if (length < B_OK)
9334 			return length;
9335 		if (length >= B_PATH_NAME_LENGTH)
9336 			return B_NAME_TOO_LONG;
9337 
9338 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9339 	} else {
9340 		// no path given: get the FD and use the FD operation
9341 		struct file_descriptor* descriptor
9342 			= get_fd(get_current_io_context(false), fd);
9343 		if (descriptor == NULL)
9344 			return B_FILE_ERROR;
9345 
9346 		if (descriptor->ops->fd_read_stat)
9347 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9348 		else
9349 			status = B_UNSUPPORTED;
9350 
9351 		put_fd(descriptor);
9352 	}
9353 
9354 	if (status != B_OK)
9355 		return status;
9356 
9357 	return user_memcpy(userStat, &stat, statSize);
9358 }
9359 
9360 
9361 status_t
9362 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9363 	const struct stat* userStat, size_t statSize, int statMask)
9364 {
9365 	if (statSize > sizeof(struct stat))
9366 		return B_BAD_VALUE;
9367 
9368 	struct stat stat;
9369 
9370 	if (!IS_USER_ADDRESS(userStat)
9371 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9372 		return B_BAD_ADDRESS;
9373 
9374 	// clear additional stat fields
9375 	if (statSize < sizeof(struct stat))
9376 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9377 
9378 	status_t status;
9379 
9380 	if (userPath) {
9381 		// path given: write the stat of the node referred to by (fd, path)
9382 		if (!IS_USER_ADDRESS(userPath))
9383 			return B_BAD_ADDRESS;
9384 
9385 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9386 		if (pathBuffer.InitCheck() != B_OK)
9387 			return B_NO_MEMORY;
9388 
9389 		char* path = pathBuffer.LockBuffer();
9390 
9391 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9392 		if (length < B_OK)
9393 			return length;
9394 		if (length >= B_PATH_NAME_LENGTH)
9395 			return B_NAME_TOO_LONG;
9396 
9397 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9398 			statMask, false);
9399 	} else {
9400 		// no path given: get the FD and use the FD operation
9401 		struct file_descriptor* descriptor
9402 			= get_fd(get_current_io_context(false), fd);
9403 		if (descriptor == NULL)
9404 			return B_FILE_ERROR;
9405 
9406 		if (descriptor->ops->fd_write_stat) {
9407 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9408 				statMask);
9409 		} else
9410 			status = B_UNSUPPORTED;
9411 
9412 		put_fd(descriptor);
9413 	}
9414 
9415 	return status;
9416 }
9417 
9418 
9419 int
9420 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9421 {
9422 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9423 	if (pathBuffer.InitCheck() != B_OK)
9424 		return B_NO_MEMORY;
9425 
9426 	char* path = pathBuffer.LockBuffer();
9427 
9428 	if (userPath != NULL) {
9429 		if (!IS_USER_ADDRESS(userPath)
9430 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9431 			return B_BAD_ADDRESS;
9432 	}
9433 
9434 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9435 }
9436 
9437 
9438 ssize_t
9439 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9440 	size_t readBytes)
9441 {
9442 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9443 	if (attr < 0)
9444 		return attr;
9445 
9446 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9447 	_user_close(attr);
9448 
9449 	return bytes;
9450 }
9451 
9452 
9453 ssize_t
9454 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9455 	const void* buffer, size_t writeBytes)
9456 {
9457 	// Try to support the BeOS typical truncation as well as the position
9458 	// argument
9459 	int attr = attr_create(fd, NULL, attribute, type,
9460 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9461 	if (attr < 0)
9462 		return attr;
9463 
9464 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9465 	_user_close(attr);
9466 
9467 	return bytes;
9468 }
9469 
9470 
9471 status_t
9472 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9473 {
9474 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9475 	if (attr < 0)
9476 		return attr;
9477 
9478 	struct file_descriptor* descriptor
9479 		= get_fd(get_current_io_context(false), attr);
9480 	if (descriptor == NULL) {
9481 		_user_close(attr);
9482 		return B_FILE_ERROR;
9483 	}
9484 
9485 	struct stat stat;
9486 	status_t status;
9487 	if (descriptor->ops->fd_read_stat)
9488 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9489 	else
9490 		status = B_UNSUPPORTED;
9491 
9492 	put_fd(descriptor);
9493 	_user_close(attr);
9494 
9495 	if (status == B_OK) {
9496 		attr_info info;
9497 		info.type = stat.st_type;
9498 		info.size = stat.st_size;
9499 
9500 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9501 			return B_BAD_ADDRESS;
9502 	}
9503 
9504 	return status;
9505 }
9506 
9507 
9508 int
9509 _user_open_attr(int fd, const char* userPath, const char* userName,
9510 	uint32 type, int openMode)
9511 {
9512 	char name[B_FILE_NAME_LENGTH];
9513 
9514 	if (!IS_USER_ADDRESS(userName)
9515 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9516 		return B_BAD_ADDRESS;
9517 
9518 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9519 	if (pathBuffer.InitCheck() != B_OK)
9520 		return B_NO_MEMORY;
9521 
9522 	char* path = pathBuffer.LockBuffer();
9523 
9524 	if (userPath != NULL) {
9525 		if (!IS_USER_ADDRESS(userPath)
9526 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9527 			return B_BAD_ADDRESS;
9528 	}
9529 
9530 	if ((openMode & O_CREAT) != 0) {
9531 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9532 			false);
9533 	}
9534 
9535 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9536 }
9537 
9538 
9539 status_t
9540 _user_remove_attr(int fd, const char* userName)
9541 {
9542 	char name[B_FILE_NAME_LENGTH];
9543 
9544 	if (!IS_USER_ADDRESS(userName)
9545 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9546 		return B_BAD_ADDRESS;
9547 
9548 	return attr_remove(fd, name, false);
9549 }
9550 
9551 
9552 status_t
9553 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9554 	const char* userToName)
9555 {
9556 	if (!IS_USER_ADDRESS(userFromName)
9557 		|| !IS_USER_ADDRESS(userToName))
9558 		return B_BAD_ADDRESS;
9559 
9560 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9561 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9562 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9563 		return B_NO_MEMORY;
9564 
9565 	char* fromName = fromNameBuffer.LockBuffer();
9566 	char* toName = toNameBuffer.LockBuffer();
9567 
9568 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9569 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9570 		return B_BAD_ADDRESS;
9571 
9572 	return attr_rename(fromFile, fromName, toFile, toName, false);
9573 }
9574 
9575 
9576 int
9577 _user_open_index_dir(dev_t device)
9578 {
9579 	return index_dir_open(device, false);
9580 }
9581 
9582 
9583 status_t
9584 _user_create_index(dev_t device, const char* userName, uint32 type,
9585 	uint32 flags)
9586 {
9587 	char name[B_FILE_NAME_LENGTH];
9588 
9589 	if (!IS_USER_ADDRESS(userName)
9590 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9591 		return B_BAD_ADDRESS;
9592 
9593 	return index_create(device, name, type, flags, false);
9594 }
9595 
9596 
9597 status_t
9598 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9599 {
9600 	char name[B_FILE_NAME_LENGTH];
9601 	struct stat stat;
9602 	status_t status;
9603 
9604 	if (!IS_USER_ADDRESS(userName)
9605 		|| !IS_USER_ADDRESS(userStat)
9606 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9607 		return B_BAD_ADDRESS;
9608 
9609 	status = index_name_read_stat(device, name, &stat, false);
9610 	if (status == B_OK) {
9611 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9612 			return B_BAD_ADDRESS;
9613 	}
9614 
9615 	return status;
9616 }
9617 
9618 
9619 status_t
9620 _user_remove_index(dev_t device, const char* userName)
9621 {
9622 	char name[B_FILE_NAME_LENGTH];
9623 
9624 	if (!IS_USER_ADDRESS(userName)
9625 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9626 		return B_BAD_ADDRESS;
9627 
9628 	return index_remove(device, name, false);
9629 }
9630 
9631 
9632 status_t
9633 _user_getcwd(char* userBuffer, size_t size)
9634 {
9635 	if (size == 0)
9636 		return B_BAD_VALUE;
9637 	if (!IS_USER_ADDRESS(userBuffer))
9638 		return B_BAD_ADDRESS;
9639 
9640 	if (size > kMaxPathLength)
9641 		size = kMaxPathLength;
9642 
9643 	KPath pathBuffer(size);
9644 	if (pathBuffer.InitCheck() != B_OK)
9645 		return B_NO_MEMORY;
9646 
9647 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9648 
9649 	char* path = pathBuffer.LockBuffer();
9650 
9651 	status_t status = get_cwd(path, size, false);
9652 	if (status != B_OK)
9653 		return status;
9654 
9655 	// Copy back the result
9656 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9657 		return B_BAD_ADDRESS;
9658 
9659 	return status;
9660 }
9661 
9662 
9663 status_t
9664 _user_setcwd(int fd, const char* userPath)
9665 {
9666 	TRACE(("user_setcwd: path = %p\n", userPath));
9667 
9668 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9669 	if (pathBuffer.InitCheck() != B_OK)
9670 		return B_NO_MEMORY;
9671 
9672 	char* path = pathBuffer.LockBuffer();
9673 
9674 	if (userPath != NULL) {
9675 		if (!IS_USER_ADDRESS(userPath)
9676 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9677 			return B_BAD_ADDRESS;
9678 	}
9679 
9680 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9681 }
9682 
9683 
9684 status_t
9685 _user_change_root(const char* userPath)
9686 {
9687 	// only root is allowed to chroot()
9688 	if (geteuid() != 0)
9689 		return B_NOT_ALLOWED;
9690 
9691 	// alloc path buffer
9692 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9693 	if (pathBuffer.InitCheck() != B_OK)
9694 		return B_NO_MEMORY;
9695 
9696 	// copy userland path to kernel
9697 	char* path = pathBuffer.LockBuffer();
9698 	if (userPath != NULL) {
9699 		if (!IS_USER_ADDRESS(userPath)
9700 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9701 			return B_BAD_ADDRESS;
9702 	}
9703 
9704 	// get the vnode
9705 	struct vnode* vnode;
9706 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9707 	if (status != B_OK)
9708 		return status;
9709 
9710 	// set the new root
9711 	struct io_context* context = get_current_io_context(false);
9712 	mutex_lock(&sIOContextRootLock);
9713 	struct vnode* oldRoot = context->root;
9714 	context->root = vnode;
9715 	mutex_unlock(&sIOContextRootLock);
9716 
9717 	put_vnode(oldRoot);
9718 
9719 	return B_OK;
9720 }
9721 
9722 
9723 int
9724 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9725 	uint32 flags, port_id port, int32 token)
9726 {
9727 	char* query;
9728 
9729 	if (device < 0 || userQuery == NULL || queryLength == 0)
9730 		return B_BAD_VALUE;
9731 
9732 	// this is a safety restriction
9733 	if (queryLength >= 65536)
9734 		return B_NAME_TOO_LONG;
9735 
9736 	query = (char*)malloc(queryLength + 1);
9737 	if (query == NULL)
9738 		return B_NO_MEMORY;
9739 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9740 		free(query);
9741 		return B_BAD_ADDRESS;
9742 	}
9743 
9744 	int fd = query_open(device, query, flags, port, token, false);
9745 
9746 	free(query);
9747 	return fd;
9748 }
9749 
9750 
9751 #include "vfs_request_io.cpp"
9752