xref: /haiku/src/system/kernel/fs/vfs.cpp (revision f73f5d4c42a01ece688cbb57b5d332cc0f68b2c6)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2011, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags,
549 		generic_size_t bytesRequested, status_t status,
550 		generic_size_t bytesTransferred)
551 		:
552 		fVnode(vnode),
553 		fMountID(vnode->mount->id),
554 		fNodeID(vnode->id),
555 		fCookie(cookie),
556 		fPos(pos),
557 		fCount(count),
558 		fFlags(flags),
559 		fBytesRequested(bytesRequested),
560 		fStatus(status),
561 		fBytesTransferred(bytesTransferred)
562 	{
563 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
564 			sizeof(generic_io_vec) * count, false);
565 	}
566 
567 	void AddDump(TraceOutput& out, const char* mode)
568 	{
569 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
570 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
571 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
572 			(uint64)fBytesRequested);
573 
574 		if (fVecs != NULL) {
575 			for (uint32 i = 0; i < fCount; i++) {
576 				if (i > 0)
577 					out.Print(", ");
578 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
579 					(uint64)fVecs[i].length);
580 			}
581 		}
582 
583 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
584 			"transferred: %" B_PRIu64, fFlags, fStatus,
585 			(uint64)fBytesTransferred);
586 	}
587 
588 protected:
589 	struct vnode*	fVnode;
590 	dev_t			fMountID;
591 	ino_t			fNodeID;
592 	void*			fCookie;
593 	off_t			fPos;
594 	generic_io_vec*	fVecs;
595 	uint32			fCount;
596 	uint32			fFlags;
597 	generic_size_t	fBytesRequested;
598 	status_t		fStatus;
599 	generic_size_t	fBytesTransferred;
600 };
601 
602 
603 class ReadPages : public PagesIOTraceEntry {
604 public:
605 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
606 		const generic_io_vec* vecs, uint32 count, uint32 flags,
607 		generic_size_t bytesRequested, status_t status,
608 		generic_size_t bytesTransferred)
609 		:
610 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
611 			bytesRequested, status, bytesTransferred)
612 	{
613 		Initialized();
614 	}
615 
616 	virtual void AddDump(TraceOutput& out)
617 	{
618 		PagesIOTraceEntry::AddDump(out, "read");
619 	}
620 };
621 
622 
623 class WritePages : public PagesIOTraceEntry {
624 public:
625 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
626 		const generic_io_vec* vecs, uint32 count, uint32 flags,
627 		generic_size_t bytesRequested, status_t status,
628 		generic_size_t bytesTransferred)
629 		:
630 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
631 			bytesRequested, status, bytesTransferred)
632 	{
633 		Initialized();
634 	}
635 
636 	virtual void AddDump(TraceOutput& out)
637 	{
638 		PagesIOTraceEntry::AddDump(out, "write");
639 	}
640 };
641 
642 }	// namespace VFSPagesIOTracing
643 
644 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
645 #else
646 #	define TPIO(x) ;
647 #endif	// VFS_PAGES_IO_TRACING
648 
649 
650 static int
651 mount_compare(void* _m, const void* _key)
652 {
653 	struct fs_mount* mount = (fs_mount*)_m;
654 	const dev_t* id = (dev_t*)_key;
655 
656 	if (mount->id == *id)
657 		return 0;
658 
659 	return -1;
660 }
661 
662 
663 static uint32
664 mount_hash(void* _m, const void* _key, uint32 range)
665 {
666 	struct fs_mount* mount = (fs_mount*)_m;
667 	const dev_t* id = (dev_t*)_key;
668 
669 	if (mount)
670 		return mount->id % range;
671 
672 	return (uint32)*id % range;
673 }
674 
675 
676 /*! Finds the mounted device (the fs_mount structure) with the given ID.
677 	Note, you must hold the gMountMutex lock when you call this function.
678 */
679 static struct fs_mount*
680 find_mount(dev_t id)
681 {
682 	ASSERT_LOCKED_MUTEX(&sMountMutex);
683 
684 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
685 }
686 
687 
688 static status_t
689 get_mount(dev_t id, struct fs_mount** _mount)
690 {
691 	struct fs_mount* mount;
692 
693 	ReadLocker nodeLocker(sVnodeLock);
694 	MutexLocker mountLocker(sMountMutex);
695 
696 	mount = find_mount(id);
697 	if (mount == NULL)
698 		return B_BAD_VALUE;
699 
700 	struct vnode* rootNode = mount->root_vnode;
701 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
702 		// might have been called during a mount/unmount operation
703 		return B_BUSY;
704 	}
705 
706 	inc_vnode_ref_count(mount->root_vnode);
707 	*_mount = mount;
708 	return B_OK;
709 }
710 
711 
712 static void
713 put_mount(struct fs_mount* mount)
714 {
715 	if (mount)
716 		put_vnode(mount->root_vnode);
717 }
718 
719 
720 /*!	Tries to open the specified file system module.
721 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
722 	Returns a pointer to file system module interface, or NULL if it
723 	could not open the module.
724 */
725 static file_system_module_info*
726 get_file_system(const char* fsName)
727 {
728 	char name[B_FILE_NAME_LENGTH];
729 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
730 		// construct module name if we didn't get one
731 		// (we currently support only one API)
732 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
733 		fsName = NULL;
734 	}
735 
736 	file_system_module_info* info;
737 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
738 		return NULL;
739 
740 	return info;
741 }
742 
743 
744 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
745 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
746 	The name is allocated for you, and you have to free() it when you're
747 	done with it.
748 	Returns NULL if the required memory is not available.
749 */
750 static char*
751 get_file_system_name(const char* fsName)
752 {
753 	const size_t length = strlen("file_systems/");
754 
755 	if (strncmp(fsName, "file_systems/", length)) {
756 		// the name already seems to be the module's file name
757 		return strdup(fsName);
758 	}
759 
760 	fsName += length;
761 	const char* end = strchr(fsName, '/');
762 	if (end == NULL) {
763 		// this doesn't seem to be a valid name, but well...
764 		return strdup(fsName);
765 	}
766 
767 	// cut off the trailing /v1
768 
769 	char* name = (char*)malloc(end + 1 - fsName);
770 	if (name == NULL)
771 		return NULL;
772 
773 	strlcpy(name, fsName, end + 1 - fsName);
774 	return name;
775 }
776 
777 
778 /*!	Accepts a list of file system names separated by a colon, one for each
779 	layer and returns the file system name for the specified layer.
780 	The name is allocated for you, and you have to free() it when you're
781 	done with it.
782 	Returns NULL if the required memory is not available or if there is no
783 	name for the specified layer.
784 */
785 static char*
786 get_file_system_name_for_layer(const char* fsNames, int32 layer)
787 {
788 	while (layer >= 0) {
789 		const char* end = strchr(fsNames, ':');
790 		if (end == NULL) {
791 			if (layer == 0)
792 				return strdup(fsNames);
793 			return NULL;
794 		}
795 
796 		if (layer == 0) {
797 			size_t length = end - fsNames + 1;
798 			char* result = (char*)malloc(length);
799 			strlcpy(result, fsNames, length);
800 			return result;
801 		}
802 
803 		fsNames = end + 1;
804 		layer--;
805 	}
806 
807 	return NULL;
808 }
809 
810 
811 static int
812 vnode_compare(void* _vnode, const void* _key)
813 {
814 	struct vnode* vnode = (struct vnode*)_vnode;
815 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
816 
817 	if (vnode->device == key->device && vnode->id == key->vnode)
818 		return 0;
819 
820 	return -1;
821 }
822 
823 
824 static uint32
825 vnode_hash(void* _vnode, const void* _key, uint32 range)
826 {
827 	struct vnode* vnode = (struct vnode*)_vnode;
828 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
829 
830 #define VHASH(mountid, vnodeid) \
831 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
832 
833 	if (vnode != NULL)
834 		return VHASH(vnode->device, vnode->id) % range;
835 
836 	return VHASH(key->device, key->vnode) % range;
837 
838 #undef VHASH
839 }
840 
841 
842 static void
843 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
844 {
845 	RecursiveLocker _(mount->rlock);
846 	mount->vnodes.Add(vnode);
847 }
848 
849 
850 static void
851 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
852 {
853 	RecursiveLocker _(mount->rlock);
854 	mount->vnodes.Remove(vnode);
855 }
856 
857 
858 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
859 
860 	The caller must hold the sVnodeLock (read lock at least).
861 
862 	\param mountID the mount ID.
863 	\param vnodeID the node ID.
864 
865 	\return The vnode structure, if it was found in the hash table, \c NULL
866 			otherwise.
867 */
868 static struct vnode*
869 lookup_vnode(dev_t mountID, ino_t vnodeID)
870 {
871 	struct vnode_hash_key key;
872 
873 	key.device = mountID;
874 	key.vnode = vnodeID;
875 
876 	return (vnode*)hash_lookup(sVnodeTable, &key);
877 }
878 
879 
880 /*!	Creates a new vnode with the given mount and node ID.
881 	If the node already exists, it is returned instead and no new node is
882 	created. In either case -- but not, if an error occurs -- the function write
883 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
884 	error the lock is not not held on return.
885 
886 	\param mountID The mount ID.
887 	\param vnodeID The vnode ID.
888 	\param _vnode Will be set to the new vnode on success.
889 	\param _nodeCreated Will be set to \c true when the returned vnode has
890 		been newly created, \c false when it already existed. Will not be
891 		changed on error.
892 	\return \c B_OK, when the vnode was successfully created and inserted or
893 		a node with the given ID was found, \c B_NO_MEMORY or
894 		\c B_ENTRY_NOT_FOUND on error.
895 */
896 static status_t
897 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
898 	bool& _nodeCreated)
899 {
900 	FUNCTION(("create_new_vnode_and_lock()\n"));
901 
902 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
903 	if (vnode == NULL)
904 		return B_NO_MEMORY;
905 
906 	// initialize basic values
907 	memset(vnode, 0, sizeof(struct vnode));
908 	vnode->device = mountID;
909 	vnode->id = vnodeID;
910 	vnode->ref_count = 1;
911 	vnode->SetBusy(true);
912 
913 	// look up the the node -- it might have been added by someone else in the
914 	// meantime
915 	rw_lock_write_lock(&sVnodeLock);
916 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
917 	if (existingVnode != NULL) {
918 		free(vnode);
919 		_vnode = existingVnode;
920 		_nodeCreated = false;
921 		return B_OK;
922 	}
923 
924 	// get the mount structure
925 	mutex_lock(&sMountMutex);
926 	vnode->mount = find_mount(mountID);
927 	if (!vnode->mount || vnode->mount->unmounting) {
928 		mutex_unlock(&sMountMutex);
929 		rw_lock_write_unlock(&sVnodeLock);
930 		free(vnode);
931 		return B_ENTRY_NOT_FOUND;
932 	}
933 
934 	// add the vnode to the mount's node list and the hash table
935 	hash_insert(sVnodeTable, vnode);
936 	add_vnode_to_mount_list(vnode, vnode->mount);
937 
938 	mutex_unlock(&sMountMutex);
939 
940 	_vnode = vnode;
941 	_nodeCreated = true;
942 
943 	// keep the vnode lock locked
944 	return B_OK;
945 }
946 
947 
948 /*!	Frees the vnode and all resources it has acquired, and removes
949 	it from the vnode hash as well as from its mount structure.
950 	Will also make sure that any cache modifications are written back.
951 */
952 static void
953 free_vnode(struct vnode* vnode, bool reenter)
954 {
955 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
956 		vnode);
957 
958 	// write back any changes in this vnode's cache -- but only
959 	// if the vnode won't be deleted, in which case the changes
960 	// will be discarded
961 
962 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
963 		FS_CALL_NO_PARAMS(vnode, fsync);
964 
965 	// Note: If this vnode has a cache attached, there will still be two
966 	// references to that cache at this point. The last one belongs to the vnode
967 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
968 	// cache. Each but the last reference to a cache also includes a reference
969 	// to the vnode. The file cache, however, released its reference (cf.
970 	// file_cache_create()), so that this vnode's ref count has the chance to
971 	// ever drop to 0. Deleting the file cache now, will cause the next to last
972 	// cache reference to be released, which will also release a (no longer
973 	// existing) vnode reference. To avoid problems, we set the vnode's ref
974 	// count, so that it will neither become negative nor 0.
975 	vnode->ref_count = 2;
976 
977 	if (!vnode->IsUnpublished()) {
978 		if (vnode->IsRemoved())
979 			FS_CALL(vnode, remove_vnode, reenter);
980 		else
981 			FS_CALL(vnode, put_vnode, reenter);
982 	}
983 
984 	// If the vnode has a VMCache attached, make sure that it won't try to get
985 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
986 	// long as the vnode is busy and in the hash, that won't happen, but as
987 	// soon as we've removed it from the hash, it could reload the vnode -- with
988 	// a new cache attached!
989 	if (vnode->cache != NULL)
990 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
991 
992 	// The file system has removed the resources of the vnode now, so we can
993 	// make it available again (by removing the busy vnode from the hash).
994 	rw_lock_write_lock(&sVnodeLock);
995 	hash_remove(sVnodeTable, vnode);
996 	rw_lock_write_unlock(&sVnodeLock);
997 
998 	// if we have a VMCache attached, remove it
999 	if (vnode->cache)
1000 		vnode->cache->ReleaseRef();
1001 
1002 	vnode->cache = NULL;
1003 
1004 	remove_vnode_from_mount_list(vnode, vnode->mount);
1005 
1006 	free(vnode);
1007 }
1008 
1009 
1010 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1011 	if the counter dropped to 0.
1012 
1013 	The caller must, of course, own a reference to the vnode to call this
1014 	function.
1015 	The caller must not hold the sVnodeLock or the sMountMutex.
1016 
1017 	\param vnode the vnode.
1018 	\param alwaysFree don't move this vnode into the unused list, but really
1019 		   delete it if possible.
1020 	\param reenter \c true, if this function is called (indirectly) from within
1021 		   a file system. This will be passed to file system hooks only.
1022 	\return \c B_OK, if everything went fine, an error code otherwise.
1023 */
1024 static status_t
1025 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1026 {
1027 	ReadLocker locker(sVnodeLock);
1028 	AutoLocker<Vnode> nodeLocker(vnode);
1029 
1030 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1031 
1032 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1033 
1034 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1035 		vnode->ref_count));
1036 
1037 	if (oldRefCount != 1)
1038 		return B_OK;
1039 
1040 	if (vnode->IsBusy())
1041 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1042 
1043 	bool freeNode = false;
1044 	bool freeUnusedNodes = false;
1045 
1046 	// Just insert the vnode into an unused list if we don't need
1047 	// to delete it
1048 	if (vnode->IsRemoved() || alwaysFree) {
1049 		vnode_to_be_freed(vnode);
1050 		vnode->SetBusy(true);
1051 		freeNode = true;
1052 	} else
1053 		freeUnusedNodes = vnode_unused(vnode);
1054 
1055 	nodeLocker.Unlock();
1056 	locker.Unlock();
1057 
1058 	if (freeNode)
1059 		free_vnode(vnode, reenter);
1060 	else if (freeUnusedNodes)
1061 		free_unused_vnodes();
1062 
1063 	return B_OK;
1064 }
1065 
1066 
1067 /*!	\brief Increments the reference counter of the given vnode.
1068 
1069 	The caller must make sure that the node isn't deleted while this function
1070 	is called. This can be done either:
1071 	- by ensuring that a reference to the node exists and remains in existence,
1072 	  or
1073 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1074 	  or by holding sVnodeLock write locked.
1075 
1076 	In the second case the caller is responsible for dealing with the ref count
1077 	0 -> 1 transition. That is 1. this function must not be invoked when the
1078 	node is busy in the first place and 2. vnode_used() must be called for the
1079 	node.
1080 
1081 	\param vnode the vnode.
1082 */
1083 static void
1084 inc_vnode_ref_count(struct vnode* vnode)
1085 {
1086 	atomic_add(&vnode->ref_count, 1);
1087 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1088 		vnode->ref_count));
1089 }
1090 
1091 
1092 static bool
1093 is_special_node_type(int type)
1094 {
1095 	// at the moment only FIFOs are supported
1096 	return S_ISFIFO(type);
1097 }
1098 
1099 
1100 static status_t
1101 create_special_sub_node(struct vnode* vnode, uint32 flags)
1102 {
1103 	if (S_ISFIFO(vnode->Type()))
1104 		return create_fifo_vnode(vnode->mount->volume, vnode);
1105 
1106 	return B_BAD_VALUE;
1107 }
1108 
1109 
1110 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1111 
1112 	If the node is not yet in memory, it will be loaded.
1113 
1114 	The caller must not hold the sVnodeLock or the sMountMutex.
1115 
1116 	\param mountID the mount ID.
1117 	\param vnodeID the node ID.
1118 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1119 		   retrieved vnode structure shall be written.
1120 	\param reenter \c true, if this function is called (indirectly) from within
1121 		   a file system.
1122 	\return \c B_OK, if everything when fine, an error code otherwise.
1123 */
1124 static status_t
1125 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1126 	int reenter)
1127 {
1128 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1129 		mountID, vnodeID, _vnode));
1130 
1131 	rw_lock_read_lock(&sVnodeLock);
1132 
1133 	int32 tries = 2000;
1134 		// try for 10 secs
1135 restart:
1136 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1137 	AutoLocker<Vnode> nodeLocker(vnode);
1138 
1139 	if (vnode && vnode->IsBusy()) {
1140 		nodeLocker.Unlock();
1141 		rw_lock_read_unlock(&sVnodeLock);
1142 		if (!canWait || --tries < 0) {
1143 			// vnode doesn't seem to become unbusy
1144 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is not becoming unbusy!\n",
1145 				mountID, vnodeID);
1146 			return B_BUSY;
1147 		}
1148 		snooze(5000); // 5 ms
1149 		rw_lock_read_lock(&sVnodeLock);
1150 		goto restart;
1151 	}
1152 
1153 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1154 
1155 	status_t status;
1156 
1157 	if (vnode) {
1158 		if (vnode->ref_count == 0) {
1159 			// this vnode has been unused before
1160 			vnode_used(vnode);
1161 		}
1162 		inc_vnode_ref_count(vnode);
1163 
1164 		nodeLocker.Unlock();
1165 		rw_lock_read_unlock(&sVnodeLock);
1166 	} else {
1167 		// we need to create a new vnode and read it in
1168 		rw_lock_read_unlock(&sVnodeLock);
1169 			// unlock -- create_new_vnode_and_lock() write-locks on success
1170 		bool nodeCreated;
1171 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1172 			nodeCreated);
1173 		if (status != B_OK)
1174 			return status;
1175 
1176 		if (!nodeCreated) {
1177 			rw_lock_read_lock(&sVnodeLock);
1178 			rw_lock_write_unlock(&sVnodeLock);
1179 			goto restart;
1180 		}
1181 
1182 		rw_lock_write_unlock(&sVnodeLock);
1183 
1184 		int type;
1185 		uint32 flags;
1186 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1187 			&flags, reenter);
1188 		if (status == B_OK && vnode->private_node == NULL)
1189 			status = B_BAD_VALUE;
1190 
1191 		bool gotNode = status == B_OK;
1192 		bool publishSpecialSubNode = false;
1193 		if (gotNode) {
1194 			vnode->SetType(type);
1195 			publishSpecialSubNode = is_special_node_type(type)
1196 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1197 		}
1198 
1199 		if (gotNode && publishSpecialSubNode)
1200 			status = create_special_sub_node(vnode, flags);
1201 
1202 		if (status != B_OK) {
1203 			if (gotNode)
1204 				FS_CALL(vnode, put_vnode, reenter);
1205 
1206 			rw_lock_write_lock(&sVnodeLock);
1207 			hash_remove(sVnodeTable, vnode);
1208 			remove_vnode_from_mount_list(vnode, vnode->mount);
1209 			rw_lock_write_unlock(&sVnodeLock);
1210 
1211 			free(vnode);
1212 			return status;
1213 		}
1214 
1215 		rw_lock_read_lock(&sVnodeLock);
1216 		vnode->Lock();
1217 
1218 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1219 		vnode->SetBusy(false);
1220 
1221 		vnode->Unlock();
1222 		rw_lock_read_unlock(&sVnodeLock);
1223 	}
1224 
1225 	TRACE(("get_vnode: returning %p\n", vnode));
1226 
1227 	*_vnode = vnode;
1228 	return B_OK;
1229 }
1230 
1231 
1232 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1233 	if the counter dropped to 0.
1234 
1235 	The caller must, of course, own a reference to the vnode to call this
1236 	function.
1237 	The caller must not hold the sVnodeLock or the sMountMutex.
1238 
1239 	\param vnode the vnode.
1240 */
1241 static inline void
1242 put_vnode(struct vnode* vnode)
1243 {
1244 	dec_vnode_ref_count(vnode, false, false);
1245 }
1246 
1247 
1248 static void
1249 free_unused_vnodes(int32 level)
1250 {
1251 	unused_vnodes_check_started();
1252 
1253 	if (level == B_NO_LOW_RESOURCE) {
1254 		unused_vnodes_check_done();
1255 		return;
1256 	}
1257 
1258 	flush_hot_vnodes();
1259 
1260 	// determine how many nodes to free
1261 	uint32 count = 1;
1262 	{
1263 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1264 
1265 		switch (level) {
1266 			case B_LOW_RESOURCE_NOTE:
1267 				count = sUnusedVnodes / 100;
1268 				break;
1269 			case B_LOW_RESOURCE_WARNING:
1270 				count = sUnusedVnodes / 10;
1271 				break;
1272 			case B_LOW_RESOURCE_CRITICAL:
1273 				count = sUnusedVnodes;
1274 				break;
1275 		}
1276 
1277 		if (count > sUnusedVnodes)
1278 			count = sUnusedVnodes;
1279 	}
1280 
1281 	// Write back the modified pages of some unused vnodes and free them.
1282 
1283 	for (uint32 i = 0; i < count; i++) {
1284 		ReadLocker vnodesReadLocker(sVnodeLock);
1285 
1286 		// get the first node
1287 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1288 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1289 			&sUnusedVnodeList);
1290 		unusedVnodesLocker.Unlock();
1291 
1292 		if (vnode == NULL)
1293 			break;
1294 
1295 		// lock the node
1296 		AutoLocker<Vnode> nodeLocker(vnode);
1297 
1298 		// Check whether the node is still unused -- since we only append to the
1299 		// the tail of the unused queue, the vnode should still be at its head.
1300 		// Alternatively we could check its ref count for 0 and its busy flag,
1301 		// but if the node is no longer at the head of the queue, it means it
1302 		// has been touched in the meantime, i.e. it is no longer the least
1303 		// recently used unused vnode and we rather don't free it.
1304 		unusedVnodesLocker.Lock();
1305 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1306 			continue;
1307 		unusedVnodesLocker.Unlock();
1308 
1309 		ASSERT(!vnode->IsBusy());
1310 
1311 		// grab a reference
1312 		inc_vnode_ref_count(vnode);
1313 		vnode_used(vnode);
1314 
1315 		// write back changes and free the node
1316 		nodeLocker.Unlock();
1317 		vnodesReadLocker.Unlock();
1318 
1319 		if (vnode->cache != NULL)
1320 			vnode->cache->WriteModified();
1321 
1322 		dec_vnode_ref_count(vnode, true, false);
1323 			// this should free the vnode when it's still unused
1324 	}
1325 
1326 	unused_vnodes_check_done();
1327 }
1328 
1329 
1330 /*!	Gets the vnode the given vnode is covering.
1331 
1332 	The caller must have \c sVnodeLock read-locked at least.
1333 
1334 	The function returns a reference to the retrieved vnode (if any), the caller
1335 	is responsible to free.
1336 
1337 	\param vnode The vnode whose covered node shall be returned.
1338 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1339 		vnode.
1340 */
1341 static inline Vnode*
1342 get_covered_vnode_locked(Vnode* vnode)
1343 {
1344 	if (Vnode* coveredNode = vnode->covers) {
1345 		while (coveredNode->covers != NULL)
1346 			coveredNode = coveredNode->covers;
1347 
1348 		inc_vnode_ref_count(coveredNode);
1349 		return coveredNode;
1350 	}
1351 
1352 	return NULL;
1353 }
1354 
1355 
1356 /*!	Gets the vnode the given vnode is covering.
1357 
1358 	The caller must not hold \c sVnodeLock. Note that this implies a race
1359 	condition, since the situation can change at any time.
1360 
1361 	The function returns a reference to the retrieved vnode (if any), the caller
1362 	is responsible to free.
1363 
1364 	\param vnode The vnode whose covered node shall be returned.
1365 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1366 		vnode.
1367 */
1368 static inline Vnode*
1369 get_covered_vnode(Vnode* vnode)
1370 {
1371 	if (!vnode->IsCovering())
1372 		return NULL;
1373 
1374 	ReadLocker vnodeReadLocker(sVnodeLock);
1375 	return get_covered_vnode_locked(vnode);
1376 }
1377 
1378 
1379 /*!	Gets the vnode the given vnode is covered by.
1380 
1381 	The caller must have \c sVnodeLock read-locked at least.
1382 
1383 	The function returns a reference to the retrieved vnode (if any), the caller
1384 	is responsible to free.
1385 
1386 	\param vnode The vnode whose covering node shall be returned.
1387 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1388 		any vnode.
1389 */
1390 static Vnode*
1391 get_covering_vnode_locked(Vnode* vnode)
1392 {
1393 	if (Vnode* coveringNode = vnode->covered_by) {
1394 		while (coveringNode->covered_by != NULL)
1395 			coveringNode = coveringNode->covered_by;
1396 
1397 		inc_vnode_ref_count(coveringNode);
1398 		return coveringNode;
1399 	}
1400 
1401 	return NULL;
1402 }
1403 
1404 
1405 /*!	Gets the vnode the given vnode is covered by.
1406 
1407 	The caller must not hold \c sVnodeLock. Note that this implies a race
1408 	condition, since the situation can change at any time.
1409 
1410 	The function returns a reference to the retrieved vnode (if any), the caller
1411 	is responsible to free.
1412 
1413 	\param vnode The vnode whose covering node shall be returned.
1414 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1415 		any vnode.
1416 */
1417 static inline Vnode*
1418 get_covering_vnode(Vnode* vnode)
1419 {
1420 	if (!vnode->IsCovered())
1421 		return NULL;
1422 
1423 	ReadLocker vnodeReadLocker(sVnodeLock);
1424 	return get_covering_vnode_locked(vnode);
1425 }
1426 
1427 
1428 static void
1429 free_unused_vnodes()
1430 {
1431 	free_unused_vnodes(
1432 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1433 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1434 }
1435 
1436 
1437 static void
1438 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1439 {
1440 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1441 
1442 	free_unused_vnodes(level);
1443 }
1444 
1445 
1446 static inline void
1447 put_advisory_locking(struct advisory_locking* locking)
1448 {
1449 	release_sem(locking->lock);
1450 }
1451 
1452 
1453 /*!	Returns the advisory_locking object of the \a vnode in case it
1454 	has one, and locks it.
1455 	You have to call put_advisory_locking() when you're done with
1456 	it.
1457 	Note, you must not have the vnode mutex locked when calling
1458 	this function.
1459 */
1460 static struct advisory_locking*
1461 get_advisory_locking(struct vnode* vnode)
1462 {
1463 	rw_lock_read_lock(&sVnodeLock);
1464 	vnode->Lock();
1465 
1466 	struct advisory_locking* locking = vnode->advisory_locking;
1467 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1468 
1469 	vnode->Unlock();
1470 	rw_lock_read_unlock(&sVnodeLock);
1471 
1472 	if (lock >= 0)
1473 		lock = acquire_sem(lock);
1474 	if (lock < 0) {
1475 		// This means the locking has been deleted in the mean time
1476 		// or had never existed in the first place - otherwise, we
1477 		// would get the lock at some point.
1478 		return NULL;
1479 	}
1480 
1481 	return locking;
1482 }
1483 
1484 
1485 /*!	Creates a locked advisory_locking object, and attaches it to the
1486 	given \a vnode.
1487 	Returns B_OK in case of success - also if the vnode got such an
1488 	object from someone else in the mean time, you'll still get this
1489 	one locked then.
1490 */
1491 static status_t
1492 create_advisory_locking(struct vnode* vnode)
1493 {
1494 	if (vnode == NULL)
1495 		return B_FILE_ERROR;
1496 
1497 	ObjectDeleter<advisory_locking> lockingDeleter;
1498 	struct advisory_locking* locking = NULL;
1499 
1500 	while (get_advisory_locking(vnode) == NULL) {
1501 		// no locking object set on the vnode yet, create one
1502 		if (locking == NULL) {
1503 			locking = new(std::nothrow) advisory_locking;
1504 			if (locking == NULL)
1505 				return B_NO_MEMORY;
1506 			lockingDeleter.SetTo(locking);
1507 
1508 			locking->wait_sem = create_sem(0, "advisory lock");
1509 			if (locking->wait_sem < 0)
1510 				return locking->wait_sem;
1511 
1512 			locking->lock = create_sem(0, "advisory locking");
1513 			if (locking->lock < 0)
1514 				return locking->lock;
1515 		}
1516 
1517 		// set our newly created locking object
1518 		ReadLocker _(sVnodeLock);
1519 		AutoLocker<Vnode> nodeLocker(vnode);
1520 		if (vnode->advisory_locking == NULL) {
1521 			vnode->advisory_locking = locking;
1522 			lockingDeleter.Detach();
1523 			return B_OK;
1524 		}
1525 	}
1526 
1527 	// The vnode already had a locking object. That's just as well.
1528 
1529 	return B_OK;
1530 }
1531 
1532 
1533 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1534 	with the advisory_lock \a lock.
1535 */
1536 static bool
1537 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1538 {
1539 	if (flock == NULL)
1540 		return true;
1541 
1542 	return lock->start <= flock->l_start - 1 + flock->l_len
1543 		&& lock->end >= flock->l_start;
1544 }
1545 
1546 
1547 /*!	Tests whether acquiring a lock would block.
1548 */
1549 static status_t
1550 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1551 {
1552 	flock->l_type = F_UNLCK;
1553 
1554 	struct advisory_locking* locking = get_advisory_locking(vnode);
1555 	if (locking == NULL)
1556 		return B_OK;
1557 
1558 	team_id team = team_get_current_team_id();
1559 
1560 	LockList::Iterator iterator = locking->locks.GetIterator();
1561 	while (iterator.HasNext()) {
1562 		struct advisory_lock* lock = iterator.Next();
1563 
1564 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1565 			// locks do overlap
1566 			if (flock->l_type != F_RDLCK || !lock->shared) {
1567 				// collision
1568 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1569 				flock->l_whence = SEEK_SET;
1570 				flock->l_start = lock->start;
1571 				flock->l_len = lock->end - lock->start + 1;
1572 				flock->l_pid = lock->team;
1573 				break;
1574 			}
1575 		}
1576 	}
1577 
1578 	put_advisory_locking(locking);
1579 	return B_OK;
1580 }
1581 
1582 
1583 /*!	Removes the specified lock, or all locks of the calling team
1584 	if \a flock is NULL.
1585 */
1586 static status_t
1587 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1588 {
1589 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1590 
1591 	struct advisory_locking* locking = get_advisory_locking(vnode);
1592 	if (locking == NULL)
1593 		return B_OK;
1594 
1595 	// TODO: use the thread ID instead??
1596 	team_id team = team_get_current_team_id();
1597 	pid_t session = thread_get_current_thread()->team->session_id;
1598 
1599 	// find matching lock entries
1600 
1601 	LockList::Iterator iterator = locking->locks.GetIterator();
1602 	while (iterator.HasNext()) {
1603 		struct advisory_lock* lock = iterator.Next();
1604 		bool removeLock = false;
1605 
1606 		if (lock->session == session)
1607 			removeLock = true;
1608 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1609 			bool endsBeyond = false;
1610 			bool startsBefore = false;
1611 			if (flock != NULL) {
1612 				startsBefore = lock->start < flock->l_start;
1613 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1614 			}
1615 
1616 			if (!startsBefore && !endsBeyond) {
1617 				// lock is completely contained in flock
1618 				removeLock = true;
1619 			} else if (startsBefore && !endsBeyond) {
1620 				// cut the end of the lock
1621 				lock->end = flock->l_start - 1;
1622 			} else if (!startsBefore && endsBeyond) {
1623 				// cut the start of the lock
1624 				lock->start = flock->l_start + flock->l_len;
1625 			} else {
1626 				// divide the lock into two locks
1627 				struct advisory_lock* secondLock = new advisory_lock;
1628 				if (secondLock == NULL) {
1629 					// TODO: we should probably revert the locks we already
1630 					// changed... (ie. allocate upfront)
1631 					put_advisory_locking(locking);
1632 					return B_NO_MEMORY;
1633 				}
1634 
1635 				lock->end = flock->l_start - 1;
1636 
1637 				secondLock->team = lock->team;
1638 				secondLock->session = lock->session;
1639 				// values must already be normalized when getting here
1640 				secondLock->start = flock->l_start + flock->l_len;
1641 				secondLock->end = lock->end;
1642 				secondLock->shared = lock->shared;
1643 
1644 				locking->locks.Add(secondLock);
1645 			}
1646 		}
1647 
1648 		if (removeLock) {
1649 			// this lock is no longer used
1650 			iterator.Remove();
1651 			free(lock);
1652 		}
1653 	}
1654 
1655 	bool removeLocking = locking->locks.IsEmpty();
1656 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1657 
1658 	put_advisory_locking(locking);
1659 
1660 	if (removeLocking) {
1661 		// We can remove the whole advisory locking structure; it's no
1662 		// longer used
1663 		locking = get_advisory_locking(vnode);
1664 		if (locking != NULL) {
1665 			ReadLocker locker(sVnodeLock);
1666 			AutoLocker<Vnode> nodeLocker(vnode);
1667 
1668 			// the locking could have been changed in the mean time
1669 			if (locking->locks.IsEmpty()) {
1670 				vnode->advisory_locking = NULL;
1671 				nodeLocker.Unlock();
1672 				locker.Unlock();
1673 
1674 				// we've detached the locking from the vnode, so we can
1675 				// safely delete it
1676 				delete locking;
1677 			} else {
1678 				// the locking is in use again
1679 				nodeLocker.Unlock();
1680 				locker.Unlock();
1681 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1682 			}
1683 		}
1684 	}
1685 
1686 	return B_OK;
1687 }
1688 
1689 
1690 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1691 	will wait for the lock to become available, if there are any collisions
1692 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1693 
1694 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1695 	BSD flock() semantics are used, that is, all children can unlock the file
1696 	in question (we even allow parents to remove the lock, though, but that
1697 	seems to be in line to what the BSD's are doing).
1698 */
1699 static status_t
1700 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1701 	bool wait)
1702 {
1703 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1704 		vnode, flock, wait ? "yes" : "no"));
1705 
1706 	bool shared = flock->l_type == F_RDLCK;
1707 	status_t status = B_OK;
1708 
1709 	// TODO: do deadlock detection!
1710 
1711 	struct advisory_locking* locking;
1712 
1713 	while (true) {
1714 		// if this vnode has an advisory_locking structure attached,
1715 		// lock that one and search for any colliding file lock
1716 		status = create_advisory_locking(vnode);
1717 		if (status != B_OK)
1718 			return status;
1719 
1720 		locking = vnode->advisory_locking;
1721 		team_id team = team_get_current_team_id();
1722 		sem_id waitForLock = -1;
1723 
1724 		// test for collisions
1725 		LockList::Iterator iterator = locking->locks.GetIterator();
1726 		while (iterator.HasNext()) {
1727 			struct advisory_lock* lock = iterator.Next();
1728 
1729 			// TODO: locks from the same team might be joinable!
1730 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1731 				// locks do overlap
1732 				if (!shared || !lock->shared) {
1733 					// we need to wait
1734 					waitForLock = locking->wait_sem;
1735 					break;
1736 				}
1737 			}
1738 		}
1739 
1740 		if (waitForLock < 0)
1741 			break;
1742 
1743 		// We need to wait. Do that or fail now, if we've been asked not to.
1744 
1745 		if (!wait) {
1746 			put_advisory_locking(locking);
1747 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1748 		}
1749 
1750 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1751 			B_CAN_INTERRUPT, 0);
1752 		if (status != B_OK && status != B_BAD_SEM_ID)
1753 			return status;
1754 
1755 		// We have been notified, but we need to re-lock the locking object. So
1756 		// go another round...
1757 	}
1758 
1759 	// install new lock
1760 
1761 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1762 		sizeof(struct advisory_lock));
1763 	if (lock == NULL) {
1764 		put_advisory_locking(locking);
1765 		return B_NO_MEMORY;
1766 	}
1767 
1768 	lock->team = team_get_current_team_id();
1769 	lock->session = session;
1770 	// values must already be normalized when getting here
1771 	lock->start = flock->l_start;
1772 	lock->end = flock->l_start - 1 + flock->l_len;
1773 	lock->shared = shared;
1774 
1775 	locking->locks.Add(lock);
1776 	put_advisory_locking(locking);
1777 
1778 	return status;
1779 }
1780 
1781 
1782 /*!	Normalizes the \a flock structure to make it easier to compare the
1783 	structure with others. The l_start and l_len fields are set to absolute
1784 	values according to the l_whence field.
1785 */
1786 static status_t
1787 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1788 {
1789 	switch (flock->l_whence) {
1790 		case SEEK_SET:
1791 			break;
1792 		case SEEK_CUR:
1793 			flock->l_start += descriptor->pos;
1794 			break;
1795 		case SEEK_END:
1796 		{
1797 			struct vnode* vnode = descriptor->u.vnode;
1798 			struct stat stat;
1799 			status_t status;
1800 
1801 			if (!HAS_FS_CALL(vnode, read_stat))
1802 				return B_UNSUPPORTED;
1803 
1804 			status = FS_CALL(vnode, read_stat, &stat);
1805 			if (status != B_OK)
1806 				return status;
1807 
1808 			flock->l_start += stat.st_size;
1809 			break;
1810 		}
1811 		default:
1812 			return B_BAD_VALUE;
1813 	}
1814 
1815 	if (flock->l_start < 0)
1816 		flock->l_start = 0;
1817 	if (flock->l_len == 0)
1818 		flock->l_len = OFF_MAX;
1819 
1820 	// don't let the offset and length overflow
1821 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1822 		flock->l_len = OFF_MAX - flock->l_start;
1823 
1824 	if (flock->l_len < 0) {
1825 		// a negative length reverses the region
1826 		flock->l_start += flock->l_len;
1827 		flock->l_len = -flock->l_len;
1828 	}
1829 
1830 	return B_OK;
1831 }
1832 
1833 
1834 static void
1835 replace_vnode_if_disconnected(struct fs_mount* mount,
1836 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1837 	struct vnode* fallBack, bool lockRootLock)
1838 {
1839 	struct vnode* givenVnode = vnode;
1840 	bool vnodeReplaced = false;
1841 
1842 	ReadLocker vnodeReadLocker(sVnodeLock);
1843 
1844 	if (lockRootLock)
1845 		mutex_lock(&sIOContextRootLock);
1846 
1847 	while (vnode != NULL && vnode->mount == mount
1848 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1849 		if (vnode->covers != NULL) {
1850 			// redirect the vnode to the covered vnode
1851 			vnode = vnode->covers;
1852 		} else
1853 			vnode = fallBack;
1854 
1855 		vnodeReplaced = true;
1856 	}
1857 
1858 	// If we've replaced the node, grab a reference for the new one.
1859 	if (vnodeReplaced && vnode != NULL)
1860 		inc_vnode_ref_count(vnode);
1861 
1862 	if (lockRootLock)
1863 		mutex_unlock(&sIOContextRootLock);
1864 
1865 	vnodeReadLocker.Unlock();
1866 
1867 	if (vnodeReplaced)
1868 		put_vnode(givenVnode);
1869 }
1870 
1871 
1872 /*!	Disconnects all file descriptors that are associated with the
1873 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1874 	\a mount object.
1875 
1876 	Note, after you've called this function, there might still be ongoing
1877 	accesses - they won't be interrupted if they already happened before.
1878 	However, any subsequent access will fail.
1879 
1880 	This is not a cheap function and should be used with care and rarely.
1881 	TODO: there is currently no means to stop a blocking read/write!
1882 */
1883 static void
1884 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1885 	struct vnode* vnodeToDisconnect)
1886 {
1887 	// iterate over all teams and peek into their file descriptors
1888 	TeamListIterator teamIterator;
1889 	while (Team* team = teamIterator.Next()) {
1890 		BReference<Team> teamReference(team, true);
1891 
1892 		// lock the I/O context
1893 		io_context* context = team->io_context;
1894 		MutexLocker contextLocker(context->io_mutex);
1895 
1896 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1897 			sRoot, true);
1898 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1899 			sRoot, false);
1900 
1901 		for (uint32 i = 0; i < context->table_size; i++) {
1902 			if (struct file_descriptor* descriptor = context->fds[i]) {
1903 				inc_fd_ref_count(descriptor);
1904 
1905 				// if this descriptor points at this mount, we
1906 				// need to disconnect it to be able to unmount
1907 				struct vnode* vnode = fd_vnode(descriptor);
1908 				if (vnodeToDisconnect != NULL) {
1909 					if (vnode == vnodeToDisconnect)
1910 						disconnect_fd(descriptor);
1911 				} else if ((vnode != NULL && vnode->mount == mount)
1912 					|| (vnode == NULL && descriptor->u.mount == mount))
1913 					disconnect_fd(descriptor);
1914 
1915 				put_fd(descriptor);
1916 			}
1917 		}
1918 	}
1919 }
1920 
1921 
1922 /*!	\brief Gets the root node of the current IO context.
1923 	If \a kernel is \c true, the kernel IO context will be used.
1924 	The caller obtains a reference to the returned node.
1925 */
1926 struct vnode*
1927 get_root_vnode(bool kernel)
1928 {
1929 	if (!kernel) {
1930 		// Get current working directory from io context
1931 		struct io_context* context = get_current_io_context(kernel);
1932 
1933 		mutex_lock(&sIOContextRootLock);
1934 
1935 		struct vnode* root = context->root;
1936 		if (root != NULL)
1937 			inc_vnode_ref_count(root);
1938 
1939 		mutex_unlock(&sIOContextRootLock);
1940 
1941 		if (root != NULL)
1942 			return root;
1943 
1944 		// That should never happen.
1945 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1946 			"have a root\n", team_get_current_team_id());
1947 	}
1948 
1949 	inc_vnode_ref_count(sRoot);
1950 	return sRoot;
1951 }
1952 
1953 
1954 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1955 
1956 	Given an arbitrary vnode (identified by mount and node ID), the function
1957 	checks, whether the vnode is covered by another vnode. If it is, the
1958 	function returns the mount and node ID of the covering vnode. Otherwise
1959 	it simply returns the supplied mount and node ID.
1960 
1961 	In case of error (e.g. the supplied node could not be found) the variables
1962 	for storing the resolved mount and node ID remain untouched and an error
1963 	code is returned.
1964 
1965 	\param mountID The mount ID of the vnode in question.
1966 	\param nodeID The node ID of the vnode in question.
1967 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1968 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1969 	\return
1970 	- \c B_OK, if everything went fine,
1971 	- another error code, if something went wrong.
1972 */
1973 status_t
1974 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1975 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1976 {
1977 	// get the node
1978 	struct vnode* node;
1979 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1980 	if (error != B_OK)
1981 		return error;
1982 
1983 	// resolve the node
1984 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1985 		put_vnode(node);
1986 		node = coveringNode;
1987 	}
1988 
1989 	// set the return values
1990 	*resolvedMountID = node->device;
1991 	*resolvedNodeID = node->id;
1992 
1993 	put_vnode(node);
1994 
1995 	return B_OK;
1996 }
1997 
1998 
1999 /*!	\brief Gets the directory path and leaf name for a given path.
2000 
2001 	The supplied \a path is transformed to refer to the directory part of
2002 	the entry identified by the original path, and into the buffer \a filename
2003 	the leaf name of the original entry is written.
2004 	Neither the returned path nor the leaf name can be expected to be
2005 	canonical.
2006 
2007 	\param path The path to be analyzed. Must be able to store at least one
2008 		   additional character.
2009 	\param filename The buffer into which the leaf name will be written.
2010 		   Must be of size B_FILE_NAME_LENGTH at least.
2011 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2012 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2013 		   if the given path name is empty.
2014 */
2015 static status_t
2016 get_dir_path_and_leaf(char* path, char* filename)
2017 {
2018 	if (*path == '\0')
2019 		return B_ENTRY_NOT_FOUND;
2020 
2021 	char* last = strrchr(path, '/');
2022 		// '/' are not allowed in file names!
2023 
2024 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2025 
2026 	if (last == NULL) {
2027 		// this path is single segment with no '/' in it
2028 		// ex. "foo"
2029 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2030 			return B_NAME_TOO_LONG;
2031 
2032 		strcpy(path, ".");
2033 	} else {
2034 		last++;
2035 		if (last[0] == '\0') {
2036 			// special case: the path ends in one or more '/' - remove them
2037 			while (*--last == '/' && last != path);
2038 			last[1] = '\0';
2039 
2040 			if (last == path && last[0] == '/') {
2041 				// This path points to the root of the file system
2042 				strcpy(filename, ".");
2043 				return B_OK;
2044 			}
2045 			for (; last != path && *(last - 1) != '/'; last--);
2046 				// rewind to the start of the leaf before the '/'
2047 		}
2048 
2049 		// normal leaf: replace the leaf portion of the path with a '.'
2050 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2051 			return B_NAME_TOO_LONG;
2052 
2053 		last[0] = '.';
2054 		last[1] = '\0';
2055 	}
2056 	return B_OK;
2057 }
2058 
2059 
2060 static status_t
2061 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2062 	bool traverse, bool kernel, struct vnode** _vnode)
2063 {
2064 	char clonedName[B_FILE_NAME_LENGTH + 1];
2065 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2066 		return B_NAME_TOO_LONG;
2067 
2068 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2069 	struct vnode* directory;
2070 
2071 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2072 	if (status < 0)
2073 		return status;
2074 
2075 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2076 		_vnode, NULL);
2077 }
2078 
2079 
2080 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2081 	and returns the respective vnode.
2082 	On success a reference to the vnode is acquired for the caller.
2083 */
2084 static status_t
2085 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2086 {
2087 	ino_t id;
2088 
2089 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2090 		return get_vnode(dir->device, id, _vnode, true, false);
2091 
2092 	status_t status = FS_CALL(dir, lookup, name, &id);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2097 	// have a reference and just need to look the node up.
2098 	rw_lock_read_lock(&sVnodeLock);
2099 	*_vnode = lookup_vnode(dir->device, id);
2100 	rw_lock_read_unlock(&sVnodeLock);
2101 
2102 	if (*_vnode == NULL) {
2103 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2104 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2105 		return B_ENTRY_NOT_FOUND;
2106 	}
2107 
2108 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2109 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2110 //		(*_vnode)->mount->id, (*_vnode)->id);
2111 
2112 	return B_OK;
2113 }
2114 
2115 
2116 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2117 	\a path must not be NULL.
2118 	If it returns successfully, \a path contains the name of the last path
2119 	component. This function clobbers the buffer pointed to by \a path only
2120 	if it does contain more than one component.
2121 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2122 	it is successful or not!
2123 */
2124 static status_t
2125 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2126 	int count, struct io_context* ioContext, struct vnode** _vnode,
2127 	ino_t* _parentID)
2128 {
2129 	status_t status = B_OK;
2130 	ino_t lastParentID = vnode->id;
2131 
2132 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2133 
2134 	if (path == NULL) {
2135 		put_vnode(vnode);
2136 		return B_BAD_VALUE;
2137 	}
2138 
2139 	if (*path == '\0') {
2140 		put_vnode(vnode);
2141 		return B_ENTRY_NOT_FOUND;
2142 	}
2143 
2144 	while (true) {
2145 		struct vnode* nextVnode;
2146 		char* nextPath;
2147 
2148 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 			path));
2150 
2151 		// done?
2152 		if (path[0] == '\0')
2153 			break;
2154 
2155 		// walk to find the next path component ("path" will point to a single
2156 		// path component), and filter out multiple slashes
2157 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 				nextPath++);
2159 
2160 		if (*nextPath == '/') {
2161 			*nextPath = '\0';
2162 			do
2163 				nextPath++;
2164 			while (*nextPath == '/');
2165 		}
2166 
2167 		// See if the '..' is at a covering vnode move to the covered
2168 		// vnode so we pass the '..' path to the underlying filesystem.
2169 		// Also prevent breaking the root of the IO context.
2170 		if (strcmp("..", path) == 0) {
2171 			if (vnode == ioContext->root) {
2172 				// Attempted prison break! Keep it contained.
2173 				path = nextPath;
2174 				continue;
2175 			}
2176 
2177 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2178 				nextVnode = coveredVnode;
2179 				put_vnode(vnode);
2180 				vnode = nextVnode;
2181 			}
2182 		}
2183 
2184 		// check if vnode is really a directory
2185 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2186 			status = B_NOT_A_DIRECTORY;
2187 
2188 		// Check if we have the right to search the current directory vnode.
2189 		// If a file system doesn't have the access() function, we assume that
2190 		// searching a directory is always allowed
2191 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2192 			status = FS_CALL(vnode, access, X_OK);
2193 
2194 		// Tell the filesystem to get the vnode of this path component (if we
2195 		// got the permission from the call above)
2196 		if (status == B_OK)
2197 			status = lookup_dir_entry(vnode, path, &nextVnode);
2198 
2199 		if (status != B_OK) {
2200 			put_vnode(vnode);
2201 			return status;
2202 		}
2203 
2204 		// If the new node is a symbolic link, resolve it (if we've been told
2205 		// to do it)
2206 		if (S_ISLNK(nextVnode->Type())
2207 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2208 			size_t bufferSize;
2209 			char* buffer;
2210 
2211 			TRACE(("traverse link\n"));
2212 
2213 			// it's not exactly nice style using goto in this way, but hey,
2214 			// it works :-/
2215 			if (count + 1 > B_MAX_SYMLINKS) {
2216 				status = B_LINK_LIMIT;
2217 				goto resolve_link_error;
2218 			}
2219 
2220 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2221 			if (buffer == NULL) {
2222 				status = B_NO_MEMORY;
2223 				goto resolve_link_error;
2224 			}
2225 
2226 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2227 				bufferSize--;
2228 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2229 				// null-terminate
2230 				if (status >= 0)
2231 					buffer[bufferSize] = '\0';
2232 			} else
2233 				status = B_BAD_VALUE;
2234 
2235 			if (status != B_OK) {
2236 				free(buffer);
2237 
2238 		resolve_link_error:
2239 				put_vnode(vnode);
2240 				put_vnode(nextVnode);
2241 
2242 				return status;
2243 			}
2244 			put_vnode(nextVnode);
2245 
2246 			// Check if we start from the root directory or the current
2247 			// directory ("vnode" still points to that one).
2248 			// Cut off all leading slashes if it's the root directory
2249 			path = buffer;
2250 			bool absoluteSymlink = false;
2251 			if (path[0] == '/') {
2252 				// we don't need the old directory anymore
2253 				put_vnode(vnode);
2254 
2255 				while (*++path == '/')
2256 					;
2257 
2258 				mutex_lock(&sIOContextRootLock);
2259 				vnode = ioContext->root;
2260 				inc_vnode_ref_count(vnode);
2261 				mutex_unlock(&sIOContextRootLock);
2262 
2263 				absoluteSymlink = true;
2264 			}
2265 
2266 			inc_vnode_ref_count(vnode);
2267 				// balance the next recursion - we will decrement the
2268 				// ref_count of the vnode, no matter if we succeeded or not
2269 
2270 			if (absoluteSymlink && *path == '\0') {
2271 				// symlink was just "/"
2272 				nextVnode = vnode;
2273 			} else {
2274 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2275 					ioContext, &nextVnode, &lastParentID);
2276 			}
2277 
2278 			free(buffer);
2279 
2280 			if (status != B_OK) {
2281 				put_vnode(vnode);
2282 				return status;
2283 			}
2284 		} else
2285 			lastParentID = vnode->id;
2286 
2287 		// decrease the ref count on the old dir we just looked up into
2288 		put_vnode(vnode);
2289 
2290 		path = nextPath;
2291 		vnode = nextVnode;
2292 
2293 		// see if we hit a covered node
2294 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2295 			put_vnode(vnode);
2296 			vnode = coveringNode;
2297 		}
2298 	}
2299 
2300 	*_vnode = vnode;
2301 	if (_parentID)
2302 		*_parentID = lastParentID;
2303 
2304 	return B_OK;
2305 }
2306 
2307 
2308 static status_t
2309 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2310 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2311 {
2312 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2313 		get_current_io_context(kernel), _vnode, _parentID);
2314 }
2315 
2316 
2317 static status_t
2318 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2319 	ino_t* _parentID, bool kernel)
2320 {
2321 	struct vnode* start = NULL;
2322 
2323 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2324 
2325 	if (!path)
2326 		return B_BAD_VALUE;
2327 
2328 	if (*path == '\0')
2329 		return B_ENTRY_NOT_FOUND;
2330 
2331 	// figure out if we need to start at root or at cwd
2332 	if (*path == '/') {
2333 		if (sRoot == NULL) {
2334 			// we're a bit early, aren't we?
2335 			return B_ERROR;
2336 		}
2337 
2338 		while (*++path == '/')
2339 			;
2340 		start = get_root_vnode(kernel);
2341 
2342 		if (*path == '\0') {
2343 			*_vnode = start;
2344 			return B_OK;
2345 		}
2346 
2347 	} else {
2348 		struct io_context* context = get_current_io_context(kernel);
2349 
2350 		mutex_lock(&context->io_mutex);
2351 		start = context->cwd;
2352 		if (start != NULL)
2353 			inc_vnode_ref_count(start);
2354 		mutex_unlock(&context->io_mutex);
2355 
2356 		if (start == NULL)
2357 			return B_ERROR;
2358 	}
2359 
2360 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2361 		_parentID);
2362 }
2363 
2364 
2365 /*! Returns the vnode in the next to last segment of the path, and returns
2366 	the last portion in filename.
2367 	The path buffer must be able to store at least one additional character.
2368 */
2369 static status_t
2370 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2371 	bool kernel)
2372 {
2373 	status_t status = get_dir_path_and_leaf(path, filename);
2374 	if (status != B_OK)
2375 		return status;
2376 
2377 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2378 }
2379 
2380 
2381 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2382 		   to by a FD + path pair.
2383 
2384 	\a path must be given in either case. \a fd might be omitted, in which
2385 	case \a path is either an absolute path or one relative to the current
2386 	directory. If both a supplied and \a path is relative it is reckoned off
2387 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2388 	ignored.
2389 
2390 	The caller has the responsibility to call put_vnode() on the returned
2391 	directory vnode.
2392 
2393 	\param fd The FD. May be < 0.
2394 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2395 	       is modified by this function. It must have at least room for a
2396 	       string one character longer than the path it contains.
2397 	\param _vnode A pointer to a variable the directory vnode shall be written
2398 		   into.
2399 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2400 		   the leaf name of the specified entry will be written.
2401 	\param kernel \c true, if invoked from inside the kernel, \c false if
2402 		   invoked from userland.
2403 	\return \c B_OK, if everything went fine, another error code otherwise.
2404 */
2405 static status_t
2406 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2407 	char* filename, bool kernel)
2408 {
2409 	if (!path)
2410 		return B_BAD_VALUE;
2411 	if (*path == '\0')
2412 		return B_ENTRY_NOT_FOUND;
2413 	if (fd < 0)
2414 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2415 
2416 	status_t status = get_dir_path_and_leaf(path, filename);
2417 	if (status != B_OK)
2418 		return status;
2419 
2420 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2421 }
2422 
2423 
2424 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2425 		   to by a vnode + path pair.
2426 
2427 	\a path must be given in either case. \a vnode might be omitted, in which
2428 	case \a path is either an absolute path or one relative to the current
2429 	directory. If both a supplied and \a path is relative it is reckoned off
2430 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2431 	ignored.
2432 
2433 	The caller has the responsibility to call put_vnode() on the returned
2434 	directory vnode.
2435 
2436 	\param vnode The vnode. May be \c NULL.
2437 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2438 	       is modified by this function. It must have at least room for a
2439 	       string one character longer than the path it contains.
2440 	\param _vnode A pointer to a variable the directory vnode shall be written
2441 		   into.
2442 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2443 		   the leaf name of the specified entry will be written.
2444 	\param kernel \c true, if invoked from inside the kernel, \c false if
2445 		   invoked from userland.
2446 	\return \c B_OK, if everything went fine, another error code otherwise.
2447 */
2448 static status_t
2449 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2450 	struct vnode** _vnode, char* filename, bool kernel)
2451 {
2452 	if (!path)
2453 		return B_BAD_VALUE;
2454 	if (*path == '\0')
2455 		return B_ENTRY_NOT_FOUND;
2456 	if (vnode == NULL || path[0] == '/')
2457 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2458 
2459 	status_t status = get_dir_path_and_leaf(path, filename);
2460 	if (status != B_OK)
2461 		return status;
2462 
2463 	inc_vnode_ref_count(vnode);
2464 		// vnode_path_to_vnode() always decrements the ref count
2465 
2466 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2467 }
2468 
2469 
2470 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2471 */
2472 static status_t
2473 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2474 	size_t bufferSize, struct io_context* ioContext)
2475 {
2476 	if (bufferSize < sizeof(struct dirent))
2477 		return B_BAD_VALUE;
2478 
2479 	// See if the vnode is convering another vnode and move to the covered
2480 	// vnode so we get the underlying file system
2481 	VNodePutter vnodePutter;
2482 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2483 		vnode = coveredVnode;
2484 		vnodePutter.SetTo(vnode);
2485 	}
2486 
2487 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 		// The FS supports getting the name of a vnode.
2489 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 			return B_OK;
2492 	}
2493 
2494 	// The FS doesn't support getting the name of a vnode. So we search the
2495 	// parent directory for the vnode, if the caller let us.
2496 
2497 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2498 		return B_UNSUPPORTED;
2499 
2500 	void* cookie;
2501 
2502 	status_t status = FS_CALL(parent, open_dir, &cookie);
2503 	if (status >= B_OK) {
2504 		while (true) {
2505 			uint32 num = 1;
2506 			// We use the FS hook directly instead of dir_read(), since we don't
2507 			// want the entries to be fixed. We have already resolved vnode to
2508 			// the covered node.
2509 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2510 				&num);
2511 			if (status != B_OK)
2512 				break;
2513 			if (num == 0) {
2514 				status = B_ENTRY_NOT_FOUND;
2515 				break;
2516 			}
2517 
2518 			if (vnode->id == buffer->d_ino) {
2519 				// found correct entry!
2520 				break;
2521 			}
2522 		}
2523 
2524 		FS_CALL(vnode, close_dir, cookie);
2525 		FS_CALL(vnode, free_dir_cookie, cookie);
2526 	}
2527 	return status;
2528 }
2529 
2530 
2531 static status_t
2532 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2533 	size_t nameSize, bool kernel)
2534 {
2535 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2536 	struct dirent* dirent = (struct dirent*)buffer;
2537 
2538 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2539 		get_current_io_context(kernel));
2540 	if (status != B_OK)
2541 		return status;
2542 
2543 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2544 		return B_BUFFER_OVERFLOW;
2545 
2546 	return B_OK;
2547 }
2548 
2549 
2550 /*!	Gets the full path to a given directory vnode.
2551 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2552 	file system doesn't support this call, it will fall back to iterating
2553 	through the parent directory to get the name of the child.
2554 
2555 	To protect against circular loops, it supports a maximum tree depth
2556 	of 256 levels.
2557 
2558 	Note that the path may not be correct the time this function returns!
2559 	It doesn't use any locking to prevent returning the correct path, as
2560 	paths aren't safe anyway: the path to a file can change at any time.
2561 
2562 	It might be a good idea, though, to check if the returned path exists
2563 	in the calling function (it's not done here because of efficiency)
2564 */
2565 static status_t
2566 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2567 	bool kernel)
2568 {
2569 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2570 
2571 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2572 		return B_BAD_VALUE;
2573 
2574 	if (!S_ISDIR(vnode->Type()))
2575 		return B_NOT_A_DIRECTORY;
2576 
2577 	char* path = buffer;
2578 	int32 insert = bufferSize;
2579 	int32 maxLevel = 256;
2580 	int32 length;
2581 	status_t status;
2582 	struct io_context* ioContext = get_current_io_context(kernel);
2583 
2584 	// we don't use get_vnode() here because this call is more
2585 	// efficient and does all we need from get_vnode()
2586 	inc_vnode_ref_count(vnode);
2587 
2588 	if (vnode != ioContext->root) {
2589 		// we don't hit the IO context root
2590 		// resolve a vnode to its covered vnode
2591 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2592 			put_vnode(vnode);
2593 			vnode = coveredVnode;
2594 		}
2595 	}
2596 
2597 	path[--insert] = '\0';
2598 		// the path is filled right to left
2599 
2600 	while (true) {
2601 		// the name buffer is also used for fs_read_dir()
2602 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2603 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2604 		struct vnode* parentVnode;
2605 
2606 		// lookup the parent vnode
2607 		if (vnode == ioContext->root) {
2608 			// we hit the IO context root
2609 			parentVnode = vnode;
2610 			inc_vnode_ref_count(vnode);
2611 		} else {
2612 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2613 			if (status != B_OK)
2614 				goto out;
2615 		}
2616 
2617 		// get the node's name
2618 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2619 			sizeof(nameBuffer), ioContext);
2620 
2621 		if (vnode != ioContext->root) {
2622 			// we don't hit the IO context root
2623 			// resolve a vnode to its covered vnode
2624 			if (Vnode* coveredVnode = get_covered_vnode(parentVnode)) {
2625 				put_vnode(parentVnode);
2626 				parentVnode = coveredVnode;
2627 			}
2628 		}
2629 
2630 		bool hitRoot = (parentVnode == vnode);
2631 
2632 		// release the current vnode, we only need its parent from now on
2633 		put_vnode(vnode);
2634 		vnode = parentVnode;
2635 
2636 		if (status != B_OK)
2637 			goto out;
2638 
2639 		if (hitRoot) {
2640 			// we have reached "/", which means we have constructed the full
2641 			// path
2642 			break;
2643 		}
2644 
2645 		// TODO: add an explicit check for loops in about 10 levels to do
2646 		// real loop detection
2647 
2648 		// don't go deeper as 'maxLevel' to prevent circular loops
2649 		if (maxLevel-- < 0) {
2650 			status = B_LINK_LIMIT;
2651 			goto out;
2652 		}
2653 
2654 		// add the name in front of the current path
2655 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2656 		length = strlen(name);
2657 		insert -= length;
2658 		if (insert <= 0) {
2659 			status = B_RESULT_NOT_REPRESENTABLE;
2660 			goto out;
2661 		}
2662 		memcpy(path + insert, name, length);
2663 		path[--insert] = '/';
2664 	}
2665 
2666 	// the root dir will result in an empty path: fix it
2667 	if (path[insert] == '\0')
2668 		path[--insert] = '/';
2669 
2670 	TRACE(("  path is: %s\n", path + insert));
2671 
2672 	// move the path to the start of the buffer
2673 	length = bufferSize - insert;
2674 	memmove(buffer, path + insert, length);
2675 
2676 out:
2677 	put_vnode(vnode);
2678 	return status;
2679 }
2680 
2681 
2682 /*!	Checks the length of every path component, and adds a '.'
2683 	if the path ends in a slash.
2684 	The given path buffer must be able to store at least one
2685 	additional character.
2686 */
2687 static status_t
2688 check_path(char* to)
2689 {
2690 	int32 length = 0;
2691 
2692 	// check length of every path component
2693 
2694 	while (*to) {
2695 		char* begin;
2696 		if (*to == '/')
2697 			to++, length++;
2698 
2699 		begin = to;
2700 		while (*to != '/' && *to)
2701 			to++, length++;
2702 
2703 		if (to - begin > B_FILE_NAME_LENGTH)
2704 			return B_NAME_TOO_LONG;
2705 	}
2706 
2707 	if (length == 0)
2708 		return B_ENTRY_NOT_FOUND;
2709 
2710 	// complete path if there is a slash at the end
2711 
2712 	if (*(to - 1) == '/') {
2713 		if (length > B_PATH_NAME_LENGTH - 2)
2714 			return B_NAME_TOO_LONG;
2715 
2716 		to[0] = '.';
2717 		to[1] = '\0';
2718 	}
2719 
2720 	return B_OK;
2721 }
2722 
2723 
2724 static struct file_descriptor*
2725 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2726 {
2727 	struct file_descriptor* descriptor
2728 		= get_fd(get_current_io_context(kernel), fd);
2729 	if (descriptor == NULL)
2730 		return NULL;
2731 
2732 	struct vnode* vnode = fd_vnode(descriptor);
2733 	if (vnode == NULL) {
2734 		put_fd(descriptor);
2735 		return NULL;
2736 	}
2737 
2738 	// ToDo: when we can close a file descriptor at any point, investigate
2739 	//	if this is still valid to do (accessing the vnode without ref_count
2740 	//	or locking)
2741 	*_vnode = vnode;
2742 	return descriptor;
2743 }
2744 
2745 
2746 static struct vnode*
2747 get_vnode_from_fd(int fd, bool kernel)
2748 {
2749 	struct file_descriptor* descriptor;
2750 	struct vnode* vnode;
2751 
2752 	descriptor = get_fd(get_current_io_context(kernel), fd);
2753 	if (descriptor == NULL)
2754 		return NULL;
2755 
2756 	vnode = fd_vnode(descriptor);
2757 	if (vnode != NULL)
2758 		inc_vnode_ref_count(vnode);
2759 
2760 	put_fd(descriptor);
2761 	return vnode;
2762 }
2763 
2764 
2765 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2766 	only the path will be considered. In this case, the \a path must not be
2767 	NULL.
2768 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2769 	and should be NULL for files.
2770 */
2771 static status_t
2772 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2773 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2774 {
2775 	if (fd < 0 && !path)
2776 		return B_BAD_VALUE;
2777 
2778 	if (path != NULL && *path == '\0')
2779 		return B_ENTRY_NOT_FOUND;
2780 
2781 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2782 		// no FD or absolute path
2783 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2784 	}
2785 
2786 	// FD only, or FD + relative path
2787 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2788 	if (!vnode)
2789 		return B_FILE_ERROR;
2790 
2791 	if (path != NULL) {
2792 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2793 			_vnode, _parentID);
2794 	}
2795 
2796 	// there is no relative path to take into account
2797 
2798 	*_vnode = vnode;
2799 	if (_parentID)
2800 		*_parentID = -1;
2801 
2802 	return B_OK;
2803 }
2804 
2805 
2806 static int
2807 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2808 	void* cookie, int openMode, bool kernel)
2809 {
2810 	struct file_descriptor* descriptor;
2811 	int fd;
2812 
2813 	// If the vnode is locked, we don't allow creating a new file/directory
2814 	// file_descriptor for it
2815 	if (vnode && vnode->mandatory_locked_by != NULL
2816 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2817 		return B_BUSY;
2818 
2819 	descriptor = alloc_fd();
2820 	if (!descriptor)
2821 		return B_NO_MEMORY;
2822 
2823 	if (vnode)
2824 		descriptor->u.vnode = vnode;
2825 	else
2826 		descriptor->u.mount = mount;
2827 	descriptor->cookie = cookie;
2828 
2829 	switch (type) {
2830 		// vnode types
2831 		case FDTYPE_FILE:
2832 			descriptor->ops = &sFileOps;
2833 			break;
2834 		case FDTYPE_DIR:
2835 			descriptor->ops = &sDirectoryOps;
2836 			break;
2837 		case FDTYPE_ATTR:
2838 			descriptor->ops = &sAttributeOps;
2839 			break;
2840 		case FDTYPE_ATTR_DIR:
2841 			descriptor->ops = &sAttributeDirectoryOps;
2842 			break;
2843 
2844 		// mount types
2845 		case FDTYPE_INDEX_DIR:
2846 			descriptor->ops = &sIndexDirectoryOps;
2847 			break;
2848 		case FDTYPE_QUERY:
2849 			descriptor->ops = &sQueryOps;
2850 			break;
2851 
2852 		default:
2853 			panic("get_new_fd() called with unknown type %d\n", type);
2854 			break;
2855 	}
2856 	descriptor->type = type;
2857 	descriptor->open_mode = openMode;
2858 
2859 	io_context* context = get_current_io_context(kernel);
2860 	fd = new_fd(context, descriptor);
2861 	if (fd < 0) {
2862 		free(descriptor);
2863 		return B_NO_MORE_FDS;
2864 	}
2865 
2866 	mutex_lock(&context->io_mutex);
2867 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2868 	mutex_unlock(&context->io_mutex);
2869 
2870 	return fd;
2871 }
2872 
2873 
2874 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2875 	vfs_normalize_path(). See there for more documentation.
2876 */
2877 static status_t
2878 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2879 {
2880 	VNodePutter dirPutter;
2881 	struct vnode* dir = NULL;
2882 	status_t error;
2883 
2884 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2885 		// get dir vnode + leaf name
2886 		struct vnode* nextDir;
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 
2892 		dir = nextDir;
2893 		strcpy(path, leaf);
2894 		dirPutter.SetTo(dir);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		struct vnode* fileVnode;
2899 		VNodePutter fileVnodePutter;
2900 		if (traverseLink) {
2901 			inc_vnode_ref_count(dir);
2902 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2903 					NULL) == B_OK) {
2904 				fileVnodePutter.SetTo(fileVnode);
2905 				fileExists = true;
2906 			}
2907 		}
2908 
2909 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2910 			// we're done -- construct the path
2911 			bool hasLeaf = true;
2912 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2913 				// special cases "." and ".." -- get the dir, forget the leaf
2914 				inc_vnode_ref_count(dir);
2915 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2916 					&nextDir, NULL);
2917 				if (error != B_OK)
2918 					return error;
2919 				dir = nextDir;
2920 				dirPutter.SetTo(dir);
2921 				hasLeaf = false;
2922 			}
2923 
2924 			// get the directory path
2925 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2926 			if (error != B_OK)
2927 				return error;
2928 
2929 			// append the leaf name
2930 			if (hasLeaf) {
2931 				// insert a directory separator if this is not the file system
2932 				// root
2933 				if ((strcmp(path, "/") != 0
2934 					&& strlcat(path, "/", pathSize) >= pathSize)
2935 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2936 					return B_NAME_TOO_LONG;
2937 				}
2938 			}
2939 
2940 			return B_OK;
2941 		}
2942 
2943 		// read link
2944 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2945 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2946 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2947 			if (error != B_OK)
2948 				return error;
2949 			path[bufferSize] = '\0';
2950 		} else
2951 			return B_BAD_VALUE;
2952 	}
2953 
2954 	return B_LINK_LIMIT;
2955 }
2956 
2957 
2958 #ifdef ADD_DEBUGGER_COMMANDS
2959 
2960 
2961 static void
2962 _dump_advisory_locking(advisory_locking* locking)
2963 {
2964 	if (locking == NULL)
2965 		return;
2966 
2967 	kprintf("   lock:        %" B_PRId32, locking->lock);
2968 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2969 
2970 	int32 index = 0;
2971 	LockList::Iterator iterator = locking->locks.GetIterator();
2972 	while (iterator.HasNext()) {
2973 		struct advisory_lock* lock = iterator.Next();
2974 
2975 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2976 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2977 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2978 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2979 	}
2980 }
2981 
2982 
2983 static void
2984 _dump_mount(struct fs_mount* mount)
2985 {
2986 	kprintf("MOUNT: %p\n", mount);
2987 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
2988 	kprintf(" device_name:   %s\n", mount->device_name);
2989 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2990 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2991 	kprintf(" partition:     %p\n", mount->partition);
2992 	kprintf(" lock:          %p\n", &mount->rlock);
2993 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2994 		mount->owns_file_device ? " owns_file_device" : "");
2995 
2996 	fs_volume* volume = mount->volume;
2997 	while (volume != NULL) {
2998 		kprintf(" volume %p:\n", volume);
2999 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3000 		kprintf("  private_volume:   %p\n", volume->private_volume);
3001 		kprintf("  ops:              %p\n", volume->ops);
3002 		kprintf("  file_system:      %p\n", volume->file_system);
3003 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3004 		volume = volume->super_volume;
3005 	}
3006 
3007 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3008 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3009 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3010 	set_debug_variable("_partition", (addr_t)mount->partition);
3011 }
3012 
3013 
3014 static bool
3015 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3016 	const char* name)
3017 {
3018 	bool insertSlash = buffer[bufferSize] != '\0';
3019 	size_t nameLength = strlen(name);
3020 
3021 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3022 		return false;
3023 
3024 	if (insertSlash)
3025 		buffer[--bufferSize] = '/';
3026 
3027 	bufferSize -= nameLength;
3028 	memcpy(buffer + bufferSize, name, nameLength);
3029 
3030 	return true;
3031 }
3032 
3033 
3034 static bool
3035 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3036 	ino_t nodeID)
3037 {
3038 	if (bufferSize == 0)
3039 		return false;
3040 
3041 	bool insertSlash = buffer[bufferSize] != '\0';
3042 	if (insertSlash)
3043 		buffer[--bufferSize] = '/';
3044 
3045 	size_t size = snprintf(buffer, bufferSize,
3046 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3047 	if (size > bufferSize) {
3048 		if (insertSlash)
3049 			bufferSize++;
3050 		return false;
3051 	}
3052 
3053 	if (size < bufferSize)
3054 		memmove(buffer + bufferSize - size, buffer, size);
3055 
3056 	bufferSize -= size;
3057 	return true;
3058 }
3059 
3060 
3061 static char*
3062 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3063 	bool& _truncated)
3064 {
3065 	// null-terminate the path
3066 	buffer[--bufferSize] = '\0';
3067 
3068 	while (true) {
3069 		while (vnode->covers != NULL)
3070 			vnode = vnode->covers;
3071 
3072 		if (vnode == sRoot) {
3073 			_truncated = bufferSize == 0;
3074 			if (!_truncated)
3075 				buffer[--bufferSize] = '/';
3076 			return buffer + bufferSize;
3077 		}
3078 
3079 		// resolve the name
3080 		ino_t dirID;
3081 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3082 			vnode->id, dirID);
3083 		if (name == NULL) {
3084 			// Failed to resolve the name -- prepend "<dev,node>/".
3085 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3086 				vnode->mount->id, vnode->id);
3087 			return buffer + bufferSize;
3088 		}
3089 
3090 		// prepend the name
3091 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3092 			_truncated = true;
3093 			return buffer + bufferSize;
3094 		}
3095 
3096 		// resolve the directory node
3097 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3098 		if (nextVnode == NULL) {
3099 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3100 				vnode->mount->id, dirID);
3101 			return buffer + bufferSize;
3102 		}
3103 
3104 		vnode = nextVnode;
3105 	}
3106 }
3107 
3108 
3109 static void
3110 _dump_vnode(struct vnode* vnode, bool printPath)
3111 {
3112 	kprintf("VNODE: %p\n", vnode);
3113 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3114 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3115 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3116 	kprintf(" private_node:  %p\n", vnode->private_node);
3117 	kprintf(" mount:         %p\n", vnode->mount);
3118 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3119 	kprintf(" covers:        %p\n", vnode->covers);
3120 	kprintf(" cache:         %p\n", vnode->cache);
3121 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3122 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3123 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3124 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3125 
3126 	_dump_advisory_locking(vnode->advisory_locking);
3127 
3128 	if (printPath) {
3129 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3130 		if (buffer != NULL) {
3131 			bool truncated;
3132 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3133 				B_PATH_NAME_LENGTH, truncated);
3134 			if (path != NULL) {
3135 				kprintf(" path:          ");
3136 				if (truncated)
3137 					kputs("<truncated>/");
3138 				kputs(path);
3139 				kputs("\n");
3140 			} else
3141 				kprintf("Failed to resolve vnode path.\n");
3142 
3143 			debug_free(buffer);
3144 		} else
3145 			kprintf("Failed to allocate memory for constructing the path.\n");
3146 	}
3147 
3148 	set_debug_variable("_node", (addr_t)vnode->private_node);
3149 	set_debug_variable("_mount", (addr_t)vnode->mount);
3150 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3151 	set_debug_variable("_covers", (addr_t)vnode->covers);
3152 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3153 }
3154 
3155 
3156 static int
3157 dump_mount(int argc, char** argv)
3158 {
3159 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3160 		kprintf("usage: %s [id|address]\n", argv[0]);
3161 		return 0;
3162 	}
3163 
3164 	ulong val = parse_expression(argv[1]);
3165 	uint32 id = val;
3166 
3167 	struct fs_mount* mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3168 	if (mount == NULL) {
3169 		if (IS_USER_ADDRESS(id)) {
3170 			kprintf("fs_mount not found\n");
3171 			return 0;
3172 		}
3173 		mount = (fs_mount*)val;
3174 	}
3175 
3176 	_dump_mount(mount);
3177 	return 0;
3178 }
3179 
3180 
3181 static int
3182 dump_mounts(int argc, char** argv)
3183 {
3184 	if (argc != 1) {
3185 		kprintf("usage: %s\n", argv[0]);
3186 		return 0;
3187 	}
3188 
3189 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3190 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3191 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3192 
3193 	struct hash_iterator iterator;
3194 	struct fs_mount* mount;
3195 
3196 	hash_open(sMountsTable, &iterator);
3197 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3198 			!= NULL) {
3199 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3200 			mount->root_vnode->covers, mount->volume->private_volume,
3201 			mount->volume->file_system_name);
3202 
3203 		fs_volume* volume = mount->volume;
3204 		while (volume->super_volume != NULL) {
3205 			volume = volume->super_volume;
3206 			kprintf("                                     %p %s\n",
3207 				volume->private_volume, volume->file_system_name);
3208 		}
3209 	}
3210 
3211 	hash_close(sMountsTable, &iterator, false);
3212 	return 0;
3213 }
3214 
3215 
3216 static int
3217 dump_vnode(int argc, char** argv)
3218 {
3219 	bool printPath = false;
3220 	int argi = 1;
3221 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3222 		printPath = true;
3223 		argi++;
3224 	}
3225 
3226 	if (argi >= argc || argi + 2 < argc) {
3227 		print_debugger_command_usage(argv[0]);
3228 		return 0;
3229 	}
3230 
3231 	struct vnode* vnode = NULL;
3232 
3233 	if (argi + 1 == argc) {
3234 		vnode = (struct vnode*)parse_expression(argv[argi]);
3235 		if (IS_USER_ADDRESS(vnode)) {
3236 			kprintf("invalid vnode address\n");
3237 			return 0;
3238 		}
3239 		_dump_vnode(vnode, printPath);
3240 		return 0;
3241 	}
3242 
3243 	struct hash_iterator iterator;
3244 	dev_t device = parse_expression(argv[argi]);
3245 	ino_t id = parse_expression(argv[argi + 1]);
3246 
3247 	hash_open(sVnodeTable, &iterator);
3248 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3249 		if (vnode->id != id || vnode->device != device)
3250 			continue;
3251 
3252 		_dump_vnode(vnode, printPath);
3253 	}
3254 
3255 	hash_close(sVnodeTable, &iterator, false);
3256 	return 0;
3257 }
3258 
3259 
3260 static int
3261 dump_vnodes(int argc, char** argv)
3262 {
3263 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3264 		kprintf("usage: %s [device]\n", argv[0]);
3265 		return 0;
3266 	}
3267 
3268 	// restrict dumped nodes to a certain device if requested
3269 	dev_t device = parse_expression(argv[1]);
3270 
3271 	struct hash_iterator iterator;
3272 	struct vnode* vnode;
3273 
3274 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3275 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3276 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3277 
3278 	hash_open(sVnodeTable, &iterator);
3279 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3280 		if (vnode->device != device)
3281 			continue;
3282 
3283 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3284 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3285 			vnode->private_node, vnode->advisory_locking,
3286 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3287 			vnode->IsUnpublished() ? "u" : "-");
3288 	}
3289 
3290 	hash_close(sVnodeTable, &iterator, false);
3291 	return 0;
3292 }
3293 
3294 
3295 static int
3296 dump_vnode_caches(int argc, char** argv)
3297 {
3298 	struct hash_iterator iterator;
3299 	struct vnode* vnode;
3300 
3301 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3302 		kprintf("usage: %s [device]\n", argv[0]);
3303 		return 0;
3304 	}
3305 
3306 	// restrict dumped nodes to a certain device if requested
3307 	dev_t device = -1;
3308 	if (argc > 1)
3309 		device = parse_expression(argv[1]);
3310 
3311 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3312 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3313 
3314 	hash_open(sVnodeTable, &iterator);
3315 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3316 		if (vnode->cache == NULL)
3317 			continue;
3318 		if (device != -1 && vnode->device != device)
3319 			continue;
3320 
3321 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3322 			vnode, vnode->device, vnode->id, vnode->cache,
3323 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3324 			vnode->cache->page_count);
3325 	}
3326 
3327 	hash_close(sVnodeTable, &iterator, false);
3328 	return 0;
3329 }
3330 
3331 
3332 int
3333 dump_io_context(int argc, char** argv)
3334 {
3335 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3336 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3337 		return 0;
3338 	}
3339 
3340 	struct io_context* context = NULL;
3341 
3342 	if (argc > 1) {
3343 		ulong num = parse_expression(argv[1]);
3344 		if (IS_KERNEL_ADDRESS(num))
3345 			context = (struct io_context*)num;
3346 		else {
3347 			Team* team = team_get_team_struct_locked(num);
3348 			if (team == NULL) {
3349 				kprintf("could not find team with ID %lu\n", num);
3350 				return 0;
3351 			}
3352 			context = (struct io_context*)team->io_context;
3353 		}
3354 	} else
3355 		context = get_current_io_context(true);
3356 
3357 	kprintf("I/O CONTEXT: %p\n", context);
3358 	kprintf(" root vnode:\t%p\n", context->root);
3359 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3360 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3361 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3362 
3363 	if (context->num_used_fds) {
3364 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3365 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3366 	}
3367 
3368 	for (uint32 i = 0; i < context->table_size; i++) {
3369 		struct file_descriptor* fd = context->fds[i];
3370 		if (fd == NULL)
3371 			continue;
3372 
3373 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3374 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3375 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3376 			fd->pos, fd->cookie,
3377 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3378 				? "mount" : "vnode",
3379 			fd->u.vnode);
3380 	}
3381 
3382 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3383 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3384 
3385 	set_debug_variable("_cwd", (addr_t)context->cwd);
3386 
3387 	return 0;
3388 }
3389 
3390 
3391 int
3392 dump_vnode_usage(int argc, char** argv)
3393 {
3394 	if (argc != 1) {
3395 		kprintf("usage: %s\n", argv[0]);
3396 		return 0;
3397 	}
3398 
3399 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3400 		sUnusedVnodes, kMaxUnusedVnodes);
3401 
3402 	struct hash_iterator iterator;
3403 	hash_open(sVnodeTable, &iterator);
3404 
3405 	uint32 count = 0;
3406 	struct vnode* vnode;
3407 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3408 		count++;
3409 	}
3410 
3411 	hash_close(sVnodeTable, &iterator, false);
3412 
3413 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3414 		count - sUnusedVnodes);
3415 	return 0;
3416 }
3417 
3418 #endif	// ADD_DEBUGGER_COMMANDS
3419 
3420 /*!	Clears an iovec array of physical pages.
3421 	Returns in \a _bytes the number of bytes successfully cleared.
3422 */
3423 static status_t
3424 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3425 {
3426 	size_t bytes = *_bytes;
3427 	size_t index = 0;
3428 
3429 	while (bytes > 0) {
3430 		size_t length = min_c(vecs[index].iov_len, bytes);
3431 
3432 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3433 			length);
3434 		if (status != B_OK) {
3435 			*_bytes -= bytes;
3436 			return status;
3437 		}
3438 
3439 		bytes -= length;
3440 	}
3441 
3442 	return B_OK;
3443 }
3444 
3445 
3446 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3447 	and calls the file system hooks to read/write the request to disk.
3448 */
3449 static status_t
3450 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3451 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3452 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3453 	bool doWrite)
3454 {
3455 	if (fileVecCount == 0) {
3456 		// There are no file vecs at this offset, so we're obviously trying
3457 		// to access the file outside of its bounds
3458 		return B_BAD_VALUE;
3459 	}
3460 
3461 	size_t numBytes = *_numBytes;
3462 	uint32 fileVecIndex;
3463 	size_t vecOffset = *_vecOffset;
3464 	uint32 vecIndex = *_vecIndex;
3465 	status_t status;
3466 	size_t size;
3467 
3468 	if (!doWrite && vecOffset == 0) {
3469 		// now directly read the data from the device
3470 		// the first file_io_vec can be read directly
3471 
3472 		if (fileVecs[0].length < (off_t)numBytes)
3473 			size = fileVecs[0].length;
3474 		else
3475 			size = numBytes;
3476 
3477 		if (fileVecs[0].offset >= 0) {
3478 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3479 				&vecs[vecIndex], vecCount - vecIndex, &size);
3480 		} else {
3481 			// sparse read
3482 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3483 		}
3484 		if (status != B_OK)
3485 			return status;
3486 
3487 		// TODO: this is a work-around for buggy device drivers!
3488 		//	When our own drivers honour the length, we can:
3489 		//	a) also use this direct I/O for writes (otherwise, it would
3490 		//	   overwrite precious data)
3491 		//	b) panic if the term below is true (at least for writes)
3492 		if ((off_t)size > fileVecs[0].length) {
3493 			//dprintf("warning: device driver %p doesn't respect total length "
3494 			//	"in read_pages() call!\n", ref->device);
3495 			size = fileVecs[0].length;
3496 		}
3497 
3498 		ASSERT((off_t)size <= fileVecs[0].length);
3499 
3500 		// If the file portion was contiguous, we're already done now
3501 		if (size == numBytes)
3502 			return B_OK;
3503 
3504 		// if we reached the end of the file, we can return as well
3505 		if ((off_t)size != fileVecs[0].length) {
3506 			*_numBytes = size;
3507 			return B_OK;
3508 		}
3509 
3510 		fileVecIndex = 1;
3511 
3512 		// first, find out where we have to continue in our iovecs
3513 		for (; vecIndex < vecCount; vecIndex++) {
3514 			if (size < vecs[vecIndex].iov_len)
3515 				break;
3516 
3517 			size -= vecs[vecIndex].iov_len;
3518 		}
3519 
3520 		vecOffset = size;
3521 	} else {
3522 		fileVecIndex = 0;
3523 		size = 0;
3524 	}
3525 
3526 	// Too bad, let's process the rest of the file_io_vecs
3527 
3528 	size_t totalSize = size;
3529 	size_t bytesLeft = numBytes - size;
3530 
3531 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3532 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3533 		off_t fileOffset = fileVec.offset;
3534 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3535 
3536 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3537 			fileLeft));
3538 
3539 		// process the complete fileVec
3540 		while (fileLeft > 0) {
3541 			iovec tempVecs[MAX_TEMP_IO_VECS];
3542 			uint32 tempCount = 0;
3543 
3544 			// size tracks how much of what is left of the current fileVec
3545 			// (fileLeft) has been assigned to tempVecs
3546 			size = 0;
3547 
3548 			// assign what is left of the current fileVec to the tempVecs
3549 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3550 					&& tempCount < MAX_TEMP_IO_VECS;) {
3551 				// try to satisfy one iovec per iteration (or as much as
3552 				// possible)
3553 
3554 				// bytes left of the current iovec
3555 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3556 				if (vecLeft == 0) {
3557 					vecOffset = 0;
3558 					vecIndex++;
3559 					continue;
3560 				}
3561 
3562 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3563 					vecIndex, vecOffset, size));
3564 
3565 				// actually available bytes
3566 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3567 
3568 				tempVecs[tempCount].iov_base
3569 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3570 				tempVecs[tempCount].iov_len = tempVecSize;
3571 				tempCount++;
3572 
3573 				size += tempVecSize;
3574 				vecOffset += tempVecSize;
3575 			}
3576 
3577 			size_t bytes = size;
3578 
3579 			if (fileOffset == -1) {
3580 				if (doWrite) {
3581 					panic("sparse write attempt: vnode %p", vnode);
3582 					status = B_IO_ERROR;
3583 				} else {
3584 					// sparse read
3585 					status = zero_pages(tempVecs, tempCount, &bytes);
3586 				}
3587 			} else if (doWrite) {
3588 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3589 					tempVecs, tempCount, &bytes);
3590 			} else {
3591 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3592 					tempVecs, tempCount, &bytes);
3593 			}
3594 			if (status != B_OK)
3595 				return status;
3596 
3597 			totalSize += bytes;
3598 			bytesLeft -= size;
3599 			if (fileOffset >= 0)
3600 				fileOffset += size;
3601 			fileLeft -= size;
3602 			//dprintf("-> file left = %Lu\n", fileLeft);
3603 
3604 			if (size != bytes || vecIndex >= vecCount) {
3605 				// there are no more bytes or iovecs, let's bail out
3606 				*_numBytes = totalSize;
3607 				return B_OK;
3608 			}
3609 		}
3610 	}
3611 
3612 	*_vecIndex = vecIndex;
3613 	*_vecOffset = vecOffset;
3614 	*_numBytes = totalSize;
3615 	return B_OK;
3616 }
3617 
3618 
3619 //	#pragma mark - public API for file systems
3620 
3621 
3622 extern "C" status_t
3623 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3624 	fs_vnode_ops* ops)
3625 {
3626 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3627 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3628 
3629 	if (privateNode == NULL)
3630 		return B_BAD_VALUE;
3631 
3632 	// create the node
3633 	bool nodeCreated;
3634 	struct vnode* vnode;
3635 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3636 		nodeCreated);
3637 	if (status != B_OK)
3638 		return status;
3639 
3640 	WriteLocker nodeLocker(sVnodeLock, true);
3641 		// create_new_vnode_and_lock() has locked for us
3642 
3643 	// file system integrity check:
3644 	// test if the vnode already exists and bail out if this is the case!
3645 	if (!nodeCreated) {
3646 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3647 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3648 			vnode->private_node);
3649 		return B_ERROR;
3650 	}
3651 
3652 	vnode->private_node = privateNode;
3653 	vnode->ops = ops;
3654 	vnode->SetUnpublished(true);
3655 
3656 	TRACE(("returns: %s\n", strerror(status)));
3657 
3658 	return status;
3659 }
3660 
3661 
3662 extern "C" status_t
3663 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3664 	fs_vnode_ops* ops, int type, uint32 flags)
3665 {
3666 	FUNCTION(("publish_vnode()\n"));
3667 
3668 	WriteLocker locker(sVnodeLock);
3669 
3670 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3671 
3672 	bool nodeCreated = false;
3673 	if (vnode == NULL) {
3674 		if (privateNode == NULL)
3675 			return B_BAD_VALUE;
3676 
3677 		// create the node
3678 		locker.Unlock();
3679 			// create_new_vnode_and_lock() will re-lock for us on success
3680 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3681 			nodeCreated);
3682 		if (status != B_OK)
3683 			return status;
3684 
3685 		locker.SetTo(sVnodeLock, true);
3686 	}
3687 
3688 	if (nodeCreated) {
3689 		vnode->private_node = privateNode;
3690 		vnode->ops = ops;
3691 		vnode->SetUnpublished(true);
3692 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3693 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3694 		// already known, but not published
3695 	} else
3696 		return B_BAD_VALUE;
3697 
3698 	bool publishSpecialSubNode = false;
3699 
3700 	vnode->SetType(type);
3701 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3702 	publishSpecialSubNode = is_special_node_type(type)
3703 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3704 
3705 	status_t status = B_OK;
3706 
3707 	// create sub vnodes, if necessary
3708 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3709 		locker.Unlock();
3710 
3711 		fs_volume* subVolume = volume;
3712 		if (volume->sub_volume != NULL) {
3713 			while (status == B_OK && subVolume->sub_volume != NULL) {
3714 				subVolume = subVolume->sub_volume;
3715 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3716 					vnode);
3717 			}
3718 		}
3719 
3720 		if (status == B_OK && publishSpecialSubNode)
3721 			status = create_special_sub_node(vnode, flags);
3722 
3723 		if (status != B_OK) {
3724 			// error -- clean up the created sub vnodes
3725 			while (subVolume->super_volume != volume) {
3726 				subVolume = subVolume->super_volume;
3727 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3728 			}
3729 		}
3730 
3731 		if (status == B_OK) {
3732 			ReadLocker vnodesReadLocker(sVnodeLock);
3733 			AutoLocker<Vnode> nodeLocker(vnode);
3734 			vnode->SetBusy(false);
3735 			vnode->SetUnpublished(false);
3736 		} else {
3737 			locker.Lock();
3738 			hash_remove(sVnodeTable, vnode);
3739 			remove_vnode_from_mount_list(vnode, vnode->mount);
3740 			free(vnode);
3741 		}
3742 	} else {
3743 		// we still hold the write lock -- mark the node unbusy and published
3744 		vnode->SetBusy(false);
3745 		vnode->SetUnpublished(false);
3746 	}
3747 
3748 	TRACE(("returns: %s\n", strerror(status)));
3749 
3750 	return status;
3751 }
3752 
3753 
3754 extern "C" status_t
3755 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3756 {
3757 	struct vnode* vnode;
3758 
3759 	if (volume == NULL)
3760 		return B_BAD_VALUE;
3761 
3762 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3763 	if (status != B_OK)
3764 		return status;
3765 
3766 	// If this is a layered FS, we need to get the node cookie for the requested
3767 	// layer.
3768 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3769 		fs_vnode resolvedNode;
3770 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3771 			&resolvedNode);
3772 		if (status != B_OK) {
3773 			panic("get_vnode(): Failed to get super node for vnode %p, "
3774 				"volume: %p", vnode, volume);
3775 			put_vnode(vnode);
3776 			return status;
3777 		}
3778 
3779 		if (_privateNode != NULL)
3780 			*_privateNode = resolvedNode.private_node;
3781 	} else if (_privateNode != NULL)
3782 		*_privateNode = vnode->private_node;
3783 
3784 	return B_OK;
3785 }
3786 
3787 
3788 extern "C" status_t
3789 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3790 {
3791 	struct vnode* vnode;
3792 
3793 	rw_lock_read_lock(&sVnodeLock);
3794 	vnode = lookup_vnode(volume->id, vnodeID);
3795 	rw_lock_read_unlock(&sVnodeLock);
3796 
3797 	if (vnode == NULL)
3798 		return B_BAD_VALUE;
3799 
3800 	inc_vnode_ref_count(vnode);
3801 	return B_OK;
3802 }
3803 
3804 
3805 extern "C" status_t
3806 put_vnode(fs_volume* volume, ino_t vnodeID)
3807 {
3808 	struct vnode* vnode;
3809 
3810 	rw_lock_read_lock(&sVnodeLock);
3811 	vnode = lookup_vnode(volume->id, vnodeID);
3812 	rw_lock_read_unlock(&sVnodeLock);
3813 
3814 	if (vnode == NULL)
3815 		return B_BAD_VALUE;
3816 
3817 	dec_vnode_ref_count(vnode, false, true);
3818 	return B_OK;
3819 }
3820 
3821 
3822 extern "C" status_t
3823 remove_vnode(fs_volume* volume, ino_t vnodeID)
3824 {
3825 	ReadLocker locker(sVnodeLock);
3826 
3827 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3828 	if (vnode == NULL)
3829 		return B_ENTRY_NOT_FOUND;
3830 
3831 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3832 		// this vnode is in use
3833 		return B_BUSY;
3834 	}
3835 
3836 	vnode->Lock();
3837 
3838 	vnode->SetRemoved(true);
3839 	bool removeUnpublished = false;
3840 
3841 	if (vnode->IsUnpublished()) {
3842 		// prepare the vnode for deletion
3843 		removeUnpublished = true;
3844 		vnode->SetBusy(true);
3845 	}
3846 
3847 	vnode->Unlock();
3848 	locker.Unlock();
3849 
3850 	if (removeUnpublished) {
3851 		// If the vnode hasn't been published yet, we delete it here
3852 		atomic_add(&vnode->ref_count, -1);
3853 		free_vnode(vnode, true);
3854 	}
3855 
3856 	return B_OK;
3857 }
3858 
3859 
3860 extern "C" status_t
3861 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3862 {
3863 	struct vnode* vnode;
3864 
3865 	rw_lock_read_lock(&sVnodeLock);
3866 
3867 	vnode = lookup_vnode(volume->id, vnodeID);
3868 	if (vnode) {
3869 		AutoLocker<Vnode> nodeLocker(vnode);
3870 		vnode->SetRemoved(false);
3871 	}
3872 
3873 	rw_lock_read_unlock(&sVnodeLock);
3874 	return B_OK;
3875 }
3876 
3877 
3878 extern "C" status_t
3879 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3880 {
3881 	ReadLocker _(sVnodeLock);
3882 
3883 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3884 		if (_removed != NULL)
3885 			*_removed = vnode->IsRemoved();
3886 		return B_OK;
3887 	}
3888 
3889 	return B_BAD_VALUE;
3890 }
3891 
3892 
3893 extern "C" fs_volume*
3894 volume_for_vnode(fs_vnode* _vnode)
3895 {
3896 	if (_vnode == NULL)
3897 		return NULL;
3898 
3899 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3900 	return vnode->mount->volume;
3901 }
3902 
3903 
3904 #if 0
3905 extern "C" status_t
3906 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3907 	size_t* _numBytes)
3908 {
3909 	struct file_descriptor* descriptor;
3910 	struct vnode* vnode;
3911 
3912 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3913 	if (descriptor == NULL)
3914 		return B_FILE_ERROR;
3915 
3916 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3917 		count, 0, _numBytes);
3918 
3919 	put_fd(descriptor);
3920 	return status;
3921 }
3922 
3923 
3924 extern "C" status_t
3925 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3926 	size_t* _numBytes)
3927 {
3928 	struct file_descriptor* descriptor;
3929 	struct vnode* vnode;
3930 
3931 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3932 	if (descriptor == NULL)
3933 		return B_FILE_ERROR;
3934 
3935 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3936 		count, 0, _numBytes);
3937 
3938 	put_fd(descriptor);
3939 	return status;
3940 }
3941 #endif
3942 
3943 
3944 extern "C" status_t
3945 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3946 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3947 	size_t* _bytes)
3948 {
3949 	struct file_descriptor* descriptor;
3950 	struct vnode* vnode;
3951 
3952 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3953 	if (descriptor == NULL)
3954 		return B_FILE_ERROR;
3955 
3956 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3957 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3958 		false);
3959 
3960 	put_fd(descriptor);
3961 	return status;
3962 }
3963 
3964 
3965 extern "C" status_t
3966 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3967 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3968 	size_t* _bytes)
3969 {
3970 	struct file_descriptor* descriptor;
3971 	struct vnode* vnode;
3972 
3973 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3974 	if (descriptor == NULL)
3975 		return B_FILE_ERROR;
3976 
3977 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3978 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3979 		true);
3980 
3981 	put_fd(descriptor);
3982 	return status;
3983 }
3984 
3985 
3986 extern "C" status_t
3987 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3988 {
3989 	// lookup mount -- the caller is required to make sure that the mount
3990 	// won't go away
3991 	MutexLocker locker(sMountMutex);
3992 	struct fs_mount* mount = find_mount(mountID);
3993 	if (mount == NULL)
3994 		return B_BAD_VALUE;
3995 	locker.Unlock();
3996 
3997 	return mount->entry_cache.Add(dirID, name, nodeID);
3998 }
3999 
4000 
4001 extern "C" status_t
4002 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4003 {
4004 	// lookup mount -- the caller is required to make sure that the mount
4005 	// won't go away
4006 	MutexLocker locker(sMountMutex);
4007 	struct fs_mount* mount = find_mount(mountID);
4008 	if (mount == NULL)
4009 		return B_BAD_VALUE;
4010 	locker.Unlock();
4011 
4012 	return mount->entry_cache.Remove(dirID, name);
4013 }
4014 
4015 
4016 //	#pragma mark - private VFS API
4017 //	Functions the VFS exports for other parts of the kernel
4018 
4019 
4020 /*! Acquires another reference to the vnode that has to be released
4021 	by calling vfs_put_vnode().
4022 */
4023 void
4024 vfs_acquire_vnode(struct vnode* vnode)
4025 {
4026 	inc_vnode_ref_count(vnode);
4027 }
4028 
4029 
4030 /*! This is currently called from file_cache_create() only.
4031 	It's probably a temporary solution as long as devfs requires that
4032 	fs_read_pages()/fs_write_pages() are called with the standard
4033 	open cookie and not with a device cookie.
4034 	If that's done differently, remove this call; it has no other
4035 	purpose.
4036 */
4037 extern "C" status_t
4038 vfs_get_cookie_from_fd(int fd, void** _cookie)
4039 {
4040 	struct file_descriptor* descriptor;
4041 
4042 	descriptor = get_fd(get_current_io_context(true), fd);
4043 	if (descriptor == NULL)
4044 		return B_FILE_ERROR;
4045 
4046 	*_cookie = descriptor->cookie;
4047 	return B_OK;
4048 }
4049 
4050 
4051 extern "C" status_t
4052 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4053 {
4054 	*vnode = get_vnode_from_fd(fd, kernel);
4055 
4056 	if (*vnode == NULL)
4057 		return B_FILE_ERROR;
4058 
4059 	return B_NO_ERROR;
4060 }
4061 
4062 
4063 extern "C" status_t
4064 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4065 {
4066 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4067 		path, kernel));
4068 
4069 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4070 	if (pathBuffer.InitCheck() != B_OK)
4071 		return B_NO_MEMORY;
4072 
4073 	char* buffer = pathBuffer.LockBuffer();
4074 	strlcpy(buffer, path, pathBuffer.BufferSize());
4075 
4076 	struct vnode* vnode;
4077 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4078 	if (status != B_OK)
4079 		return status;
4080 
4081 	*_vnode = vnode;
4082 	return B_OK;
4083 }
4084 
4085 
4086 extern "C" status_t
4087 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4088 {
4089 	struct vnode* vnode;
4090 
4091 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4092 	if (status != B_OK)
4093 		return status;
4094 
4095 	*_vnode = vnode;
4096 	return B_OK;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4102 	const char* name, struct vnode** _vnode)
4103 {
4104 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4105 }
4106 
4107 
4108 extern "C" void
4109 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4110 {
4111 	*_mountID = vnode->device;
4112 	*_vnodeID = vnode->id;
4113 }
4114 
4115 
4116 /*!
4117 	Helper function abstracting the process of "converting" a given
4118 	vnode-pointer to a fs_vnode-pointer.
4119 	Currently only used in bindfs.
4120 */
4121 extern "C" fs_vnode*
4122 vfs_fsnode_for_vnode(struct vnode* vnode)
4123 {
4124 	return vnode;
4125 }
4126 
4127 
4128 /*!
4129 	Calls fs_open() on the given vnode and returns a new
4130 	file descriptor for it
4131 */
4132 int
4133 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4134 {
4135 	return open_vnode(vnode, openMode, kernel);
4136 }
4137 
4138 
4139 /*!	Looks up a vnode with the given mount and vnode ID.
4140 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4141 	to the node.
4142 	It's currently only be used by file_cache_create().
4143 */
4144 extern "C" status_t
4145 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4146 {
4147 	rw_lock_read_lock(&sVnodeLock);
4148 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4149 	rw_lock_read_unlock(&sVnodeLock);
4150 
4151 	if (vnode == NULL)
4152 		return B_ERROR;
4153 
4154 	*_vnode = vnode;
4155 	return B_OK;
4156 }
4157 
4158 
4159 extern "C" status_t
4160 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4161 	bool traverseLeafLink, bool kernel, void** _node)
4162 {
4163 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4164 		volume, path, kernel));
4165 
4166 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4167 	if (pathBuffer.InitCheck() != B_OK)
4168 		return B_NO_MEMORY;
4169 
4170 	fs_mount* mount;
4171 	status_t status = get_mount(volume->id, &mount);
4172 	if (status != B_OK)
4173 		return status;
4174 
4175 	char* buffer = pathBuffer.LockBuffer();
4176 	strlcpy(buffer, path, pathBuffer.BufferSize());
4177 
4178 	struct vnode* vnode = mount->root_vnode;
4179 
4180 	if (buffer[0] == '/')
4181 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4182 	else {
4183 		inc_vnode_ref_count(vnode);
4184 			// vnode_path_to_vnode() releases a reference to the starting vnode
4185 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4186 			kernel, &vnode, NULL);
4187 	}
4188 
4189 	put_mount(mount);
4190 
4191 	if (status != B_OK)
4192 		return status;
4193 
4194 	if (vnode->device != volume->id) {
4195 		// wrong mount ID - must not gain access on foreign file system nodes
4196 		put_vnode(vnode);
4197 		return B_BAD_VALUE;
4198 	}
4199 
4200 	// Use get_vnode() to resolve the cookie for the right layer.
4201 	status = get_vnode(volume, vnode->id, _node);
4202 	put_vnode(vnode);
4203 
4204 	return status;
4205 }
4206 
4207 
4208 status_t
4209 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4210 	struct stat* stat, bool kernel)
4211 {
4212 	status_t status;
4213 
4214 	if (path) {
4215 		// path given: get the stat of the node referred to by (fd, path)
4216 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4217 		if (pathBuffer.InitCheck() != B_OK)
4218 			return B_NO_MEMORY;
4219 
4220 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4221 			traverseLeafLink, stat, kernel);
4222 	} else {
4223 		// no path given: get the FD and use the FD operation
4224 		struct file_descriptor* descriptor
4225 			= get_fd(get_current_io_context(kernel), fd);
4226 		if (descriptor == NULL)
4227 			return B_FILE_ERROR;
4228 
4229 		if (descriptor->ops->fd_read_stat)
4230 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4231 		else
4232 			status = B_UNSUPPORTED;
4233 
4234 		put_fd(descriptor);
4235 	}
4236 
4237 	return status;
4238 }
4239 
4240 
4241 /*!	Finds the full path to the file that contains the module \a moduleName,
4242 	puts it into \a pathBuffer, and returns B_OK for success.
4243 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4244 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4245 	\a pathBuffer is clobbered in any case and must not be relied on if this
4246 	functions returns unsuccessfully.
4247 	\a basePath and \a pathBuffer must not point to the same space.
4248 */
4249 status_t
4250 vfs_get_module_path(const char* basePath, const char* moduleName,
4251 	char* pathBuffer, size_t bufferSize)
4252 {
4253 	struct vnode* dir;
4254 	struct vnode* file;
4255 	status_t status;
4256 	size_t length;
4257 	char* path;
4258 
4259 	if (bufferSize == 0
4260 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4261 		return B_BUFFER_OVERFLOW;
4262 
4263 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4264 	if (status != B_OK)
4265 		return status;
4266 
4267 	// the path buffer had been clobbered by the above call
4268 	length = strlcpy(pathBuffer, basePath, bufferSize);
4269 	if (pathBuffer[length - 1] != '/')
4270 		pathBuffer[length++] = '/';
4271 
4272 	path = pathBuffer + length;
4273 	bufferSize -= length;
4274 
4275 	while (moduleName) {
4276 		char* nextPath = strchr(moduleName, '/');
4277 		if (nextPath == NULL)
4278 			length = strlen(moduleName);
4279 		else {
4280 			length = nextPath - moduleName;
4281 			nextPath++;
4282 		}
4283 
4284 		if (length + 1 >= bufferSize) {
4285 			status = B_BUFFER_OVERFLOW;
4286 			goto err;
4287 		}
4288 
4289 		memcpy(path, moduleName, length);
4290 		path[length] = '\0';
4291 		moduleName = nextPath;
4292 
4293 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4294 		if (status != B_OK) {
4295 			// vnode_path_to_vnode() has already released the reference to dir
4296 			return status;
4297 		}
4298 
4299 		if (S_ISDIR(file->Type())) {
4300 			// goto the next directory
4301 			path[length] = '/';
4302 			path[length + 1] = '\0';
4303 			path += length + 1;
4304 			bufferSize -= length + 1;
4305 
4306 			dir = file;
4307 		} else if (S_ISREG(file->Type())) {
4308 			// it's a file so it should be what we've searched for
4309 			put_vnode(file);
4310 
4311 			return B_OK;
4312 		} else {
4313 			TRACE(("vfs_get_module_path(): something is strange here: "
4314 				"0x%08" B_PRIx32 "...\n", file->Type()));
4315 			status = B_ERROR;
4316 			dir = file;
4317 			goto err;
4318 		}
4319 	}
4320 
4321 	// if we got here, the moduleName just pointed to a directory, not to
4322 	// a real module - what should we do in this case?
4323 	status = B_ENTRY_NOT_FOUND;
4324 
4325 err:
4326 	put_vnode(dir);
4327 	return status;
4328 }
4329 
4330 
4331 /*!	\brief Normalizes a given path.
4332 
4333 	The path must refer to an existing or non-existing entry in an existing
4334 	directory, that is chopping off the leaf component the remaining path must
4335 	refer to an existing directory.
4336 
4337 	The returned will be canonical in that it will be absolute, will not
4338 	contain any "." or ".." components or duplicate occurrences of '/'s,
4339 	and none of the directory components will by symbolic links.
4340 
4341 	Any two paths referring to the same entry, will result in the same
4342 	normalized path (well, that is pretty much the definition of `normalized',
4343 	isn't it :-).
4344 
4345 	\param path The path to be normalized.
4346 	\param buffer The buffer into which the normalized path will be written.
4347 		   May be the same one as \a path.
4348 	\param bufferSize The size of \a buffer.
4349 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4350 	\param kernel \c true, if the IO context of the kernel shall be used,
4351 		   otherwise that of the team this thread belongs to. Only relevant,
4352 		   if the path is relative (to get the CWD).
4353 	\return \c B_OK if everything went fine, another error code otherwise.
4354 */
4355 status_t
4356 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4357 	bool traverseLink, bool kernel)
4358 {
4359 	if (!path || !buffer || bufferSize < 1)
4360 		return B_BAD_VALUE;
4361 
4362 	if (path != buffer) {
4363 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4364 			return B_BUFFER_OVERFLOW;
4365 	}
4366 
4367 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4368 }
4369 
4370 
4371 /*!	\brief Creates a special node in the file system.
4372 
4373 	The caller gets a reference to the newly created node (which is passed
4374 	back through \a _createdVnode) and is responsible for releasing it.
4375 
4376 	\param path The path where to create the entry for the node. Can be \c NULL,
4377 		in which case the node is created without an entry in the root FS -- it
4378 		will automatically be deleted when the last reference has been released.
4379 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4380 		the target file system will just create the node with its standard
4381 		operations. Depending on the type of the node a subnode might be created
4382 		automatically, though.
4383 	\param mode The type and permissions for the node to be created.
4384 	\param flags Flags to be passed to the creating FS.
4385 	\param kernel \c true, if called in the kernel context (relevant only if
4386 		\a path is not \c NULL and not absolute).
4387 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4388 		file system creating the node, with the private data pointer and
4389 		operations for the super node. Can be \c NULL.
4390 	\param _createVnode Pointer to pre-allocated storage where to store the
4391 		pointer to the newly created node.
4392 	\return \c B_OK, if everything went fine, another error code otherwise.
4393 */
4394 status_t
4395 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4396 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4397 	struct vnode** _createdVnode)
4398 {
4399 	struct vnode* dirNode;
4400 	char _leaf[B_FILE_NAME_LENGTH];
4401 	char* leaf = NULL;
4402 
4403 	if (path) {
4404 		// We've got a path. Get the dir vnode and the leaf name.
4405 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4406 		if (tmpPathBuffer.InitCheck() != B_OK)
4407 			return B_NO_MEMORY;
4408 
4409 		char* tmpPath = tmpPathBuffer.LockBuffer();
4410 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4411 			return B_NAME_TOO_LONG;
4412 
4413 		// get the dir vnode and the leaf name
4414 		leaf = _leaf;
4415 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4416 		if (error != B_OK)
4417 			return error;
4418 	} else {
4419 		// No path. Create the node in the root FS.
4420 		dirNode = sRoot;
4421 		inc_vnode_ref_count(dirNode);
4422 	}
4423 
4424 	VNodePutter _(dirNode);
4425 
4426 	// check support for creating special nodes
4427 	if (!HAS_FS_CALL(dirNode, create_special_node))
4428 		return B_UNSUPPORTED;
4429 
4430 	// create the node
4431 	fs_vnode superVnode;
4432 	ino_t nodeID;
4433 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4434 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4435 	if (status != B_OK)
4436 		return status;
4437 
4438 	// lookup the node
4439 	rw_lock_read_lock(&sVnodeLock);
4440 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4441 	rw_lock_read_unlock(&sVnodeLock);
4442 
4443 	if (*_createdVnode == NULL) {
4444 		panic("vfs_create_special_node(): lookup of node failed");
4445 		return B_ERROR;
4446 	}
4447 
4448 	return B_OK;
4449 }
4450 
4451 
4452 extern "C" void
4453 vfs_put_vnode(struct vnode* vnode)
4454 {
4455 	put_vnode(vnode);
4456 }
4457 
4458 
4459 extern "C" status_t
4460 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4461 {
4462 	// Get current working directory from io context
4463 	struct io_context* context = get_current_io_context(false);
4464 	status_t status = B_OK;
4465 
4466 	mutex_lock(&context->io_mutex);
4467 
4468 	if (context->cwd != NULL) {
4469 		*_mountID = context->cwd->device;
4470 		*_vnodeID = context->cwd->id;
4471 	} else
4472 		status = B_ERROR;
4473 
4474 	mutex_unlock(&context->io_mutex);
4475 	return status;
4476 }
4477 
4478 
4479 status_t
4480 vfs_unmount(dev_t mountID, uint32 flags)
4481 {
4482 	return fs_unmount(NULL, mountID, flags, true);
4483 }
4484 
4485 
4486 extern "C" status_t
4487 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4488 {
4489 	struct vnode* vnode;
4490 
4491 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4492 	if (status != B_OK)
4493 		return status;
4494 
4495 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4496 	put_vnode(vnode);
4497 	return B_OK;
4498 }
4499 
4500 
4501 extern "C" void
4502 vfs_free_unused_vnodes(int32 level)
4503 {
4504 	vnode_low_resource_handler(NULL,
4505 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4506 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4507 		level);
4508 }
4509 
4510 
4511 extern "C" bool
4512 vfs_can_page(struct vnode* vnode, void* cookie)
4513 {
4514 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4515 
4516 	if (HAS_FS_CALL(vnode, can_page))
4517 		return FS_CALL(vnode, can_page, cookie);
4518 	return false;
4519 }
4520 
4521 
4522 extern "C" status_t
4523 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4524 	const generic_io_vec* vecs, size_t count, uint32 flags,
4525 	generic_size_t* _numBytes)
4526 {
4527 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4528 		vecs, pos));
4529 
4530 #if VFS_PAGES_IO_TRACING
4531 	generic_size_t bytesRequested = *_numBytes;
4532 #endif
4533 
4534 	IORequest request;
4535 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4536 	if (status == B_OK) {
4537 		status = vfs_vnode_io(vnode, cookie, &request);
4538 		if (status == B_OK)
4539 			status = request.Wait();
4540 		*_numBytes = request.TransferredBytes();
4541 	}
4542 
4543 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4544 		status, *_numBytes));
4545 
4546 	return status;
4547 }
4548 
4549 
4550 extern "C" status_t
4551 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4552 	const generic_io_vec* vecs, size_t count, uint32 flags,
4553 	generic_size_t* _numBytes)
4554 {
4555 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4556 		vecs, pos));
4557 
4558 #if VFS_PAGES_IO_TRACING
4559 	generic_size_t bytesRequested = *_numBytes;
4560 #endif
4561 
4562 	IORequest request;
4563 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4564 	if (status == B_OK) {
4565 		status = vfs_vnode_io(vnode, cookie, &request);
4566 		if (status == B_OK)
4567 			status = request.Wait();
4568 		*_numBytes = request.TransferredBytes();
4569 	}
4570 
4571 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4572 		status, *_numBytes));
4573 
4574 	return status;
4575 }
4576 
4577 
4578 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4579 	created if \a allocate is \c true.
4580 	In case it's successful, it will also grab a reference to the cache
4581 	it returns.
4582 */
4583 extern "C" status_t
4584 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4585 {
4586 	if (vnode->cache != NULL) {
4587 		vnode->cache->AcquireRef();
4588 		*_cache = vnode->cache;
4589 		return B_OK;
4590 	}
4591 
4592 	rw_lock_read_lock(&sVnodeLock);
4593 	vnode->Lock();
4594 
4595 	status_t status = B_OK;
4596 
4597 	// The cache could have been created in the meantime
4598 	if (vnode->cache == NULL) {
4599 		if (allocate) {
4600 			// TODO: actually the vnode needs to be busy already here, or
4601 			//	else this won't work...
4602 			bool wasBusy = vnode->IsBusy();
4603 			vnode->SetBusy(true);
4604 
4605 			vnode->Unlock();
4606 			rw_lock_read_unlock(&sVnodeLock);
4607 
4608 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4609 
4610 			rw_lock_read_lock(&sVnodeLock);
4611 			vnode->Lock();
4612 			vnode->SetBusy(wasBusy);
4613 		} else
4614 			status = B_BAD_VALUE;
4615 	}
4616 
4617 	vnode->Unlock();
4618 	rw_lock_read_unlock(&sVnodeLock);
4619 
4620 	if (status == B_OK) {
4621 		vnode->cache->AcquireRef();
4622 		*_cache = vnode->cache;
4623 	}
4624 
4625 	return status;
4626 }
4627 
4628 
4629 status_t
4630 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4631 	file_io_vec* vecs, size_t* _count)
4632 {
4633 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4634 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4635 
4636 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4637 }
4638 
4639 
4640 status_t
4641 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4642 {
4643 	status_t status = FS_CALL(vnode, read_stat, stat);
4644 
4645 	// fill in the st_dev and st_ino fields
4646 	if (status == B_OK) {
4647 		stat->st_dev = vnode->device;
4648 		stat->st_ino = vnode->id;
4649 		stat->st_rdev = -1;
4650 	}
4651 
4652 	return status;
4653 }
4654 
4655 
4656 status_t
4657 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4658 {
4659 	struct vnode* vnode;
4660 	status_t status = get_vnode(device, inode, &vnode, true, false);
4661 	if (status != B_OK)
4662 		return status;
4663 
4664 	status = FS_CALL(vnode, read_stat, stat);
4665 
4666 	// fill in the st_dev and st_ino fields
4667 	if (status == B_OK) {
4668 		stat->st_dev = vnode->device;
4669 		stat->st_ino = vnode->id;
4670 		stat->st_rdev = -1;
4671 	}
4672 
4673 	put_vnode(vnode);
4674 	return status;
4675 }
4676 
4677 
4678 status_t
4679 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4680 {
4681 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4682 }
4683 
4684 
4685 status_t
4686 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4687 	bool kernel, char* path, size_t pathLength)
4688 {
4689 	struct vnode* vnode;
4690 	status_t status;
4691 
4692 	// filter invalid leaf names
4693 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4694 		return B_BAD_VALUE;
4695 
4696 	// get the vnode matching the dir's node_ref
4697 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4698 		// special cases "." and "..": we can directly get the vnode of the
4699 		// referenced directory
4700 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4701 		leaf = NULL;
4702 	} else
4703 		status = get_vnode(device, inode, &vnode, true, false);
4704 	if (status != B_OK)
4705 		return status;
4706 
4707 	// get the directory path
4708 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4709 	put_vnode(vnode);
4710 		// we don't need the vnode anymore
4711 	if (status != B_OK)
4712 		return status;
4713 
4714 	// append the leaf name
4715 	if (leaf) {
4716 		// insert a directory separator if this is not the file system root
4717 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4718 				>= pathLength)
4719 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4720 			return B_NAME_TOO_LONG;
4721 		}
4722 	}
4723 
4724 	return B_OK;
4725 }
4726 
4727 
4728 /*!	If the given descriptor locked its vnode, that lock will be released. */
4729 void
4730 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4731 {
4732 	struct vnode* vnode = fd_vnode(descriptor);
4733 
4734 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4735 		vnode->mandatory_locked_by = NULL;
4736 }
4737 
4738 
4739 /*!	Closes all file descriptors of the specified I/O context that
4740 	have the O_CLOEXEC flag set.
4741 */
4742 void
4743 vfs_exec_io_context(io_context* context)
4744 {
4745 	uint32 i;
4746 
4747 	for (i = 0; i < context->table_size; i++) {
4748 		mutex_lock(&context->io_mutex);
4749 
4750 		struct file_descriptor* descriptor = context->fds[i];
4751 		bool remove = false;
4752 
4753 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4754 			context->fds[i] = NULL;
4755 			context->num_used_fds--;
4756 
4757 			remove = true;
4758 		}
4759 
4760 		mutex_unlock(&context->io_mutex);
4761 
4762 		if (remove) {
4763 			close_fd(descriptor);
4764 			put_fd(descriptor);
4765 		}
4766 	}
4767 }
4768 
4769 
4770 /*! Sets up a new io_control structure, and inherits the properties
4771 	of the parent io_control if it is given.
4772 */
4773 io_context*
4774 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4775 {
4776 	io_context* context = (io_context*)malloc(sizeof(io_context));
4777 	if (context == NULL)
4778 		return NULL;
4779 
4780 	TIOC(NewIOContext(context, parentContext));
4781 
4782 	memset(context, 0, sizeof(io_context));
4783 	context->ref_count = 1;
4784 
4785 	MutexLocker parentLocker;
4786 
4787 	size_t tableSize;
4788 	if (parentContext) {
4789 		parentLocker.SetTo(parentContext->io_mutex, false);
4790 		tableSize = parentContext->table_size;
4791 	} else
4792 		tableSize = DEFAULT_FD_TABLE_SIZE;
4793 
4794 	// allocate space for FDs and their close-on-exec flag
4795 	context->fds = (file_descriptor**)malloc(
4796 		sizeof(struct file_descriptor*) * tableSize
4797 		+ sizeof(struct select_sync*) * tableSize
4798 		+ (tableSize + 7) / 8);
4799 	if (context->fds == NULL) {
4800 		free(context);
4801 		return NULL;
4802 	}
4803 
4804 	context->select_infos = (select_info**)(context->fds + tableSize);
4805 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4806 
4807 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4808 		+ sizeof(struct select_sync*) * tableSize
4809 		+ (tableSize + 7) / 8);
4810 
4811 	mutex_init(&context->io_mutex, "I/O context");
4812 
4813 	// Copy all parent file descriptors
4814 
4815 	if (parentContext) {
4816 		size_t i;
4817 
4818 		mutex_lock(&sIOContextRootLock);
4819 		context->root = parentContext->root;
4820 		if (context->root)
4821 			inc_vnode_ref_count(context->root);
4822 		mutex_unlock(&sIOContextRootLock);
4823 
4824 		context->cwd = parentContext->cwd;
4825 		if (context->cwd)
4826 			inc_vnode_ref_count(context->cwd);
4827 
4828 		for (i = 0; i < tableSize; i++) {
4829 			struct file_descriptor* descriptor = parentContext->fds[i];
4830 
4831 			if (descriptor != NULL) {
4832 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4833 				if (closeOnExec && purgeCloseOnExec)
4834 					continue;
4835 
4836 				TFD(InheritFD(context, i, descriptor, parentContext));
4837 
4838 				context->fds[i] = descriptor;
4839 				context->num_used_fds++;
4840 				atomic_add(&descriptor->ref_count, 1);
4841 				atomic_add(&descriptor->open_count, 1);
4842 
4843 				if (closeOnExec)
4844 					fd_set_close_on_exec(context, i, true);
4845 			}
4846 		}
4847 
4848 		parentLocker.Unlock();
4849 	} else {
4850 		context->root = sRoot;
4851 		context->cwd = sRoot;
4852 
4853 		if (context->root)
4854 			inc_vnode_ref_count(context->root);
4855 
4856 		if (context->cwd)
4857 			inc_vnode_ref_count(context->cwd);
4858 	}
4859 
4860 	context->table_size = tableSize;
4861 
4862 	list_init(&context->node_monitors);
4863 	context->max_monitors = DEFAULT_NODE_MONITORS;
4864 
4865 	return context;
4866 }
4867 
4868 
4869 static status_t
4870 vfs_free_io_context(io_context* context)
4871 {
4872 	uint32 i;
4873 
4874 	TIOC(FreeIOContext(context));
4875 
4876 	if (context->root)
4877 		put_vnode(context->root);
4878 
4879 	if (context->cwd)
4880 		put_vnode(context->cwd);
4881 
4882 	mutex_lock(&context->io_mutex);
4883 
4884 	for (i = 0; i < context->table_size; i++) {
4885 		if (struct file_descriptor* descriptor = context->fds[i]) {
4886 			close_fd(descriptor);
4887 			put_fd(descriptor);
4888 		}
4889 	}
4890 
4891 	mutex_destroy(&context->io_mutex);
4892 
4893 	remove_node_monitors(context);
4894 	free(context->fds);
4895 	free(context);
4896 
4897 	return B_OK;
4898 }
4899 
4900 
4901 void
4902 vfs_get_io_context(io_context* context)
4903 {
4904 	atomic_add(&context->ref_count, 1);
4905 }
4906 
4907 
4908 void
4909 vfs_put_io_context(io_context* context)
4910 {
4911 	if (atomic_add(&context->ref_count, -1) == 1)
4912 		vfs_free_io_context(context);
4913 }
4914 
4915 
4916 static status_t
4917 vfs_resize_fd_table(struct io_context* context, const int newSize)
4918 {
4919 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4920 		return B_BAD_VALUE;
4921 
4922 	TIOC(ResizeIOContext(context, newSize));
4923 
4924 	MutexLocker _(context->io_mutex);
4925 
4926 	int oldSize = context->table_size;
4927 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4928 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4929 
4930 	// If the tables shrink, make sure none of the fds being dropped are in use.
4931 	if (newSize < oldSize) {
4932 		for (int i = oldSize; i-- > newSize;) {
4933 			if (context->fds[i])
4934 				return B_BUSY;
4935 		}
4936 	}
4937 
4938 	// store pointers to the old tables
4939 	file_descriptor** oldFDs = context->fds;
4940 	select_info** oldSelectInfos = context->select_infos;
4941 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4942 
4943 	// allocate new tables
4944 	file_descriptor** newFDs = (file_descriptor**)malloc(
4945 		sizeof(struct file_descriptor*) * newSize
4946 		+ sizeof(struct select_sync*) * newSize
4947 		+ newCloseOnExitBitmapSize);
4948 	if (newFDs == NULL)
4949 		return B_NO_MEMORY;
4950 
4951 	context->fds = newFDs;
4952 	context->select_infos = (select_info**)(context->fds + newSize);
4953 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4954 	context->table_size = newSize;
4955 
4956 	// copy entries from old tables
4957 	int toCopy = min_c(oldSize, newSize);
4958 
4959 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4960 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4961 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4962 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4963 
4964 	// clear additional entries, if the tables grow
4965 	if (newSize > oldSize) {
4966 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4967 		memset(context->select_infos + oldSize, 0,
4968 			sizeof(void*) * (newSize - oldSize));
4969 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4970 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4971 	}
4972 
4973 	free(oldFDs);
4974 
4975 	return B_OK;
4976 }
4977 
4978 
4979 static status_t
4980 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4981 {
4982 	int	status = B_OK;
4983 
4984 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4985 		return B_BAD_VALUE;
4986 
4987 	mutex_lock(&context->io_mutex);
4988 
4989 	if ((size_t)newSize < context->num_monitors) {
4990 		status = B_BUSY;
4991 		goto out;
4992 	}
4993 	context->max_monitors = newSize;
4994 
4995 out:
4996 	mutex_unlock(&context->io_mutex);
4997 	return status;
4998 }
4999 
5000 
5001 status_t
5002 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5003 	ino_t* _mountPointNodeID)
5004 {
5005 	ReadLocker nodeLocker(sVnodeLock);
5006 	MutexLocker mountLocker(sMountMutex);
5007 
5008 	struct fs_mount* mount = find_mount(mountID);
5009 	if (mount == NULL)
5010 		return B_BAD_VALUE;
5011 
5012 	Vnode* mountPoint = mount->covers_vnode;
5013 
5014 	*_mountPointMountID = mountPoint->device;
5015 	*_mountPointNodeID = mountPoint->id;
5016 
5017 	return B_OK;
5018 }
5019 
5020 
5021 status_t
5022 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5023 	ino_t coveredNodeID)
5024 {
5025 	// get the vnodes
5026 	Vnode* vnode;
5027 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5028 	if (error != B_OK)
5029 		return B_BAD_VALUE;
5030 	VNodePutter vnodePutter(vnode);
5031 
5032 	Vnode* coveredVnode;
5033 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5034 		false);
5035 	if (error != B_OK)
5036 		return B_BAD_VALUE;
5037 	VNodePutter coveredVnodePutter(coveredVnode);
5038 
5039 	// establish the covered/covering links
5040 	WriteLocker locker(sVnodeLock);
5041 
5042 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5043 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5044 		return B_BUSY;
5045 	}
5046 
5047 	vnode->covers = coveredVnode;
5048 	vnode->SetCovering(true);
5049 
5050 	coveredVnode->covered_by = vnode;
5051 	coveredVnode->SetCovered(true);
5052 
5053 	// the vnodes do now reference each other
5054 	inc_vnode_ref_count(vnode);
5055 	inc_vnode_ref_count(coveredVnode);
5056 
5057 	return B_OK;
5058 }
5059 
5060 
5061 int
5062 vfs_getrlimit(int resource, struct rlimit* rlp)
5063 {
5064 	if (!rlp)
5065 		return B_BAD_ADDRESS;
5066 
5067 	switch (resource) {
5068 		case RLIMIT_NOFILE:
5069 		{
5070 			struct io_context* context = get_current_io_context(false);
5071 			MutexLocker _(context->io_mutex);
5072 
5073 			rlp->rlim_cur = context->table_size;
5074 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5075 			return 0;
5076 		}
5077 
5078 		case RLIMIT_NOVMON:
5079 		{
5080 			struct io_context* context = get_current_io_context(false);
5081 			MutexLocker _(context->io_mutex);
5082 
5083 			rlp->rlim_cur = context->max_monitors;
5084 			rlp->rlim_max = MAX_NODE_MONITORS;
5085 			return 0;
5086 		}
5087 
5088 		default:
5089 			return B_BAD_VALUE;
5090 	}
5091 }
5092 
5093 
5094 int
5095 vfs_setrlimit(int resource, const struct rlimit* rlp)
5096 {
5097 	if (!rlp)
5098 		return B_BAD_ADDRESS;
5099 
5100 	switch (resource) {
5101 		case RLIMIT_NOFILE:
5102 			/* TODO: check getuid() */
5103 			if (rlp->rlim_max != RLIM_SAVED_MAX
5104 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5105 				return B_NOT_ALLOWED;
5106 
5107 			return vfs_resize_fd_table(get_current_io_context(false),
5108 				rlp->rlim_cur);
5109 
5110 		case RLIMIT_NOVMON:
5111 			/* TODO: check getuid() */
5112 			if (rlp->rlim_max != RLIM_SAVED_MAX
5113 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5114 				return B_NOT_ALLOWED;
5115 
5116 			return vfs_resize_monitor_table(get_current_io_context(false),
5117 				rlp->rlim_cur);
5118 
5119 		default:
5120 			return B_BAD_VALUE;
5121 	}
5122 }
5123 
5124 
5125 status_t
5126 vfs_init(kernel_args* args)
5127 {
5128 	vnode::StaticInit();
5129 
5130 	struct vnode dummyVnode;
5131 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5132 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5133 	if (sVnodeTable == NULL)
5134 		panic("vfs_init: error creating vnode hash table\n");
5135 
5136 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5137 
5138 	struct fs_mount dummyMount;
5139 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5140 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5141 	if (sMountsTable == NULL)
5142 		panic("vfs_init: error creating mounts hash table\n");
5143 
5144 	node_monitor_init();
5145 
5146 	sRoot = NULL;
5147 
5148 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5149 
5150 	if (block_cache_init() != B_OK)
5151 		return B_ERROR;
5152 
5153 #ifdef ADD_DEBUGGER_COMMANDS
5154 	// add some debugger commands
5155 	add_debugger_command_etc("vnode", &dump_vnode,
5156 		"Print info about the specified vnode",
5157 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5158 		"Prints information about the vnode specified by address <vnode> or\n"
5159 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5160 		"constructed and printed. It might not be possible to construct a\n"
5161 		"complete path, though.\n",
5162 		0);
5163 	add_debugger_command("vnodes", &dump_vnodes,
5164 		"list all vnodes (from the specified device)");
5165 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5166 		"list all vnode caches");
5167 	add_debugger_command("mount", &dump_mount,
5168 		"info about the specified fs_mount");
5169 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5170 	add_debugger_command("io_context", &dump_io_context,
5171 		"info about the I/O context");
5172 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5173 		"info about vnode usage");
5174 #endif
5175 
5176 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5177 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5178 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5179 		0);
5180 
5181 	file_map_init();
5182 
5183 	return file_cache_init();
5184 }
5185 
5186 
5187 //	#pragma mark - fd_ops implementations
5188 
5189 
5190 /*!
5191 	Calls fs_open() on the given vnode and returns a new
5192 	file descriptor for it
5193 */
5194 static int
5195 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5196 {
5197 	void* cookie;
5198 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5199 	if (status != B_OK)
5200 		return status;
5201 
5202 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5203 	if (fd < 0) {
5204 		FS_CALL(vnode, close, cookie);
5205 		FS_CALL(vnode, free_cookie, cookie);
5206 	}
5207 	return fd;
5208 }
5209 
5210 
5211 /*!
5212 	Calls fs_open() on the given vnode and returns a new
5213 	file descriptor for it
5214 */
5215 static int
5216 create_vnode(struct vnode* directory, const char* name, int openMode,
5217 	int perms, bool kernel)
5218 {
5219 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5220 	status_t status = B_ERROR;
5221 	struct vnode* vnode;
5222 	void* cookie;
5223 	ino_t newID;
5224 
5225 	// This is somewhat tricky: If the entry already exists, the FS responsible
5226 	// for the directory might not necessarily also be the one responsible for
5227 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5228 	// we can actually never call the create() hook without O_EXCL. Instead we
5229 	// try to look the entry up first. If it already exists, we just open the
5230 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5231 	// introduces a race condition, since someone else might have created the
5232 	// entry in the meantime. We hope the respective FS returns the correct
5233 	// error code and retry (up to 3 times) again.
5234 
5235 	for (int i = 0; i < 3 && status != B_OK; i++) {
5236 		// look the node up
5237 		status = lookup_dir_entry(directory, name, &vnode);
5238 		if (status == B_OK) {
5239 			VNodePutter putter(vnode);
5240 
5241 			if ((openMode & O_EXCL) != 0)
5242 				return B_FILE_EXISTS;
5243 
5244 			// If the node is a symlink, we have to follow it, unless
5245 			// O_NOTRAVERSE is set.
5246 			if (S_ISLNK(vnode->Type()) && traverse) {
5247 				putter.Put();
5248 				char clonedName[B_FILE_NAME_LENGTH + 1];
5249 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5250 						>= B_FILE_NAME_LENGTH) {
5251 					return B_NAME_TOO_LONG;
5252 				}
5253 
5254 				inc_vnode_ref_count(directory);
5255 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5256 					kernel, &vnode, NULL);
5257 				if (status != B_OK)
5258 					return status;
5259 
5260 				putter.SetTo(vnode);
5261 			}
5262 
5263 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5264 				return B_LINK_LIMIT;
5265 
5266 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5267 			// on success keep the vnode reference for the FD
5268 			if (fd >= 0)
5269 				putter.Detach();
5270 
5271 			return fd;
5272 		}
5273 
5274 		// it doesn't exist yet -- try to create it
5275 
5276 		if (!HAS_FS_CALL(directory, create))
5277 			return B_READ_ONLY_DEVICE;
5278 
5279 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5280 			&cookie, &newID);
5281 		if (status != B_OK
5282 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5283 			return status;
5284 		}
5285 	}
5286 
5287 	if (status != B_OK)
5288 		return status;
5289 
5290 	// the node has been created successfully
5291 
5292 	rw_lock_read_lock(&sVnodeLock);
5293 	vnode = lookup_vnode(directory->device, newID);
5294 	rw_lock_read_unlock(&sVnodeLock);
5295 
5296 	if (vnode == NULL) {
5297 		panic("vfs: fs_create() returned success but there is no vnode, "
5298 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5299 		return B_BAD_VALUE;
5300 	}
5301 
5302 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5303 	if (fd >= 0)
5304 		return fd;
5305 
5306 	status = fd;
5307 
5308 	// something went wrong, clean up
5309 
5310 	FS_CALL(vnode, close, cookie);
5311 	FS_CALL(vnode, free_cookie, cookie);
5312 	put_vnode(vnode);
5313 
5314 	FS_CALL(directory, unlink, name);
5315 
5316 	return status;
5317 }
5318 
5319 
5320 /*! Calls fs open_dir() on the given vnode and returns a new
5321 	file descriptor for it
5322 */
5323 static int
5324 open_dir_vnode(struct vnode* vnode, bool kernel)
5325 {
5326 	void* cookie;
5327 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5328 	if (status != B_OK)
5329 		return status;
5330 
5331 	// directory is opened, create a fd
5332 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5333 	if (status >= 0)
5334 		return status;
5335 
5336 	FS_CALL(vnode, close_dir, cookie);
5337 	FS_CALL(vnode, free_dir_cookie, cookie);
5338 
5339 	return status;
5340 }
5341 
5342 
5343 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5344 	file descriptor for it.
5345 	Used by attr_dir_open(), and attr_dir_open_fd().
5346 */
5347 static int
5348 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5349 {
5350 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5351 		return B_UNSUPPORTED;
5352 
5353 	void* cookie;
5354 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5355 	if (status != B_OK)
5356 		return status;
5357 
5358 	// directory is opened, create a fd
5359 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5360 		kernel);
5361 	if (status >= 0)
5362 		return status;
5363 
5364 	FS_CALL(vnode, close_attr_dir, cookie);
5365 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5366 
5367 	return status;
5368 }
5369 
5370 
5371 static int
5372 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5373 	int openMode, int perms, bool kernel)
5374 {
5375 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5376 		"kernel %d\n", name, openMode, perms, kernel));
5377 
5378 	// get directory to put the new file in
5379 	struct vnode* directory;
5380 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5381 	if (status != B_OK)
5382 		return status;
5383 
5384 	status = create_vnode(directory, name, openMode, perms, kernel);
5385 	put_vnode(directory);
5386 
5387 	return status;
5388 }
5389 
5390 
5391 static int
5392 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5393 {
5394 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5395 		openMode, perms, kernel));
5396 
5397 	// get directory to put the new file in
5398 	char name[B_FILE_NAME_LENGTH];
5399 	struct vnode* directory;
5400 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5401 		kernel);
5402 	if (status < 0)
5403 		return status;
5404 
5405 	status = create_vnode(directory, name, openMode, perms, kernel);
5406 
5407 	put_vnode(directory);
5408 	return status;
5409 }
5410 
5411 
5412 static int
5413 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5414 	int openMode, bool kernel)
5415 {
5416 	if (name == NULL || *name == '\0')
5417 		return B_BAD_VALUE;
5418 
5419 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5420 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5421 
5422 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5423 
5424 	// get the vnode matching the entry_ref
5425 	struct vnode* vnode;
5426 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5427 		kernel, &vnode);
5428 	if (status != B_OK)
5429 		return status;
5430 
5431 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5432 		put_vnode(vnode);
5433 		return B_LINK_LIMIT;
5434 	}
5435 
5436 	int newFD = open_vnode(vnode, openMode, kernel);
5437 	if (newFD >= 0) {
5438 		// The vnode reference has been transferred to the FD
5439 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5440 			directoryID, vnode->id, name);
5441 	} else
5442 		put_vnode(vnode);
5443 
5444 	return newFD;
5445 }
5446 
5447 
5448 static int
5449 file_open(int fd, char* path, int openMode, bool kernel)
5450 {
5451 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5452 
5453 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5454 		fd, path, openMode, kernel));
5455 
5456 	// get the vnode matching the vnode + path combination
5457 	struct vnode* vnode;
5458 	ino_t parentID;
5459 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5460 		&parentID, kernel);
5461 	if (status != B_OK)
5462 		return status;
5463 
5464 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5465 		put_vnode(vnode);
5466 		return B_LINK_LIMIT;
5467 	}
5468 
5469 	// open the vnode
5470 	int newFD = open_vnode(vnode, openMode, kernel);
5471 	if (newFD >= 0) {
5472 		// The vnode reference has been transferred to the FD
5473 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5474 			vnode->device, parentID, vnode->id, NULL);
5475 	} else
5476 		put_vnode(vnode);
5477 
5478 	return newFD;
5479 }
5480 
5481 
5482 static status_t
5483 file_close(struct file_descriptor* descriptor)
5484 {
5485 	struct vnode* vnode = descriptor->u.vnode;
5486 	status_t status = B_OK;
5487 
5488 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5489 
5490 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5491 		vnode->id);
5492 	if (HAS_FS_CALL(vnode, close)) {
5493 		status = FS_CALL(vnode, close, descriptor->cookie);
5494 	}
5495 
5496 	if (status == B_OK) {
5497 		// remove all outstanding locks for this team
5498 		if (HAS_FS_CALL(vnode, release_lock))
5499 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5500 		else
5501 			status = release_advisory_lock(vnode, NULL);
5502 	}
5503 	return status;
5504 }
5505 
5506 
5507 static void
5508 file_free_fd(struct file_descriptor* descriptor)
5509 {
5510 	struct vnode* vnode = descriptor->u.vnode;
5511 
5512 	if (vnode != NULL) {
5513 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5514 		put_vnode(vnode);
5515 	}
5516 }
5517 
5518 
5519 static status_t
5520 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5521 	size_t* length)
5522 {
5523 	struct vnode* vnode = descriptor->u.vnode;
5524 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5525 		pos, length, *length));
5526 
5527 	if (S_ISDIR(vnode->Type()))
5528 		return B_IS_A_DIRECTORY;
5529 
5530 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5531 }
5532 
5533 
5534 static status_t
5535 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5536 	size_t* length)
5537 {
5538 	struct vnode* vnode = descriptor->u.vnode;
5539 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5540 		length));
5541 
5542 	if (S_ISDIR(vnode->Type()))
5543 		return B_IS_A_DIRECTORY;
5544 	if (!HAS_FS_CALL(vnode, write))
5545 		return B_READ_ONLY_DEVICE;
5546 
5547 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5548 }
5549 
5550 
5551 static off_t
5552 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5553 {
5554 	struct vnode* vnode = descriptor->u.vnode;
5555 	off_t offset;
5556 
5557 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5558 		seekType));
5559 
5560 	// some kinds of files are not seekable
5561 	switch (vnode->Type() & S_IFMT) {
5562 		case S_IFIFO:
5563 		case S_IFSOCK:
5564 			return ESPIPE;
5565 
5566 		// The Open Group Base Specs don't mention any file types besides pipes,
5567 		// fifos, and sockets specially, so we allow seeking them.
5568 		case S_IFREG:
5569 		case S_IFBLK:
5570 		case S_IFDIR:
5571 		case S_IFLNK:
5572 		case S_IFCHR:
5573 			break;
5574 	}
5575 
5576 	switch (seekType) {
5577 		case SEEK_SET:
5578 			offset = 0;
5579 			break;
5580 		case SEEK_CUR:
5581 			offset = descriptor->pos;
5582 			break;
5583 		case SEEK_END:
5584 		{
5585 			// stat() the node
5586 			if (!HAS_FS_CALL(vnode, read_stat))
5587 				return B_UNSUPPORTED;
5588 
5589 			struct stat stat;
5590 			status_t status = FS_CALL(vnode, read_stat, &stat);
5591 			if (status != B_OK)
5592 				return status;
5593 
5594 			offset = stat.st_size;
5595 			break;
5596 		}
5597 		default:
5598 			return B_BAD_VALUE;
5599 	}
5600 
5601 	// assumes off_t is 64 bits wide
5602 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5603 		return B_BUFFER_OVERFLOW;
5604 
5605 	pos += offset;
5606 	if (pos < 0)
5607 		return B_BAD_VALUE;
5608 
5609 	return descriptor->pos = pos;
5610 }
5611 
5612 
5613 static status_t
5614 file_select(struct file_descriptor* descriptor, uint8 event,
5615 	struct selectsync* sync)
5616 {
5617 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5618 
5619 	struct vnode* vnode = descriptor->u.vnode;
5620 
5621 	// If the FS has no select() hook, notify select() now.
5622 	if (!HAS_FS_CALL(vnode, select))
5623 		return notify_select_event(sync, event);
5624 
5625 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5626 }
5627 
5628 
5629 static status_t
5630 file_deselect(struct file_descriptor* descriptor, uint8 event,
5631 	struct selectsync* sync)
5632 {
5633 	struct vnode* vnode = descriptor->u.vnode;
5634 
5635 	if (!HAS_FS_CALL(vnode, deselect))
5636 		return B_OK;
5637 
5638 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5639 }
5640 
5641 
5642 static status_t
5643 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5644 	bool kernel)
5645 {
5646 	struct vnode* vnode;
5647 	status_t status;
5648 
5649 	if (name == NULL || *name == '\0')
5650 		return B_BAD_VALUE;
5651 
5652 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5653 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5654 
5655 	status = get_vnode(mountID, parentID, &vnode, true, false);
5656 	if (status != B_OK)
5657 		return status;
5658 
5659 	if (HAS_FS_CALL(vnode, create_dir))
5660 		status = FS_CALL(vnode, create_dir, name, perms);
5661 	else
5662 		status = B_READ_ONLY_DEVICE;
5663 
5664 	put_vnode(vnode);
5665 	return status;
5666 }
5667 
5668 
5669 static status_t
5670 dir_create(int fd, char* path, int perms, bool kernel)
5671 {
5672 	char filename[B_FILE_NAME_LENGTH];
5673 	struct vnode* vnode;
5674 	status_t status;
5675 
5676 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5677 		kernel));
5678 
5679 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5680 	if (status < 0)
5681 		return status;
5682 
5683 	if (HAS_FS_CALL(vnode, create_dir)) {
5684 		status = FS_CALL(vnode, create_dir, filename, perms);
5685 	} else
5686 		status = B_READ_ONLY_DEVICE;
5687 
5688 	put_vnode(vnode);
5689 	return status;
5690 }
5691 
5692 
5693 static int
5694 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5695 {
5696 	FUNCTION(("dir_open_entry_ref()\n"));
5697 
5698 	if (name && name[0] == '\0')
5699 		return B_BAD_VALUE;
5700 
5701 	// get the vnode matching the entry_ref/node_ref
5702 	struct vnode* vnode;
5703 	status_t status;
5704 	if (name) {
5705 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5706 			&vnode);
5707 	} else
5708 		status = get_vnode(mountID, parentID, &vnode, true, false);
5709 	if (status != B_OK)
5710 		return status;
5711 
5712 	int newFD = open_dir_vnode(vnode, kernel);
5713 	if (newFD >= 0) {
5714 		// The vnode reference has been transferred to the FD
5715 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5716 			vnode->id, name);
5717 	} else
5718 		put_vnode(vnode);
5719 
5720 	return newFD;
5721 }
5722 
5723 
5724 static int
5725 dir_open(int fd, char* path, bool kernel)
5726 {
5727 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5728 		kernel));
5729 
5730 	// get the vnode matching the vnode + path combination
5731 	struct vnode* vnode = NULL;
5732 	ino_t parentID;
5733 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5734 		kernel);
5735 	if (status != B_OK)
5736 		return status;
5737 
5738 	// open the dir
5739 	int newFD = open_dir_vnode(vnode, kernel);
5740 	if (newFD >= 0) {
5741 		// The vnode reference has been transferred to the FD
5742 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5743 			parentID, vnode->id, NULL);
5744 	} else
5745 		put_vnode(vnode);
5746 
5747 	return newFD;
5748 }
5749 
5750 
5751 static status_t
5752 dir_close(struct file_descriptor* descriptor)
5753 {
5754 	struct vnode* vnode = descriptor->u.vnode;
5755 
5756 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5757 
5758 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5759 		vnode->id);
5760 	if (HAS_FS_CALL(vnode, close_dir))
5761 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5762 
5763 	return B_OK;
5764 }
5765 
5766 
5767 static void
5768 dir_free_fd(struct file_descriptor* descriptor)
5769 {
5770 	struct vnode* vnode = descriptor->u.vnode;
5771 
5772 	if (vnode != NULL) {
5773 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5774 		put_vnode(vnode);
5775 	}
5776 }
5777 
5778 
5779 static status_t
5780 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5781 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5782 {
5783 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5784 		bufferSize, _count);
5785 }
5786 
5787 
5788 static status_t
5789 fix_dirent(struct vnode* parent, struct dirent* entry,
5790 	struct io_context* ioContext)
5791 {
5792 	// set d_pdev and d_pino
5793 	entry->d_pdev = parent->device;
5794 	entry->d_pino = parent->id;
5795 
5796 	// If this is the ".." entry and the directory covering another vnode,
5797 	// we need to replace d_dev and d_ino with the actual values.
5798 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5799 		// Make sure the IO context root is not bypassed.
5800 		if (parent == ioContext->root) {
5801 			entry->d_dev = parent->device;
5802 			entry->d_ino = parent->id;
5803 		} else {
5804 			inc_vnode_ref_count(parent);
5805 				// vnode_path_to_vnode() puts the node
5806 
5807 			// ".." is guaranteed not to be clobbered by this call
5808 			struct vnode* vnode;
5809 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5810 				ioContext, &vnode, NULL);
5811 
5812 			if (status == B_OK) {
5813 				entry->d_dev = vnode->device;
5814 				entry->d_ino = vnode->id;
5815 				put_vnode(vnode);
5816 			}
5817 		}
5818 	} else {
5819 		// resolve covered vnodes
5820 		ReadLocker _(&sVnodeLock);
5821 
5822 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5823 		if (vnode != NULL && vnode->covered_by != NULL) {
5824 			do {
5825 				vnode = vnode->covered_by;
5826 			} while (vnode->covered_by != NULL);
5827 
5828 			entry->d_dev = vnode->device;
5829 			entry->d_ino = vnode->id;
5830 		}
5831 	}
5832 
5833 	return B_OK;
5834 }
5835 
5836 
5837 static status_t
5838 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5839 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5840 {
5841 	if (!HAS_FS_CALL(vnode, read_dir))
5842 		return B_UNSUPPORTED;
5843 
5844 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5845 		_count);
5846 	if (error != B_OK)
5847 		return error;
5848 
5849 	// we need to adjust the read dirents
5850 	uint32 count = *_count;
5851 	for (uint32 i = 0; i < count; i++) {
5852 		error = fix_dirent(vnode, buffer, ioContext);
5853 		if (error != B_OK)
5854 			return error;
5855 
5856 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5857 	}
5858 
5859 	return error;
5860 }
5861 
5862 
5863 static status_t
5864 dir_rewind(struct file_descriptor* descriptor)
5865 {
5866 	struct vnode* vnode = descriptor->u.vnode;
5867 
5868 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5869 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5870 	}
5871 
5872 	return B_UNSUPPORTED;
5873 }
5874 
5875 
5876 static status_t
5877 dir_remove(int fd, char* path, bool kernel)
5878 {
5879 	char name[B_FILE_NAME_LENGTH];
5880 	struct vnode* directory;
5881 	status_t status;
5882 
5883 	if (path != NULL) {
5884 		// we need to make sure our path name doesn't stop with "/", ".",
5885 		// or ".."
5886 		char* lastSlash;
5887 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5888 			char* leaf = lastSlash + 1;
5889 			if (!strcmp(leaf, ".."))
5890 				return B_NOT_ALLOWED;
5891 
5892 			// omit multiple slashes
5893 			while (lastSlash > path && lastSlash[-1] == '/')
5894 				lastSlash--;
5895 
5896 			if (leaf[0]
5897 				&& strcmp(leaf, ".")) {
5898 				break;
5899 			}
5900 			// "name/" -> "name", or "name/." -> "name"
5901 			lastSlash[0] = '\0';
5902 		}
5903 
5904 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5905 			return B_NOT_ALLOWED;
5906 	}
5907 
5908 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5909 	if (status != B_OK)
5910 		return status;
5911 
5912 	if (HAS_FS_CALL(directory, remove_dir))
5913 		status = FS_CALL(directory, remove_dir, name);
5914 	else
5915 		status = B_READ_ONLY_DEVICE;
5916 
5917 	put_vnode(directory);
5918 	return status;
5919 }
5920 
5921 
5922 static status_t
5923 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5924 	size_t length)
5925 {
5926 	struct vnode* vnode = descriptor->u.vnode;
5927 
5928 	if (HAS_FS_CALL(vnode, ioctl))
5929 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5930 
5931 	return B_DEV_INVALID_IOCTL;
5932 }
5933 
5934 
5935 static status_t
5936 common_fcntl(int fd, int op, size_t argument, bool kernel)
5937 {
5938 	struct flock flock;
5939 
5940 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5941 		fd, op, argument, kernel ? "kernel" : "user"));
5942 
5943 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5944 		fd);
5945 	if (descriptor == NULL)
5946 		return B_FILE_ERROR;
5947 
5948 	struct vnode* vnode = fd_vnode(descriptor);
5949 
5950 	status_t status = B_OK;
5951 
5952 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5953 		if (descriptor->type != FDTYPE_FILE)
5954 			status = B_BAD_VALUE;
5955 		else if (user_memcpy(&flock, (struct flock*)argument,
5956 				sizeof(struct flock)) != B_OK)
5957 			status = B_BAD_ADDRESS;
5958 
5959 		if (status != B_OK) {
5960 			put_fd(descriptor);
5961 			return status;
5962 		}
5963 	}
5964 
5965 	switch (op) {
5966 		case F_SETFD:
5967 		{
5968 			struct io_context* context = get_current_io_context(kernel);
5969 			// Set file descriptor flags
5970 
5971 			// O_CLOEXEC is the only flag available at this time
5972 			mutex_lock(&context->io_mutex);
5973 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5974 			mutex_unlock(&context->io_mutex);
5975 
5976 			status = B_OK;
5977 			break;
5978 		}
5979 
5980 		case F_GETFD:
5981 		{
5982 			struct io_context* context = get_current_io_context(kernel);
5983 
5984 			// Get file descriptor flags
5985 			mutex_lock(&context->io_mutex);
5986 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5987 			mutex_unlock(&context->io_mutex);
5988 			break;
5989 		}
5990 
5991 		case F_SETFL:
5992 			// Set file descriptor open mode
5993 
5994 			// we only accept changes to O_APPEND and O_NONBLOCK
5995 			argument &= O_APPEND | O_NONBLOCK;
5996 			if (descriptor->ops->fd_set_flags != NULL) {
5997 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5998 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5999 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6000 					(int)argument);
6001 			} else
6002 				status = B_UNSUPPORTED;
6003 
6004 			if (status == B_OK) {
6005 				// update this descriptor's open_mode field
6006 				descriptor->open_mode = (descriptor->open_mode
6007 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6008 			}
6009 
6010 			break;
6011 
6012 		case F_GETFL:
6013 			// Get file descriptor open mode
6014 			status = descriptor->open_mode;
6015 			break;
6016 
6017 		case F_DUPFD:
6018 		{
6019 			struct io_context* context = get_current_io_context(kernel);
6020 
6021 			status = new_fd_etc(context, descriptor, (int)argument);
6022 			if (status >= 0) {
6023 				mutex_lock(&context->io_mutex);
6024 				fd_set_close_on_exec(context, fd, false);
6025 				mutex_unlock(&context->io_mutex);
6026 
6027 				atomic_add(&descriptor->ref_count, 1);
6028 			}
6029 			break;
6030 		}
6031 
6032 		case F_GETLK:
6033 			if (vnode != NULL) {
6034 				struct flock normalizedLock;
6035 
6036 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6037 				status = normalize_flock(descriptor, &normalizedLock);
6038 				if (status != B_OK)
6039 					break;
6040 
6041 				if (HAS_FS_CALL(vnode, test_lock)) {
6042 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6043 						&normalizedLock);
6044 				} else
6045 					status = test_advisory_lock(vnode, &normalizedLock);
6046 				if (status == B_OK) {
6047 					if (normalizedLock.l_type == F_UNLCK) {
6048 						// no conflicting lock found, copy back the same struct
6049 						// we were given except change type to F_UNLCK
6050 						flock.l_type = F_UNLCK;
6051 						status = user_memcpy((struct flock*)argument, &flock,
6052 							sizeof(struct flock));
6053 					} else {
6054 						// a conflicting lock was found, copy back its range and
6055 						// type
6056 						if (normalizedLock.l_len == OFF_MAX)
6057 							normalizedLock.l_len = 0;
6058 
6059 						status = user_memcpy((struct flock*)argument,
6060 							&normalizedLock, sizeof(struct flock));
6061 					}
6062 				}
6063 			} else
6064 				status = B_BAD_VALUE;
6065 			break;
6066 
6067 		case F_SETLK:
6068 		case F_SETLKW:
6069 			status = normalize_flock(descriptor, &flock);
6070 			if (status != B_OK)
6071 				break;
6072 
6073 			if (vnode == NULL) {
6074 				status = B_BAD_VALUE;
6075 			} else if (flock.l_type == F_UNLCK) {
6076 				if (HAS_FS_CALL(vnode, release_lock)) {
6077 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6078 						&flock);
6079 				} else
6080 					status = release_advisory_lock(vnode, &flock);
6081 			} else {
6082 				// the open mode must match the lock type
6083 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6084 						&& flock.l_type == F_WRLCK)
6085 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6086 						&& flock.l_type == F_RDLCK))
6087 					status = B_FILE_ERROR;
6088 				else {
6089 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6090 						status = FS_CALL(vnode, acquire_lock,
6091 							descriptor->cookie, &flock, op == F_SETLKW);
6092 					} else {
6093 						status = acquire_advisory_lock(vnode, -1,
6094 							&flock, op == F_SETLKW);
6095 					}
6096 				}
6097 			}
6098 			break;
6099 
6100 		// ToDo: add support for more ops?
6101 
6102 		default:
6103 			status = B_BAD_VALUE;
6104 	}
6105 
6106 	put_fd(descriptor);
6107 	return status;
6108 }
6109 
6110 
6111 static status_t
6112 common_sync(int fd, bool kernel)
6113 {
6114 	struct file_descriptor* descriptor;
6115 	struct vnode* vnode;
6116 	status_t status;
6117 
6118 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6119 
6120 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6121 	if (descriptor == NULL)
6122 		return B_FILE_ERROR;
6123 
6124 	if (HAS_FS_CALL(vnode, fsync))
6125 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6126 	else
6127 		status = B_UNSUPPORTED;
6128 
6129 	put_fd(descriptor);
6130 	return status;
6131 }
6132 
6133 
6134 static status_t
6135 common_lock_node(int fd, bool kernel)
6136 {
6137 	struct file_descriptor* descriptor;
6138 	struct vnode* vnode;
6139 
6140 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6141 	if (descriptor == NULL)
6142 		return B_FILE_ERROR;
6143 
6144 	status_t status = B_OK;
6145 
6146 	// We need to set the locking atomically - someone
6147 	// else might set one at the same time
6148 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6149 			(file_descriptor*)NULL) != NULL)
6150 		status = B_BUSY;
6151 
6152 	put_fd(descriptor);
6153 	return status;
6154 }
6155 
6156 
6157 static status_t
6158 common_unlock_node(int fd, bool kernel)
6159 {
6160 	struct file_descriptor* descriptor;
6161 	struct vnode* vnode;
6162 
6163 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6164 	if (descriptor == NULL)
6165 		return B_FILE_ERROR;
6166 
6167 	status_t status = B_OK;
6168 
6169 	// We need to set the locking atomically - someone
6170 	// else might set one at the same time
6171 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6172 			(file_descriptor*)NULL, descriptor) != descriptor)
6173 		status = B_BAD_VALUE;
6174 
6175 	put_fd(descriptor);
6176 	return status;
6177 }
6178 
6179 
6180 static status_t
6181 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6182 	bool kernel)
6183 {
6184 	struct vnode* vnode;
6185 	status_t status;
6186 
6187 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6188 	if (status != B_OK)
6189 		return status;
6190 
6191 	if (HAS_FS_CALL(vnode, read_symlink)) {
6192 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6193 	} else
6194 		status = B_BAD_VALUE;
6195 
6196 	put_vnode(vnode);
6197 	return status;
6198 }
6199 
6200 
6201 static status_t
6202 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6203 	bool kernel)
6204 {
6205 	// path validity checks have to be in the calling function!
6206 	char name[B_FILE_NAME_LENGTH];
6207 	struct vnode* vnode;
6208 	status_t status;
6209 
6210 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6211 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6212 
6213 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6214 	if (status != B_OK)
6215 		return status;
6216 
6217 	if (HAS_FS_CALL(vnode, create_symlink))
6218 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6219 	else {
6220 		status = HAS_FS_CALL(vnode, write)
6221 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6222 	}
6223 
6224 	put_vnode(vnode);
6225 
6226 	return status;
6227 }
6228 
6229 
6230 static status_t
6231 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6232 	bool traverseLeafLink, bool kernel)
6233 {
6234 	// path validity checks have to be in the calling function!
6235 
6236 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6237 		toPath, kernel));
6238 
6239 	char name[B_FILE_NAME_LENGTH];
6240 	struct vnode* directory;
6241 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6242 		kernel);
6243 	if (status != B_OK)
6244 		return status;
6245 
6246 	struct vnode* vnode;
6247 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6248 		kernel);
6249 	if (status != B_OK)
6250 		goto err;
6251 
6252 	if (directory->mount != vnode->mount) {
6253 		status = B_CROSS_DEVICE_LINK;
6254 		goto err1;
6255 	}
6256 
6257 	if (HAS_FS_CALL(directory, link))
6258 		status = FS_CALL(directory, link, name, vnode);
6259 	else
6260 		status = B_READ_ONLY_DEVICE;
6261 
6262 err1:
6263 	put_vnode(vnode);
6264 err:
6265 	put_vnode(directory);
6266 
6267 	return status;
6268 }
6269 
6270 
6271 static status_t
6272 common_unlink(int fd, char* path, bool kernel)
6273 {
6274 	char filename[B_FILE_NAME_LENGTH];
6275 	struct vnode* vnode;
6276 	status_t status;
6277 
6278 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6279 		kernel));
6280 
6281 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6282 	if (status < 0)
6283 		return status;
6284 
6285 	if (HAS_FS_CALL(vnode, unlink))
6286 		status = FS_CALL(vnode, unlink, filename);
6287 	else
6288 		status = B_READ_ONLY_DEVICE;
6289 
6290 	put_vnode(vnode);
6291 
6292 	return status;
6293 }
6294 
6295 
6296 static status_t
6297 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6298 {
6299 	struct vnode* vnode;
6300 	status_t status;
6301 
6302 	// TODO: honor effectiveUserGroup argument
6303 
6304 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6305 	if (status != B_OK)
6306 		return status;
6307 
6308 	if (HAS_FS_CALL(vnode, access))
6309 		status = FS_CALL(vnode, access, mode);
6310 	else
6311 		status = B_OK;
6312 
6313 	put_vnode(vnode);
6314 
6315 	return status;
6316 }
6317 
6318 
6319 static status_t
6320 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6321 {
6322 	struct vnode* fromVnode;
6323 	struct vnode* toVnode;
6324 	char fromName[B_FILE_NAME_LENGTH];
6325 	char toName[B_FILE_NAME_LENGTH];
6326 	status_t status;
6327 
6328 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6329 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6330 
6331 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6332 	if (status != B_OK)
6333 		return status;
6334 
6335 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6336 	if (status != B_OK)
6337 		goto err1;
6338 
6339 	if (fromVnode->device != toVnode->device) {
6340 		status = B_CROSS_DEVICE_LINK;
6341 		goto err2;
6342 	}
6343 
6344 	if (fromName[0] == '\0' || toName[0] == '\0'
6345 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6346 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6347 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6348 		status = B_BAD_VALUE;
6349 		goto err2;
6350 	}
6351 
6352 	if (HAS_FS_CALL(fromVnode, rename))
6353 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6354 	else
6355 		status = B_READ_ONLY_DEVICE;
6356 
6357 err2:
6358 	put_vnode(toVnode);
6359 err1:
6360 	put_vnode(fromVnode);
6361 
6362 	return status;
6363 }
6364 
6365 
6366 static status_t
6367 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6368 {
6369 	struct vnode* vnode = descriptor->u.vnode;
6370 
6371 	FUNCTION(("common_read_stat: stat %p\n", stat));
6372 
6373 	// TODO: remove this once all file systems properly set them!
6374 	stat->st_crtim.tv_nsec = 0;
6375 	stat->st_ctim.tv_nsec = 0;
6376 	stat->st_mtim.tv_nsec = 0;
6377 	stat->st_atim.tv_nsec = 0;
6378 
6379 	status_t status = FS_CALL(vnode, read_stat, stat);
6380 
6381 	// fill in the st_dev and st_ino fields
6382 	if (status == B_OK) {
6383 		stat->st_dev = vnode->device;
6384 		stat->st_ino = vnode->id;
6385 		stat->st_rdev = -1;
6386 	}
6387 
6388 	return status;
6389 }
6390 
6391 
6392 static status_t
6393 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6394 	int statMask)
6395 {
6396 	struct vnode* vnode = descriptor->u.vnode;
6397 
6398 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6399 		vnode, stat, statMask));
6400 
6401 	if (!HAS_FS_CALL(vnode, write_stat))
6402 		return B_READ_ONLY_DEVICE;
6403 
6404 	return FS_CALL(vnode, write_stat, stat, statMask);
6405 }
6406 
6407 
6408 static status_t
6409 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6410 	struct stat* stat, bool kernel)
6411 {
6412 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6413 		stat));
6414 
6415 	struct vnode* vnode;
6416 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6417 		NULL, kernel);
6418 	if (status != B_OK)
6419 		return status;
6420 
6421 	status = FS_CALL(vnode, read_stat, stat);
6422 
6423 	// fill in the st_dev and st_ino fields
6424 	if (status == B_OK) {
6425 		stat->st_dev = vnode->device;
6426 		stat->st_ino = vnode->id;
6427 		stat->st_rdev = -1;
6428 	}
6429 
6430 	put_vnode(vnode);
6431 	return status;
6432 }
6433 
6434 
6435 static status_t
6436 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6437 	const struct stat* stat, int statMask, bool kernel)
6438 {
6439 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6440 		"kernel %d\n", fd, path, stat, statMask, kernel));
6441 
6442 	struct vnode* vnode;
6443 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6444 		NULL, kernel);
6445 	if (status != B_OK)
6446 		return status;
6447 
6448 	if (HAS_FS_CALL(vnode, write_stat))
6449 		status = FS_CALL(vnode, write_stat, stat, statMask);
6450 	else
6451 		status = B_READ_ONLY_DEVICE;
6452 
6453 	put_vnode(vnode);
6454 
6455 	return status;
6456 }
6457 
6458 
6459 static int
6460 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6461 {
6462 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6463 		kernel));
6464 
6465 	struct vnode* vnode;
6466 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6467 		NULL, kernel);
6468 	if (status != B_OK)
6469 		return status;
6470 
6471 	status = open_attr_dir_vnode(vnode, kernel);
6472 	if (status < 0)
6473 		put_vnode(vnode);
6474 
6475 	return status;
6476 }
6477 
6478 
6479 static status_t
6480 attr_dir_close(struct file_descriptor* descriptor)
6481 {
6482 	struct vnode* vnode = descriptor->u.vnode;
6483 
6484 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6485 
6486 	if (HAS_FS_CALL(vnode, close_attr_dir))
6487 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6488 
6489 	return B_OK;
6490 }
6491 
6492 
6493 static void
6494 attr_dir_free_fd(struct file_descriptor* descriptor)
6495 {
6496 	struct vnode* vnode = descriptor->u.vnode;
6497 
6498 	if (vnode != NULL) {
6499 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6500 		put_vnode(vnode);
6501 	}
6502 }
6503 
6504 
6505 static status_t
6506 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6507 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6508 {
6509 	struct vnode* vnode = descriptor->u.vnode;
6510 
6511 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6512 
6513 	if (HAS_FS_CALL(vnode, read_attr_dir))
6514 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6515 			bufferSize, _count);
6516 
6517 	return B_UNSUPPORTED;
6518 }
6519 
6520 
6521 static status_t
6522 attr_dir_rewind(struct file_descriptor* descriptor)
6523 {
6524 	struct vnode* vnode = descriptor->u.vnode;
6525 
6526 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6527 
6528 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6529 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6530 
6531 	return B_UNSUPPORTED;
6532 }
6533 
6534 
6535 static int
6536 attr_create(int fd, char* path, const char* name, uint32 type,
6537 	int openMode, bool kernel)
6538 {
6539 	if (name == NULL || *name == '\0')
6540 		return B_BAD_VALUE;
6541 
6542 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6543 	struct vnode* vnode;
6544 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6545 		kernel);
6546 	if (status != B_OK)
6547 		return status;
6548 
6549 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6550 		status = B_LINK_LIMIT;
6551 		goto err;
6552 	}
6553 
6554 	if (!HAS_FS_CALL(vnode, create_attr)) {
6555 		status = B_READ_ONLY_DEVICE;
6556 		goto err;
6557 	}
6558 
6559 	void* cookie;
6560 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6561 	if (status != B_OK)
6562 		goto err;
6563 
6564 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6565 	if (fd >= 0)
6566 		return fd;
6567 
6568 	status = fd;
6569 
6570 	FS_CALL(vnode, close_attr, cookie);
6571 	FS_CALL(vnode, free_attr_cookie, cookie);
6572 
6573 	FS_CALL(vnode, remove_attr, name);
6574 
6575 err:
6576 	put_vnode(vnode);
6577 
6578 	return status;
6579 }
6580 
6581 
6582 static int
6583 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6584 {
6585 	if (name == NULL || *name == '\0')
6586 		return B_BAD_VALUE;
6587 
6588 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6589 	struct vnode* vnode;
6590 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6591 		kernel);
6592 	if (status != B_OK)
6593 		return status;
6594 
6595 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6596 		status = B_LINK_LIMIT;
6597 		goto err;
6598 	}
6599 
6600 	if (!HAS_FS_CALL(vnode, open_attr)) {
6601 		status = B_UNSUPPORTED;
6602 		goto err;
6603 	}
6604 
6605 	void* cookie;
6606 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6607 	if (status != B_OK)
6608 		goto err;
6609 
6610 	// now we only need a file descriptor for this attribute and we're done
6611 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6612 	if (fd >= 0)
6613 		return fd;
6614 
6615 	status = fd;
6616 
6617 	FS_CALL(vnode, close_attr, cookie);
6618 	FS_CALL(vnode, free_attr_cookie, cookie);
6619 
6620 err:
6621 	put_vnode(vnode);
6622 
6623 	return status;
6624 }
6625 
6626 
6627 static status_t
6628 attr_close(struct file_descriptor* descriptor)
6629 {
6630 	struct vnode* vnode = descriptor->u.vnode;
6631 
6632 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6633 
6634 	if (HAS_FS_CALL(vnode, close_attr))
6635 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6636 
6637 	return B_OK;
6638 }
6639 
6640 
6641 static void
6642 attr_free_fd(struct file_descriptor* descriptor)
6643 {
6644 	struct vnode* vnode = descriptor->u.vnode;
6645 
6646 	if (vnode != NULL) {
6647 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6648 		put_vnode(vnode);
6649 	}
6650 }
6651 
6652 
6653 static status_t
6654 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6655 	size_t* length)
6656 {
6657 	struct vnode* vnode = descriptor->u.vnode;
6658 
6659 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6660 		pos, length, *length));
6661 
6662 	if (!HAS_FS_CALL(vnode, read_attr))
6663 		return B_UNSUPPORTED;
6664 
6665 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6666 }
6667 
6668 
6669 static status_t
6670 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6671 	size_t* length)
6672 {
6673 	struct vnode* vnode = descriptor->u.vnode;
6674 
6675 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6676 		length));
6677 
6678 	if (!HAS_FS_CALL(vnode, write_attr))
6679 		return B_UNSUPPORTED;
6680 
6681 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6682 }
6683 
6684 
6685 static off_t
6686 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6687 {
6688 	off_t offset;
6689 
6690 	switch (seekType) {
6691 		case SEEK_SET:
6692 			offset = 0;
6693 			break;
6694 		case SEEK_CUR:
6695 			offset = descriptor->pos;
6696 			break;
6697 		case SEEK_END:
6698 		{
6699 			struct vnode* vnode = descriptor->u.vnode;
6700 			if (!HAS_FS_CALL(vnode, read_stat))
6701 				return B_UNSUPPORTED;
6702 
6703 			struct stat stat;
6704 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6705 				&stat);
6706 			if (status != B_OK)
6707 				return status;
6708 
6709 			offset = stat.st_size;
6710 			break;
6711 		}
6712 		default:
6713 			return B_BAD_VALUE;
6714 	}
6715 
6716 	// assumes off_t is 64 bits wide
6717 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6718 		return B_BUFFER_OVERFLOW;
6719 
6720 	pos += offset;
6721 	if (pos < 0)
6722 		return B_BAD_VALUE;
6723 
6724 	return descriptor->pos = pos;
6725 }
6726 
6727 
6728 static status_t
6729 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6730 {
6731 	struct vnode* vnode = descriptor->u.vnode;
6732 
6733 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6734 
6735 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6736 		return B_UNSUPPORTED;
6737 
6738 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6739 }
6740 
6741 
6742 static status_t
6743 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6744 	int statMask)
6745 {
6746 	struct vnode* vnode = descriptor->u.vnode;
6747 
6748 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6749 
6750 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6751 		return B_READ_ONLY_DEVICE;
6752 
6753 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6754 }
6755 
6756 
6757 static status_t
6758 attr_remove(int fd, const char* name, bool kernel)
6759 {
6760 	struct file_descriptor* descriptor;
6761 	struct vnode* vnode;
6762 	status_t status;
6763 
6764 	if (name == NULL || *name == '\0')
6765 		return B_BAD_VALUE;
6766 
6767 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6768 		kernel));
6769 
6770 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6771 	if (descriptor == NULL)
6772 		return B_FILE_ERROR;
6773 
6774 	if (HAS_FS_CALL(vnode, remove_attr))
6775 		status = FS_CALL(vnode, remove_attr, name);
6776 	else
6777 		status = B_READ_ONLY_DEVICE;
6778 
6779 	put_fd(descriptor);
6780 
6781 	return status;
6782 }
6783 
6784 
6785 static status_t
6786 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6787 	bool kernel)
6788 {
6789 	struct file_descriptor* fromDescriptor;
6790 	struct file_descriptor* toDescriptor;
6791 	struct vnode* fromVnode;
6792 	struct vnode* toVnode;
6793 	status_t status;
6794 
6795 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6796 		|| *toName == '\0')
6797 		return B_BAD_VALUE;
6798 
6799 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6800 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6801 
6802 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6803 	if (fromDescriptor == NULL)
6804 		return B_FILE_ERROR;
6805 
6806 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6807 	if (toDescriptor == NULL) {
6808 		status = B_FILE_ERROR;
6809 		goto err;
6810 	}
6811 
6812 	// are the files on the same volume?
6813 	if (fromVnode->device != toVnode->device) {
6814 		status = B_CROSS_DEVICE_LINK;
6815 		goto err1;
6816 	}
6817 
6818 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6819 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6820 	} else
6821 		status = B_READ_ONLY_DEVICE;
6822 
6823 err1:
6824 	put_fd(toDescriptor);
6825 err:
6826 	put_fd(fromDescriptor);
6827 
6828 	return status;
6829 }
6830 
6831 
6832 static int
6833 index_dir_open(dev_t mountID, bool kernel)
6834 {
6835 	struct fs_mount* mount;
6836 	void* cookie;
6837 
6838 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6839 		kernel));
6840 
6841 	status_t status = get_mount(mountID, &mount);
6842 	if (status != B_OK)
6843 		return status;
6844 
6845 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6846 		status = B_UNSUPPORTED;
6847 		goto error;
6848 	}
6849 
6850 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6851 	if (status != B_OK)
6852 		goto error;
6853 
6854 	// get fd for the index directory
6855 	int fd;
6856 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6857 	if (fd >= 0)
6858 		return fd;
6859 
6860 	// something went wrong
6861 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6862 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6863 
6864 	status = fd;
6865 
6866 error:
6867 	put_mount(mount);
6868 	return status;
6869 }
6870 
6871 
6872 static status_t
6873 index_dir_close(struct file_descriptor* descriptor)
6874 {
6875 	struct fs_mount* mount = descriptor->u.mount;
6876 
6877 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6878 
6879 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6880 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6881 
6882 	return B_OK;
6883 }
6884 
6885 
6886 static void
6887 index_dir_free_fd(struct file_descriptor* descriptor)
6888 {
6889 	struct fs_mount* mount = descriptor->u.mount;
6890 
6891 	if (mount != NULL) {
6892 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6893 		put_mount(mount);
6894 	}
6895 }
6896 
6897 
6898 static status_t
6899 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6900 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6901 {
6902 	struct fs_mount* mount = descriptor->u.mount;
6903 
6904 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6905 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6906 			bufferSize, _count);
6907 	}
6908 
6909 	return B_UNSUPPORTED;
6910 }
6911 
6912 
6913 static status_t
6914 index_dir_rewind(struct file_descriptor* descriptor)
6915 {
6916 	struct fs_mount* mount = descriptor->u.mount;
6917 
6918 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6919 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6920 
6921 	return B_UNSUPPORTED;
6922 }
6923 
6924 
6925 static status_t
6926 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6927 	bool kernel)
6928 {
6929 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6930 		mountID, name, kernel));
6931 
6932 	struct fs_mount* mount;
6933 	status_t status = get_mount(mountID, &mount);
6934 	if (status != B_OK)
6935 		return status;
6936 
6937 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6938 		status = B_READ_ONLY_DEVICE;
6939 		goto out;
6940 	}
6941 
6942 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6943 
6944 out:
6945 	put_mount(mount);
6946 	return status;
6947 }
6948 
6949 
6950 #if 0
6951 static status_t
6952 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6953 {
6954 	struct vnode* vnode = descriptor->u.vnode;
6955 
6956 	// ToDo: currently unused!
6957 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6958 	if (!HAS_FS_CALL(vnode, read_index_stat))
6959 		return B_UNSUPPORTED;
6960 
6961 	return B_UNSUPPORTED;
6962 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6963 }
6964 
6965 
6966 static void
6967 index_free_fd(struct file_descriptor* descriptor)
6968 {
6969 	struct vnode* vnode = descriptor->u.vnode;
6970 
6971 	if (vnode != NULL) {
6972 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6973 		put_vnode(vnode);
6974 	}
6975 }
6976 #endif
6977 
6978 
6979 static status_t
6980 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6981 	bool kernel)
6982 {
6983 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6984 		mountID, name, kernel));
6985 
6986 	struct fs_mount* mount;
6987 	status_t status = get_mount(mountID, &mount);
6988 	if (status != B_OK)
6989 		return status;
6990 
6991 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6992 		status = B_UNSUPPORTED;
6993 		goto out;
6994 	}
6995 
6996 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6997 
6998 out:
6999 	put_mount(mount);
7000 	return status;
7001 }
7002 
7003 
7004 static status_t
7005 index_remove(dev_t mountID, const char* name, bool kernel)
7006 {
7007 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7008 		mountID, name, kernel));
7009 
7010 	struct fs_mount* mount;
7011 	status_t status = get_mount(mountID, &mount);
7012 	if (status != B_OK)
7013 		return status;
7014 
7015 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7016 		status = B_READ_ONLY_DEVICE;
7017 		goto out;
7018 	}
7019 
7020 	status = FS_MOUNT_CALL(mount, remove_index, name);
7021 
7022 out:
7023 	put_mount(mount);
7024 	return status;
7025 }
7026 
7027 
7028 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7029 		It would be nice if the FS would find some more kernel support
7030 		for them.
7031 		For example, query parsing should be moved into the kernel.
7032 */
7033 static int
7034 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7035 	int32 token, bool kernel)
7036 {
7037 	struct fs_mount* mount;
7038 	void* cookie;
7039 
7040 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7041 		device, query, kernel));
7042 
7043 	status_t status = get_mount(device, &mount);
7044 	if (status != B_OK)
7045 		return status;
7046 
7047 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7048 		status = B_UNSUPPORTED;
7049 		goto error;
7050 	}
7051 
7052 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7053 		&cookie);
7054 	if (status != B_OK)
7055 		goto error;
7056 
7057 	// get fd for the index directory
7058 	int fd;
7059 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7060 	if (fd >= 0)
7061 		return fd;
7062 
7063 	status = fd;
7064 
7065 	// something went wrong
7066 	FS_MOUNT_CALL(mount, close_query, cookie);
7067 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7068 
7069 error:
7070 	put_mount(mount);
7071 	return status;
7072 }
7073 
7074 
7075 static status_t
7076 query_close(struct file_descriptor* descriptor)
7077 {
7078 	struct fs_mount* mount = descriptor->u.mount;
7079 
7080 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7081 
7082 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7083 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7084 
7085 	return B_OK;
7086 }
7087 
7088 
7089 static void
7090 query_free_fd(struct file_descriptor* descriptor)
7091 {
7092 	struct fs_mount* mount = descriptor->u.mount;
7093 
7094 	if (mount != NULL) {
7095 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7096 		put_mount(mount);
7097 	}
7098 }
7099 
7100 
7101 static status_t
7102 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7103 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7104 {
7105 	struct fs_mount* mount = descriptor->u.mount;
7106 
7107 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7108 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7109 			bufferSize, _count);
7110 	}
7111 
7112 	return B_UNSUPPORTED;
7113 }
7114 
7115 
7116 static status_t
7117 query_rewind(struct file_descriptor* descriptor)
7118 {
7119 	struct fs_mount* mount = descriptor->u.mount;
7120 
7121 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7122 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7123 
7124 	return B_UNSUPPORTED;
7125 }
7126 
7127 
7128 //	#pragma mark - General File System functions
7129 
7130 
7131 static dev_t
7132 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7133 	const char* args, bool kernel)
7134 {
7135 	struct ::fs_mount* mount;
7136 	status_t status = B_OK;
7137 	fs_volume* volume = NULL;
7138 	int32 layer = 0;
7139 	Vnode* coveredNode = NULL;
7140 
7141 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7142 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7143 
7144 	// The path is always safe, we just have to make sure that fsName is
7145 	// almost valid - we can't make any assumptions about args, though.
7146 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7147 	// We'll get it from the DDM later.
7148 	if (fsName == NULL) {
7149 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7150 			return B_BAD_VALUE;
7151 	} else if (fsName[0] == '\0')
7152 		return B_BAD_VALUE;
7153 
7154 	RecursiveLocker mountOpLocker(sMountOpLock);
7155 
7156 	// Helper to delete a newly created file device on failure.
7157 	// Not exactly beautiful, but helps to keep the code below cleaner.
7158 	struct FileDeviceDeleter {
7159 		FileDeviceDeleter() : id(-1) {}
7160 		~FileDeviceDeleter()
7161 		{
7162 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7163 		}
7164 
7165 		partition_id id;
7166 	} fileDeviceDeleter;
7167 
7168 	// If the file system is not a "virtual" one, the device argument should
7169 	// point to a real file/device (if given at all).
7170 	// get the partition
7171 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7172 	KPartition* partition = NULL;
7173 	KPath normalizedDevice;
7174 	bool newlyCreatedFileDevice = false;
7175 
7176 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7177 		// normalize the device path
7178 		status = normalizedDevice.SetTo(device, true);
7179 		if (status != B_OK)
7180 			return status;
7181 
7182 		// get a corresponding partition from the DDM
7183 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7184 		if (partition == NULL) {
7185 			// Partition not found: This either means, the user supplied
7186 			// an invalid path, or the path refers to an image file. We try
7187 			// to let the DDM create a file device for the path.
7188 			partition_id deviceID = ddm->CreateFileDevice(
7189 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7190 			if (deviceID >= 0) {
7191 				partition = ddm->RegisterPartition(deviceID);
7192 				if (newlyCreatedFileDevice)
7193 					fileDeviceDeleter.id = deviceID;
7194 			}
7195 		}
7196 
7197 		if (!partition) {
7198 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7199 				normalizedDevice.Path()));
7200 			return B_ENTRY_NOT_FOUND;
7201 		}
7202 
7203 		device = normalizedDevice.Path();
7204 			// correct path to file device
7205 	}
7206 	PartitionRegistrar partitionRegistrar(partition, true);
7207 
7208 	// Write lock the partition's device. For the time being, we keep the lock
7209 	// until we're done mounting -- not nice, but ensure, that no-one is
7210 	// interfering.
7211 	// TODO: Just mark the partition busy while mounting!
7212 	KDiskDevice* diskDevice = NULL;
7213 	if (partition) {
7214 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7215 		if (!diskDevice) {
7216 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7217 			return B_ERROR;
7218 		}
7219 	}
7220 
7221 	DeviceWriteLocker writeLocker(diskDevice, true);
7222 		// this takes over the write lock acquired before
7223 
7224 	if (partition != NULL) {
7225 		// make sure, that the partition is not busy
7226 		if (partition->IsBusy()) {
7227 			TRACE(("fs_mount(): Partition is busy.\n"));
7228 			return B_BUSY;
7229 		}
7230 
7231 		// if no FS name had been supplied, we get it from the partition
7232 		if (fsName == NULL) {
7233 			KDiskSystem* diskSystem = partition->DiskSystem();
7234 			if (!diskSystem) {
7235 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7236 					"recognize it.\n"));
7237 				return B_BAD_VALUE;
7238 			}
7239 
7240 			if (!diskSystem->IsFileSystem()) {
7241 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7242 					"partitioning system.\n"));
7243 				return B_BAD_VALUE;
7244 			}
7245 
7246 			// The disk system name will not change, and the KDiskSystem
7247 			// object will not go away while the disk device is locked (and
7248 			// the partition has a reference to it), so this is safe.
7249 			fsName = diskSystem->Name();
7250 		}
7251 	}
7252 
7253 	mount = new(std::nothrow) (struct ::fs_mount);
7254 	if (mount == NULL)
7255 		return B_NO_MEMORY;
7256 
7257 	mount->device_name = strdup(device);
7258 		// "device" can be NULL
7259 
7260 	status = mount->entry_cache.Init();
7261 	if (status != B_OK)
7262 		goto err1;
7263 
7264 	// initialize structure
7265 	mount->id = sNextMountID++;
7266 	mount->partition = NULL;
7267 	mount->root_vnode = NULL;
7268 	mount->covers_vnode = NULL;
7269 	mount->unmounting = false;
7270 	mount->owns_file_device = false;
7271 	mount->volume = NULL;
7272 
7273 	// build up the volume(s)
7274 	while (true) {
7275 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7276 		if (layerFSName == NULL) {
7277 			if (layer == 0) {
7278 				status = B_NO_MEMORY;
7279 				goto err1;
7280 			}
7281 
7282 			break;
7283 		}
7284 		MemoryDeleter layerFSNameDeleter(layerFSName);
7285 
7286 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7287 		if (volume == NULL) {
7288 			status = B_NO_MEMORY;
7289 			goto err1;
7290 		}
7291 
7292 		volume->id = mount->id;
7293 		volume->partition = partition != NULL ? partition->ID() : -1;
7294 		volume->layer = layer++;
7295 		volume->private_volume = NULL;
7296 		volume->ops = NULL;
7297 		volume->sub_volume = NULL;
7298 		volume->super_volume = NULL;
7299 		volume->file_system = NULL;
7300 		volume->file_system_name = NULL;
7301 
7302 		volume->file_system_name = get_file_system_name(layerFSName);
7303 		if (volume->file_system_name == NULL) {
7304 			status = B_NO_MEMORY;
7305 			free(volume);
7306 			goto err1;
7307 		}
7308 
7309 		volume->file_system = get_file_system(layerFSName);
7310 		if (volume->file_system == NULL) {
7311 			status = B_DEVICE_NOT_FOUND;
7312 			free(volume->file_system_name);
7313 			free(volume);
7314 			goto err1;
7315 		}
7316 
7317 		if (mount->volume == NULL)
7318 			mount->volume = volume;
7319 		else {
7320 			volume->super_volume = mount->volume;
7321 			mount->volume->sub_volume = volume;
7322 			mount->volume = volume;
7323 		}
7324 	}
7325 
7326 	// insert mount struct into list before we call FS's mount() function
7327 	// so that vnodes can be created for this mount
7328 	mutex_lock(&sMountMutex);
7329 	hash_insert(sMountsTable, mount);
7330 	mutex_unlock(&sMountMutex);
7331 
7332 	ino_t rootID;
7333 
7334 	if (!sRoot) {
7335 		// we haven't mounted anything yet
7336 		if (strcmp(path, "/") != 0) {
7337 			status = B_ERROR;
7338 			goto err2;
7339 		}
7340 
7341 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7342 			args, &rootID);
7343 		if (status != 0)
7344 			goto err2;
7345 	} else {
7346 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7347 		if (status != B_OK)
7348 			goto err2;
7349 
7350 		mount->covers_vnode = coveredNode;
7351 
7352 		// make sure covered_vnode is a directory
7353 		if (!S_ISDIR(coveredNode->Type())) {
7354 			status = B_NOT_A_DIRECTORY;
7355 			goto err3;
7356 		}
7357 
7358 		if (coveredNode->IsCovered()) {
7359 			// this is already a covered vnode
7360 			status = B_BUSY;
7361 			goto err3;
7362 		}
7363 
7364 		// mount it/them
7365 		fs_volume* volume = mount->volume;
7366 		while (volume) {
7367 			status = volume->file_system->mount(volume, device, flags, args,
7368 				&rootID);
7369 			if (status != B_OK) {
7370 				if (volume->sub_volume)
7371 					goto err4;
7372 				goto err3;
7373 			}
7374 
7375 			volume = volume->super_volume;
7376 		}
7377 
7378 		volume = mount->volume;
7379 		while (volume) {
7380 			if (volume->ops->all_layers_mounted != NULL)
7381 				volume->ops->all_layers_mounted(volume);
7382 			volume = volume->super_volume;
7383 		}
7384 	}
7385 
7386 	// the root node is supposed to be owned by the file system - it must
7387 	// exist at this point
7388 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7389 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7390 		panic("fs_mount: file system does not own its root node!\n");
7391 		status = B_ERROR;
7392 		goto err4;
7393 	}
7394 
7395 	// set up the links between the root vnode and the vnode it covers
7396 	rw_lock_write_lock(&sVnodeLock);
7397 	if (coveredNode != NULL) {
7398 		if (coveredNode->IsCovered()) {
7399 			// the vnode is covered now
7400 			status = B_BUSY;
7401 			rw_lock_write_unlock(&sVnodeLock);
7402 			goto err4;
7403 		}
7404 
7405 		mount->root_vnode->covers = coveredNode;
7406 		mount->root_vnode->SetCovering(true);
7407 
7408 		coveredNode->covered_by = mount->root_vnode;
7409 		coveredNode->SetCovered(true);
7410 	}
7411 	rw_lock_write_unlock(&sVnodeLock);
7412 
7413 	if (!sRoot) {
7414 		sRoot = mount->root_vnode;
7415 		mutex_lock(&sIOContextRootLock);
7416 		get_current_io_context(true)->root = sRoot;
7417 		mutex_unlock(&sIOContextRootLock);
7418 		inc_vnode_ref_count(sRoot);
7419 	}
7420 
7421 	// supply the partition (if any) with the mount cookie and mark it mounted
7422 	if (partition) {
7423 		partition->SetMountCookie(mount->volume->private_volume);
7424 		partition->SetVolumeID(mount->id);
7425 
7426 		// keep a partition reference as long as the partition is mounted
7427 		partitionRegistrar.Detach();
7428 		mount->partition = partition;
7429 		mount->owns_file_device = newlyCreatedFileDevice;
7430 		fileDeviceDeleter.id = -1;
7431 	}
7432 
7433 	notify_mount(mount->id,
7434 		coveredNode != NULL ? coveredNode->device : -1,
7435 		coveredNode ? coveredNode->id : -1);
7436 
7437 	return mount->id;
7438 
7439 err4:
7440 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7441 err3:
7442 	if (coveredNode != NULL)
7443 		put_vnode(coveredNode);
7444 err2:
7445 	mutex_lock(&sMountMutex);
7446 	hash_remove(sMountsTable, mount);
7447 	mutex_unlock(&sMountMutex);
7448 err1:
7449 	delete mount;
7450 
7451 	return status;
7452 }
7453 
7454 
7455 static status_t
7456 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7457 {
7458 	struct fs_mount* mount;
7459 	status_t err;
7460 
7461 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7462 		mountID, kernel));
7463 
7464 	struct vnode* pathVnode = NULL;
7465 	if (path != NULL) {
7466 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7467 		if (err != B_OK)
7468 			return B_ENTRY_NOT_FOUND;
7469 	}
7470 
7471 	RecursiveLocker mountOpLocker(sMountOpLock);
7472 
7473 	// this lock is not strictly necessary, but here in case of KDEBUG
7474 	// to keep the ASSERT in find_mount() working.
7475 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7476 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7477 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7478 	if (mount == NULL) {
7479 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7480 			pathVnode);
7481 	}
7482 
7483 	if (path != NULL) {
7484 		put_vnode(pathVnode);
7485 
7486 		if (mount->root_vnode != pathVnode) {
7487 			// not mountpoint
7488 			return B_BAD_VALUE;
7489 		}
7490 	}
7491 
7492 	// if the volume is associated with a partition, lock the device of the
7493 	// partition as long as we are unmounting
7494 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7495 	KPartition* partition = mount->partition;
7496 	KDiskDevice* diskDevice = NULL;
7497 	if (partition != NULL) {
7498 		if (partition->Device() == NULL) {
7499 			dprintf("fs_unmount(): There is no device!\n");
7500 			return B_ERROR;
7501 		}
7502 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7503 		if (!diskDevice) {
7504 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7505 			return B_ERROR;
7506 		}
7507 	}
7508 	DeviceWriteLocker writeLocker(diskDevice, true);
7509 
7510 	// make sure, that the partition is not busy
7511 	if (partition != NULL) {
7512 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7513 			TRACE(("fs_unmount(): Partition is busy.\n"));
7514 			return B_BUSY;
7515 		}
7516 	}
7517 
7518 	// grab the vnode master mutex to keep someone from creating
7519 	// a vnode while we're figuring out if we can continue
7520 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7521 
7522 	bool disconnectedDescriptors = false;
7523 
7524 	while (true) {
7525 		bool busy = false;
7526 
7527 		// cycle through the list of vnodes associated with this mount and
7528 		// make sure all of them are not busy or have refs on them
7529 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7530 		while (struct vnode* vnode = iterator.Next()) {
7531 			if (vnode->IsBusy()) {
7532 				busy = true;
7533 				break;
7534 			}
7535 
7536 			// check the vnode's ref count -- subtract additional references for
7537 			// covering
7538 			int32 refCount = vnode->ref_count;
7539 			if (vnode->covers != NULL)
7540 				refCount--;
7541 			if (vnode->covered_by != NULL)
7542 				refCount--;
7543 
7544 			if (refCount != 0) {
7545 				// there are still vnodes in use on this mount, so we cannot
7546 				// unmount yet
7547 				busy = true;
7548 				break;
7549 			}
7550 		}
7551 
7552 		if (!busy)
7553 			break;
7554 
7555 		if ((flags & B_FORCE_UNMOUNT) == 0)
7556 			return B_BUSY;
7557 
7558 		if (disconnectedDescriptors) {
7559 			// wait a bit until the last access is finished, and then try again
7560 			vnodesWriteLocker.Unlock();
7561 			snooze(100000);
7562 			// TODO: if there is some kind of bug that prevents the ref counts
7563 			// from getting back to zero, this will fall into an endless loop...
7564 			vnodesWriteLocker.Lock();
7565 			continue;
7566 		}
7567 
7568 		// the file system is still busy - but we're forced to unmount it,
7569 		// so let's disconnect all open file descriptors
7570 
7571 		mount->unmounting = true;
7572 			// prevent new vnodes from being created
7573 
7574 		vnodesWriteLocker.Unlock();
7575 
7576 		disconnect_mount_or_vnode_fds(mount, NULL);
7577 		disconnectedDescriptors = true;
7578 
7579 		vnodesWriteLocker.Lock();
7580 	}
7581 
7582 	// We can safely continue. Mark all of the vnodes busy and this mount
7583 	// structure in unmounting state. Also undo the vnode covers/covered_by
7584 	// links.
7585 	mount->unmounting = true;
7586 
7587 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7588 	while (struct vnode* vnode = iterator.Next()) {
7589 		// Remove all covers/covered_by links from other mounts' nodes to this
7590 		// vnode and adjust the node ref count accordingly. We will release the
7591 		// references to the external vnodes below.
7592 		if (Vnode* coveredNode = vnode->covers) {
7593 			if (Vnode* coveringNode = vnode->covered_by) {
7594 				// We have both covered and covering vnodes, so just remove us
7595 				// from the chain.
7596 				coveredNode->covered_by = coveringNode;
7597 				coveringNode->covers = coveredNode;
7598 				vnode->ref_count -= 2;
7599 
7600 				vnode->covered_by = NULL;
7601 				vnode->covers = NULL;
7602 				vnode->SetCovering(false);
7603 				vnode->SetCovered(false);
7604 			} else {
7605 				// We only have a covered vnode. Remove its link to us.
7606 				coveredNode->covered_by = NULL;
7607 				coveredNode->SetCovered(false);
7608 				vnode->ref_count--;
7609 
7610 				// If the other node is an external vnode, we keep its link
7611 				// link around so we can put the reference later on. Otherwise
7612 				// we get rid of it right now.
7613 				if (coveredNode->mount == mount) {
7614 					vnode->covers = NULL;
7615 					coveredNode->ref_count--;
7616 				}
7617 			}
7618 		} else if (Vnode* coveringNode = vnode->covered_by) {
7619 			// We only have a covering vnode. Remove its link to us.
7620 			coveringNode->covers = NULL;
7621 			coveringNode->SetCovering(false);
7622 			vnode->ref_count--;
7623 
7624 			// If the other node is an external vnode, we keep its link
7625 			// link around so we can put the reference later on. Otherwise
7626 			// we get rid of it right now.
7627 			if (coveringNode->mount == mount) {
7628 				vnode->covered_by = NULL;
7629 				coveringNode->ref_count--;
7630 			}
7631 		}
7632 
7633 		vnode->SetBusy(true);
7634 		vnode_to_be_freed(vnode);
7635 	}
7636 
7637 	vnodesWriteLocker.Unlock();
7638 
7639 	// Free all vnodes associated with this mount.
7640 	// They will be removed from the mount list by free_vnode(), so
7641 	// we don't have to do this.
7642 	while (struct vnode* vnode = mount->vnodes.Head()) {
7643 		// Put the references to external covered/covering vnodes we kept above.
7644 		if (Vnode* coveredNode = vnode->covers)
7645 			put_vnode(coveredNode);
7646 		if (Vnode* coveringNode = vnode->covered_by)
7647 			put_vnode(coveringNode);
7648 
7649 		free_vnode(vnode, false);
7650 	}
7651 
7652 	// remove the mount structure from the hash table
7653 	mutex_lock(&sMountMutex);
7654 	hash_remove(sMountsTable, mount);
7655 	mutex_unlock(&sMountMutex);
7656 
7657 	mountOpLocker.Unlock();
7658 
7659 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7660 	notify_unmount(mount->id);
7661 
7662 	// dereference the partition and mark it unmounted
7663 	if (partition) {
7664 		partition->SetVolumeID(-1);
7665 		partition->SetMountCookie(NULL);
7666 
7667 		if (mount->owns_file_device)
7668 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7669 		partition->Unregister();
7670 	}
7671 
7672 	delete mount;
7673 	return B_OK;
7674 }
7675 
7676 
7677 static status_t
7678 fs_sync(dev_t device)
7679 {
7680 	struct fs_mount* mount;
7681 	status_t status = get_mount(device, &mount);
7682 	if (status != B_OK)
7683 		return status;
7684 
7685 	struct vnode marker;
7686 	memset(&marker, 0, sizeof(marker));
7687 	marker.SetBusy(true);
7688 	marker.SetRemoved(true);
7689 
7690 	// First, synchronize all file caches
7691 
7692 	while (true) {
7693 		WriteLocker locker(sVnodeLock);
7694 			// Note: That's the easy way. Which is probably OK for sync(),
7695 			// since it's a relatively rare call and doesn't need to allow for
7696 			// a lot of concurrency. Using a read lock would be possible, but
7697 			// also more involved, since we had to lock the individual nodes
7698 			// and take care of the locking order, which we might not want to
7699 			// do while holding fs_mount::rlock.
7700 
7701 		// synchronize access to vnode list
7702 		recursive_lock_lock(&mount->rlock);
7703 
7704 		struct vnode* vnode;
7705 		if (!marker.IsRemoved()) {
7706 			vnode = mount->vnodes.GetNext(&marker);
7707 			mount->vnodes.Remove(&marker);
7708 			marker.SetRemoved(true);
7709 		} else
7710 			vnode = mount->vnodes.First();
7711 
7712 		while (vnode != NULL && (vnode->cache == NULL
7713 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7714 			// TODO: we could track writes (and writable mapped vnodes)
7715 			//	and have a simple flag that we could test for here
7716 			vnode = mount->vnodes.GetNext(vnode);
7717 		}
7718 
7719 		if (vnode != NULL) {
7720 			// insert marker vnode again
7721 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7722 			marker.SetRemoved(false);
7723 		}
7724 
7725 		recursive_lock_unlock(&mount->rlock);
7726 
7727 		if (vnode == NULL)
7728 			break;
7729 
7730 		vnode = lookup_vnode(mount->id, vnode->id);
7731 		if (vnode == NULL || vnode->IsBusy())
7732 			continue;
7733 
7734 		if (vnode->ref_count == 0) {
7735 			// this vnode has been unused before
7736 			vnode_used(vnode);
7737 		}
7738 		inc_vnode_ref_count(vnode);
7739 
7740 		locker.Unlock();
7741 
7742 		if (vnode->cache != NULL && !vnode->IsRemoved())
7743 			vnode->cache->WriteModified();
7744 
7745 		put_vnode(vnode);
7746 	}
7747 
7748 	// And then, let the file systems do their synchronizing work
7749 
7750 	if (HAS_FS_MOUNT_CALL(mount, sync))
7751 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7752 
7753 	put_mount(mount);
7754 	return status;
7755 }
7756 
7757 
7758 static status_t
7759 fs_read_info(dev_t device, struct fs_info* info)
7760 {
7761 	struct fs_mount* mount;
7762 	status_t status = get_mount(device, &mount);
7763 	if (status != B_OK)
7764 		return status;
7765 
7766 	memset(info, 0, sizeof(struct fs_info));
7767 
7768 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7769 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7770 
7771 	// fill in info the file system doesn't (have to) know about
7772 	if (status == B_OK) {
7773 		info->dev = mount->id;
7774 		info->root = mount->root_vnode->id;
7775 
7776 		fs_volume* volume = mount->volume;
7777 		while (volume->super_volume != NULL)
7778 			volume = volume->super_volume;
7779 
7780 		strlcpy(info->fsh_name, volume->file_system_name,
7781 			sizeof(info->fsh_name));
7782 		if (mount->device_name != NULL) {
7783 			strlcpy(info->device_name, mount->device_name,
7784 				sizeof(info->device_name));
7785 		}
7786 	}
7787 
7788 	// if the call is not supported by the file system, there are still
7789 	// the parts that we filled out ourselves
7790 
7791 	put_mount(mount);
7792 	return status;
7793 }
7794 
7795 
7796 static status_t
7797 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7798 {
7799 	struct fs_mount* mount;
7800 	status_t status = get_mount(device, &mount);
7801 	if (status != B_OK)
7802 		return status;
7803 
7804 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7805 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7806 	else
7807 		status = B_READ_ONLY_DEVICE;
7808 
7809 	put_mount(mount);
7810 	return status;
7811 }
7812 
7813 
7814 static dev_t
7815 fs_next_device(int32* _cookie)
7816 {
7817 	struct fs_mount* mount = NULL;
7818 	dev_t device = *_cookie;
7819 
7820 	mutex_lock(&sMountMutex);
7821 
7822 	// Since device IDs are assigned sequentially, this algorithm
7823 	// does work good enough. It makes sure that the device list
7824 	// returned is sorted, and that no device is skipped when an
7825 	// already visited device got unmounted.
7826 
7827 	while (device < sNextMountID) {
7828 		mount = find_mount(device++);
7829 		if (mount != NULL && mount->volume->private_volume != NULL)
7830 			break;
7831 	}
7832 
7833 	*_cookie = device;
7834 
7835 	if (mount != NULL)
7836 		device = mount->id;
7837 	else
7838 		device = B_BAD_VALUE;
7839 
7840 	mutex_unlock(&sMountMutex);
7841 
7842 	return device;
7843 }
7844 
7845 
7846 ssize_t
7847 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7848 	void *buffer, size_t readBytes)
7849 {
7850 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7851 	if (attrFD < 0)
7852 		return attrFD;
7853 
7854 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7855 
7856 	_kern_close(attrFD);
7857 
7858 	return bytesRead;
7859 }
7860 
7861 
7862 static status_t
7863 get_cwd(char* buffer, size_t size, bool kernel)
7864 {
7865 	// Get current working directory from io context
7866 	struct io_context* context = get_current_io_context(kernel);
7867 	status_t status;
7868 
7869 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7870 
7871 	mutex_lock(&context->io_mutex);
7872 
7873 	struct vnode* vnode = context->cwd;
7874 	if (vnode)
7875 		inc_vnode_ref_count(vnode);
7876 
7877 	mutex_unlock(&context->io_mutex);
7878 
7879 	if (vnode) {
7880 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7881 		put_vnode(vnode);
7882 	} else
7883 		status = B_ERROR;
7884 
7885 	return status;
7886 }
7887 
7888 
7889 static status_t
7890 set_cwd(int fd, char* path, bool kernel)
7891 {
7892 	struct io_context* context;
7893 	struct vnode* vnode = NULL;
7894 	struct vnode* oldDirectory;
7895 	status_t status;
7896 
7897 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7898 
7899 	// Get vnode for passed path, and bail if it failed
7900 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7901 	if (status < 0)
7902 		return status;
7903 
7904 	if (!S_ISDIR(vnode->Type())) {
7905 		// nope, can't cwd to here
7906 		status = B_NOT_A_DIRECTORY;
7907 		goto err;
7908 	}
7909 
7910 	// Get current io context and lock
7911 	context = get_current_io_context(kernel);
7912 	mutex_lock(&context->io_mutex);
7913 
7914 	// save the old current working directory first
7915 	oldDirectory = context->cwd;
7916 	context->cwd = vnode;
7917 
7918 	mutex_unlock(&context->io_mutex);
7919 
7920 	if (oldDirectory)
7921 		put_vnode(oldDirectory);
7922 
7923 	return B_NO_ERROR;
7924 
7925 err:
7926 	put_vnode(vnode);
7927 	return status;
7928 }
7929 
7930 
7931 //	#pragma mark - kernel mirrored syscalls
7932 
7933 
7934 dev_t
7935 _kern_mount(const char* path, const char* device, const char* fsName,
7936 	uint32 flags, const char* args, size_t argsLength)
7937 {
7938 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7939 	if (pathBuffer.InitCheck() != B_OK)
7940 		return B_NO_MEMORY;
7941 
7942 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7943 }
7944 
7945 
7946 status_t
7947 _kern_unmount(const char* path, uint32 flags)
7948 {
7949 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7950 	if (pathBuffer.InitCheck() != B_OK)
7951 		return B_NO_MEMORY;
7952 
7953 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7954 }
7955 
7956 
7957 status_t
7958 _kern_read_fs_info(dev_t device, struct fs_info* info)
7959 {
7960 	if (info == NULL)
7961 		return B_BAD_VALUE;
7962 
7963 	return fs_read_info(device, info);
7964 }
7965 
7966 
7967 status_t
7968 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7969 {
7970 	if (info == NULL)
7971 		return B_BAD_VALUE;
7972 
7973 	return fs_write_info(device, info, mask);
7974 }
7975 
7976 
7977 status_t
7978 _kern_sync(void)
7979 {
7980 	// Note: _kern_sync() is also called from _user_sync()
7981 	int32 cookie = 0;
7982 	dev_t device;
7983 	while ((device = next_dev(&cookie)) >= 0) {
7984 		status_t status = fs_sync(device);
7985 		if (status != B_OK && status != B_BAD_VALUE) {
7986 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
7987 				strerror(status));
7988 		}
7989 	}
7990 
7991 	return B_OK;
7992 }
7993 
7994 
7995 dev_t
7996 _kern_next_device(int32* _cookie)
7997 {
7998 	return fs_next_device(_cookie);
7999 }
8000 
8001 
8002 status_t
8003 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8004 	size_t infoSize)
8005 {
8006 	if (infoSize != sizeof(fd_info))
8007 		return B_BAD_VALUE;
8008 
8009 	// get the team
8010 	Team* team = Team::Get(teamID);
8011 	if (team == NULL)
8012 		return B_BAD_TEAM_ID;
8013 	BReference<Team> teamReference(team, true);
8014 
8015 	// now that we have a team reference, its I/O context won't go away
8016 	io_context* context = team->io_context;
8017 	MutexLocker contextLocker(context->io_mutex);
8018 
8019 	uint32 slot = *_cookie;
8020 
8021 	struct file_descriptor* descriptor;
8022 	while (slot < context->table_size
8023 		&& (descriptor = context->fds[slot]) == NULL) {
8024 		slot++;
8025 	}
8026 
8027 	if (slot >= context->table_size)
8028 		return B_ENTRY_NOT_FOUND;
8029 
8030 	info->number = slot;
8031 	info->open_mode = descriptor->open_mode;
8032 
8033 	struct vnode* vnode = fd_vnode(descriptor);
8034 	if (vnode != NULL) {
8035 		info->device = vnode->device;
8036 		info->node = vnode->id;
8037 	} else if (descriptor->u.mount != NULL) {
8038 		info->device = descriptor->u.mount->id;
8039 		info->node = -1;
8040 	}
8041 
8042 	*_cookie = slot + 1;
8043 	return B_OK;
8044 }
8045 
8046 
8047 int
8048 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8049 	int perms)
8050 {
8051 	if ((openMode & O_CREAT) != 0) {
8052 		return file_create_entry_ref(device, inode, name, openMode, perms,
8053 			true);
8054 	}
8055 
8056 	return file_open_entry_ref(device, inode, name, openMode, true);
8057 }
8058 
8059 
8060 /*!	\brief Opens a node specified by a FD + path pair.
8061 
8062 	At least one of \a fd and \a path must be specified.
8063 	If only \a fd is given, the function opens the node identified by this
8064 	FD. If only a path is given, this path is opened. If both are given and
8065 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8066 	of the directory (!) identified by \a fd.
8067 
8068 	\param fd The FD. May be < 0.
8069 	\param path The absolute or relative path. May be \c NULL.
8070 	\param openMode The open mode.
8071 	\return A FD referring to the newly opened node, or an error code,
8072 			if an error occurs.
8073 */
8074 int
8075 _kern_open(int fd, const char* path, int openMode, int perms)
8076 {
8077 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8078 	if (pathBuffer.InitCheck() != B_OK)
8079 		return B_NO_MEMORY;
8080 
8081 	if (openMode & O_CREAT)
8082 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8083 
8084 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8085 }
8086 
8087 
8088 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8089 
8090 	The supplied name may be \c NULL, in which case directory identified
8091 	by \a device and \a inode will be opened. Otherwise \a device and
8092 	\a inode identify the parent directory of the directory to be opened
8093 	and \a name its entry name.
8094 
8095 	\param device If \a name is specified the ID of the device the parent
8096 		   directory of the directory to be opened resides on, otherwise
8097 		   the device of the directory itself.
8098 	\param inode If \a name is specified the node ID of the parent
8099 		   directory of the directory to be opened, otherwise node ID of the
8100 		   directory itself.
8101 	\param name The entry name of the directory to be opened. If \c NULL,
8102 		   the \a device + \a inode pair identify the node to be opened.
8103 	\return The FD of the newly opened directory or an error code, if
8104 			something went wrong.
8105 */
8106 int
8107 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8108 {
8109 	return dir_open_entry_ref(device, inode, name, true);
8110 }
8111 
8112 
8113 /*!	\brief Opens a directory specified by a FD + path pair.
8114 
8115 	At least one of \a fd and \a path must be specified.
8116 	If only \a fd is given, the function opens the directory identified by this
8117 	FD. If only a path is given, this path is opened. If both are given and
8118 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8119 	of the directory (!) identified by \a fd.
8120 
8121 	\param fd The FD. May be < 0.
8122 	\param path The absolute or relative path. May be \c NULL.
8123 	\return A FD referring to the newly opened directory, or an error code,
8124 			if an error occurs.
8125 */
8126 int
8127 _kern_open_dir(int fd, const char* path)
8128 {
8129 	if (path == NULL)
8130 		return dir_open(fd, NULL, true);;
8131 
8132 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8133 	if (pathBuffer.InitCheck() != B_OK)
8134 		return B_NO_MEMORY;
8135 
8136 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8137 }
8138 
8139 
8140 status_t
8141 _kern_fcntl(int fd, int op, size_t argument)
8142 {
8143 	return common_fcntl(fd, op, argument, true);
8144 }
8145 
8146 
8147 status_t
8148 _kern_fsync(int fd)
8149 {
8150 	return common_sync(fd, true);
8151 }
8152 
8153 
8154 status_t
8155 _kern_lock_node(int fd)
8156 {
8157 	return common_lock_node(fd, true);
8158 }
8159 
8160 
8161 status_t
8162 _kern_unlock_node(int fd)
8163 {
8164 	return common_unlock_node(fd, true);
8165 }
8166 
8167 
8168 status_t
8169 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8170 	int perms)
8171 {
8172 	return dir_create_entry_ref(device, inode, name, perms, true);
8173 }
8174 
8175 
8176 /*!	\brief Creates a directory specified by a FD + path pair.
8177 
8178 	\a path must always be specified (it contains the name of the new directory
8179 	at least). If only a path is given, this path identifies the location at
8180 	which the directory shall be created. If both \a fd and \a path are given
8181 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8182 	of the directory (!) identified by \a fd.
8183 
8184 	\param fd The FD. May be < 0.
8185 	\param path The absolute or relative path. Must not be \c NULL.
8186 	\param perms The access permissions the new directory shall have.
8187 	\return \c B_OK, if the directory has been created successfully, another
8188 			error code otherwise.
8189 */
8190 status_t
8191 _kern_create_dir(int fd, const char* path, int perms)
8192 {
8193 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8194 	if (pathBuffer.InitCheck() != B_OK)
8195 		return B_NO_MEMORY;
8196 
8197 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8198 }
8199 
8200 
8201 status_t
8202 _kern_remove_dir(int fd, const char* path)
8203 {
8204 	if (path) {
8205 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8206 		if (pathBuffer.InitCheck() != B_OK)
8207 			return B_NO_MEMORY;
8208 
8209 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8210 	}
8211 
8212 	return dir_remove(fd, NULL, true);
8213 }
8214 
8215 
8216 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8217 
8218 	At least one of \a fd and \a path must be specified.
8219 	If only \a fd is given, the function the symlink to be read is the node
8220 	identified by this FD. If only a path is given, this path identifies the
8221 	symlink to be read. If both are given and the path is absolute, \a fd is
8222 	ignored; a relative path is reckoned off of the directory (!) identified
8223 	by \a fd.
8224 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8225 	will still be updated to reflect the required buffer size.
8226 
8227 	\param fd The FD. May be < 0.
8228 	\param path The absolute or relative path. May be \c NULL.
8229 	\param buffer The buffer into which the contents of the symlink shall be
8230 		   written.
8231 	\param _bufferSize A pointer to the size of the supplied buffer.
8232 	\return The length of the link on success or an appropriate error code
8233 */
8234 status_t
8235 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8236 {
8237 	if (path) {
8238 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8239 		if (pathBuffer.InitCheck() != B_OK)
8240 			return B_NO_MEMORY;
8241 
8242 		return common_read_link(fd, pathBuffer.LockBuffer(),
8243 			buffer, _bufferSize, true);
8244 	}
8245 
8246 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8247 }
8248 
8249 
8250 /*!	\brief Creates a symlink specified by a FD + path pair.
8251 
8252 	\a path must always be specified (it contains the name of the new symlink
8253 	at least). If only a path is given, this path identifies the location at
8254 	which the symlink shall be created. If both \a fd and \a path are given and
8255 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8256 	of the directory (!) identified by \a fd.
8257 
8258 	\param fd The FD. May be < 0.
8259 	\param toPath The absolute or relative path. Must not be \c NULL.
8260 	\param mode The access permissions the new symlink shall have.
8261 	\return \c B_OK, if the symlink has been created successfully, another
8262 			error code otherwise.
8263 */
8264 status_t
8265 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8266 {
8267 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8268 	if (pathBuffer.InitCheck() != B_OK)
8269 		return B_NO_MEMORY;
8270 
8271 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8272 		toPath, mode, true);
8273 }
8274 
8275 
8276 status_t
8277 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8278 	bool traverseLeafLink)
8279 {
8280 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8281 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8282 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8283 		return B_NO_MEMORY;
8284 
8285 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8286 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8287 }
8288 
8289 
8290 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8291 
8292 	\a path must always be specified (it contains at least the name of the entry
8293 	to be deleted). If only a path is given, this path identifies the entry
8294 	directly. If both \a fd and \a path are given and the path is absolute,
8295 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8296 	identified by \a fd.
8297 
8298 	\param fd The FD. May be < 0.
8299 	\param path The absolute or relative path. Must not be \c NULL.
8300 	\return \c B_OK, if the entry has been removed successfully, another
8301 			error code otherwise.
8302 */
8303 status_t
8304 _kern_unlink(int fd, const char* path)
8305 {
8306 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8307 	if (pathBuffer.InitCheck() != B_OK)
8308 		return B_NO_MEMORY;
8309 
8310 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8311 }
8312 
8313 
8314 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8315 		   by another FD + path pair.
8316 
8317 	\a oldPath and \a newPath must always be specified (they contain at least
8318 	the name of the entry). If only a path is given, this path identifies the
8319 	entry directly. If both a FD and a path are given and the path is absolute,
8320 	the FD is ignored; a relative path is reckoned off of the directory (!)
8321 	identified by the respective FD.
8322 
8323 	\param oldFD The FD of the old location. May be < 0.
8324 	\param oldPath The absolute or relative path of the old location. Must not
8325 		   be \c NULL.
8326 	\param newFD The FD of the new location. May be < 0.
8327 	\param newPath The absolute or relative path of the new location. Must not
8328 		   be \c NULL.
8329 	\return \c B_OK, if the entry has been moved successfully, another
8330 			error code otherwise.
8331 */
8332 status_t
8333 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8334 {
8335 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8336 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8337 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8338 		return B_NO_MEMORY;
8339 
8340 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8341 		newFD, newPathBuffer.LockBuffer(), true);
8342 }
8343 
8344 
8345 status_t
8346 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8347 {
8348 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8349 	if (pathBuffer.InitCheck() != B_OK)
8350 		return B_NO_MEMORY;
8351 
8352 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8353 		true);
8354 }
8355 
8356 
8357 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8358 
8359 	If only \a fd is given, the stat operation associated with the type
8360 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8361 	given, this path identifies the entry for whose node to retrieve the
8362 	stat data. If both \a fd and \a path are given and the path is absolute,
8363 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8364 	identified by \a fd and specifies the entry whose stat data shall be
8365 	retrieved.
8366 
8367 	\param fd The FD. May be < 0.
8368 	\param path The absolute or relative path. Must not be \c NULL.
8369 	\param traverseLeafLink If \a path is given, \c true specifies that the
8370 		   function shall not stick to symlinks, but traverse them.
8371 	\param stat The buffer the stat data shall be written into.
8372 	\param statSize The size of the supplied stat buffer.
8373 	\return \c B_OK, if the the stat data have been read successfully, another
8374 			error code otherwise.
8375 */
8376 status_t
8377 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8378 	struct stat* stat, size_t statSize)
8379 {
8380 	struct stat completeStat;
8381 	struct stat* originalStat = NULL;
8382 	status_t status;
8383 
8384 	if (statSize > sizeof(struct stat))
8385 		return B_BAD_VALUE;
8386 
8387 	// this supports different stat extensions
8388 	if (statSize < sizeof(struct stat)) {
8389 		originalStat = stat;
8390 		stat = &completeStat;
8391 	}
8392 
8393 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8394 
8395 	if (status == B_OK && originalStat != NULL)
8396 		memcpy(originalStat, stat, statSize);
8397 
8398 	return status;
8399 }
8400 
8401 
8402 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8403 
8404 	If only \a fd is given, the stat operation associated with the type
8405 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8406 	given, this path identifies the entry for whose node to write the
8407 	stat data. If both \a fd and \a path are given and the path is absolute,
8408 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8409 	identified by \a fd and specifies the entry whose stat data shall be
8410 	written.
8411 
8412 	\param fd The FD. May be < 0.
8413 	\param path The absolute or relative path. Must not be \c NULL.
8414 	\param traverseLeafLink If \a path is given, \c true specifies that the
8415 		   function shall not stick to symlinks, but traverse them.
8416 	\param stat The buffer containing the stat data to be written.
8417 	\param statSize The size of the supplied stat buffer.
8418 	\param statMask A mask specifying which parts of the stat data shall be
8419 		   written.
8420 	\return \c B_OK, if the the stat data have been written successfully,
8421 			another error code otherwise.
8422 */
8423 status_t
8424 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8425 	const struct stat* stat, size_t statSize, int statMask)
8426 {
8427 	struct stat completeStat;
8428 
8429 	if (statSize > sizeof(struct stat))
8430 		return B_BAD_VALUE;
8431 
8432 	// this supports different stat extensions
8433 	if (statSize < sizeof(struct stat)) {
8434 		memset((uint8*)&completeStat + statSize, 0,
8435 			sizeof(struct stat) - statSize);
8436 		memcpy(&completeStat, stat, statSize);
8437 		stat = &completeStat;
8438 	}
8439 
8440 	status_t status;
8441 
8442 	if (path) {
8443 		// path given: write the stat of the node referred to by (fd, path)
8444 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8445 		if (pathBuffer.InitCheck() != B_OK)
8446 			return B_NO_MEMORY;
8447 
8448 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8449 			traverseLeafLink, stat, statMask, true);
8450 	} else {
8451 		// no path given: get the FD and use the FD operation
8452 		struct file_descriptor* descriptor
8453 			= get_fd(get_current_io_context(true), fd);
8454 		if (descriptor == NULL)
8455 			return B_FILE_ERROR;
8456 
8457 		if (descriptor->ops->fd_write_stat)
8458 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8459 		else
8460 			status = B_UNSUPPORTED;
8461 
8462 		put_fd(descriptor);
8463 	}
8464 
8465 	return status;
8466 }
8467 
8468 
8469 int
8470 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8471 {
8472 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8473 	if (pathBuffer.InitCheck() != B_OK)
8474 		return B_NO_MEMORY;
8475 
8476 	if (path != NULL)
8477 		pathBuffer.SetTo(path);
8478 
8479 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8480 		traverseLeafLink, true);
8481 }
8482 
8483 
8484 int
8485 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8486 	int openMode)
8487 {
8488 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8489 	if (pathBuffer.InitCheck() != B_OK)
8490 		return B_NO_MEMORY;
8491 
8492 	if ((openMode & O_CREAT) != 0) {
8493 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8494 			true);
8495 	}
8496 
8497 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8498 }
8499 
8500 
8501 status_t
8502 _kern_remove_attr(int fd, const char* name)
8503 {
8504 	return attr_remove(fd, name, true);
8505 }
8506 
8507 
8508 status_t
8509 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8510 	const char* toName)
8511 {
8512 	return attr_rename(fromFile, fromName, toFile, toName, true);
8513 }
8514 
8515 
8516 int
8517 _kern_open_index_dir(dev_t device)
8518 {
8519 	return index_dir_open(device, true);
8520 }
8521 
8522 
8523 status_t
8524 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8525 {
8526 	return index_create(device, name, type, flags, true);
8527 }
8528 
8529 
8530 status_t
8531 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8532 {
8533 	return index_name_read_stat(device, name, stat, true);
8534 }
8535 
8536 
8537 status_t
8538 _kern_remove_index(dev_t device, const char* name)
8539 {
8540 	return index_remove(device, name, true);
8541 }
8542 
8543 
8544 status_t
8545 _kern_getcwd(char* buffer, size_t size)
8546 {
8547 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8548 
8549 	// Call vfs to get current working directory
8550 	return get_cwd(buffer, size, true);
8551 }
8552 
8553 
8554 status_t
8555 _kern_setcwd(int fd, const char* path)
8556 {
8557 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8558 	if (pathBuffer.InitCheck() != B_OK)
8559 		return B_NO_MEMORY;
8560 
8561 	if (path != NULL)
8562 		pathBuffer.SetTo(path);
8563 
8564 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8565 }
8566 
8567 
8568 //	#pragma mark - userland syscalls
8569 
8570 
8571 dev_t
8572 _user_mount(const char* userPath, const char* userDevice,
8573 	const char* userFileSystem, uint32 flags, const char* userArgs,
8574 	size_t argsLength)
8575 {
8576 	char fileSystem[B_FILE_NAME_LENGTH];
8577 	KPath path, device;
8578 	char* args = NULL;
8579 	status_t status;
8580 
8581 	if (!IS_USER_ADDRESS(userPath)
8582 		|| !IS_USER_ADDRESS(userFileSystem)
8583 		|| !IS_USER_ADDRESS(userDevice))
8584 		return B_BAD_ADDRESS;
8585 
8586 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8587 		return B_NO_MEMORY;
8588 
8589 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8590 		return B_BAD_ADDRESS;
8591 
8592 	if (userFileSystem != NULL
8593 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8594 		return B_BAD_ADDRESS;
8595 
8596 	if (userDevice != NULL
8597 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8598 			< B_OK)
8599 		return B_BAD_ADDRESS;
8600 
8601 	if (userArgs != NULL && argsLength > 0) {
8602 		// this is a safety restriction
8603 		if (argsLength >= 65536)
8604 			return B_NAME_TOO_LONG;
8605 
8606 		args = (char*)malloc(argsLength + 1);
8607 		if (args == NULL)
8608 			return B_NO_MEMORY;
8609 
8610 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8611 			free(args);
8612 			return B_BAD_ADDRESS;
8613 		}
8614 	}
8615 	path.UnlockBuffer();
8616 	device.UnlockBuffer();
8617 
8618 	status = fs_mount(path.LockBuffer(),
8619 		userDevice != NULL ? device.Path() : NULL,
8620 		userFileSystem ? fileSystem : NULL, flags, args, false);
8621 
8622 	free(args);
8623 	return status;
8624 }
8625 
8626 
8627 status_t
8628 _user_unmount(const char* userPath, uint32 flags)
8629 {
8630 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8631 	if (pathBuffer.InitCheck() != B_OK)
8632 		return B_NO_MEMORY;
8633 
8634 	char* path = pathBuffer.LockBuffer();
8635 
8636 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8637 		return B_BAD_ADDRESS;
8638 
8639 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8640 }
8641 
8642 
8643 status_t
8644 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8645 {
8646 	struct fs_info info;
8647 	status_t status;
8648 
8649 	if (userInfo == NULL)
8650 		return B_BAD_VALUE;
8651 
8652 	if (!IS_USER_ADDRESS(userInfo))
8653 		return B_BAD_ADDRESS;
8654 
8655 	status = fs_read_info(device, &info);
8656 	if (status != B_OK)
8657 		return status;
8658 
8659 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8660 		return B_BAD_ADDRESS;
8661 
8662 	return B_OK;
8663 }
8664 
8665 
8666 status_t
8667 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8668 {
8669 	struct fs_info info;
8670 
8671 	if (userInfo == NULL)
8672 		return B_BAD_VALUE;
8673 
8674 	if (!IS_USER_ADDRESS(userInfo)
8675 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8676 		return B_BAD_ADDRESS;
8677 
8678 	return fs_write_info(device, &info, mask);
8679 }
8680 
8681 
8682 dev_t
8683 _user_next_device(int32* _userCookie)
8684 {
8685 	int32 cookie;
8686 	dev_t device;
8687 
8688 	if (!IS_USER_ADDRESS(_userCookie)
8689 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8690 		return B_BAD_ADDRESS;
8691 
8692 	device = fs_next_device(&cookie);
8693 
8694 	if (device >= B_OK) {
8695 		// update user cookie
8696 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8697 			return B_BAD_ADDRESS;
8698 	}
8699 
8700 	return device;
8701 }
8702 
8703 
8704 status_t
8705 _user_sync(void)
8706 {
8707 	return _kern_sync();
8708 }
8709 
8710 
8711 status_t
8712 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8713 	size_t infoSize)
8714 {
8715 	struct fd_info info;
8716 	uint32 cookie;
8717 
8718 	// only root can do this (or should root's group be enough?)
8719 	if (geteuid() != 0)
8720 		return B_NOT_ALLOWED;
8721 
8722 	if (infoSize != sizeof(fd_info))
8723 		return B_BAD_VALUE;
8724 
8725 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8726 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8727 		return B_BAD_ADDRESS;
8728 
8729 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8730 	if (status != B_OK)
8731 		return status;
8732 
8733 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8734 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8735 		return B_BAD_ADDRESS;
8736 
8737 	return status;
8738 }
8739 
8740 
8741 status_t
8742 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8743 	char* userPath, size_t pathLength)
8744 {
8745 	if (!IS_USER_ADDRESS(userPath))
8746 		return B_BAD_ADDRESS;
8747 
8748 	KPath path(B_PATH_NAME_LENGTH + 1);
8749 	if (path.InitCheck() != B_OK)
8750 		return B_NO_MEMORY;
8751 
8752 	// copy the leaf name onto the stack
8753 	char stackLeaf[B_FILE_NAME_LENGTH];
8754 	if (leaf) {
8755 		if (!IS_USER_ADDRESS(leaf))
8756 			return B_BAD_ADDRESS;
8757 
8758 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8759 		if (length < 0)
8760 			return length;
8761 		if (length >= B_FILE_NAME_LENGTH)
8762 			return B_NAME_TOO_LONG;
8763 
8764 		leaf = stackLeaf;
8765 	}
8766 
8767 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8768 		false, path.LockBuffer(), path.BufferSize());
8769 	if (status != B_OK)
8770 		return status;
8771 
8772 	path.UnlockBuffer();
8773 
8774 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8775 	if (length < 0)
8776 		return length;
8777 	if (length >= (int)pathLength)
8778 		return B_BUFFER_OVERFLOW;
8779 
8780 	return B_OK;
8781 }
8782 
8783 
8784 status_t
8785 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8786 {
8787 	if (userPath == NULL || buffer == NULL)
8788 		return B_BAD_VALUE;
8789 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8790 		return B_BAD_ADDRESS;
8791 
8792 	// copy path from userland
8793 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8794 	if (pathBuffer.InitCheck() != B_OK)
8795 		return B_NO_MEMORY;
8796 	char* path = pathBuffer.LockBuffer();
8797 
8798 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8799 		return B_BAD_ADDRESS;
8800 
8801 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8802 		false);
8803 	if (error != B_OK)
8804 		return error;
8805 
8806 	// copy back to userland
8807 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8808 	if (len < 0)
8809 		return len;
8810 	if (len >= B_PATH_NAME_LENGTH)
8811 		return B_BUFFER_OVERFLOW;
8812 
8813 	return B_OK;
8814 }
8815 
8816 
8817 int
8818 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8819 	int openMode, int perms)
8820 {
8821 	char name[B_FILE_NAME_LENGTH];
8822 
8823 	if (userName == NULL || device < 0 || inode < 0)
8824 		return B_BAD_VALUE;
8825 	if (!IS_USER_ADDRESS(userName)
8826 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8827 		return B_BAD_ADDRESS;
8828 
8829 	if ((openMode & O_CREAT) != 0) {
8830 		return file_create_entry_ref(device, inode, name, openMode, perms,
8831 		 false);
8832 	}
8833 
8834 	return file_open_entry_ref(device, inode, name, openMode, false);
8835 }
8836 
8837 
8838 int
8839 _user_open(int fd, const char* userPath, int openMode, int perms)
8840 {
8841 	KPath path(B_PATH_NAME_LENGTH + 1);
8842 	if (path.InitCheck() != B_OK)
8843 		return B_NO_MEMORY;
8844 
8845 	char* buffer = path.LockBuffer();
8846 
8847 	if (!IS_USER_ADDRESS(userPath)
8848 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8849 		return B_BAD_ADDRESS;
8850 
8851 	if ((openMode & O_CREAT) != 0)
8852 		return file_create(fd, buffer, openMode, perms, false);
8853 
8854 	return file_open(fd, buffer, openMode, false);
8855 }
8856 
8857 
8858 int
8859 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8860 {
8861 	if (userName != NULL) {
8862 		char name[B_FILE_NAME_LENGTH];
8863 
8864 		if (!IS_USER_ADDRESS(userName)
8865 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8866 			return B_BAD_ADDRESS;
8867 
8868 		return dir_open_entry_ref(device, inode, name, false);
8869 	}
8870 	return dir_open_entry_ref(device, inode, NULL, false);
8871 }
8872 
8873 
8874 int
8875 _user_open_dir(int fd, const char* userPath)
8876 {
8877 	if (userPath == NULL)
8878 		return dir_open(fd, NULL, false);
8879 
8880 	KPath path(B_PATH_NAME_LENGTH + 1);
8881 	if (path.InitCheck() != B_OK)
8882 		return B_NO_MEMORY;
8883 
8884 	char* buffer = path.LockBuffer();
8885 
8886 	if (!IS_USER_ADDRESS(userPath)
8887 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8888 		return B_BAD_ADDRESS;
8889 
8890 	return dir_open(fd, buffer, false);
8891 }
8892 
8893 
8894 /*!	\brief Opens a directory's parent directory and returns the entry name
8895 		   of the former.
8896 
8897 	Aside from that it returns the directory's entry name, this method is
8898 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8899 	equivalent, if \a userName is \c NULL.
8900 
8901 	If a name buffer is supplied and the name does not fit the buffer, the
8902 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8903 
8904 	\param fd A FD referring to a directory.
8905 	\param userName Buffer the directory's entry name shall be written into.
8906 		   May be \c NULL.
8907 	\param nameLength Size of the name buffer.
8908 	\return The file descriptor of the opened parent directory, if everything
8909 			went fine, an error code otherwise.
8910 */
8911 int
8912 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8913 {
8914 	bool kernel = false;
8915 
8916 	if (userName && !IS_USER_ADDRESS(userName))
8917 		return B_BAD_ADDRESS;
8918 
8919 	// open the parent dir
8920 	int parentFD = dir_open(fd, (char*)"..", kernel);
8921 	if (parentFD < 0)
8922 		return parentFD;
8923 	FDCloser fdCloser(parentFD, kernel);
8924 
8925 	if (userName) {
8926 		// get the vnodes
8927 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8928 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8929 		VNodePutter parentVNodePutter(parentVNode);
8930 		VNodePutter dirVNodePutter(dirVNode);
8931 		if (!parentVNode || !dirVNode)
8932 			return B_FILE_ERROR;
8933 
8934 		// get the vnode name
8935 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8936 		struct dirent* buffer = (struct dirent*)_buffer;
8937 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8938 			sizeof(_buffer), get_current_io_context(false));
8939 		if (status != B_OK)
8940 			return status;
8941 
8942 		// copy the name to the userland buffer
8943 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8944 		if (len < 0)
8945 			return len;
8946 		if (len >= (int)nameLength)
8947 			return B_BUFFER_OVERFLOW;
8948 	}
8949 
8950 	return fdCloser.Detach();
8951 }
8952 
8953 
8954 status_t
8955 _user_fcntl(int fd, int op, size_t argument)
8956 {
8957 	status_t status = common_fcntl(fd, op, argument, false);
8958 	if (op == F_SETLKW)
8959 		syscall_restart_handle_post(status);
8960 
8961 	return status;
8962 }
8963 
8964 
8965 status_t
8966 _user_fsync(int fd)
8967 {
8968 	return common_sync(fd, false);
8969 }
8970 
8971 
8972 status_t
8973 _user_flock(int fd, int operation)
8974 {
8975 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8976 
8977 	// Check if the operation is valid
8978 	switch (operation & ~LOCK_NB) {
8979 		case LOCK_UN:
8980 		case LOCK_SH:
8981 		case LOCK_EX:
8982 			break;
8983 
8984 		default:
8985 			return B_BAD_VALUE;
8986 	}
8987 
8988 	struct file_descriptor* descriptor;
8989 	struct vnode* vnode;
8990 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8991 	if (descriptor == NULL)
8992 		return B_FILE_ERROR;
8993 
8994 	if (descriptor->type != FDTYPE_FILE) {
8995 		put_fd(descriptor);
8996 		return B_BAD_VALUE;
8997 	}
8998 
8999 	struct flock flock;
9000 	flock.l_start = 0;
9001 	flock.l_len = OFF_MAX;
9002 	flock.l_whence = 0;
9003 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9004 
9005 	status_t status;
9006 	if ((operation & LOCK_UN) != 0)
9007 		status = release_advisory_lock(vnode, &flock);
9008 	else {
9009 		status = acquire_advisory_lock(vnode,
9010 			thread_get_current_thread()->team->session_id, &flock,
9011 			(operation & LOCK_NB) == 0);
9012 	}
9013 
9014 	syscall_restart_handle_post(status);
9015 
9016 	put_fd(descriptor);
9017 	return status;
9018 }
9019 
9020 
9021 status_t
9022 _user_lock_node(int fd)
9023 {
9024 	return common_lock_node(fd, false);
9025 }
9026 
9027 
9028 status_t
9029 _user_unlock_node(int fd)
9030 {
9031 	return common_unlock_node(fd, false);
9032 }
9033 
9034 
9035 status_t
9036 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9037 	int perms)
9038 {
9039 	char name[B_FILE_NAME_LENGTH];
9040 	status_t status;
9041 
9042 	if (!IS_USER_ADDRESS(userName))
9043 		return B_BAD_ADDRESS;
9044 
9045 	status = user_strlcpy(name, userName, sizeof(name));
9046 	if (status < 0)
9047 		return status;
9048 
9049 	return dir_create_entry_ref(device, inode, name, perms, false);
9050 }
9051 
9052 
9053 status_t
9054 _user_create_dir(int fd, const char* userPath, int perms)
9055 {
9056 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9057 	if (pathBuffer.InitCheck() != B_OK)
9058 		return B_NO_MEMORY;
9059 
9060 	char* path = pathBuffer.LockBuffer();
9061 
9062 	if (!IS_USER_ADDRESS(userPath)
9063 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9064 		return B_BAD_ADDRESS;
9065 
9066 	return dir_create(fd, path, perms, false);
9067 }
9068 
9069 
9070 status_t
9071 _user_remove_dir(int fd, const char* userPath)
9072 {
9073 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9074 	if (pathBuffer.InitCheck() != B_OK)
9075 		return B_NO_MEMORY;
9076 
9077 	char* path = pathBuffer.LockBuffer();
9078 
9079 	if (userPath != NULL) {
9080 		if (!IS_USER_ADDRESS(userPath)
9081 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9082 			return B_BAD_ADDRESS;
9083 	}
9084 
9085 	return dir_remove(fd, userPath ? path : NULL, false);
9086 }
9087 
9088 
9089 status_t
9090 _user_read_link(int fd, const char* userPath, char* userBuffer,
9091 	size_t* userBufferSize)
9092 {
9093 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9094 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9095 		return B_NO_MEMORY;
9096 
9097 	size_t bufferSize;
9098 
9099 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9100 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9101 		return B_BAD_ADDRESS;
9102 
9103 	char* path = pathBuffer.LockBuffer();
9104 	char* buffer = linkBuffer.LockBuffer();
9105 
9106 	if (userPath) {
9107 		if (!IS_USER_ADDRESS(userPath)
9108 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9109 			return B_BAD_ADDRESS;
9110 
9111 		if (bufferSize > B_PATH_NAME_LENGTH)
9112 			bufferSize = B_PATH_NAME_LENGTH;
9113 	}
9114 
9115 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9116 		&bufferSize, false);
9117 
9118 	// we also update the bufferSize in case of errors
9119 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9120 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9121 		return B_BAD_ADDRESS;
9122 
9123 	if (status != B_OK)
9124 		return status;
9125 
9126 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9127 		return B_BAD_ADDRESS;
9128 
9129 	return B_OK;
9130 }
9131 
9132 
9133 status_t
9134 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9135 	int mode)
9136 {
9137 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9138 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9139 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9140 		return B_NO_MEMORY;
9141 
9142 	char* path = pathBuffer.LockBuffer();
9143 	char* toPath = toPathBuffer.LockBuffer();
9144 
9145 	if (!IS_USER_ADDRESS(userPath)
9146 		|| !IS_USER_ADDRESS(userToPath)
9147 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9148 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9149 		return B_BAD_ADDRESS;
9150 
9151 	return common_create_symlink(fd, path, toPath, mode, false);
9152 }
9153 
9154 
9155 status_t
9156 _user_create_link(int pathFD, const char* userPath, int toFD,
9157 	const char* userToPath, bool traverseLeafLink)
9158 {
9159 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9160 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9161 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9162 		return B_NO_MEMORY;
9163 
9164 	char* path = pathBuffer.LockBuffer();
9165 	char* toPath = toPathBuffer.LockBuffer();
9166 
9167 	if (!IS_USER_ADDRESS(userPath)
9168 		|| !IS_USER_ADDRESS(userToPath)
9169 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9170 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9171 		return B_BAD_ADDRESS;
9172 
9173 	status_t status = check_path(toPath);
9174 	if (status != B_OK)
9175 		return status;
9176 
9177 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9178 		false);
9179 }
9180 
9181 
9182 status_t
9183 _user_unlink(int fd, const char* userPath)
9184 {
9185 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9186 	if (pathBuffer.InitCheck() != B_OK)
9187 		return B_NO_MEMORY;
9188 
9189 	char* path = pathBuffer.LockBuffer();
9190 
9191 	if (!IS_USER_ADDRESS(userPath)
9192 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9193 		return B_BAD_ADDRESS;
9194 
9195 	return common_unlink(fd, path, false);
9196 }
9197 
9198 
9199 status_t
9200 _user_rename(int oldFD, const char* userOldPath, int newFD,
9201 	const char* userNewPath)
9202 {
9203 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9204 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9205 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9206 		return B_NO_MEMORY;
9207 
9208 	char* oldPath = oldPathBuffer.LockBuffer();
9209 	char* newPath = newPathBuffer.LockBuffer();
9210 
9211 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9212 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9213 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9214 		return B_BAD_ADDRESS;
9215 
9216 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9217 }
9218 
9219 
9220 status_t
9221 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9222 {
9223 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9224 	if (pathBuffer.InitCheck() != B_OK)
9225 		return B_NO_MEMORY;
9226 
9227 	char* path = pathBuffer.LockBuffer();
9228 
9229 	if (!IS_USER_ADDRESS(userPath)
9230 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9231 		return B_BAD_ADDRESS;
9232 	}
9233 
9234 	// split into directory vnode and filename path
9235 	char filename[B_FILE_NAME_LENGTH];
9236 	struct vnode* dir;
9237 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9238 	if (status != B_OK)
9239 		return status;
9240 
9241 	VNodePutter _(dir);
9242 
9243 	// the underlying FS needs to support creating FIFOs
9244 	if (!HAS_FS_CALL(dir, create_special_node))
9245 		return B_UNSUPPORTED;
9246 
9247 	// create the entry	-- the FIFO sub node is set up automatically
9248 	fs_vnode superVnode;
9249 	ino_t nodeID;
9250 	status = FS_CALL(dir, create_special_node, filename, NULL,
9251 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9252 
9253 	// create_special_node() acquired a reference for us that we don't need.
9254 	if (status == B_OK)
9255 		put_vnode(dir->mount->volume, nodeID);
9256 
9257 	return status;
9258 }
9259 
9260 
9261 status_t
9262 _user_create_pipe(int* userFDs)
9263 {
9264 	// rootfs should support creating FIFOs, but let's be sure
9265 	if (!HAS_FS_CALL(sRoot, create_special_node))
9266 		return B_UNSUPPORTED;
9267 
9268 	// create the node	-- the FIFO sub node is set up automatically
9269 	fs_vnode superVnode;
9270 	ino_t nodeID;
9271 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9272 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9273 	if (status != B_OK)
9274 		return status;
9275 
9276 	// We've got one reference to the node and need another one.
9277 	struct vnode* vnode;
9278 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9279 	if (status != B_OK) {
9280 		// that should not happen
9281 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9282 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9283 		return status;
9284 	}
9285 
9286 	// Everything looks good so far. Open two FDs for reading respectively
9287 	// writing.
9288 	int fds[2];
9289 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9290 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9291 
9292 	FDCloser closer0(fds[0], false);
9293 	FDCloser closer1(fds[1], false);
9294 
9295 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9296 
9297 	// copy FDs to userland
9298 	if (status == B_OK) {
9299 		if (!IS_USER_ADDRESS(userFDs)
9300 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9301 			status = B_BAD_ADDRESS;
9302 		}
9303 	}
9304 
9305 	// keep FDs, if everything went fine
9306 	if (status == B_OK) {
9307 		closer0.Detach();
9308 		closer1.Detach();
9309 	}
9310 
9311 	return status;
9312 }
9313 
9314 
9315 status_t
9316 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9317 {
9318 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9319 	if (pathBuffer.InitCheck() != B_OK)
9320 		return B_NO_MEMORY;
9321 
9322 	char* path = pathBuffer.LockBuffer();
9323 
9324 	if (!IS_USER_ADDRESS(userPath)
9325 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9326 		return B_BAD_ADDRESS;
9327 
9328 	return common_access(fd, path, mode, effectiveUserGroup, false);
9329 }
9330 
9331 
9332 status_t
9333 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9334 	struct stat* userStat, size_t statSize)
9335 {
9336 	struct stat stat;
9337 	status_t status;
9338 
9339 	if (statSize > sizeof(struct stat))
9340 		return B_BAD_VALUE;
9341 
9342 	if (!IS_USER_ADDRESS(userStat))
9343 		return B_BAD_ADDRESS;
9344 
9345 	if (userPath) {
9346 		// path given: get the stat of the node referred to by (fd, path)
9347 		if (!IS_USER_ADDRESS(userPath))
9348 			return B_BAD_ADDRESS;
9349 
9350 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9351 		if (pathBuffer.InitCheck() != B_OK)
9352 			return B_NO_MEMORY;
9353 
9354 		char* path = pathBuffer.LockBuffer();
9355 
9356 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9357 		if (length < B_OK)
9358 			return length;
9359 		if (length >= B_PATH_NAME_LENGTH)
9360 			return B_NAME_TOO_LONG;
9361 
9362 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9363 	} else {
9364 		// no path given: get the FD and use the FD operation
9365 		struct file_descriptor* descriptor
9366 			= get_fd(get_current_io_context(false), fd);
9367 		if (descriptor == NULL)
9368 			return B_FILE_ERROR;
9369 
9370 		if (descriptor->ops->fd_read_stat)
9371 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9372 		else
9373 			status = B_UNSUPPORTED;
9374 
9375 		put_fd(descriptor);
9376 	}
9377 
9378 	if (status != B_OK)
9379 		return status;
9380 
9381 	return user_memcpy(userStat, &stat, statSize);
9382 }
9383 
9384 
9385 status_t
9386 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9387 	const struct stat* userStat, size_t statSize, int statMask)
9388 {
9389 	if (statSize > sizeof(struct stat))
9390 		return B_BAD_VALUE;
9391 
9392 	struct stat stat;
9393 
9394 	if (!IS_USER_ADDRESS(userStat)
9395 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9396 		return B_BAD_ADDRESS;
9397 
9398 	// clear additional stat fields
9399 	if (statSize < sizeof(struct stat))
9400 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9401 
9402 	status_t status;
9403 
9404 	if (userPath) {
9405 		// path given: write the stat of the node referred to by (fd, path)
9406 		if (!IS_USER_ADDRESS(userPath))
9407 			return B_BAD_ADDRESS;
9408 
9409 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9410 		if (pathBuffer.InitCheck() != B_OK)
9411 			return B_NO_MEMORY;
9412 
9413 		char* path = pathBuffer.LockBuffer();
9414 
9415 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9416 		if (length < B_OK)
9417 			return length;
9418 		if (length >= B_PATH_NAME_LENGTH)
9419 			return B_NAME_TOO_LONG;
9420 
9421 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9422 			statMask, false);
9423 	} else {
9424 		// no path given: get the FD and use the FD operation
9425 		struct file_descriptor* descriptor
9426 			= get_fd(get_current_io_context(false), fd);
9427 		if (descriptor == NULL)
9428 			return B_FILE_ERROR;
9429 
9430 		if (descriptor->ops->fd_write_stat) {
9431 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9432 				statMask);
9433 		} else
9434 			status = B_UNSUPPORTED;
9435 
9436 		put_fd(descriptor);
9437 	}
9438 
9439 	return status;
9440 }
9441 
9442 
9443 int
9444 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9445 {
9446 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9447 	if (pathBuffer.InitCheck() != B_OK)
9448 		return B_NO_MEMORY;
9449 
9450 	char* path = pathBuffer.LockBuffer();
9451 
9452 	if (userPath != NULL) {
9453 		if (!IS_USER_ADDRESS(userPath)
9454 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9455 			return B_BAD_ADDRESS;
9456 	}
9457 
9458 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9459 }
9460 
9461 
9462 ssize_t
9463 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9464 	size_t readBytes)
9465 {
9466 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9467 	if (attr < 0)
9468 		return attr;
9469 
9470 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9471 	_user_close(attr);
9472 
9473 	return bytes;
9474 }
9475 
9476 
9477 ssize_t
9478 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9479 	const void* buffer, size_t writeBytes)
9480 {
9481 	// Try to support the BeOS typical truncation as well as the position
9482 	// argument
9483 	int attr = attr_create(fd, NULL, attribute, type,
9484 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9485 	if (attr < 0)
9486 		return attr;
9487 
9488 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9489 	_user_close(attr);
9490 
9491 	return bytes;
9492 }
9493 
9494 
9495 status_t
9496 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9497 {
9498 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9499 	if (attr < 0)
9500 		return attr;
9501 
9502 	struct file_descriptor* descriptor
9503 		= get_fd(get_current_io_context(false), attr);
9504 	if (descriptor == NULL) {
9505 		_user_close(attr);
9506 		return B_FILE_ERROR;
9507 	}
9508 
9509 	struct stat stat;
9510 	status_t status;
9511 	if (descriptor->ops->fd_read_stat)
9512 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9513 	else
9514 		status = B_UNSUPPORTED;
9515 
9516 	put_fd(descriptor);
9517 	_user_close(attr);
9518 
9519 	if (status == B_OK) {
9520 		attr_info info;
9521 		info.type = stat.st_type;
9522 		info.size = stat.st_size;
9523 
9524 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9525 			return B_BAD_ADDRESS;
9526 	}
9527 
9528 	return status;
9529 }
9530 
9531 
9532 int
9533 _user_open_attr(int fd, const char* userPath, const char* userName,
9534 	uint32 type, int openMode)
9535 {
9536 	char name[B_FILE_NAME_LENGTH];
9537 
9538 	if (!IS_USER_ADDRESS(userName)
9539 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9540 		return B_BAD_ADDRESS;
9541 
9542 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9543 	if (pathBuffer.InitCheck() != B_OK)
9544 		return B_NO_MEMORY;
9545 
9546 	char* path = pathBuffer.LockBuffer();
9547 
9548 	if (userPath != NULL) {
9549 		if (!IS_USER_ADDRESS(userPath)
9550 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9551 			return B_BAD_ADDRESS;
9552 	}
9553 
9554 	if ((openMode & O_CREAT) != 0) {
9555 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9556 			false);
9557 	}
9558 
9559 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9560 }
9561 
9562 
9563 status_t
9564 _user_remove_attr(int fd, const char* userName)
9565 {
9566 	char name[B_FILE_NAME_LENGTH];
9567 
9568 	if (!IS_USER_ADDRESS(userName)
9569 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9570 		return B_BAD_ADDRESS;
9571 
9572 	return attr_remove(fd, name, false);
9573 }
9574 
9575 
9576 status_t
9577 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9578 	const char* userToName)
9579 {
9580 	if (!IS_USER_ADDRESS(userFromName)
9581 		|| !IS_USER_ADDRESS(userToName))
9582 		return B_BAD_ADDRESS;
9583 
9584 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9585 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9586 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9587 		return B_NO_MEMORY;
9588 
9589 	char* fromName = fromNameBuffer.LockBuffer();
9590 	char* toName = toNameBuffer.LockBuffer();
9591 
9592 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9593 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9594 		return B_BAD_ADDRESS;
9595 
9596 	return attr_rename(fromFile, fromName, toFile, toName, false);
9597 }
9598 
9599 
9600 int
9601 _user_open_index_dir(dev_t device)
9602 {
9603 	return index_dir_open(device, false);
9604 }
9605 
9606 
9607 status_t
9608 _user_create_index(dev_t device, const char* userName, uint32 type,
9609 	uint32 flags)
9610 {
9611 	char name[B_FILE_NAME_LENGTH];
9612 
9613 	if (!IS_USER_ADDRESS(userName)
9614 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9615 		return B_BAD_ADDRESS;
9616 
9617 	return index_create(device, name, type, flags, false);
9618 }
9619 
9620 
9621 status_t
9622 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9623 {
9624 	char name[B_FILE_NAME_LENGTH];
9625 	struct stat stat;
9626 	status_t status;
9627 
9628 	if (!IS_USER_ADDRESS(userName)
9629 		|| !IS_USER_ADDRESS(userStat)
9630 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9631 		return B_BAD_ADDRESS;
9632 
9633 	status = index_name_read_stat(device, name, &stat, false);
9634 	if (status == B_OK) {
9635 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9636 			return B_BAD_ADDRESS;
9637 	}
9638 
9639 	return status;
9640 }
9641 
9642 
9643 status_t
9644 _user_remove_index(dev_t device, const char* userName)
9645 {
9646 	char name[B_FILE_NAME_LENGTH];
9647 
9648 	if (!IS_USER_ADDRESS(userName)
9649 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9650 		return B_BAD_ADDRESS;
9651 
9652 	return index_remove(device, name, false);
9653 }
9654 
9655 
9656 status_t
9657 _user_getcwd(char* userBuffer, size_t size)
9658 {
9659 	if (size == 0)
9660 		return B_BAD_VALUE;
9661 	if (!IS_USER_ADDRESS(userBuffer))
9662 		return B_BAD_ADDRESS;
9663 
9664 	if (size > kMaxPathLength)
9665 		size = kMaxPathLength;
9666 
9667 	KPath pathBuffer(size);
9668 	if (pathBuffer.InitCheck() != B_OK)
9669 		return B_NO_MEMORY;
9670 
9671 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9672 
9673 	char* path = pathBuffer.LockBuffer();
9674 
9675 	status_t status = get_cwd(path, size, false);
9676 	if (status != B_OK)
9677 		return status;
9678 
9679 	// Copy back the result
9680 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9681 		return B_BAD_ADDRESS;
9682 
9683 	return status;
9684 }
9685 
9686 
9687 status_t
9688 _user_setcwd(int fd, const char* userPath)
9689 {
9690 	TRACE(("user_setcwd: path = %p\n", userPath));
9691 
9692 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9693 	if (pathBuffer.InitCheck() != B_OK)
9694 		return B_NO_MEMORY;
9695 
9696 	char* path = pathBuffer.LockBuffer();
9697 
9698 	if (userPath != NULL) {
9699 		if (!IS_USER_ADDRESS(userPath)
9700 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9701 			return B_BAD_ADDRESS;
9702 	}
9703 
9704 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9705 }
9706 
9707 
9708 status_t
9709 _user_change_root(const char* userPath)
9710 {
9711 	// only root is allowed to chroot()
9712 	if (geteuid() != 0)
9713 		return B_NOT_ALLOWED;
9714 
9715 	// alloc path buffer
9716 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9717 	if (pathBuffer.InitCheck() != B_OK)
9718 		return B_NO_MEMORY;
9719 
9720 	// copy userland path to kernel
9721 	char* path = pathBuffer.LockBuffer();
9722 	if (userPath != NULL) {
9723 		if (!IS_USER_ADDRESS(userPath)
9724 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9725 			return B_BAD_ADDRESS;
9726 	}
9727 
9728 	// get the vnode
9729 	struct vnode* vnode;
9730 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9731 	if (status != B_OK)
9732 		return status;
9733 
9734 	// set the new root
9735 	struct io_context* context = get_current_io_context(false);
9736 	mutex_lock(&sIOContextRootLock);
9737 	struct vnode* oldRoot = context->root;
9738 	context->root = vnode;
9739 	mutex_unlock(&sIOContextRootLock);
9740 
9741 	put_vnode(oldRoot);
9742 
9743 	return B_OK;
9744 }
9745 
9746 
9747 int
9748 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9749 	uint32 flags, port_id port, int32 token)
9750 {
9751 	char* query;
9752 
9753 	if (device < 0 || userQuery == NULL || queryLength == 0)
9754 		return B_BAD_VALUE;
9755 
9756 	// this is a safety restriction
9757 	if (queryLength >= 65536)
9758 		return B_NAME_TOO_LONG;
9759 
9760 	query = (char*)malloc(queryLength + 1);
9761 	if (query == NULL)
9762 		return B_NO_MEMORY;
9763 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9764 		free(query);
9765 		return B_BAD_ADDRESS;
9766 	}
9767 
9768 	int fd = query_open(device, query, flags, port, token, false);
9769 
9770 	free(query);
9771 	return fd;
9772 }
9773 
9774 
9775 #include "vfs_request_io.cpp"
9776