xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 5b189b0e1e2f51f367bfcb126b2f00a3702f352d)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <NodeMonitor.h>
30 #include <OS.h>
31 #include <StorageDefs.h>
32 
33 #include <AutoDeleter.h>
34 #include <block_cache.h>
35 #include <boot/kernel_args.h>
36 #include <debug_heap.h>
37 #include <disk_device_manager/KDiskDevice.h>
38 #include <disk_device_manager/KDiskDeviceManager.h>
39 #include <disk_device_manager/KDiskDeviceUtils.h>
40 #include <disk_device_manager/KDiskSystem.h>
41 #include <fd.h>
42 #include <file_cache.h>
43 #include <fs/node_monitor.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 #include <wait_for_objects.h>
57 
58 #include "EntryCache.h"
59 #include "fifo.h"
60 #include "IORequest.h"
61 #include "unused_vnodes.h"
62 #include "vfs_tracing.h"
63 #include "Vnode.h"
64 #include "../cache/vnode_store.h"
65 
66 
67 //#define TRACE_VFS
68 #ifdef TRACE_VFS
69 #	define TRACE(x) dprintf x
70 #	define FUNCTION(x) dprintf x
71 #else
72 #	define TRACE(x) ;
73 #	define FUNCTION(x) ;
74 #endif
75 
76 #define ADD_DEBUGGER_COMMANDS
77 
78 
79 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
80 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
81 
82 #if KDEBUG
83 #	define FS_CALL(vnode, op, params...) \
84 		( HAS_FS_CALL(vnode, op) ? \
85 			vnode->ops->op(vnode->mount->volume, vnode, params) \
86 			: (panic("FS_CALL op " #op " is NULL"), 0))
87 #	define FS_CALL_NO_PARAMS(vnode, op) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode) \
90 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
91 #	define FS_MOUNT_CALL(mount, op, params...) \
92 		( HAS_FS_MOUNT_CALL(mount, op) ? \
93 			mount->volume->ops->op(mount->volume, params) \
94 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
95 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume) \
98 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
99 #else
100 #	define FS_CALL(vnode, op, params...) \
101 			vnode->ops->op(vnode->mount->volume, vnode, params)
102 #	define FS_CALL_NO_PARAMS(vnode, op) \
103 			vnode->ops->op(vnode->mount->volume, vnode)
104 #	define FS_MOUNT_CALL(mount, op, params...) \
105 			mount->volume->ops->op(mount->volume, params)
106 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
107 			mount->volume->ops->op(mount->volume)
108 #endif
109 
110 
111 const static size_t kMaxPathLength = 65536;
112 	// The absolute maximum path length (for getcwd() - this is not depending
113 	// on PATH_MAX
114 
115 
116 typedef DoublyLinkedList<vnode> VnodeList;
117 
118 /*!	\brief Structure to manage a mounted file system
119 
120 	Note: The root_vnode and root_vnode->covers fields (what others?) are
121 	initialized in fs_mount() and not changed afterwards. That is as soon
122 	as the mount is mounted and it is made sure it won't be unmounted
123 	(e.g. by holding a reference to a vnode of that mount) (read) access
124 	to those fields is always safe, even without additional locking. Morever
125 	while mounted the mount holds a reference to the root_vnode->covers vnode,
126 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
127 	safe if a reference to vnode is held (note that for the root mount
128 	root_vnode->covers is NULL, though).
129 */
130 struct fs_mount {
131 	fs_mount()
132 		:
133 		volume(NULL),
134 		device_name(NULL)
135 	{
136 		mutex_init(&lock, "mount lock");
137 	}
138 
139 	~fs_mount()
140 	{
141 		mutex_destroy(&lock);
142 		free(device_name);
143 
144 		while (volume) {
145 			fs_volume* superVolume = volume->super_volume;
146 
147 			if (volume->file_system != NULL)
148 				put_module(volume->file_system->info.name);
149 
150 			free(volume->file_system_name);
151 			free(volume);
152 			volume = superVolume;
153 		}
154 	}
155 
156 	struct fs_mount* next;
157 	dev_t			id;
158 	fs_volume*		volume;
159 	char*			device_name;
160 	mutex			lock;	// guards the vnodes list
161 	struct vnode*	root_vnode;
162 	struct vnode*	covers_vnode;	// immutable
163 	KPartition*		partition;
164 	VnodeList		vnodes;
165 	EntryCache		entry_cache;
166 	bool			unmounting;
167 	bool			owns_file_device;
168 };
169 
170 
171 namespace {
172 
173 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
174 	list_link		link;
175 	void*			bound_to;
176 	team_id			team;
177 	pid_t			session;
178 	off_t			start;
179 	off_t			end;
180 	bool			shared;
181 };
182 
183 typedef DoublyLinkedList<advisory_lock> LockList;
184 
185 } // namespace
186 
187 
188 struct advisory_locking {
189 	sem_id			lock;
190 	sem_id			wait_sem;
191 	LockList		locks;
192 
193 	advisory_locking()
194 		:
195 		lock(-1),
196 		wait_sem(-1)
197 	{
198 	}
199 
200 	~advisory_locking()
201 	{
202 		if (lock >= 0)
203 			delete_sem(lock);
204 		if (wait_sem >= 0)
205 			delete_sem(wait_sem);
206 	}
207 };
208 
209 /*!	\brief Guards sMountsTable.
210 
211 	The holder is allowed to read/write access the sMountsTable.
212 	Manipulation of the fs_mount structures themselves
213 	(and their destruction) requires different locks though.
214 */
215 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
216 
217 /*!	\brief Guards mount/unmount operations.
218 
219 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
220 	That is locking the lock ensures that no FS is mounted/unmounted. In
221 	particular this means that
222 	- sMountsTable will not be modified,
223 	- the fields immutable after initialization of the fs_mount structures in
224 	  sMountsTable will not be modified,
225 
226 	The thread trying to lock the lock must not hold sVnodeLock or
227 	sMountMutex.
228 */
229 static recursive_lock sMountOpLock;
230 
231 /*!	\brief Guards sVnodeTable.
232 
233 	The holder is allowed read/write access to sVnodeTable and to
234 	any unbusy vnode in that table, save to the immutable fields (device, id,
235 	private_node, mount) to which only read-only access is allowed.
236 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
237 	well as the busy, removed, unused flags, and the vnode's type can also be
238 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
239 	locked. Write access to covered_by and covers requires to write lock
240 	sVnodeLock.
241 
242 	The thread trying to acquire the lock must not hold sMountMutex.
243 	You must not hold this lock when calling create_sem(), as this might call
244 	vfs_free_unused_vnodes() and thus cause a deadlock.
245 */
246 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
247 
248 /*!	\brief Guards io_context::root.
249 
250 	Must be held when setting or getting the io_context::root field.
251 	The only operation allowed while holding this lock besides getting or
252 	setting the field is inc_vnode_ref_count() on io_context::root.
253 */
254 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
255 
256 
257 namespace {
258 
259 struct vnode_hash_key {
260 	dev_t	device;
261 	ino_t	vnode;
262 };
263 
264 struct VnodeHash {
265 	typedef vnode_hash_key	KeyType;
266 	typedef	struct vnode	ValueType;
267 
268 #define VHASH(mountid, vnodeid) \
269 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
270 
271 	size_t HashKey(KeyType key) const
272 	{
273 		return VHASH(key.device, key.vnode);
274 	}
275 
276 	size_t Hash(ValueType* vnode) const
277 	{
278 		return VHASH(vnode->device, vnode->id);
279 	}
280 
281 #undef VHASH
282 
283 	bool Compare(KeyType key, ValueType* vnode) const
284 	{
285 		return vnode->device == key.device && vnode->id == key.vnode;
286 	}
287 
288 	ValueType*& GetLink(ValueType* value) const
289 	{
290 		return value->next;
291 	}
292 };
293 
294 typedef BOpenHashTable<VnodeHash> VnodeTable;
295 
296 
297 struct MountHash {
298 	typedef dev_t			KeyType;
299 	typedef	struct fs_mount	ValueType;
300 
301 	size_t HashKey(KeyType key) const
302 	{
303 		return key;
304 	}
305 
306 	size_t Hash(ValueType* mount) const
307 	{
308 		return mount->id;
309 	}
310 
311 	bool Compare(KeyType key, ValueType* mount) const
312 	{
313 		return mount->id == key;
314 	}
315 
316 	ValueType*& GetLink(ValueType* value) const
317 	{
318 		return value->next;
319 	}
320 };
321 
322 typedef BOpenHashTable<MountHash> MountTable;
323 
324 } // namespace
325 
326 
327 #define VNODE_HASH_TABLE_SIZE 1024
328 static VnodeTable* sVnodeTable;
329 static struct vnode* sRoot;
330 
331 #define MOUNTS_HASH_TABLE_SIZE 16
332 static MountTable* sMountsTable;
333 static dev_t sNextMountID = 1;
334 
335 #define MAX_TEMP_IO_VECS 8
336 
337 // How long to wait for busy vnodes (10s)
338 #define BUSY_VNODE_RETRIES 2000
339 #define BUSY_VNODE_DELAY 5000
340 
341 mode_t __gUmask = 022;
342 
343 /* function declarations */
344 
345 static void free_unused_vnodes();
346 
347 // file descriptor operation prototypes
348 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
349 	void* buffer, size_t* _bytes);
350 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
351 	const void* buffer, size_t* _bytes);
352 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
353 	int seekType);
354 static void file_free_fd(struct file_descriptor* descriptor);
355 static status_t file_close(struct file_descriptor* descriptor);
356 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
357 	struct selectsync* sync);
358 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
359 	struct selectsync* sync);
360 static status_t dir_read(struct io_context* context,
361 	struct file_descriptor* descriptor, struct dirent* buffer,
362 	size_t bufferSize, uint32* _count);
363 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
364 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
365 static status_t dir_rewind(struct file_descriptor* descriptor);
366 static void dir_free_fd(struct file_descriptor* descriptor);
367 static status_t dir_close(struct file_descriptor* descriptor);
368 static status_t attr_dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
372 static void attr_dir_free_fd(struct file_descriptor* descriptor);
373 static status_t attr_dir_close(struct file_descriptor* descriptor);
374 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
375 	void* buffer, size_t* _bytes);
376 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
377 	const void* buffer, size_t* _bytes);
378 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
379 	int seekType);
380 static void attr_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_close(struct file_descriptor* descriptor);
382 static status_t attr_read_stat(struct file_descriptor* descriptor,
383 	struct stat* statData);
384 static status_t attr_write_stat(struct file_descriptor* descriptor,
385 	const struct stat* stat, int statMask);
386 static status_t index_dir_read(struct io_context* context,
387 	struct file_descriptor* descriptor, struct dirent* buffer,
388 	size_t bufferSize, uint32* _count);
389 static status_t index_dir_rewind(struct file_descriptor* descriptor);
390 static void index_dir_free_fd(struct file_descriptor* descriptor);
391 static status_t index_dir_close(struct file_descriptor* descriptor);
392 static status_t query_read(struct io_context* context,
393 	struct file_descriptor* descriptor, struct dirent* buffer,
394 	size_t bufferSize, uint32* _count);
395 static status_t query_rewind(struct file_descriptor* descriptor);
396 static void query_free_fd(struct file_descriptor* descriptor);
397 static status_t query_close(struct file_descriptor* descriptor);
398 
399 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
400 	void* buffer, size_t length);
401 static status_t common_read_stat(struct file_descriptor* descriptor,
402 	struct stat* statData);
403 static status_t common_write_stat(struct file_descriptor* descriptor,
404 	const struct stat* statData, int statMask);
405 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
406 	struct stat* stat, bool kernel);
407 
408 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
409 	bool traverseLeafLink, int count, bool kernel,
410 	struct vnode** _vnode, ino_t* _parentID);
411 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
412 	size_t bufferSize, bool kernel);
413 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
414 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
415 static void inc_vnode_ref_count(struct vnode* vnode);
416 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
417 	bool reenter);
418 static inline void put_vnode(struct vnode* vnode);
419 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
420 	bool kernel);
421 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
422 
423 
424 static struct fd_ops sFileOps = {
425 	file_read,
426 	file_write,
427 	file_seek,
428 	common_ioctl,
429 	NULL,		// set_flags
430 	file_select,
431 	file_deselect,
432 	NULL,		// read_dir()
433 	NULL,		// rewind_dir()
434 	common_read_stat,
435 	common_write_stat,
436 	file_close,
437 	file_free_fd
438 };
439 
440 static struct fd_ops sDirectoryOps = {
441 	NULL,		// read()
442 	NULL,		// write()
443 	NULL,		// seek()
444 	common_ioctl,
445 	NULL,		// set_flags
446 	NULL,		// select()
447 	NULL,		// deselect()
448 	dir_read,
449 	dir_rewind,
450 	common_read_stat,
451 	common_write_stat,
452 	dir_close,
453 	dir_free_fd
454 };
455 
456 static struct fd_ops sAttributeDirectoryOps = {
457 	NULL,		// read()
458 	NULL,		// write()
459 	NULL,		// seek()
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	attr_dir_read,
465 	attr_dir_rewind,
466 	common_read_stat,
467 	common_write_stat,
468 	attr_dir_close,
469 	attr_dir_free_fd
470 };
471 
472 static struct fd_ops sAttributeOps = {
473 	attr_read,
474 	attr_write,
475 	attr_seek,
476 	common_ioctl,
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	NULL,		// read_dir()
481 	NULL,		// rewind_dir()
482 	attr_read_stat,
483 	attr_write_stat,
484 	attr_close,
485 	attr_free_fd
486 };
487 
488 static struct fd_ops sIndexDirectoryOps = {
489 	NULL,		// read()
490 	NULL,		// write()
491 	NULL,		// seek()
492 	NULL,		// ioctl()
493 	NULL,		// set_flags
494 	NULL,		// select()
495 	NULL,		// deselect()
496 	index_dir_read,
497 	index_dir_rewind,
498 	NULL,		// read_stat()
499 	NULL,		// write_stat()
500 	index_dir_close,
501 	index_dir_free_fd
502 };
503 
504 #if 0
505 static struct fd_ops sIndexOps = {
506 	NULL,		// read()
507 	NULL,		// write()
508 	NULL,		// seek()
509 	NULL,		// ioctl()
510 	NULL,		// set_flags
511 	NULL,		// select()
512 	NULL,		// deselect()
513 	NULL,		// dir_read()
514 	NULL,		// dir_rewind()
515 	index_read_stat,	// read_stat()
516 	NULL,		// write_stat()
517 	NULL,		// dir_close()
518 	NULL		// free_fd()
519 };
520 #endif
521 
522 static struct fd_ops sQueryOps = {
523 	NULL,		// read()
524 	NULL,		// write()
525 	NULL,		// seek()
526 	NULL,		// ioctl()
527 	NULL,		// set_flags
528 	NULL,		// select()
529 	NULL,		// deselect()
530 	query_read,
531 	query_rewind,
532 	NULL,		// read_stat()
533 	NULL,		// write_stat()
534 	query_close,
535 	query_free_fd
536 };
537 
538 
539 namespace {
540 
541 class VNodePutter {
542 public:
543 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
544 
545 	~VNodePutter()
546 	{
547 		Put();
548 	}
549 
550 	void SetTo(struct vnode* vnode)
551 	{
552 		Put();
553 		fVNode = vnode;
554 	}
555 
556 	void Put()
557 	{
558 		if (fVNode) {
559 			put_vnode(fVNode);
560 			fVNode = NULL;
561 		}
562 	}
563 
564 	struct vnode* Detach()
565 	{
566 		struct vnode* vnode = fVNode;
567 		fVNode = NULL;
568 		return vnode;
569 	}
570 
571 private:
572 	struct vnode* fVNode;
573 };
574 
575 
576 class FDCloser {
577 public:
578 	FDCloser() : fFD(-1), fKernel(true) {}
579 
580 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
581 
582 	~FDCloser()
583 	{
584 		Close();
585 	}
586 
587 	void SetTo(int fd, bool kernel)
588 	{
589 		Close();
590 		fFD = fd;
591 		fKernel = kernel;
592 	}
593 
594 	void Close()
595 	{
596 		if (fFD >= 0) {
597 			if (fKernel)
598 				_kern_close(fFD);
599 			else
600 				_user_close(fFD);
601 			fFD = -1;
602 		}
603 	}
604 
605 	int Detach()
606 	{
607 		int fd = fFD;
608 		fFD = -1;
609 		return fd;
610 	}
611 
612 private:
613 	int		fFD;
614 	bool	fKernel;
615 };
616 
617 } // namespace
618 
619 
620 #if VFS_PAGES_IO_TRACING
621 
622 namespace VFSPagesIOTracing {
623 
624 class PagesIOTraceEntry : public AbstractTraceEntry {
625 protected:
626 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
627 		const generic_io_vec* vecs, uint32 count, uint32 flags,
628 		generic_size_t bytesRequested, status_t status,
629 		generic_size_t bytesTransferred)
630 		:
631 		fVnode(vnode),
632 		fMountID(vnode->mount->id),
633 		fNodeID(vnode->id),
634 		fCookie(cookie),
635 		fPos(pos),
636 		fCount(count),
637 		fFlags(flags),
638 		fBytesRequested(bytesRequested),
639 		fStatus(status),
640 		fBytesTransferred(bytesTransferred)
641 	{
642 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
643 			sizeof(generic_io_vec) * count, false);
644 	}
645 
646 	void AddDump(TraceOutput& out, const char* mode)
647 	{
648 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
649 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
650 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
651 			(uint64)fBytesRequested);
652 
653 		if (fVecs != NULL) {
654 			for (uint32 i = 0; i < fCount; i++) {
655 				if (i > 0)
656 					out.Print(", ");
657 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
658 					(uint64)fVecs[i].length);
659 			}
660 		}
661 
662 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
663 			"transferred: %" B_PRIu64, fFlags, fStatus,
664 			(uint64)fBytesTransferred);
665 	}
666 
667 protected:
668 	struct vnode*	fVnode;
669 	dev_t			fMountID;
670 	ino_t			fNodeID;
671 	void*			fCookie;
672 	off_t			fPos;
673 	generic_io_vec*	fVecs;
674 	uint32			fCount;
675 	uint32			fFlags;
676 	generic_size_t	fBytesRequested;
677 	status_t		fStatus;
678 	generic_size_t	fBytesTransferred;
679 };
680 
681 
682 class ReadPages : public PagesIOTraceEntry {
683 public:
684 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
685 		const generic_io_vec* vecs, uint32 count, uint32 flags,
686 		generic_size_t bytesRequested, status_t status,
687 		generic_size_t bytesTransferred)
688 		:
689 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
690 			bytesRequested, status, bytesTransferred)
691 	{
692 		Initialized();
693 	}
694 
695 	virtual void AddDump(TraceOutput& out)
696 	{
697 		PagesIOTraceEntry::AddDump(out, "read");
698 	}
699 };
700 
701 
702 class WritePages : public PagesIOTraceEntry {
703 public:
704 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
705 		const generic_io_vec* vecs, uint32 count, uint32 flags,
706 		generic_size_t bytesRequested, status_t status,
707 		generic_size_t bytesTransferred)
708 		:
709 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
710 			bytesRequested, status, bytesTransferred)
711 	{
712 		Initialized();
713 	}
714 
715 	virtual void AddDump(TraceOutput& out)
716 	{
717 		PagesIOTraceEntry::AddDump(out, "write");
718 	}
719 };
720 
721 }	// namespace VFSPagesIOTracing
722 
723 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
724 #else
725 #	define TPIO(x) ;
726 #endif	// VFS_PAGES_IO_TRACING
727 
728 
729 /*! Finds the mounted device (the fs_mount structure) with the given ID.
730 	Note, you must hold the gMountMutex lock when you call this function.
731 */
732 static struct fs_mount*
733 find_mount(dev_t id)
734 {
735 	ASSERT_LOCKED_MUTEX(&sMountMutex);
736 
737 	return sMountsTable->Lookup(id);
738 }
739 
740 
741 static status_t
742 get_mount(dev_t id, struct fs_mount** _mount)
743 {
744 	struct fs_mount* mount;
745 
746 	ReadLocker nodeLocker(sVnodeLock);
747 	MutexLocker mountLocker(sMountMutex);
748 
749 	mount = find_mount(id);
750 	if (mount == NULL)
751 		return B_BAD_VALUE;
752 
753 	struct vnode* rootNode = mount->root_vnode;
754 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
755 		|| rootNode->ref_count == 0) {
756 		// might have been called during a mount/unmount operation
757 		return B_BUSY;
758 	}
759 
760 	inc_vnode_ref_count(rootNode);
761 	*_mount = mount;
762 	return B_OK;
763 }
764 
765 
766 static void
767 put_mount(struct fs_mount* mount)
768 {
769 	if (mount)
770 		put_vnode(mount->root_vnode);
771 }
772 
773 
774 /*!	Tries to open the specified file system module.
775 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
776 	Returns a pointer to file system module interface, or NULL if it
777 	could not open the module.
778 */
779 static file_system_module_info*
780 get_file_system(const char* fsName)
781 {
782 	char name[B_FILE_NAME_LENGTH];
783 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
784 		// construct module name if we didn't get one
785 		// (we currently support only one API)
786 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
787 		fsName = NULL;
788 	}
789 
790 	file_system_module_info* info;
791 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
792 		return NULL;
793 
794 	return info;
795 }
796 
797 
798 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
799 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
800 	The name is allocated for you, and you have to free() it when you're
801 	done with it.
802 	Returns NULL if the required memory is not available.
803 */
804 static char*
805 get_file_system_name(const char* fsName)
806 {
807 	const size_t length = strlen("file_systems/");
808 
809 	if (strncmp(fsName, "file_systems/", length)) {
810 		// the name already seems to be the module's file name
811 		return strdup(fsName);
812 	}
813 
814 	fsName += length;
815 	const char* end = strchr(fsName, '/');
816 	if (end == NULL) {
817 		// this doesn't seem to be a valid name, but well...
818 		return strdup(fsName);
819 	}
820 
821 	// cut off the trailing /v1
822 
823 	char* name = (char*)malloc(end + 1 - fsName);
824 	if (name == NULL)
825 		return NULL;
826 
827 	strlcpy(name, fsName, end + 1 - fsName);
828 	return name;
829 }
830 
831 
832 /*!	Accepts a list of file system names separated by a colon, one for each
833 	layer and returns the file system name for the specified layer.
834 	The name is allocated for you, and you have to free() it when you're
835 	done with it.
836 	Returns NULL if the required memory is not available or if there is no
837 	name for the specified layer.
838 */
839 static char*
840 get_file_system_name_for_layer(const char* fsNames, int32 layer)
841 {
842 	while (layer >= 0) {
843 		const char* end = strchr(fsNames, ':');
844 		if (end == NULL) {
845 			if (layer == 0)
846 				return strdup(fsNames);
847 			return NULL;
848 		}
849 
850 		if (layer == 0) {
851 			size_t length = end - fsNames + 1;
852 			char* result = (char*)malloc(length);
853 			strlcpy(result, fsNames, length);
854 			return result;
855 		}
856 
857 		fsNames = end + 1;
858 		layer--;
859 	}
860 
861 	return NULL;
862 }
863 
864 
865 static void
866 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
867 {
868 	MutexLocker _(mount->lock);
869 	mount->vnodes.Add(vnode);
870 }
871 
872 
873 static void
874 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	MutexLocker _(mount->lock);
877 	mount->vnodes.Remove(vnode);
878 }
879 
880 
881 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
882 
883 	The caller must hold the sVnodeLock (read lock at least).
884 
885 	\param mountID the mount ID.
886 	\param vnodeID the node ID.
887 
888 	\return The vnode structure, if it was found in the hash table, \c NULL
889 			otherwise.
890 */
891 static struct vnode*
892 lookup_vnode(dev_t mountID, ino_t vnodeID)
893 {
894 	struct vnode_hash_key key;
895 
896 	key.device = mountID;
897 	key.vnode = vnodeID;
898 
899 	return sVnodeTable->Lookup(key);
900 }
901 
902 
903 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
904 
905 	This will also wait for BUSY_VNODE_DELAY before returning if one should
906 	still wait for the vnode becoming unbusy.
907 
908 	\return \c true if one should retry, \c false if not.
909 */
910 static bool
911 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
912 {
913 	if (--tries < 0) {
914 		// vnode doesn't seem to become unbusy
915 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
916 			" is not becoming unbusy!\n", mountID, vnodeID);
917 		return false;
918 	}
919 	snooze(BUSY_VNODE_DELAY);
920 	return true;
921 }
922 
923 
924 /*!	Creates a new vnode with the given mount and node ID.
925 	If the node already exists, it is returned instead and no new node is
926 	created. In either case -- but not, if an error occurs -- the function write
927 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
928 	error the lock is not held on return.
929 
930 	\param mountID The mount ID.
931 	\param vnodeID The vnode ID.
932 	\param _vnode Will be set to the new vnode on success.
933 	\param _nodeCreated Will be set to \c true when the returned vnode has
934 		been newly created, \c false when it already existed. Will not be
935 		changed on error.
936 	\return \c B_OK, when the vnode was successfully created and inserted or
937 		a node with the given ID was found, \c B_NO_MEMORY or
938 		\c B_ENTRY_NOT_FOUND on error.
939 */
940 static status_t
941 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
942 	bool& _nodeCreated)
943 {
944 	FUNCTION(("create_new_vnode_and_lock()\n"));
945 
946 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
947 	if (vnode == NULL)
948 		return B_NO_MEMORY;
949 
950 	// initialize basic values
951 	memset(vnode, 0, sizeof(struct vnode));
952 	vnode->device = mountID;
953 	vnode->id = vnodeID;
954 	vnode->ref_count = 1;
955 	vnode->SetBusy(true);
956 
957 	// look up the node -- it might have been added by someone else in the
958 	// meantime
959 	rw_lock_write_lock(&sVnodeLock);
960 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
961 	if (existingVnode != NULL) {
962 		free(vnode);
963 		_vnode = existingVnode;
964 		_nodeCreated = false;
965 		return B_OK;
966 	}
967 
968 	// get the mount structure
969 	mutex_lock(&sMountMutex);
970 	vnode->mount = find_mount(mountID);
971 	if (!vnode->mount || vnode->mount->unmounting) {
972 		mutex_unlock(&sMountMutex);
973 		rw_lock_write_unlock(&sVnodeLock);
974 		free(vnode);
975 		return B_ENTRY_NOT_FOUND;
976 	}
977 
978 	// add the vnode to the mount's node list and the hash table
979 	sVnodeTable->Insert(vnode);
980 	add_vnode_to_mount_list(vnode, vnode->mount);
981 
982 	mutex_unlock(&sMountMutex);
983 
984 	_vnode = vnode;
985 	_nodeCreated = true;
986 
987 	// keep the vnode lock locked
988 	return B_OK;
989 }
990 
991 
992 /*!	Frees the vnode and all resources it has acquired, and removes
993 	it from the vnode hash as well as from its mount structure.
994 	Will also make sure that any cache modifications are written back.
995 */
996 static void
997 free_vnode(struct vnode* vnode, bool reenter)
998 {
999 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1000 		vnode);
1001 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1002 
1003 	// write back any changes in this vnode's cache -- but only
1004 	// if the vnode won't be deleted, in which case the changes
1005 	// will be discarded
1006 
1007 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1008 		FS_CALL_NO_PARAMS(vnode, fsync);
1009 
1010 	// Note: If this vnode has a cache attached, there will still be two
1011 	// references to that cache at this point. The last one belongs to the vnode
1012 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1013 	// cache. Each but the last reference to a cache also includes a reference
1014 	// to the vnode. The file cache, however, released its reference (cf.
1015 	// file_cache_create()), so that this vnode's ref count has the chance to
1016 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1017 	// cache reference to be released, which will also release a (no longer
1018 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1019 	// count, so that it will neither become negative nor 0.
1020 	vnode->ref_count = 2;
1021 
1022 	if (!vnode->IsUnpublished()) {
1023 		if (vnode->IsRemoved())
1024 			FS_CALL(vnode, remove_vnode, reenter);
1025 		else
1026 			FS_CALL(vnode, put_vnode, reenter);
1027 	}
1028 
1029 	// If the vnode has a VMCache attached, make sure that it won't try to get
1030 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1031 	// long as the vnode is busy and in the hash, that won't happen, but as
1032 	// soon as we've removed it from the hash, it could reload the vnode -- with
1033 	// a new cache attached!
1034 	if (vnode->cache != NULL)
1035 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1036 
1037 	// The file system has removed the resources of the vnode now, so we can
1038 	// make it available again (by removing the busy vnode from the hash).
1039 	rw_lock_write_lock(&sVnodeLock);
1040 	sVnodeTable->Remove(vnode);
1041 	rw_lock_write_unlock(&sVnodeLock);
1042 
1043 	// if we have a VMCache attached, remove it
1044 	if (vnode->cache)
1045 		vnode->cache->ReleaseRef();
1046 
1047 	vnode->cache = NULL;
1048 
1049 	remove_vnode_from_mount_list(vnode, vnode->mount);
1050 
1051 	free(vnode);
1052 }
1053 
1054 
1055 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1056 	if the counter dropped to 0.
1057 
1058 	The caller must, of course, own a reference to the vnode to call this
1059 	function.
1060 	The caller must not hold the sVnodeLock or the sMountMutex.
1061 
1062 	\param vnode the vnode.
1063 	\param alwaysFree don't move this vnode into the unused list, but really
1064 		   delete it if possible.
1065 	\param reenter \c true, if this function is called (indirectly) from within
1066 		   a file system. This will be passed to file system hooks only.
1067 	\return \c B_OK, if everything went fine, an error code otherwise.
1068 */
1069 static status_t
1070 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1071 {
1072 	ReadLocker locker(sVnodeLock);
1073 	AutoLocker<Vnode> nodeLocker(vnode);
1074 
1075 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1076 
1077 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1078 
1079 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1080 		vnode->ref_count));
1081 
1082 	if (oldRefCount != 1)
1083 		return B_OK;
1084 
1085 	if (vnode->IsBusy())
1086 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1087 
1088 	bool freeNode = false;
1089 	bool freeUnusedNodes = false;
1090 
1091 	// Just insert the vnode into an unused list if we don't need
1092 	// to delete it
1093 	if (vnode->IsRemoved() || alwaysFree) {
1094 		vnode_to_be_freed(vnode);
1095 		vnode->SetBusy(true);
1096 		freeNode = true;
1097 	} else
1098 		freeUnusedNodes = vnode_unused(vnode);
1099 
1100 	nodeLocker.Unlock();
1101 	locker.Unlock();
1102 
1103 	if (freeNode)
1104 		free_vnode(vnode, reenter);
1105 	else if (freeUnusedNodes)
1106 		free_unused_vnodes();
1107 
1108 	return B_OK;
1109 }
1110 
1111 
1112 /*!	\brief Increments the reference counter of the given vnode.
1113 
1114 	The caller must make sure that the node isn't deleted while this function
1115 	is called. This can be done either:
1116 	- by ensuring that a reference to the node exists and remains in existence,
1117 	  or
1118 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1119 	  or by holding sVnodeLock write locked.
1120 
1121 	In the second case the caller is responsible for dealing with the ref count
1122 	0 -> 1 transition. That is 1. this function must not be invoked when the
1123 	node is busy in the first place and 2. vnode_used() must be called for the
1124 	node.
1125 
1126 	\param vnode the vnode.
1127 */
1128 static void
1129 inc_vnode_ref_count(struct vnode* vnode)
1130 {
1131 	atomic_add(&vnode->ref_count, 1);
1132 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1133 		vnode->ref_count));
1134 }
1135 
1136 
1137 static bool
1138 is_special_node_type(int type)
1139 {
1140 	// at the moment only FIFOs are supported
1141 	return S_ISFIFO(type);
1142 }
1143 
1144 
1145 static status_t
1146 create_special_sub_node(struct vnode* vnode, uint32 flags)
1147 {
1148 	if (S_ISFIFO(vnode->Type()))
1149 		return create_fifo_vnode(vnode->mount->volume, vnode);
1150 
1151 	return B_BAD_VALUE;
1152 }
1153 
1154 
1155 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1156 
1157 	If the node is not yet in memory, it will be loaded.
1158 
1159 	The caller must not hold the sVnodeLock or the sMountMutex.
1160 
1161 	\param mountID the mount ID.
1162 	\param vnodeID the node ID.
1163 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1164 		   retrieved vnode structure shall be written.
1165 	\param reenter \c true, if this function is called (indirectly) from within
1166 		   a file system.
1167 	\return \c B_OK, if everything when fine, an error code otherwise.
1168 */
1169 static status_t
1170 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1171 	int reenter)
1172 {
1173 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1174 		mountID, vnodeID, _vnode));
1175 
1176 	rw_lock_read_lock(&sVnodeLock);
1177 
1178 	int32 tries = BUSY_VNODE_RETRIES;
1179 restart:
1180 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1181 	AutoLocker<Vnode> nodeLocker(vnode);
1182 
1183 	if (vnode && vnode->IsBusy()) {
1184 		nodeLocker.Unlock();
1185 		rw_lock_read_unlock(&sVnodeLock);
1186 		if (!canWait) {
1187 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1188 				mountID, vnodeID);
1189 			return B_BUSY;
1190 		}
1191 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1192 			return B_BUSY;
1193 
1194 		rw_lock_read_lock(&sVnodeLock);
1195 		goto restart;
1196 	}
1197 
1198 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1199 
1200 	status_t status;
1201 
1202 	if (vnode) {
1203 		if (vnode->ref_count == 0) {
1204 			// this vnode has been unused before
1205 			vnode_used(vnode);
1206 		}
1207 		inc_vnode_ref_count(vnode);
1208 
1209 		nodeLocker.Unlock();
1210 		rw_lock_read_unlock(&sVnodeLock);
1211 	} else {
1212 		// we need to create a new vnode and read it in
1213 		rw_lock_read_unlock(&sVnodeLock);
1214 			// unlock -- create_new_vnode_and_lock() write-locks on success
1215 		bool nodeCreated;
1216 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1217 			nodeCreated);
1218 		if (status != B_OK)
1219 			return status;
1220 
1221 		if (!nodeCreated) {
1222 			rw_lock_read_lock(&sVnodeLock);
1223 			rw_lock_write_unlock(&sVnodeLock);
1224 			goto restart;
1225 		}
1226 
1227 		rw_lock_write_unlock(&sVnodeLock);
1228 
1229 		int type;
1230 		uint32 flags;
1231 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1232 			&flags, reenter);
1233 		if (status == B_OK && vnode->private_node == NULL)
1234 			status = B_BAD_VALUE;
1235 
1236 		bool gotNode = status == B_OK;
1237 		bool publishSpecialSubNode = false;
1238 		if (gotNode) {
1239 			vnode->SetType(type);
1240 			publishSpecialSubNode = is_special_node_type(type)
1241 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1242 		}
1243 
1244 		if (gotNode && publishSpecialSubNode)
1245 			status = create_special_sub_node(vnode, flags);
1246 
1247 		if (status != B_OK) {
1248 			if (gotNode)
1249 				FS_CALL(vnode, put_vnode, reenter);
1250 
1251 			rw_lock_write_lock(&sVnodeLock);
1252 			sVnodeTable->Remove(vnode);
1253 			remove_vnode_from_mount_list(vnode, vnode->mount);
1254 			rw_lock_write_unlock(&sVnodeLock);
1255 
1256 			free(vnode);
1257 			return status;
1258 		}
1259 
1260 		rw_lock_read_lock(&sVnodeLock);
1261 		vnode->Lock();
1262 
1263 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1264 		vnode->SetBusy(false);
1265 
1266 		vnode->Unlock();
1267 		rw_lock_read_unlock(&sVnodeLock);
1268 	}
1269 
1270 	TRACE(("get_vnode: returning %p\n", vnode));
1271 
1272 	*_vnode = vnode;
1273 	return B_OK;
1274 }
1275 
1276 
1277 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1278 	if the counter dropped to 0.
1279 
1280 	The caller must, of course, own a reference to the vnode to call this
1281 	function.
1282 	The caller must not hold the sVnodeLock or the sMountMutex.
1283 
1284 	\param vnode the vnode.
1285 */
1286 static inline void
1287 put_vnode(struct vnode* vnode)
1288 {
1289 	dec_vnode_ref_count(vnode, false, false);
1290 }
1291 
1292 
1293 static void
1294 free_unused_vnodes(int32 level)
1295 {
1296 	unused_vnodes_check_started();
1297 
1298 	if (level == B_NO_LOW_RESOURCE) {
1299 		unused_vnodes_check_done();
1300 		return;
1301 	}
1302 
1303 	flush_hot_vnodes();
1304 
1305 	// determine how many nodes to free
1306 	uint32 count = 1;
1307 	{
1308 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1309 
1310 		switch (level) {
1311 			case B_LOW_RESOURCE_NOTE:
1312 				count = sUnusedVnodes / 100;
1313 				break;
1314 			case B_LOW_RESOURCE_WARNING:
1315 				count = sUnusedVnodes / 10;
1316 				break;
1317 			case B_LOW_RESOURCE_CRITICAL:
1318 				count = sUnusedVnodes;
1319 				break;
1320 		}
1321 
1322 		if (count > sUnusedVnodes)
1323 			count = sUnusedVnodes;
1324 	}
1325 
1326 	// Write back the modified pages of some unused vnodes and free them.
1327 
1328 	for (uint32 i = 0; i < count; i++) {
1329 		ReadLocker vnodesReadLocker(sVnodeLock);
1330 
1331 		// get the first node
1332 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1333 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1334 			&sUnusedVnodeList);
1335 		unusedVnodesLocker.Unlock();
1336 
1337 		if (vnode == NULL)
1338 			break;
1339 
1340 		// lock the node
1341 		AutoLocker<Vnode> nodeLocker(vnode);
1342 
1343 		// Check whether the node is still unused -- since we only append to the
1344 		// tail of the unused queue, the vnode should still be at its head.
1345 		// Alternatively we could check its ref count for 0 and its busy flag,
1346 		// but if the node is no longer at the head of the queue, it means it
1347 		// has been touched in the meantime, i.e. it is no longer the least
1348 		// recently used unused vnode and we rather don't free it.
1349 		unusedVnodesLocker.Lock();
1350 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1351 			continue;
1352 		unusedVnodesLocker.Unlock();
1353 
1354 		ASSERT(!vnode->IsBusy());
1355 
1356 		// grab a reference
1357 		inc_vnode_ref_count(vnode);
1358 		vnode_used(vnode);
1359 
1360 		// write back changes and free the node
1361 		nodeLocker.Unlock();
1362 		vnodesReadLocker.Unlock();
1363 
1364 		if (vnode->cache != NULL)
1365 			vnode->cache->WriteModified();
1366 
1367 		dec_vnode_ref_count(vnode, true, false);
1368 			// this should free the vnode when it's still unused
1369 	}
1370 
1371 	unused_vnodes_check_done();
1372 }
1373 
1374 
1375 /*!	Gets the vnode the given vnode is covering.
1376 
1377 	The caller must have \c sVnodeLock read-locked at least.
1378 
1379 	The function returns a reference to the retrieved vnode (if any), the caller
1380 	is responsible to free.
1381 
1382 	\param vnode The vnode whose covered node shall be returned.
1383 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1384 		vnode.
1385 */
1386 static inline Vnode*
1387 get_covered_vnode_locked(Vnode* vnode)
1388 {
1389 	if (Vnode* coveredNode = vnode->covers) {
1390 		while (coveredNode->covers != NULL)
1391 			coveredNode = coveredNode->covers;
1392 
1393 		inc_vnode_ref_count(coveredNode);
1394 		return coveredNode;
1395 	}
1396 
1397 	return NULL;
1398 }
1399 
1400 
1401 /*!	Gets the vnode the given vnode is covering.
1402 
1403 	The caller must not hold \c sVnodeLock. Note that this implies a race
1404 	condition, since the situation can change at any time.
1405 
1406 	The function returns a reference to the retrieved vnode (if any), the caller
1407 	is responsible to free.
1408 
1409 	\param vnode The vnode whose covered node shall be returned.
1410 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1411 		vnode.
1412 */
1413 static inline Vnode*
1414 get_covered_vnode(Vnode* vnode)
1415 {
1416 	if (!vnode->IsCovering())
1417 		return NULL;
1418 
1419 	ReadLocker vnodeReadLocker(sVnodeLock);
1420 	return get_covered_vnode_locked(vnode);
1421 }
1422 
1423 
1424 /*!	Gets the vnode the given vnode is covered by.
1425 
1426 	The caller must have \c sVnodeLock read-locked at least.
1427 
1428 	The function returns a reference to the retrieved vnode (if any), the caller
1429 	is responsible to free.
1430 
1431 	\param vnode The vnode whose covering node shall be returned.
1432 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1433 		any vnode.
1434 */
1435 static Vnode*
1436 get_covering_vnode_locked(Vnode* vnode)
1437 {
1438 	if (Vnode* coveringNode = vnode->covered_by) {
1439 		while (coveringNode->covered_by != NULL)
1440 			coveringNode = coveringNode->covered_by;
1441 
1442 		inc_vnode_ref_count(coveringNode);
1443 		return coveringNode;
1444 	}
1445 
1446 	return NULL;
1447 }
1448 
1449 
1450 /*!	Gets the vnode the given vnode is covered by.
1451 
1452 	The caller must not hold \c sVnodeLock. Note that this implies a race
1453 	condition, since the situation can change at any time.
1454 
1455 	The function returns a reference to the retrieved vnode (if any), the caller
1456 	is responsible to free.
1457 
1458 	\param vnode The vnode whose covering node shall be returned.
1459 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1460 		any vnode.
1461 */
1462 static inline Vnode*
1463 get_covering_vnode(Vnode* vnode)
1464 {
1465 	if (!vnode->IsCovered())
1466 		return NULL;
1467 
1468 	ReadLocker vnodeReadLocker(sVnodeLock);
1469 	return get_covering_vnode_locked(vnode);
1470 }
1471 
1472 
1473 static void
1474 free_unused_vnodes()
1475 {
1476 	free_unused_vnodes(
1477 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1478 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1479 }
1480 
1481 
1482 static void
1483 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1484 {
1485 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1486 
1487 	free_unused_vnodes(level);
1488 }
1489 
1490 
1491 static inline void
1492 put_advisory_locking(struct advisory_locking* locking)
1493 {
1494 	release_sem(locking->lock);
1495 }
1496 
1497 
1498 /*!	Returns the advisory_locking object of the \a vnode in case it
1499 	has one, and locks it.
1500 	You have to call put_advisory_locking() when you're done with
1501 	it.
1502 	Note, you must not have the vnode mutex locked when calling
1503 	this function.
1504 */
1505 static struct advisory_locking*
1506 get_advisory_locking(struct vnode* vnode)
1507 {
1508 	rw_lock_read_lock(&sVnodeLock);
1509 	vnode->Lock();
1510 
1511 	struct advisory_locking* locking = vnode->advisory_locking;
1512 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1513 
1514 	vnode->Unlock();
1515 	rw_lock_read_unlock(&sVnodeLock);
1516 
1517 	if (lock >= 0)
1518 		lock = acquire_sem(lock);
1519 	if (lock < 0) {
1520 		// This means the locking has been deleted in the mean time
1521 		// or had never existed in the first place - otherwise, we
1522 		// would get the lock at some point.
1523 		return NULL;
1524 	}
1525 
1526 	return locking;
1527 }
1528 
1529 
1530 /*!	Creates a locked advisory_locking object, and attaches it to the
1531 	given \a vnode.
1532 	Returns B_OK in case of success - also if the vnode got such an
1533 	object from someone else in the mean time, you'll still get this
1534 	one locked then.
1535 */
1536 static status_t
1537 create_advisory_locking(struct vnode* vnode)
1538 {
1539 	if (vnode == NULL)
1540 		return B_FILE_ERROR;
1541 
1542 	ObjectDeleter<advisory_locking> lockingDeleter;
1543 	struct advisory_locking* locking = NULL;
1544 
1545 	while (get_advisory_locking(vnode) == NULL) {
1546 		// no locking object set on the vnode yet, create one
1547 		if (locking == NULL) {
1548 			locking = new(std::nothrow) advisory_locking;
1549 			if (locking == NULL)
1550 				return B_NO_MEMORY;
1551 			lockingDeleter.SetTo(locking);
1552 
1553 			locking->wait_sem = create_sem(0, "advisory lock");
1554 			if (locking->wait_sem < 0)
1555 				return locking->wait_sem;
1556 
1557 			locking->lock = create_sem(0, "advisory locking");
1558 			if (locking->lock < 0)
1559 				return locking->lock;
1560 		}
1561 
1562 		// set our newly created locking object
1563 		ReadLocker _(sVnodeLock);
1564 		AutoLocker<Vnode> nodeLocker(vnode);
1565 		if (vnode->advisory_locking == NULL) {
1566 			vnode->advisory_locking = locking;
1567 			lockingDeleter.Detach();
1568 			return B_OK;
1569 		}
1570 	}
1571 
1572 	// The vnode already had a locking object. That's just as well.
1573 
1574 	return B_OK;
1575 }
1576 
1577 
1578 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1579 	with the advisory_lock \a lock.
1580 */
1581 static bool
1582 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1583 {
1584 	if (flock == NULL)
1585 		return true;
1586 
1587 	return lock->start <= flock->l_start - 1 + flock->l_len
1588 		&& lock->end >= flock->l_start;
1589 }
1590 
1591 
1592 /*!	Tests whether acquiring a lock would block.
1593 */
1594 static status_t
1595 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1596 {
1597 	flock->l_type = F_UNLCK;
1598 
1599 	struct advisory_locking* locking = get_advisory_locking(vnode);
1600 	if (locking == NULL)
1601 		return B_OK;
1602 
1603 	team_id team = team_get_current_team_id();
1604 
1605 	LockList::Iterator iterator = locking->locks.GetIterator();
1606 	while (iterator.HasNext()) {
1607 		struct advisory_lock* lock = iterator.Next();
1608 
1609 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1610 			// locks do overlap
1611 			if (flock->l_type != F_RDLCK || !lock->shared) {
1612 				// collision
1613 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1614 				flock->l_whence = SEEK_SET;
1615 				flock->l_start = lock->start;
1616 				flock->l_len = lock->end - lock->start + 1;
1617 				flock->l_pid = lock->team;
1618 				break;
1619 			}
1620 		}
1621 	}
1622 
1623 	put_advisory_locking(locking);
1624 	return B_OK;
1625 }
1626 
1627 
1628 /*!	Removes the specified lock, or all locks of the calling team
1629 	if \a flock is NULL.
1630 */
1631 static status_t
1632 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1633 	struct file_descriptor* descriptor, struct flock* flock)
1634 {
1635 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1636 
1637 	struct advisory_locking* locking = get_advisory_locking(vnode);
1638 	if (locking == NULL)
1639 		return B_OK;
1640 
1641 	// find matching lock entries
1642 
1643 	LockList::Iterator iterator = locking->locks.GetIterator();
1644 	while (iterator.HasNext()) {
1645 		struct advisory_lock* lock = iterator.Next();
1646 		bool removeLock = false;
1647 
1648 		if (descriptor != NULL && lock->bound_to == descriptor) {
1649 			// Remove flock() locks
1650 			removeLock = true;
1651 		} else if (lock->bound_to == context
1652 				&& advisory_lock_intersects(lock, flock)) {
1653 			// Remove POSIX locks
1654 			bool endsBeyond = false;
1655 			bool startsBefore = false;
1656 			if (flock != NULL) {
1657 				startsBefore = lock->start < flock->l_start;
1658 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1659 			}
1660 
1661 			if (!startsBefore && !endsBeyond) {
1662 				// lock is completely contained in flock
1663 				removeLock = true;
1664 			} else if (startsBefore && !endsBeyond) {
1665 				// cut the end of the lock
1666 				lock->end = flock->l_start - 1;
1667 			} else if (!startsBefore && endsBeyond) {
1668 				// cut the start of the lock
1669 				lock->start = flock->l_start + flock->l_len;
1670 			} else {
1671 				// divide the lock into two locks
1672 				struct advisory_lock* secondLock = new advisory_lock;
1673 				if (secondLock == NULL) {
1674 					// TODO: we should probably revert the locks we already
1675 					// changed... (ie. allocate upfront)
1676 					put_advisory_locking(locking);
1677 					return B_NO_MEMORY;
1678 				}
1679 
1680 				lock->end = flock->l_start - 1;
1681 
1682 				secondLock->bound_to = context;
1683 				secondLock->team = lock->team;
1684 				secondLock->session = lock->session;
1685 				// values must already be normalized when getting here
1686 				secondLock->start = flock->l_start + flock->l_len;
1687 				secondLock->end = lock->end;
1688 				secondLock->shared = lock->shared;
1689 
1690 				locking->locks.Add(secondLock);
1691 			}
1692 		}
1693 
1694 		if (removeLock) {
1695 			// this lock is no longer used
1696 			iterator.Remove();
1697 			free(lock);
1698 		}
1699 	}
1700 
1701 	bool removeLocking = locking->locks.IsEmpty();
1702 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1703 
1704 	put_advisory_locking(locking);
1705 
1706 	if (removeLocking) {
1707 		// We can remove the whole advisory locking structure; it's no
1708 		// longer used
1709 		locking = get_advisory_locking(vnode);
1710 		if (locking != NULL) {
1711 			ReadLocker locker(sVnodeLock);
1712 			AutoLocker<Vnode> nodeLocker(vnode);
1713 
1714 			// the locking could have been changed in the mean time
1715 			if (locking->locks.IsEmpty()) {
1716 				vnode->advisory_locking = NULL;
1717 				nodeLocker.Unlock();
1718 				locker.Unlock();
1719 
1720 				// we've detached the locking from the vnode, so we can
1721 				// safely delete it
1722 				delete locking;
1723 			} else {
1724 				// the locking is in use again
1725 				nodeLocker.Unlock();
1726 				locker.Unlock();
1727 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1728 			}
1729 		}
1730 	}
1731 
1732 	return B_OK;
1733 }
1734 
1735 
1736 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1737 	will wait for the lock to become available, if there are any collisions
1738 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1739 
1740 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1741 	BSD flock() semantics are used, that is, all children can unlock the file
1742 	in question (we even allow parents to remove the lock, though, but that
1743 	seems to be in line to what the BSD's are doing).
1744 */
1745 static status_t
1746 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1747 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1748 {
1749 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1750 		vnode, flock, wait ? "yes" : "no"));
1751 	dprintf("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1752 		vnode, flock, wait ? "yes" : "no");
1753 
1754 	bool shared = flock->l_type == F_RDLCK;
1755 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1756 	status_t status = B_OK;
1757 
1758 	// TODO: do deadlock detection!
1759 
1760 	struct advisory_locking* locking;
1761 
1762 	while (true) {
1763 		// if this vnode has an advisory_locking structure attached,
1764 		// lock that one and search for any colliding file lock
1765 		status = create_advisory_locking(vnode);
1766 		if (status != B_OK)
1767 			return status;
1768 
1769 		locking = vnode->advisory_locking;
1770 		team_id team = team_get_current_team_id();
1771 		sem_id waitForLock = -1;
1772 
1773 		// test for collisions
1774 		LockList::Iterator iterator = locking->locks.GetIterator();
1775 		while (iterator.HasNext()) {
1776 			struct advisory_lock* lock = iterator.Next();
1777 
1778 			// TODO: locks from the same team might be joinable!
1779 			if ((lock->team != team || lock->bound_to != boundTo)
1780 					&& advisory_lock_intersects(lock, flock)) {
1781 				// locks do overlap
1782 				if (!shared || !lock->shared) {
1783 					// we need to wait
1784 					waitForLock = locking->wait_sem;
1785 					break;
1786 				}
1787 			}
1788 		}
1789 
1790 		if (waitForLock < 0)
1791 			break;
1792 
1793 		// We need to wait. Do that or fail now, if we've been asked not to.
1794 
1795 		if (!wait) {
1796 			put_advisory_locking(locking);
1797 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1798 		}
1799 
1800 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1801 			B_CAN_INTERRUPT, 0);
1802 		if (status != B_OK && status != B_BAD_SEM_ID)
1803 			return status;
1804 
1805 		// We have been notified, but we need to re-lock the locking object. So
1806 		// go another round...
1807 	}
1808 
1809 	// install new lock
1810 
1811 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1812 		sizeof(struct advisory_lock));
1813 	if (lock == NULL) {
1814 		put_advisory_locking(locking);
1815 		return B_NO_MEMORY;
1816 	}
1817 
1818 	lock->bound_to = boundTo;
1819 	lock->team = team_get_current_team_id();
1820 	lock->session = thread_get_current_thread()->team->session_id;
1821 	// values must already be normalized when getting here
1822 	lock->start = flock->l_start;
1823 	lock->end = flock->l_start - 1 + flock->l_len;
1824 	lock->shared = shared;
1825 
1826 	locking->locks.Add(lock);
1827 	put_advisory_locking(locking);
1828 
1829 	return status;
1830 }
1831 
1832 
1833 /*!	Normalizes the \a flock structure to make it easier to compare the
1834 	structure with others. The l_start and l_len fields are set to absolute
1835 	values according to the l_whence field.
1836 */
1837 static status_t
1838 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1839 {
1840 	switch (flock->l_whence) {
1841 		case SEEK_SET:
1842 			break;
1843 		case SEEK_CUR:
1844 			flock->l_start += descriptor->pos;
1845 			break;
1846 		case SEEK_END:
1847 		{
1848 			struct vnode* vnode = descriptor->u.vnode;
1849 			struct stat stat;
1850 			status_t status;
1851 
1852 			if (!HAS_FS_CALL(vnode, read_stat))
1853 				return B_UNSUPPORTED;
1854 
1855 			status = FS_CALL(vnode, read_stat, &stat);
1856 			if (status != B_OK)
1857 				return status;
1858 
1859 			flock->l_start += stat.st_size;
1860 			break;
1861 		}
1862 		default:
1863 			return B_BAD_VALUE;
1864 	}
1865 
1866 	if (flock->l_start < 0)
1867 		flock->l_start = 0;
1868 	if (flock->l_len == 0)
1869 		flock->l_len = OFF_MAX;
1870 
1871 	// don't let the offset and length overflow
1872 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1873 		flock->l_len = OFF_MAX - flock->l_start;
1874 
1875 	if (flock->l_len < 0) {
1876 		// a negative length reverses the region
1877 		flock->l_start += flock->l_len;
1878 		flock->l_len = -flock->l_len;
1879 	}
1880 
1881 	return B_OK;
1882 }
1883 
1884 
1885 static void
1886 replace_vnode_if_disconnected(struct fs_mount* mount,
1887 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1888 	struct vnode* fallBack, bool lockRootLock)
1889 {
1890 	struct vnode* givenVnode = vnode;
1891 	bool vnodeReplaced = false;
1892 
1893 	ReadLocker vnodeReadLocker(sVnodeLock);
1894 
1895 	if (lockRootLock)
1896 		mutex_lock(&sIOContextRootLock);
1897 
1898 	while (vnode != NULL && vnode->mount == mount
1899 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1900 		if (vnode->covers != NULL) {
1901 			// redirect the vnode to the covered vnode
1902 			vnode = vnode->covers;
1903 		} else
1904 			vnode = fallBack;
1905 
1906 		vnodeReplaced = true;
1907 	}
1908 
1909 	// If we've replaced the node, grab a reference for the new one.
1910 	if (vnodeReplaced && vnode != NULL)
1911 		inc_vnode_ref_count(vnode);
1912 
1913 	if (lockRootLock)
1914 		mutex_unlock(&sIOContextRootLock);
1915 
1916 	vnodeReadLocker.Unlock();
1917 
1918 	if (vnodeReplaced)
1919 		put_vnode(givenVnode);
1920 }
1921 
1922 
1923 /*!	Disconnects all file descriptors that are associated with the
1924 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1925 	\a mount object.
1926 
1927 	Note, after you've called this function, there might still be ongoing
1928 	accesses - they won't be interrupted if they already happened before.
1929 	However, any subsequent access will fail.
1930 
1931 	This is not a cheap function and should be used with care and rarely.
1932 	TODO: there is currently no means to stop a blocking read/write!
1933 */
1934 static void
1935 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1936 	struct vnode* vnodeToDisconnect)
1937 {
1938 	// iterate over all teams and peek into their file descriptors
1939 	TeamListIterator teamIterator;
1940 	while (Team* team = teamIterator.Next()) {
1941 		BReference<Team> teamReference(team, true);
1942 		TeamLocker teamLocker(team);
1943 
1944 		// lock the I/O context
1945 		io_context* context = team->io_context;
1946 		if (context == NULL)
1947 			continue;
1948 		MutexLocker contextLocker(context->io_mutex);
1949 
1950 		teamLocker.Unlock();
1951 
1952 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1953 			sRoot, true);
1954 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1955 			sRoot, false);
1956 
1957 		for (uint32 i = 0; i < context->table_size; i++) {
1958 			struct file_descriptor* descriptor = context->fds[i];
1959 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1960 				continue;
1961 
1962 			inc_fd_ref_count(descriptor);
1963 
1964 			// if this descriptor points at this mount, we
1965 			// need to disconnect it to be able to unmount
1966 			struct vnode* vnode = fd_vnode(descriptor);
1967 			if (vnodeToDisconnect != NULL) {
1968 				if (vnode == vnodeToDisconnect)
1969 					disconnect_fd(descriptor);
1970 			} else if ((vnode != NULL && vnode->mount == mount)
1971 				|| (vnode == NULL && descriptor->u.mount == mount))
1972 				disconnect_fd(descriptor);
1973 
1974 			put_fd(descriptor);
1975 		}
1976 	}
1977 }
1978 
1979 
1980 /*!	\brief Gets the root node of the current IO context.
1981 	If \a kernel is \c true, the kernel IO context will be used.
1982 	The caller obtains a reference to the returned node.
1983 */
1984 struct vnode*
1985 get_root_vnode(bool kernel)
1986 {
1987 	if (!kernel) {
1988 		// Get current working directory from io context
1989 		struct io_context* context = get_current_io_context(kernel);
1990 
1991 		mutex_lock(&sIOContextRootLock);
1992 
1993 		struct vnode* root = context->root;
1994 		if (root != NULL)
1995 			inc_vnode_ref_count(root);
1996 
1997 		mutex_unlock(&sIOContextRootLock);
1998 
1999 		if (root != NULL)
2000 			return root;
2001 
2002 		// That should never happen.
2003 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2004 			"have a root\n", team_get_current_team_id());
2005 	}
2006 
2007 	inc_vnode_ref_count(sRoot);
2008 	return sRoot;
2009 }
2010 
2011 
2012 /*!	\brief Gets the directory path and leaf name for a given path.
2013 
2014 	The supplied \a path is transformed to refer to the directory part of
2015 	the entry identified by the original path, and into the buffer \a filename
2016 	the leaf name of the original entry is written.
2017 	Neither the returned path nor the leaf name can be expected to be
2018 	canonical.
2019 
2020 	\param path The path to be analyzed. Must be able to store at least one
2021 		   additional character.
2022 	\param filename The buffer into which the leaf name will be written.
2023 		   Must be of size B_FILE_NAME_LENGTH at least.
2024 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2025 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2026 		   if the given path name is empty.
2027 */
2028 static status_t
2029 get_dir_path_and_leaf(char* path, char* filename)
2030 {
2031 	if (*path == '\0')
2032 		return B_ENTRY_NOT_FOUND;
2033 
2034 	char* last = strrchr(path, '/');
2035 		// '/' are not allowed in file names!
2036 
2037 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2038 
2039 	if (last == NULL) {
2040 		// this path is single segment with no '/' in it
2041 		// ex. "foo"
2042 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2043 			return B_NAME_TOO_LONG;
2044 
2045 		strcpy(path, ".");
2046 	} else {
2047 		last++;
2048 		if (last[0] == '\0') {
2049 			// special case: the path ends in one or more '/' - remove them
2050 			while (*--last == '/' && last != path);
2051 			last[1] = '\0';
2052 
2053 			if (last == path && last[0] == '/') {
2054 				// This path points to the root of the file system
2055 				strcpy(filename, ".");
2056 				return B_OK;
2057 			}
2058 			for (; last != path && *(last - 1) != '/'; last--);
2059 				// rewind to the start of the leaf before the '/'
2060 		}
2061 
2062 		// normal leaf: replace the leaf portion of the path with a '.'
2063 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2064 			return B_NAME_TOO_LONG;
2065 
2066 		last[0] = '.';
2067 		last[1] = '\0';
2068 	}
2069 	return B_OK;
2070 }
2071 
2072 
2073 static status_t
2074 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2075 	bool traverse, bool kernel, struct vnode** _vnode)
2076 {
2077 	char clonedName[B_FILE_NAME_LENGTH + 1];
2078 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2079 		return B_NAME_TOO_LONG;
2080 
2081 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2082 	struct vnode* directory;
2083 
2084 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2085 	if (status < 0)
2086 		return status;
2087 
2088 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2089 		_vnode, NULL);
2090 }
2091 
2092 
2093 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2094 	and returns the respective vnode.
2095 	On success a reference to the vnode is acquired for the caller.
2096 */
2097 static status_t
2098 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2099 {
2100 	ino_t id;
2101 	bool missing;
2102 
2103 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2104 		return missing ? B_ENTRY_NOT_FOUND
2105 			: get_vnode(dir->device, id, _vnode, true, false);
2106 	}
2107 
2108 	status_t status = FS_CALL(dir, lookup, name, &id);
2109 	if (status != B_OK)
2110 		return status;
2111 
2112 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2113 	// have a reference and just need to look the node up.
2114 	rw_lock_read_lock(&sVnodeLock);
2115 	*_vnode = lookup_vnode(dir->device, id);
2116 	rw_lock_read_unlock(&sVnodeLock);
2117 
2118 	if (*_vnode == NULL) {
2119 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2120 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2121 		return B_ENTRY_NOT_FOUND;
2122 	}
2123 
2124 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2125 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2126 //		(*_vnode)->mount->id, (*_vnode)->id);
2127 
2128 	return B_OK;
2129 }
2130 
2131 
2132 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2133 	\a path must not be NULL.
2134 	If it returns successfully, \a path contains the name of the last path
2135 	component. This function clobbers the buffer pointed to by \a path only
2136 	if it does contain more than one component.
2137 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2138 	it is successful or not!
2139 */
2140 static status_t
2141 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2142 	int count, struct io_context* ioContext, struct vnode** _vnode,
2143 	ino_t* _parentID)
2144 {
2145 	status_t status = B_OK;
2146 	ino_t lastParentID = vnode->id;
2147 
2148 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2149 
2150 	if (path == NULL) {
2151 		put_vnode(vnode);
2152 		return B_BAD_VALUE;
2153 	}
2154 
2155 	if (*path == '\0') {
2156 		put_vnode(vnode);
2157 		return B_ENTRY_NOT_FOUND;
2158 	}
2159 
2160 	while (true) {
2161 		struct vnode* nextVnode;
2162 		char* nextPath;
2163 
2164 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2165 			path));
2166 
2167 		// done?
2168 		if (path[0] == '\0')
2169 			break;
2170 
2171 		// walk to find the next path component ("path" will point to a single
2172 		// path component), and filter out multiple slashes
2173 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2174 				nextPath++);
2175 
2176 		if (*nextPath == '/') {
2177 			*nextPath = '\0';
2178 			do
2179 				nextPath++;
2180 			while (*nextPath == '/');
2181 		}
2182 
2183 		// See if the '..' is at a covering vnode move to the covered
2184 		// vnode so we pass the '..' path to the underlying filesystem.
2185 		// Also prevent breaking the root of the IO context.
2186 		if (strcmp("..", path) == 0) {
2187 			if (vnode == ioContext->root) {
2188 				// Attempted prison break! Keep it contained.
2189 				path = nextPath;
2190 				continue;
2191 			}
2192 
2193 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2194 				nextVnode = coveredVnode;
2195 				put_vnode(vnode);
2196 				vnode = nextVnode;
2197 			}
2198 		}
2199 
2200 		// check if vnode is really a directory
2201 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2202 			status = B_NOT_A_DIRECTORY;
2203 
2204 		// Check if we have the right to search the current directory vnode.
2205 		// If a file system doesn't have the access() function, we assume that
2206 		// searching a directory is always allowed
2207 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2208 			status = FS_CALL(vnode, access, X_OK);
2209 
2210 		// Tell the filesystem to get the vnode of this path component (if we
2211 		// got the permission from the call above)
2212 		if (status == B_OK)
2213 			status = lookup_dir_entry(vnode, path, &nextVnode);
2214 
2215 		if (status != B_OK) {
2216 			put_vnode(vnode);
2217 			return status;
2218 		}
2219 
2220 		// If the new node is a symbolic link, resolve it (if we've been told
2221 		// to do it)
2222 		if (S_ISLNK(nextVnode->Type())
2223 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2224 			size_t bufferSize;
2225 			char* buffer;
2226 
2227 			TRACE(("traverse link\n"));
2228 
2229 			// it's not exactly nice style using goto in this way, but hey,
2230 			// it works :-/
2231 			if (count + 1 > B_MAX_SYMLINKS) {
2232 				status = B_LINK_LIMIT;
2233 				goto resolve_link_error;
2234 			}
2235 
2236 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2237 			if (buffer == NULL) {
2238 				status = B_NO_MEMORY;
2239 				goto resolve_link_error;
2240 			}
2241 
2242 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2243 				bufferSize--;
2244 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2245 				// null-terminate
2246 				if (status >= 0)
2247 					buffer[bufferSize] = '\0';
2248 			} else
2249 				status = B_BAD_VALUE;
2250 
2251 			if (status != B_OK) {
2252 				free(buffer);
2253 
2254 		resolve_link_error:
2255 				put_vnode(vnode);
2256 				put_vnode(nextVnode);
2257 
2258 				return status;
2259 			}
2260 			put_vnode(nextVnode);
2261 
2262 			// Check if we start from the root directory or the current
2263 			// directory ("vnode" still points to that one).
2264 			// Cut off all leading slashes if it's the root directory
2265 			path = buffer;
2266 			bool absoluteSymlink = false;
2267 			if (path[0] == '/') {
2268 				// we don't need the old directory anymore
2269 				put_vnode(vnode);
2270 
2271 				while (*++path == '/')
2272 					;
2273 
2274 				mutex_lock(&sIOContextRootLock);
2275 				vnode = ioContext->root;
2276 				inc_vnode_ref_count(vnode);
2277 				mutex_unlock(&sIOContextRootLock);
2278 
2279 				absoluteSymlink = true;
2280 			}
2281 
2282 			inc_vnode_ref_count(vnode);
2283 				// balance the next recursion - we will decrement the
2284 				// ref_count of the vnode, no matter if we succeeded or not
2285 
2286 			if (absoluteSymlink && *path == '\0') {
2287 				// symlink was just "/"
2288 				nextVnode = vnode;
2289 			} else {
2290 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2291 					ioContext, &nextVnode, &lastParentID);
2292 			}
2293 
2294 			free(buffer);
2295 
2296 			if (status != B_OK) {
2297 				put_vnode(vnode);
2298 				return status;
2299 			}
2300 		} else
2301 			lastParentID = vnode->id;
2302 
2303 		// decrease the ref count on the old dir we just looked up into
2304 		put_vnode(vnode);
2305 
2306 		path = nextPath;
2307 		vnode = nextVnode;
2308 
2309 		// see if we hit a covered node
2310 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2311 			put_vnode(vnode);
2312 			vnode = coveringNode;
2313 		}
2314 	}
2315 
2316 	*_vnode = vnode;
2317 	if (_parentID)
2318 		*_parentID = lastParentID;
2319 
2320 	return B_OK;
2321 }
2322 
2323 
2324 static status_t
2325 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2326 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2327 {
2328 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2329 		get_current_io_context(kernel), _vnode, _parentID);
2330 }
2331 
2332 
2333 static status_t
2334 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2335 	ino_t* _parentID, bool kernel)
2336 {
2337 	struct vnode* start = NULL;
2338 
2339 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2340 
2341 	if (!path)
2342 		return B_BAD_VALUE;
2343 
2344 	if (*path == '\0')
2345 		return B_ENTRY_NOT_FOUND;
2346 
2347 	// figure out if we need to start at root or at cwd
2348 	if (*path == '/') {
2349 		if (sRoot == NULL) {
2350 			// we're a bit early, aren't we?
2351 			return B_ERROR;
2352 		}
2353 
2354 		while (*++path == '/')
2355 			;
2356 		start = get_root_vnode(kernel);
2357 
2358 		if (*path == '\0') {
2359 			*_vnode = start;
2360 			return B_OK;
2361 		}
2362 
2363 	} else {
2364 		struct io_context* context = get_current_io_context(kernel);
2365 
2366 		mutex_lock(&context->io_mutex);
2367 		start = context->cwd;
2368 		if (start != NULL)
2369 			inc_vnode_ref_count(start);
2370 		mutex_unlock(&context->io_mutex);
2371 
2372 		if (start == NULL)
2373 			return B_ERROR;
2374 	}
2375 
2376 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2377 		_parentID);
2378 }
2379 
2380 
2381 /*! Returns the vnode in the next to last segment of the path, and returns
2382 	the last portion in filename.
2383 	The path buffer must be able to store at least one additional character.
2384 */
2385 static status_t
2386 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2387 	bool kernel)
2388 {
2389 	status_t status = get_dir_path_and_leaf(path, filename);
2390 	if (status != B_OK)
2391 		return status;
2392 
2393 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2394 }
2395 
2396 
2397 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2398 		   to by a FD + path pair.
2399 
2400 	\a path must be given in either case. \a fd might be omitted, in which
2401 	case \a path is either an absolute path or one relative to the current
2402 	directory. If both a supplied and \a path is relative it is reckoned off
2403 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2404 	ignored.
2405 
2406 	The caller has the responsibility to call put_vnode() on the returned
2407 	directory vnode.
2408 
2409 	\param fd The FD. May be < 0.
2410 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2411 	       is modified by this function. It must have at least room for a
2412 	       string one character longer than the path it contains.
2413 	\param _vnode A pointer to a variable the directory vnode shall be written
2414 		   into.
2415 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2416 		   the leaf name of the specified entry will be written.
2417 	\param kernel \c true, if invoked from inside the kernel, \c false if
2418 		   invoked from userland.
2419 	\return \c B_OK, if everything went fine, another error code otherwise.
2420 */
2421 static status_t
2422 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2423 	char* filename, bool kernel)
2424 {
2425 	if (!path)
2426 		return B_BAD_VALUE;
2427 	if (*path == '\0')
2428 		return B_ENTRY_NOT_FOUND;
2429 	if (fd < 0)
2430 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2431 
2432 	status_t status = get_dir_path_and_leaf(path, filename);
2433 	if (status != B_OK)
2434 		return status;
2435 
2436 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2437 }
2438 
2439 
2440 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2441 		   to by a vnode + path pair.
2442 
2443 	\a path must be given in either case. \a vnode might be omitted, in which
2444 	case \a path is either an absolute path or one relative to the current
2445 	directory. If both a supplied and \a path is relative it is reckoned off
2446 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2447 	ignored.
2448 
2449 	The caller has the responsibility to call put_vnode() on the returned
2450 	directory vnode.
2451 
2452 	\param vnode The vnode. May be \c NULL.
2453 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2454 	       is modified by this function. It must have at least room for a
2455 	       string one character longer than the path it contains.
2456 	\param _vnode A pointer to a variable the directory vnode shall be written
2457 		   into.
2458 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2459 		   the leaf name of the specified entry will be written.
2460 	\param kernel \c true, if invoked from inside the kernel, \c false if
2461 		   invoked from userland.
2462 	\return \c B_OK, if everything went fine, another error code otherwise.
2463 */
2464 static status_t
2465 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2466 	struct vnode** _vnode, char* filename, bool kernel)
2467 {
2468 	if (!path)
2469 		return B_BAD_VALUE;
2470 	if (*path == '\0')
2471 		return B_ENTRY_NOT_FOUND;
2472 	if (vnode == NULL || path[0] == '/')
2473 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2474 
2475 	status_t status = get_dir_path_and_leaf(path, filename);
2476 	if (status != B_OK)
2477 		return status;
2478 
2479 	inc_vnode_ref_count(vnode);
2480 		// vnode_path_to_vnode() always decrements the ref count
2481 
2482 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2483 }
2484 
2485 
2486 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2487 */
2488 static status_t
2489 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2490 	size_t bufferSize, struct io_context* ioContext)
2491 {
2492 	if (bufferSize < sizeof(struct dirent))
2493 		return B_BAD_VALUE;
2494 
2495 	// See if the vnode is covering another vnode and move to the covered
2496 	// vnode so we get the underlying file system
2497 	VNodePutter vnodePutter;
2498 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2499 		vnode = coveredVnode;
2500 		vnodePutter.SetTo(vnode);
2501 	}
2502 
2503 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2504 		// The FS supports getting the name of a vnode.
2505 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2506 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2507 			return B_OK;
2508 	}
2509 
2510 	// The FS doesn't support getting the name of a vnode. So we search the
2511 	// parent directory for the vnode, if the caller let us.
2512 
2513 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2514 		return B_UNSUPPORTED;
2515 
2516 	void* cookie;
2517 
2518 	status_t status = FS_CALL(parent, open_dir, &cookie);
2519 	if (status >= B_OK) {
2520 		while (true) {
2521 			uint32 num = 1;
2522 			// We use the FS hook directly instead of dir_read(), since we don't
2523 			// want the entries to be fixed. We have already resolved vnode to
2524 			// the covered node.
2525 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2526 				&num);
2527 			if (status != B_OK)
2528 				break;
2529 			if (num == 0) {
2530 				status = B_ENTRY_NOT_FOUND;
2531 				break;
2532 			}
2533 
2534 			if (vnode->id == buffer->d_ino) {
2535 				// found correct entry!
2536 				break;
2537 			}
2538 		}
2539 
2540 		FS_CALL(parent, close_dir, cookie);
2541 		FS_CALL(parent, free_dir_cookie, cookie);
2542 	}
2543 	return status;
2544 }
2545 
2546 
2547 static status_t
2548 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2549 	size_t nameSize, bool kernel)
2550 {
2551 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2552 	struct dirent* dirent = (struct dirent*)buffer;
2553 
2554 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2555 		get_current_io_context(kernel));
2556 	if (status != B_OK)
2557 		return status;
2558 
2559 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2560 		return B_BUFFER_OVERFLOW;
2561 
2562 	return B_OK;
2563 }
2564 
2565 
2566 /*!	Gets the full path to a given directory vnode.
2567 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2568 	file system doesn't support this call, it will fall back to iterating
2569 	through the parent directory to get the name of the child.
2570 
2571 	To protect against circular loops, it supports a maximum tree depth
2572 	of 256 levels.
2573 
2574 	Note that the path may not be correct the time this function returns!
2575 	It doesn't use any locking to prevent returning the correct path, as
2576 	paths aren't safe anyway: the path to a file can change at any time.
2577 
2578 	It might be a good idea, though, to check if the returned path exists
2579 	in the calling function (it's not done here because of efficiency)
2580 */
2581 static status_t
2582 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2583 	bool kernel)
2584 {
2585 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2586 
2587 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2588 		return B_BAD_VALUE;
2589 
2590 	if (!S_ISDIR(vnode->Type()))
2591 		return B_NOT_A_DIRECTORY;
2592 
2593 	char* path = buffer;
2594 	int32 insert = bufferSize;
2595 	int32 maxLevel = 256;
2596 	int32 length;
2597 	status_t status = B_OK;
2598 	struct io_context* ioContext = get_current_io_context(kernel);
2599 
2600 	// we don't use get_vnode() here because this call is more
2601 	// efficient and does all we need from get_vnode()
2602 	inc_vnode_ref_count(vnode);
2603 
2604 	path[--insert] = '\0';
2605 		// the path is filled right to left
2606 
2607 	while (true) {
2608 		// If the node is the context's root, bail out. Otherwise resolve mount
2609 		// points.
2610 		if (vnode == ioContext->root)
2611 			break;
2612 
2613 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2614 			put_vnode(vnode);
2615 			vnode = coveredVnode;
2616 		}
2617 
2618 		// lookup the parent vnode
2619 		struct vnode* parentVnode;
2620 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2621 		if (status != B_OK)
2622 			goto out;
2623 
2624 		if (parentVnode == vnode) {
2625 			// The caller apparently got their hands on a node outside of their
2626 			// context's root. Now we've hit the global root.
2627 			put_vnode(parentVnode);
2628 			break;
2629 		}
2630 
2631 		// get the node's name
2632 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2633 			// also used for fs_read_dir()
2634 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2635 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2636 			sizeof(nameBuffer), ioContext);
2637 
2638 		// release the current vnode, we only need its parent from now on
2639 		put_vnode(vnode);
2640 		vnode = parentVnode;
2641 
2642 		if (status != B_OK)
2643 			goto out;
2644 
2645 		// TODO: add an explicit check for loops in about 10 levels to do
2646 		// real loop detection
2647 
2648 		// don't go deeper as 'maxLevel' to prevent circular loops
2649 		if (maxLevel-- < 0) {
2650 			status = B_LINK_LIMIT;
2651 			goto out;
2652 		}
2653 
2654 		// add the name in front of the current path
2655 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2656 		length = strlen(name);
2657 		insert -= length;
2658 		if (insert <= 0) {
2659 			status = B_RESULT_NOT_REPRESENTABLE;
2660 			goto out;
2661 		}
2662 		memcpy(path + insert, name, length);
2663 		path[--insert] = '/';
2664 	}
2665 
2666 	// the root dir will result in an empty path: fix it
2667 	if (path[insert] == '\0')
2668 		path[--insert] = '/';
2669 
2670 	TRACE(("  path is: %s\n", path + insert));
2671 
2672 	// move the path to the start of the buffer
2673 	length = bufferSize - insert;
2674 	memmove(buffer, path + insert, length);
2675 
2676 out:
2677 	put_vnode(vnode);
2678 	return status;
2679 }
2680 
2681 
2682 /*!	Checks the length of every path component, and adds a '.'
2683 	if the path ends in a slash.
2684 	The given path buffer must be able to store at least one
2685 	additional character.
2686 */
2687 static status_t
2688 check_path(char* to)
2689 {
2690 	int32 length = 0;
2691 
2692 	// check length of every path component
2693 
2694 	while (*to) {
2695 		char* begin;
2696 		if (*to == '/')
2697 			to++, length++;
2698 
2699 		begin = to;
2700 		while (*to != '/' && *to)
2701 			to++, length++;
2702 
2703 		if (to - begin > B_FILE_NAME_LENGTH)
2704 			return B_NAME_TOO_LONG;
2705 	}
2706 
2707 	if (length == 0)
2708 		return B_ENTRY_NOT_FOUND;
2709 
2710 	// complete path if there is a slash at the end
2711 
2712 	if (*(to - 1) == '/') {
2713 		if (length > B_PATH_NAME_LENGTH - 2)
2714 			return B_NAME_TOO_LONG;
2715 
2716 		to[0] = '.';
2717 		to[1] = '\0';
2718 	}
2719 
2720 	return B_OK;
2721 }
2722 
2723 
2724 static struct file_descriptor*
2725 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2726 {
2727 	struct file_descriptor* descriptor
2728 		= get_fd(get_current_io_context(kernel), fd);
2729 	if (descriptor == NULL)
2730 		return NULL;
2731 
2732 	struct vnode* vnode = fd_vnode(descriptor);
2733 	if (vnode == NULL) {
2734 		put_fd(descriptor);
2735 		return NULL;
2736 	}
2737 
2738 	// ToDo: when we can close a file descriptor at any point, investigate
2739 	//	if this is still valid to do (accessing the vnode without ref_count
2740 	//	or locking)
2741 	*_vnode = vnode;
2742 	return descriptor;
2743 }
2744 
2745 
2746 static struct vnode*
2747 get_vnode_from_fd(int fd, bool kernel)
2748 {
2749 	struct file_descriptor* descriptor;
2750 	struct vnode* vnode;
2751 
2752 	descriptor = get_fd(get_current_io_context(kernel), fd);
2753 	if (descriptor == NULL)
2754 		return NULL;
2755 
2756 	vnode = fd_vnode(descriptor);
2757 	if (vnode != NULL)
2758 		inc_vnode_ref_count(vnode);
2759 
2760 	put_fd(descriptor);
2761 	return vnode;
2762 }
2763 
2764 
2765 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2766 	only the path will be considered. In this case, the \a path must not be
2767 	NULL.
2768 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2769 	and should be NULL for files.
2770 */
2771 static status_t
2772 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2773 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2774 {
2775 	if (fd < 0 && !path)
2776 		return B_BAD_VALUE;
2777 
2778 	if (path != NULL && *path == '\0')
2779 		return B_ENTRY_NOT_FOUND;
2780 
2781 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2782 		// no FD or absolute path
2783 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2784 	}
2785 
2786 	// FD only, or FD + relative path
2787 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2788 	if (vnode == NULL)
2789 		return B_FILE_ERROR;
2790 
2791 	if (path != NULL) {
2792 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2793 			_vnode, _parentID);
2794 	}
2795 
2796 	// there is no relative path to take into account
2797 
2798 	*_vnode = vnode;
2799 	if (_parentID)
2800 		*_parentID = -1;
2801 
2802 	return B_OK;
2803 }
2804 
2805 
2806 static int
2807 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2808 	void* cookie, int openMode, bool kernel)
2809 {
2810 	struct file_descriptor* descriptor;
2811 	int fd;
2812 
2813 	// If the vnode is locked, we don't allow creating a new file/directory
2814 	// file_descriptor for it
2815 	if (vnode && vnode->mandatory_locked_by != NULL
2816 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2817 		return B_BUSY;
2818 
2819 	descriptor = alloc_fd();
2820 	if (!descriptor)
2821 		return B_NO_MEMORY;
2822 
2823 	if (vnode)
2824 		descriptor->u.vnode = vnode;
2825 	else
2826 		descriptor->u.mount = mount;
2827 	descriptor->cookie = cookie;
2828 
2829 	switch (type) {
2830 		// vnode types
2831 		case FDTYPE_FILE:
2832 			descriptor->ops = &sFileOps;
2833 			break;
2834 		case FDTYPE_DIR:
2835 			descriptor->ops = &sDirectoryOps;
2836 			break;
2837 		case FDTYPE_ATTR:
2838 			descriptor->ops = &sAttributeOps;
2839 			break;
2840 		case FDTYPE_ATTR_DIR:
2841 			descriptor->ops = &sAttributeDirectoryOps;
2842 			break;
2843 
2844 		// mount types
2845 		case FDTYPE_INDEX_DIR:
2846 			descriptor->ops = &sIndexDirectoryOps;
2847 			break;
2848 		case FDTYPE_QUERY:
2849 			descriptor->ops = &sQueryOps;
2850 			break;
2851 
2852 		default:
2853 			panic("get_new_fd() called with unknown type %d\n", type);
2854 			break;
2855 	}
2856 	descriptor->type = type;
2857 	descriptor->open_mode = openMode;
2858 
2859 	io_context* context = get_current_io_context(kernel);
2860 	fd = new_fd(context, descriptor);
2861 	if (fd < 0) {
2862 		free(descriptor);
2863 		return B_NO_MORE_FDS;
2864 	}
2865 
2866 	mutex_lock(&context->io_mutex);
2867 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2868 	mutex_unlock(&context->io_mutex);
2869 
2870 	return fd;
2871 }
2872 
2873 
2874 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2875 	vfs_normalize_path(). See there for more documentation.
2876 */
2877 static status_t
2878 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2879 {
2880 	VNodePutter dirPutter;
2881 	struct vnode* dir = NULL;
2882 	status_t error;
2883 
2884 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2885 		// get dir vnode + leaf name
2886 		struct vnode* nextDir;
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 
2892 		dir = nextDir;
2893 		strcpy(path, leaf);
2894 		dirPutter.SetTo(dir);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		struct vnode* fileVnode;
2899 		VNodePutter fileVnodePutter;
2900 		if (traverseLink) {
2901 			inc_vnode_ref_count(dir);
2902 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2903 					NULL) == B_OK) {
2904 				fileVnodePutter.SetTo(fileVnode);
2905 				fileExists = true;
2906 			}
2907 		}
2908 
2909 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2910 			// we're done -- construct the path
2911 			bool hasLeaf = true;
2912 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2913 				// special cases "." and ".." -- get the dir, forget the leaf
2914 				inc_vnode_ref_count(dir);
2915 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2916 					&nextDir, NULL);
2917 				if (error != B_OK)
2918 					return error;
2919 				dir = nextDir;
2920 				dirPutter.SetTo(dir);
2921 				hasLeaf = false;
2922 			}
2923 
2924 			// get the directory path
2925 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2926 			if (error != B_OK)
2927 				return error;
2928 
2929 			// append the leaf name
2930 			if (hasLeaf) {
2931 				// insert a directory separator if this is not the file system
2932 				// root
2933 				if ((strcmp(path, "/") != 0
2934 					&& strlcat(path, "/", pathSize) >= pathSize)
2935 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2936 					return B_NAME_TOO_LONG;
2937 				}
2938 			}
2939 
2940 			return B_OK;
2941 		}
2942 
2943 		// read link
2944 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2945 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2946 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2947 			if (error != B_OK)
2948 				return error;
2949 			path[bufferSize] = '\0';
2950 		} else
2951 			return B_BAD_VALUE;
2952 	}
2953 
2954 	return B_LINK_LIMIT;
2955 }
2956 
2957 
2958 static status_t
2959 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2960 	struct io_context* ioContext)
2961 {
2962 	// Make sure the IO context root is not bypassed.
2963 	if (parent == ioContext->root) {
2964 		*_device = parent->device;
2965 		*_node = parent->id;
2966 		return B_OK;
2967 	}
2968 
2969 	inc_vnode_ref_count(parent);
2970 		// vnode_path_to_vnode() puts the node
2971 
2972 	// ".." is guaranteed not to be clobbered by this call
2973 	struct vnode* vnode;
2974 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2975 		ioContext, &vnode, NULL);
2976 	if (status == B_OK) {
2977 		*_device = vnode->device;
2978 		*_node = vnode->id;
2979 		put_vnode(vnode);
2980 	}
2981 
2982 	return status;
2983 }
2984 
2985 
2986 #ifdef ADD_DEBUGGER_COMMANDS
2987 
2988 
2989 static void
2990 _dump_advisory_locking(advisory_locking* locking)
2991 {
2992 	if (locking == NULL)
2993 		return;
2994 
2995 	kprintf("   lock:        %" B_PRId32, locking->lock);
2996 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2997 
2998 	int32 index = 0;
2999 	LockList::Iterator iterator = locking->locks.GetIterator();
3000 	while (iterator.HasNext()) {
3001 		struct advisory_lock* lock = iterator.Next();
3002 
3003 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3004 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3005 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3006 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3007 	}
3008 }
3009 
3010 
3011 static void
3012 _dump_mount(struct fs_mount* mount)
3013 {
3014 	kprintf("MOUNT: %p\n", mount);
3015 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3016 	kprintf(" device_name:   %s\n", mount->device_name);
3017 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3018 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3019 	kprintf(" partition:     %p\n", mount->partition);
3020 	kprintf(" lock:          %p\n", &mount->lock);
3021 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3022 		mount->owns_file_device ? " owns_file_device" : "");
3023 
3024 	fs_volume* volume = mount->volume;
3025 	while (volume != NULL) {
3026 		kprintf(" volume %p:\n", volume);
3027 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3028 		kprintf("  private_volume:   %p\n", volume->private_volume);
3029 		kprintf("  ops:              %p\n", volume->ops);
3030 		kprintf("  file_system:      %p\n", volume->file_system);
3031 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3032 		volume = volume->super_volume;
3033 	}
3034 
3035 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3036 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3037 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3038 	set_debug_variable("_partition", (addr_t)mount->partition);
3039 }
3040 
3041 
3042 static bool
3043 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3044 	const char* name)
3045 {
3046 	bool insertSlash = buffer[bufferSize] != '\0';
3047 	size_t nameLength = strlen(name);
3048 
3049 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3050 		return false;
3051 
3052 	if (insertSlash)
3053 		buffer[--bufferSize] = '/';
3054 
3055 	bufferSize -= nameLength;
3056 	memcpy(buffer + bufferSize, name, nameLength);
3057 
3058 	return true;
3059 }
3060 
3061 
3062 static bool
3063 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3064 	ino_t nodeID)
3065 {
3066 	if (bufferSize == 0)
3067 		return false;
3068 
3069 	bool insertSlash = buffer[bufferSize] != '\0';
3070 	if (insertSlash)
3071 		buffer[--bufferSize] = '/';
3072 
3073 	size_t size = snprintf(buffer, bufferSize,
3074 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3075 	if (size > bufferSize) {
3076 		if (insertSlash)
3077 			bufferSize++;
3078 		return false;
3079 	}
3080 
3081 	if (size < bufferSize)
3082 		memmove(buffer + bufferSize - size, buffer, size);
3083 
3084 	bufferSize -= size;
3085 	return true;
3086 }
3087 
3088 
3089 static char*
3090 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3091 	bool& _truncated)
3092 {
3093 	// null-terminate the path
3094 	buffer[--bufferSize] = '\0';
3095 
3096 	while (true) {
3097 		while (vnode->covers != NULL)
3098 			vnode = vnode->covers;
3099 
3100 		if (vnode == sRoot) {
3101 			_truncated = bufferSize == 0;
3102 			if (!_truncated)
3103 				buffer[--bufferSize] = '/';
3104 			return buffer + bufferSize;
3105 		}
3106 
3107 		// resolve the name
3108 		ino_t dirID;
3109 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3110 			vnode->id, dirID);
3111 		if (name == NULL) {
3112 			// Failed to resolve the name -- prepend "<dev,node>/".
3113 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3114 				vnode->mount->id, vnode->id);
3115 			return buffer + bufferSize;
3116 		}
3117 
3118 		// prepend the name
3119 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3120 			_truncated = true;
3121 			return buffer + bufferSize;
3122 		}
3123 
3124 		// resolve the directory node
3125 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3126 		if (nextVnode == NULL) {
3127 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3128 				vnode->mount->id, dirID);
3129 			return buffer + bufferSize;
3130 		}
3131 
3132 		vnode = nextVnode;
3133 	}
3134 }
3135 
3136 
3137 static void
3138 _dump_vnode(struct vnode* vnode, bool printPath)
3139 {
3140 	kprintf("VNODE: %p\n", vnode);
3141 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3142 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3143 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3144 	kprintf(" private_node:  %p\n", vnode->private_node);
3145 	kprintf(" mount:         %p\n", vnode->mount);
3146 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3147 	kprintf(" covers:        %p\n", vnode->covers);
3148 	kprintf(" cache:         %p\n", vnode->cache);
3149 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3150 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3151 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3152 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3153 
3154 	_dump_advisory_locking(vnode->advisory_locking);
3155 
3156 	if (printPath) {
3157 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3158 		if (buffer != NULL) {
3159 			bool truncated;
3160 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3161 				B_PATH_NAME_LENGTH, truncated);
3162 			if (path != NULL) {
3163 				kprintf(" path:          ");
3164 				if (truncated)
3165 					kputs("<truncated>/");
3166 				kputs(path);
3167 				kputs("\n");
3168 			} else
3169 				kprintf("Failed to resolve vnode path.\n");
3170 
3171 			debug_free(buffer);
3172 		} else
3173 			kprintf("Failed to allocate memory for constructing the path.\n");
3174 	}
3175 
3176 	set_debug_variable("_node", (addr_t)vnode->private_node);
3177 	set_debug_variable("_mount", (addr_t)vnode->mount);
3178 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3179 	set_debug_variable("_covers", (addr_t)vnode->covers);
3180 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3181 }
3182 
3183 
3184 static int
3185 dump_mount(int argc, char** argv)
3186 {
3187 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3188 		kprintf("usage: %s [id|address]\n", argv[0]);
3189 		return 0;
3190 	}
3191 
3192 	ulong val = parse_expression(argv[1]);
3193 	uint32 id = val;
3194 
3195 	struct fs_mount* mount = sMountsTable->Lookup(id);
3196 	if (mount == NULL) {
3197 		if (IS_USER_ADDRESS(id)) {
3198 			kprintf("fs_mount not found\n");
3199 			return 0;
3200 		}
3201 		mount = (fs_mount*)val;
3202 	}
3203 
3204 	_dump_mount(mount);
3205 	return 0;
3206 }
3207 
3208 
3209 static int
3210 dump_mounts(int argc, char** argv)
3211 {
3212 	if (argc != 1) {
3213 		kprintf("usage: %s\n", argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3218 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3219 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3220 
3221 	struct fs_mount* mount;
3222 
3223 	MountTable::Iterator iterator(sMountsTable);
3224 	while (iterator.HasNext()) {
3225 		mount = iterator.Next();
3226 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3227 			mount->root_vnode->covers, mount->volume->private_volume,
3228 			mount->volume->file_system_name);
3229 
3230 		fs_volume* volume = mount->volume;
3231 		while (volume->super_volume != NULL) {
3232 			volume = volume->super_volume;
3233 			kprintf("                                     %p %s\n",
3234 				volume->private_volume, volume->file_system_name);
3235 		}
3236 	}
3237 
3238 	return 0;
3239 }
3240 
3241 
3242 static int
3243 dump_vnode(int argc, char** argv)
3244 {
3245 	bool printPath = false;
3246 	int argi = 1;
3247 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3248 		printPath = true;
3249 		argi++;
3250 	}
3251 
3252 	if (argi >= argc || argi + 2 < argc) {
3253 		print_debugger_command_usage(argv[0]);
3254 		return 0;
3255 	}
3256 
3257 	struct vnode* vnode = NULL;
3258 
3259 	if (argi + 1 == argc) {
3260 		vnode = (struct vnode*)parse_expression(argv[argi]);
3261 		if (IS_USER_ADDRESS(vnode)) {
3262 			kprintf("invalid vnode address\n");
3263 			return 0;
3264 		}
3265 		_dump_vnode(vnode, printPath);
3266 		return 0;
3267 	}
3268 
3269 	dev_t device = parse_expression(argv[argi]);
3270 	ino_t id = parse_expression(argv[argi + 1]);
3271 
3272 	VnodeTable::Iterator iterator(sVnodeTable);
3273 	while (iterator.HasNext()) {
3274 		vnode = iterator.Next();
3275 		if (vnode->id != id || vnode->device != device)
3276 			continue;
3277 
3278 		_dump_vnode(vnode, printPath);
3279 	}
3280 
3281 	return 0;
3282 }
3283 
3284 
3285 static int
3286 dump_vnodes(int argc, char** argv)
3287 {
3288 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3289 		kprintf("usage: %s [device]\n", argv[0]);
3290 		return 0;
3291 	}
3292 
3293 	// restrict dumped nodes to a certain device if requested
3294 	dev_t device = parse_expression(argv[1]);
3295 
3296 	struct vnode* vnode;
3297 
3298 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3299 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3300 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3301 
3302 	VnodeTable::Iterator iterator(sVnodeTable);
3303 	while (iterator.HasNext()) {
3304 		vnode = iterator.Next();
3305 		if (vnode->device != device)
3306 			continue;
3307 
3308 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3309 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3310 			vnode->private_node, vnode->advisory_locking,
3311 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3312 			vnode->IsUnpublished() ? "u" : "-");
3313 	}
3314 
3315 	return 0;
3316 }
3317 
3318 
3319 static int
3320 dump_vnode_caches(int argc, char** argv)
3321 {
3322 	struct vnode* vnode;
3323 
3324 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3325 		kprintf("usage: %s [device]\n", argv[0]);
3326 		return 0;
3327 	}
3328 
3329 	// restrict dumped nodes to a certain device if requested
3330 	dev_t device = -1;
3331 	if (argc > 1)
3332 		device = parse_expression(argv[1]);
3333 
3334 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3335 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3336 
3337 	VnodeTable::Iterator iterator(sVnodeTable);
3338 	while (iterator.HasNext()) {
3339 		vnode = iterator.Next();
3340 		if (vnode->cache == NULL)
3341 			continue;
3342 		if (device != -1 && vnode->device != device)
3343 			continue;
3344 
3345 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3346 			vnode, vnode->device, vnode->id, vnode->cache,
3347 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3348 			vnode->cache->page_count);
3349 	}
3350 
3351 	return 0;
3352 }
3353 
3354 
3355 int
3356 dump_io_context(int argc, char** argv)
3357 {
3358 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3359 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3360 		return 0;
3361 	}
3362 
3363 	struct io_context* context = NULL;
3364 
3365 	if (argc > 1) {
3366 		ulong num = parse_expression(argv[1]);
3367 		if (IS_KERNEL_ADDRESS(num))
3368 			context = (struct io_context*)num;
3369 		else {
3370 			Team* team = team_get_team_struct_locked(num);
3371 			if (team == NULL) {
3372 				kprintf("could not find team with ID %lu\n", num);
3373 				return 0;
3374 			}
3375 			context = (struct io_context*)team->io_context;
3376 		}
3377 	} else
3378 		context = get_current_io_context(true);
3379 
3380 	kprintf("I/O CONTEXT: %p\n", context);
3381 	kprintf(" root vnode:\t%p\n", context->root);
3382 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3383 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3384 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3385 
3386 	if (context->num_used_fds) {
3387 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3388 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3389 	}
3390 
3391 	for (uint32 i = 0; i < context->table_size; i++) {
3392 		struct file_descriptor* fd = context->fds[i];
3393 		if (fd == NULL)
3394 			continue;
3395 
3396 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3397 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3398 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3399 			fd->pos, fd->cookie,
3400 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3401 				? "mount" : "vnode",
3402 			fd->u.vnode);
3403 	}
3404 
3405 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3406 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3407 
3408 	set_debug_variable("_cwd", (addr_t)context->cwd);
3409 
3410 	return 0;
3411 }
3412 
3413 
3414 int
3415 dump_vnode_usage(int argc, char** argv)
3416 {
3417 	if (argc != 1) {
3418 		kprintf("usage: %s\n", argv[0]);
3419 		return 0;
3420 	}
3421 
3422 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3423 		sUnusedVnodes, kMaxUnusedVnodes);
3424 
3425 	uint32 count = sVnodeTable->CountElements();
3426 
3427 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3428 		count - sUnusedVnodes);
3429 	return 0;
3430 }
3431 
3432 #endif	// ADD_DEBUGGER_COMMANDS
3433 
3434 
3435 /*!	Clears memory specified by an iovec array.
3436 */
3437 static void
3438 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3439 {
3440 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3441 		size_t length = std::min(vecs[i].iov_len, bytes);
3442 		memset(vecs[i].iov_base, 0, length);
3443 		bytes -= length;
3444 	}
3445 }
3446 
3447 
3448 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3449 	and calls the file system hooks to read/write the request to disk.
3450 */
3451 static status_t
3452 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3453 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3454 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3455 	bool doWrite)
3456 {
3457 	if (fileVecCount == 0) {
3458 		// There are no file vecs at this offset, so we're obviously trying
3459 		// to access the file outside of its bounds
3460 		return B_BAD_VALUE;
3461 	}
3462 
3463 	size_t numBytes = *_numBytes;
3464 	uint32 fileVecIndex;
3465 	size_t vecOffset = *_vecOffset;
3466 	uint32 vecIndex = *_vecIndex;
3467 	status_t status;
3468 	size_t size;
3469 
3470 	if (!doWrite && vecOffset == 0) {
3471 		// now directly read the data from the device
3472 		// the first file_io_vec can be read directly
3473 
3474 		if (fileVecs[0].length < (off_t)numBytes)
3475 			size = fileVecs[0].length;
3476 		else
3477 			size = numBytes;
3478 
3479 		if (fileVecs[0].offset >= 0) {
3480 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3481 				&vecs[vecIndex], vecCount - vecIndex, &size);
3482 		} else {
3483 			// sparse read
3484 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3485 			status = B_OK;
3486 		}
3487 		if (status != B_OK)
3488 			return status;
3489 
3490 		// TODO: this is a work-around for buggy device drivers!
3491 		//	When our own drivers honour the length, we can:
3492 		//	a) also use this direct I/O for writes (otherwise, it would
3493 		//	   overwrite precious data)
3494 		//	b) panic if the term below is true (at least for writes)
3495 		if ((off_t)size > fileVecs[0].length) {
3496 			//dprintf("warning: device driver %p doesn't respect total length "
3497 			//	"in read_pages() call!\n", ref->device);
3498 			size = fileVecs[0].length;
3499 		}
3500 
3501 		ASSERT((off_t)size <= fileVecs[0].length);
3502 
3503 		// If the file portion was contiguous, we're already done now
3504 		if (size == numBytes)
3505 			return B_OK;
3506 
3507 		// if we reached the end of the file, we can return as well
3508 		if ((off_t)size != fileVecs[0].length) {
3509 			*_numBytes = size;
3510 			return B_OK;
3511 		}
3512 
3513 		fileVecIndex = 1;
3514 
3515 		// first, find out where we have to continue in our iovecs
3516 		for (; vecIndex < vecCount; vecIndex++) {
3517 			if (size < vecs[vecIndex].iov_len)
3518 				break;
3519 
3520 			size -= vecs[vecIndex].iov_len;
3521 		}
3522 
3523 		vecOffset = size;
3524 	} else {
3525 		fileVecIndex = 0;
3526 		size = 0;
3527 	}
3528 
3529 	// Too bad, let's process the rest of the file_io_vecs
3530 
3531 	size_t totalSize = size;
3532 	size_t bytesLeft = numBytes - size;
3533 
3534 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3535 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3536 		off_t fileOffset = fileVec.offset;
3537 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3538 
3539 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3540 			fileLeft));
3541 
3542 		// process the complete fileVec
3543 		while (fileLeft > 0) {
3544 			iovec tempVecs[MAX_TEMP_IO_VECS];
3545 			uint32 tempCount = 0;
3546 
3547 			// size tracks how much of what is left of the current fileVec
3548 			// (fileLeft) has been assigned to tempVecs
3549 			size = 0;
3550 
3551 			// assign what is left of the current fileVec to the tempVecs
3552 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3553 					&& tempCount < MAX_TEMP_IO_VECS;) {
3554 				// try to satisfy one iovec per iteration (or as much as
3555 				// possible)
3556 
3557 				// bytes left of the current iovec
3558 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3559 				if (vecLeft == 0) {
3560 					vecOffset = 0;
3561 					vecIndex++;
3562 					continue;
3563 				}
3564 
3565 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3566 					vecIndex, vecOffset, size));
3567 
3568 				// actually available bytes
3569 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3570 
3571 				tempVecs[tempCount].iov_base
3572 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3573 				tempVecs[tempCount].iov_len = tempVecSize;
3574 				tempCount++;
3575 
3576 				size += tempVecSize;
3577 				vecOffset += tempVecSize;
3578 			}
3579 
3580 			size_t bytes = size;
3581 
3582 			if (fileOffset == -1) {
3583 				if (doWrite) {
3584 					panic("sparse write attempt: vnode %p", vnode);
3585 					status = B_IO_ERROR;
3586 				} else {
3587 					// sparse read
3588 					zero_iovecs(tempVecs, tempCount, bytes);
3589 					status = B_OK;
3590 				}
3591 			} else if (doWrite) {
3592 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3593 					tempVecs, tempCount, &bytes);
3594 			} else {
3595 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3596 					tempVecs, tempCount, &bytes);
3597 			}
3598 			if (status != B_OK)
3599 				return status;
3600 
3601 			totalSize += bytes;
3602 			bytesLeft -= size;
3603 			if (fileOffset >= 0)
3604 				fileOffset += size;
3605 			fileLeft -= size;
3606 			//dprintf("-> file left = %Lu\n", fileLeft);
3607 
3608 			if (size != bytes || vecIndex >= vecCount) {
3609 				// there are no more bytes or iovecs, let's bail out
3610 				*_numBytes = totalSize;
3611 				return B_OK;
3612 			}
3613 		}
3614 	}
3615 
3616 	*_vecIndex = vecIndex;
3617 	*_vecOffset = vecOffset;
3618 	*_numBytes = totalSize;
3619 	return B_OK;
3620 }
3621 
3622 
3623 static bool
3624 is_user_in_group(gid_t gid)
3625 {
3626 	if (gid == getegid())
3627 		return true;
3628 
3629 	gid_t groups[NGROUPS_MAX];
3630 	int groupCount = getgroups(NGROUPS_MAX, groups);
3631 	for (int i = 0; i < groupCount; i++) {
3632 		if (gid == groups[i])
3633 			return true;
3634 	}
3635 
3636 	return false;
3637 }
3638 
3639 
3640 static status_t
3641 free_io_context(io_context* context)
3642 {
3643 	uint32 i;
3644 
3645 	TIOC(FreeIOContext(context));
3646 
3647 	if (context->root)
3648 		put_vnode(context->root);
3649 
3650 	if (context->cwd)
3651 		put_vnode(context->cwd);
3652 
3653 	mutex_lock(&context->io_mutex);
3654 
3655 	for (i = 0; i < context->table_size; i++) {
3656 		if (struct file_descriptor* descriptor = context->fds[i]) {
3657 			close_fd(context, descriptor);
3658 			put_fd(descriptor);
3659 		}
3660 	}
3661 
3662 	mutex_destroy(&context->io_mutex);
3663 
3664 	remove_node_monitors(context);
3665 	free(context->fds);
3666 	free(context);
3667 
3668 	return B_OK;
3669 }
3670 
3671 
3672 static status_t
3673 resize_monitor_table(struct io_context* context, const int newSize)
3674 {
3675 	int	status = B_OK;
3676 
3677 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3678 		return B_BAD_VALUE;
3679 
3680 	mutex_lock(&context->io_mutex);
3681 
3682 	if ((size_t)newSize < context->num_monitors) {
3683 		status = B_BUSY;
3684 		goto out;
3685 	}
3686 	context->max_monitors = newSize;
3687 
3688 out:
3689 	mutex_unlock(&context->io_mutex);
3690 	return status;
3691 }
3692 
3693 
3694 //	#pragma mark - public API for file systems
3695 
3696 
3697 extern "C" status_t
3698 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3699 	fs_vnode_ops* ops)
3700 {
3701 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3702 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3703 
3704 	if (privateNode == NULL)
3705 		return B_BAD_VALUE;
3706 
3707 	int32 tries = BUSY_VNODE_RETRIES;
3708 restart:
3709 	// create the node
3710 	bool nodeCreated;
3711 	struct vnode* vnode;
3712 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3713 		nodeCreated);
3714 	if (status != B_OK)
3715 		return status;
3716 
3717 	WriteLocker nodeLocker(sVnodeLock, true);
3718 		// create_new_vnode_and_lock() has locked for us
3719 
3720 	if (!nodeCreated && vnode->IsBusy()) {
3721 		nodeLocker.Unlock();
3722 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3723 			return B_BUSY;
3724 		goto restart;
3725 	}
3726 
3727 	// file system integrity check:
3728 	// test if the vnode already exists and bail out if this is the case!
3729 	if (!nodeCreated) {
3730 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3731 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3732 			vnode->private_node);
3733 		return B_ERROR;
3734 	}
3735 
3736 	vnode->private_node = privateNode;
3737 	vnode->ops = ops;
3738 	vnode->SetUnpublished(true);
3739 
3740 	TRACE(("returns: %s\n", strerror(status)));
3741 
3742 	return status;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3748 	fs_vnode_ops* ops, int type, uint32 flags)
3749 {
3750 	FUNCTION(("publish_vnode()\n"));
3751 
3752 	int32 tries = BUSY_VNODE_RETRIES;
3753 restart:
3754 	WriteLocker locker(sVnodeLock);
3755 
3756 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3757 
3758 	bool nodeCreated = false;
3759 	if (vnode == NULL) {
3760 		if (privateNode == NULL)
3761 			return B_BAD_VALUE;
3762 
3763 		// create the node
3764 		locker.Unlock();
3765 			// create_new_vnode_and_lock() will re-lock for us on success
3766 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3767 			nodeCreated);
3768 		if (status != B_OK)
3769 			return status;
3770 
3771 		locker.SetTo(sVnodeLock, true);
3772 	}
3773 
3774 	if (nodeCreated) {
3775 		vnode->private_node = privateNode;
3776 		vnode->ops = ops;
3777 		vnode->SetUnpublished(true);
3778 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3779 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3780 		// already known, but not published
3781 	} else if (vnode->IsBusy()) {
3782 		locker.Unlock();
3783 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3784 			return B_BUSY;
3785 		goto restart;
3786 	} else
3787 		return B_BAD_VALUE;
3788 
3789 	bool publishSpecialSubNode = false;
3790 
3791 	vnode->SetType(type);
3792 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3793 	publishSpecialSubNode = is_special_node_type(type)
3794 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3795 
3796 	status_t status = B_OK;
3797 
3798 	// create sub vnodes, if necessary
3799 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3800 		locker.Unlock();
3801 
3802 		fs_volume* subVolume = volume;
3803 		if (volume->sub_volume != NULL) {
3804 			while (status == B_OK && subVolume->sub_volume != NULL) {
3805 				subVolume = subVolume->sub_volume;
3806 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3807 					vnode);
3808 			}
3809 		}
3810 
3811 		if (status == B_OK && publishSpecialSubNode)
3812 			status = create_special_sub_node(vnode, flags);
3813 
3814 		if (status != B_OK) {
3815 			// error -- clean up the created sub vnodes
3816 			while (subVolume->super_volume != volume) {
3817 				subVolume = subVolume->super_volume;
3818 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3819 			}
3820 		}
3821 
3822 		if (status == B_OK) {
3823 			ReadLocker vnodesReadLocker(sVnodeLock);
3824 			AutoLocker<Vnode> nodeLocker(vnode);
3825 			vnode->SetBusy(false);
3826 			vnode->SetUnpublished(false);
3827 		} else {
3828 			locker.Lock();
3829 			sVnodeTable->Remove(vnode);
3830 			remove_vnode_from_mount_list(vnode, vnode->mount);
3831 			free(vnode);
3832 		}
3833 	} else {
3834 		// we still hold the write lock -- mark the node unbusy and published
3835 		vnode->SetBusy(false);
3836 		vnode->SetUnpublished(false);
3837 	}
3838 
3839 	TRACE(("returns: %s\n", strerror(status)));
3840 
3841 	return status;
3842 }
3843 
3844 
3845 extern "C" status_t
3846 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3847 {
3848 	struct vnode* vnode;
3849 
3850 	if (volume == NULL)
3851 		return B_BAD_VALUE;
3852 
3853 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3854 	if (status != B_OK)
3855 		return status;
3856 
3857 	// If this is a layered FS, we need to get the node cookie for the requested
3858 	// layer.
3859 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3860 		fs_vnode resolvedNode;
3861 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3862 			&resolvedNode);
3863 		if (status != B_OK) {
3864 			panic("get_vnode(): Failed to get super node for vnode %p, "
3865 				"volume: %p", vnode, volume);
3866 			put_vnode(vnode);
3867 			return status;
3868 		}
3869 
3870 		if (_privateNode != NULL)
3871 			*_privateNode = resolvedNode.private_node;
3872 	} else if (_privateNode != NULL)
3873 		*_privateNode = vnode->private_node;
3874 
3875 	return B_OK;
3876 }
3877 
3878 
3879 extern "C" status_t
3880 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3881 {
3882 	struct vnode* vnode;
3883 
3884 	rw_lock_read_lock(&sVnodeLock);
3885 	vnode = lookup_vnode(volume->id, vnodeID);
3886 	rw_lock_read_unlock(&sVnodeLock);
3887 
3888 	if (vnode == NULL)
3889 		return B_BAD_VALUE;
3890 
3891 	inc_vnode_ref_count(vnode);
3892 	return B_OK;
3893 }
3894 
3895 
3896 extern "C" status_t
3897 put_vnode(fs_volume* volume, ino_t vnodeID)
3898 {
3899 	struct vnode* vnode;
3900 
3901 	rw_lock_read_lock(&sVnodeLock);
3902 	vnode = lookup_vnode(volume->id, vnodeID);
3903 	rw_lock_read_unlock(&sVnodeLock);
3904 
3905 	if (vnode == NULL)
3906 		return B_BAD_VALUE;
3907 
3908 	dec_vnode_ref_count(vnode, false, true);
3909 	return B_OK;
3910 }
3911 
3912 
3913 extern "C" status_t
3914 remove_vnode(fs_volume* volume, ino_t vnodeID)
3915 {
3916 	ReadLocker locker(sVnodeLock);
3917 
3918 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3919 	if (vnode == NULL)
3920 		return B_ENTRY_NOT_FOUND;
3921 
3922 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3923 		// this vnode is in use
3924 		return B_BUSY;
3925 	}
3926 
3927 	vnode->Lock();
3928 
3929 	vnode->SetRemoved(true);
3930 	bool removeUnpublished = false;
3931 
3932 	if (vnode->IsUnpublished()) {
3933 		// prepare the vnode for deletion
3934 		removeUnpublished = true;
3935 		vnode->SetBusy(true);
3936 	}
3937 
3938 	vnode->Unlock();
3939 	locker.Unlock();
3940 
3941 	if (removeUnpublished) {
3942 		// If the vnode hasn't been published yet, we delete it here
3943 		atomic_add(&vnode->ref_count, -1);
3944 		free_vnode(vnode, true);
3945 	}
3946 
3947 	return B_OK;
3948 }
3949 
3950 
3951 extern "C" status_t
3952 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3953 {
3954 	struct vnode* vnode;
3955 
3956 	rw_lock_read_lock(&sVnodeLock);
3957 
3958 	vnode = lookup_vnode(volume->id, vnodeID);
3959 	if (vnode) {
3960 		AutoLocker<Vnode> nodeLocker(vnode);
3961 		vnode->SetRemoved(false);
3962 	}
3963 
3964 	rw_lock_read_unlock(&sVnodeLock);
3965 	return B_OK;
3966 }
3967 
3968 
3969 extern "C" status_t
3970 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3971 {
3972 	ReadLocker _(sVnodeLock);
3973 
3974 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3975 		if (_removed != NULL)
3976 			*_removed = vnode->IsRemoved();
3977 		return B_OK;
3978 	}
3979 
3980 	return B_BAD_VALUE;
3981 }
3982 
3983 
3984 extern "C" status_t
3985 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3986 {
3987 	ReadLocker locker(sVnodeLock);
3988 
3989 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3990 	if (vnode == NULL)
3991 		return B_ENTRY_NOT_FOUND;
3992 
3993 	// are we trying to mark an already busy node busy again?
3994 	if (busy && vnode->IsBusy())
3995 		return B_BUSY;
3996 
3997 	vnode->Lock();
3998 	vnode->SetBusy(busy);
3999 	vnode->Unlock();
4000 
4001 	return B_OK;
4002 }
4003 
4004 
4005 extern "C" status_t
4006 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4007 {
4008 	WriteLocker locker(sVnodeLock);
4009 
4010 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4011 	if (vnode == NULL)
4012 		return B_ENTRY_NOT_FOUND;
4013 
4014 	sVnodeTable->Remove(vnode);
4015 	vnode->id = newID;
4016 	sVnodeTable->Insert(vnode);
4017 
4018 	if (vnode->cache != NULL)
4019 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4020 
4021 	return B_OK;
4022 }
4023 
4024 
4025 extern "C" fs_volume*
4026 volume_for_vnode(fs_vnode* _vnode)
4027 {
4028 	if (_vnode == NULL)
4029 		return NULL;
4030 
4031 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4032 	return vnode->mount->volume;
4033 }
4034 
4035 
4036 extern "C" status_t
4037 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4038 	uid_t nodeUserID)
4039 {
4040 	// get node permissions
4041 	int userPermissions = (mode & S_IRWXU) >> 6;
4042 	int groupPermissions = (mode & S_IRWXG) >> 3;
4043 	int otherPermissions = mode & S_IRWXO;
4044 
4045 	// get the node permissions for this uid/gid
4046 	int permissions = 0;
4047 	uid_t uid = geteuid();
4048 
4049 	if (uid == 0) {
4050 		// user is root
4051 		// root has always read/write permission, but at least one of the
4052 		// X bits must be set for execute permission
4053 		permissions = userPermissions | groupPermissions | otherPermissions
4054 			| S_IROTH | S_IWOTH;
4055 		if (S_ISDIR(mode))
4056 			permissions |= S_IXOTH;
4057 	} else if (uid == nodeUserID) {
4058 		// user is node owner
4059 		permissions = userPermissions;
4060 	} else if (is_user_in_group(nodeGroupID)) {
4061 		// user is in owning group
4062 		permissions = groupPermissions;
4063 	} else {
4064 		// user is one of the others
4065 		permissions = otherPermissions;
4066 	}
4067 
4068 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4069 }
4070 
4071 
4072 #if 0
4073 extern "C" status_t
4074 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4075 	size_t* _numBytes)
4076 {
4077 	struct file_descriptor* descriptor;
4078 	struct vnode* vnode;
4079 
4080 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4081 	if (descriptor == NULL)
4082 		return B_FILE_ERROR;
4083 
4084 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4085 		count, 0, _numBytes);
4086 
4087 	put_fd(descriptor);
4088 	return status;
4089 }
4090 
4091 
4092 extern "C" status_t
4093 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4094 	size_t* _numBytes)
4095 {
4096 	struct file_descriptor* descriptor;
4097 	struct vnode* vnode;
4098 
4099 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4100 	if (descriptor == NULL)
4101 		return B_FILE_ERROR;
4102 
4103 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4104 		count, 0, _numBytes);
4105 
4106 	put_fd(descriptor);
4107 	return status;
4108 }
4109 #endif
4110 
4111 
4112 extern "C" status_t
4113 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4114 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4115 	size_t* _bytes)
4116 {
4117 	struct file_descriptor* descriptor;
4118 	struct vnode* vnode;
4119 
4120 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4121 	if (descriptor == NULL)
4122 		return B_FILE_ERROR;
4123 
4124 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4125 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4126 		false);
4127 
4128 	put_fd(descriptor);
4129 	return status;
4130 }
4131 
4132 
4133 extern "C" status_t
4134 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4135 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4136 	size_t* _bytes)
4137 {
4138 	struct file_descriptor* descriptor;
4139 	struct vnode* vnode;
4140 
4141 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4142 	if (descriptor == NULL)
4143 		return B_FILE_ERROR;
4144 
4145 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4146 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4147 		true);
4148 
4149 	put_fd(descriptor);
4150 	return status;
4151 }
4152 
4153 
4154 extern "C" status_t
4155 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4156 {
4157 	// lookup mount -- the caller is required to make sure that the mount
4158 	// won't go away
4159 	MutexLocker locker(sMountMutex);
4160 	struct fs_mount* mount = find_mount(mountID);
4161 	if (mount == NULL)
4162 		return B_BAD_VALUE;
4163 	locker.Unlock();
4164 
4165 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4166 }
4167 
4168 
4169 extern "C" status_t
4170 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4171 {
4172 	// lookup mount -- the caller is required to make sure that the mount
4173 	// won't go away
4174 	MutexLocker locker(sMountMutex);
4175 	struct fs_mount* mount = find_mount(mountID);
4176 	if (mount == NULL)
4177 		return B_BAD_VALUE;
4178 	locker.Unlock();
4179 
4180 	return mount->entry_cache.Add(dirID, name, -1, true);
4181 }
4182 
4183 
4184 extern "C" status_t
4185 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4186 {
4187 	// lookup mount -- the caller is required to make sure that the mount
4188 	// won't go away
4189 	MutexLocker locker(sMountMutex);
4190 	struct fs_mount* mount = find_mount(mountID);
4191 	if (mount == NULL)
4192 		return B_BAD_VALUE;
4193 	locker.Unlock();
4194 
4195 	return mount->entry_cache.Remove(dirID, name);
4196 }
4197 
4198 
4199 //	#pragma mark - private VFS API
4200 //	Functions the VFS exports for other parts of the kernel
4201 
4202 
4203 /*! Acquires another reference to the vnode that has to be released
4204 	by calling vfs_put_vnode().
4205 */
4206 void
4207 vfs_acquire_vnode(struct vnode* vnode)
4208 {
4209 	inc_vnode_ref_count(vnode);
4210 }
4211 
4212 
4213 /*! This is currently called from file_cache_create() only.
4214 	It's probably a temporary solution as long as devfs requires that
4215 	fs_read_pages()/fs_write_pages() are called with the standard
4216 	open cookie and not with a device cookie.
4217 	If that's done differently, remove this call; it has no other
4218 	purpose.
4219 */
4220 extern "C" status_t
4221 vfs_get_cookie_from_fd(int fd, void** _cookie)
4222 {
4223 	struct file_descriptor* descriptor;
4224 
4225 	descriptor = get_fd(get_current_io_context(true), fd);
4226 	if (descriptor == NULL)
4227 		return B_FILE_ERROR;
4228 
4229 	*_cookie = descriptor->cookie;
4230 	return B_OK;
4231 }
4232 
4233 
4234 extern "C" status_t
4235 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4236 {
4237 	*vnode = get_vnode_from_fd(fd, kernel);
4238 
4239 	if (*vnode == NULL)
4240 		return B_FILE_ERROR;
4241 
4242 	return B_NO_ERROR;
4243 }
4244 
4245 
4246 extern "C" status_t
4247 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4248 {
4249 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4250 		path, kernel));
4251 
4252 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4253 	if (pathBuffer.InitCheck() != B_OK)
4254 		return B_NO_MEMORY;
4255 
4256 	char* buffer = pathBuffer.LockBuffer();
4257 	strlcpy(buffer, path, pathBuffer.BufferSize());
4258 
4259 	struct vnode* vnode;
4260 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4261 	if (status != B_OK)
4262 		return status;
4263 
4264 	*_vnode = vnode;
4265 	return B_OK;
4266 }
4267 
4268 
4269 extern "C" status_t
4270 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4271 {
4272 	struct vnode* vnode = NULL;
4273 
4274 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4275 	if (status != B_OK)
4276 		return status;
4277 
4278 	*_vnode = vnode;
4279 	return B_OK;
4280 }
4281 
4282 
4283 extern "C" status_t
4284 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4285 	const char* name, struct vnode** _vnode)
4286 {
4287 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4288 }
4289 
4290 
4291 extern "C" void
4292 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4293 {
4294 	*_mountID = vnode->device;
4295 	*_vnodeID = vnode->id;
4296 }
4297 
4298 
4299 /*!
4300 	Helper function abstracting the process of "converting" a given
4301 	vnode-pointer to a fs_vnode-pointer.
4302 	Currently only used in bindfs.
4303 */
4304 extern "C" fs_vnode*
4305 vfs_fsnode_for_vnode(struct vnode* vnode)
4306 {
4307 	return vnode;
4308 }
4309 
4310 
4311 /*!
4312 	Calls fs_open() on the given vnode and returns a new
4313 	file descriptor for it
4314 */
4315 int
4316 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4317 {
4318 	return open_vnode(vnode, openMode, kernel);
4319 }
4320 
4321 
4322 /*!	Looks up a vnode with the given mount and vnode ID.
4323 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4324 	to the node.
4325 	It's currently only be used by file_cache_create().
4326 */
4327 extern "C" status_t
4328 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4329 {
4330 	rw_lock_read_lock(&sVnodeLock);
4331 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4332 	rw_lock_read_unlock(&sVnodeLock);
4333 
4334 	if (vnode == NULL)
4335 		return B_ERROR;
4336 
4337 	*_vnode = vnode;
4338 	return B_OK;
4339 }
4340 
4341 
4342 extern "C" status_t
4343 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4344 	bool traverseLeafLink, bool kernel, void** _node)
4345 {
4346 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4347 		volume, path, kernel));
4348 
4349 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4350 	if (pathBuffer.InitCheck() != B_OK)
4351 		return B_NO_MEMORY;
4352 
4353 	fs_mount* mount;
4354 	status_t status = get_mount(volume->id, &mount);
4355 	if (status != B_OK)
4356 		return status;
4357 
4358 	char* buffer = pathBuffer.LockBuffer();
4359 	strlcpy(buffer, path, pathBuffer.BufferSize());
4360 
4361 	struct vnode* vnode = mount->root_vnode;
4362 
4363 	if (buffer[0] == '/')
4364 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4365 	else {
4366 		inc_vnode_ref_count(vnode);
4367 			// vnode_path_to_vnode() releases a reference to the starting vnode
4368 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4369 			kernel, &vnode, NULL);
4370 	}
4371 
4372 	put_mount(mount);
4373 
4374 	if (status != B_OK)
4375 		return status;
4376 
4377 	if (vnode->device != volume->id) {
4378 		// wrong mount ID - must not gain access on foreign file system nodes
4379 		put_vnode(vnode);
4380 		return B_BAD_VALUE;
4381 	}
4382 
4383 	// Use get_vnode() to resolve the cookie for the right layer.
4384 	status = get_vnode(volume, vnode->id, _node);
4385 	put_vnode(vnode);
4386 
4387 	return status;
4388 }
4389 
4390 
4391 status_t
4392 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4393 	struct stat* stat, bool kernel)
4394 {
4395 	status_t status;
4396 
4397 	if (path != NULL) {
4398 		// path given: get the stat of the node referred to by (fd, path)
4399 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4400 		if (pathBuffer.InitCheck() != B_OK)
4401 			return B_NO_MEMORY;
4402 
4403 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4404 			traverseLeafLink, stat, kernel);
4405 	} else {
4406 		// no path given: get the FD and use the FD operation
4407 		struct file_descriptor* descriptor
4408 			= get_fd(get_current_io_context(kernel), fd);
4409 		if (descriptor == NULL)
4410 			return B_FILE_ERROR;
4411 
4412 		if (descriptor->ops->fd_read_stat)
4413 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4414 		else
4415 			status = B_UNSUPPORTED;
4416 
4417 		put_fd(descriptor);
4418 	}
4419 
4420 	return status;
4421 }
4422 
4423 
4424 /*!	Finds the full path to the file that contains the module \a moduleName,
4425 	puts it into \a pathBuffer, and returns B_OK for success.
4426 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4427 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4428 	\a pathBuffer is clobbered in any case and must not be relied on if this
4429 	functions returns unsuccessfully.
4430 	\a basePath and \a pathBuffer must not point to the same space.
4431 */
4432 status_t
4433 vfs_get_module_path(const char* basePath, const char* moduleName,
4434 	char* pathBuffer, size_t bufferSize)
4435 {
4436 	struct vnode* dir;
4437 	struct vnode* file;
4438 	status_t status;
4439 	size_t length;
4440 	char* path;
4441 
4442 	if (bufferSize == 0
4443 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4444 		return B_BUFFER_OVERFLOW;
4445 
4446 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4447 	if (status != B_OK)
4448 		return status;
4449 
4450 	// the path buffer had been clobbered by the above call
4451 	length = strlcpy(pathBuffer, basePath, bufferSize);
4452 	if (pathBuffer[length - 1] != '/')
4453 		pathBuffer[length++] = '/';
4454 
4455 	path = pathBuffer + length;
4456 	bufferSize -= length;
4457 
4458 	while (moduleName) {
4459 		char* nextPath = strchr(moduleName, '/');
4460 		if (nextPath == NULL)
4461 			length = strlen(moduleName);
4462 		else {
4463 			length = nextPath - moduleName;
4464 			nextPath++;
4465 		}
4466 
4467 		if (length + 1 >= bufferSize) {
4468 			status = B_BUFFER_OVERFLOW;
4469 			goto err;
4470 		}
4471 
4472 		memcpy(path, moduleName, length);
4473 		path[length] = '\0';
4474 		moduleName = nextPath;
4475 
4476 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4477 		if (status != B_OK) {
4478 			// vnode_path_to_vnode() has already released the reference to dir
4479 			return status;
4480 		}
4481 
4482 		if (S_ISDIR(file->Type())) {
4483 			// goto the next directory
4484 			path[length] = '/';
4485 			path[length + 1] = '\0';
4486 			path += length + 1;
4487 			bufferSize -= length + 1;
4488 
4489 			dir = file;
4490 		} else if (S_ISREG(file->Type())) {
4491 			// it's a file so it should be what we've searched for
4492 			put_vnode(file);
4493 
4494 			return B_OK;
4495 		} else {
4496 			TRACE(("vfs_get_module_path(): something is strange here: "
4497 				"0x%08" B_PRIx32 "...\n", file->Type()));
4498 			status = B_ERROR;
4499 			dir = file;
4500 			goto err;
4501 		}
4502 	}
4503 
4504 	// if we got here, the moduleName just pointed to a directory, not to
4505 	// a real module - what should we do in this case?
4506 	status = B_ENTRY_NOT_FOUND;
4507 
4508 err:
4509 	put_vnode(dir);
4510 	return status;
4511 }
4512 
4513 
4514 /*!	\brief Normalizes a given path.
4515 
4516 	The path must refer to an existing or non-existing entry in an existing
4517 	directory, that is chopping off the leaf component the remaining path must
4518 	refer to an existing directory.
4519 
4520 	The returned will be canonical in that it will be absolute, will not
4521 	contain any "." or ".." components or duplicate occurrences of '/'s,
4522 	and none of the directory components will by symbolic links.
4523 
4524 	Any two paths referring to the same entry, will result in the same
4525 	normalized path (well, that is pretty much the definition of `normalized',
4526 	isn't it :-).
4527 
4528 	\param path The path to be normalized.
4529 	\param buffer The buffer into which the normalized path will be written.
4530 		   May be the same one as \a path.
4531 	\param bufferSize The size of \a buffer.
4532 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4533 	\param kernel \c true, if the IO context of the kernel shall be used,
4534 		   otherwise that of the team this thread belongs to. Only relevant,
4535 		   if the path is relative (to get the CWD).
4536 	\return \c B_OK if everything went fine, another error code otherwise.
4537 */
4538 status_t
4539 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4540 	bool traverseLink, bool kernel)
4541 {
4542 	if (!path || !buffer || bufferSize < 1)
4543 		return B_BAD_VALUE;
4544 
4545 	if (path != buffer) {
4546 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4547 			return B_BUFFER_OVERFLOW;
4548 	}
4549 
4550 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4551 }
4552 
4553 
4554 /*!	\brief Gets the parent of the passed in node.
4555 
4556 	Gets the parent of the passed in node, and correctly resolves covered
4557 	nodes.
4558 */
4559 extern "C" status_t
4560 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4561 {
4562 	return resolve_covered_parent(parent, device, node,
4563 		get_current_io_context(true));
4564 }
4565 
4566 
4567 /*!	\brief Creates a special node in the file system.
4568 
4569 	The caller gets a reference to the newly created node (which is passed
4570 	back through \a _createdVnode) and is responsible for releasing it.
4571 
4572 	\param path The path where to create the entry for the node. Can be \c NULL,
4573 		in which case the node is created without an entry in the root FS -- it
4574 		will automatically be deleted when the last reference has been released.
4575 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4576 		the target file system will just create the node with its standard
4577 		operations. Depending on the type of the node a subnode might be created
4578 		automatically, though.
4579 	\param mode The type and permissions for the node to be created.
4580 	\param flags Flags to be passed to the creating FS.
4581 	\param kernel \c true, if called in the kernel context (relevant only if
4582 		\a path is not \c NULL and not absolute).
4583 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4584 		file system creating the node, with the private data pointer and
4585 		operations for the super node. Can be \c NULL.
4586 	\param _createVnode Pointer to pre-allocated storage where to store the
4587 		pointer to the newly created node.
4588 	\return \c B_OK, if everything went fine, another error code otherwise.
4589 */
4590 status_t
4591 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4592 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4593 	struct vnode** _createdVnode)
4594 {
4595 	struct vnode* dirNode;
4596 	char _leaf[B_FILE_NAME_LENGTH];
4597 	char* leaf = NULL;
4598 
4599 	if (path) {
4600 		// We've got a path. Get the dir vnode and the leaf name.
4601 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4602 		if (tmpPathBuffer.InitCheck() != B_OK)
4603 			return B_NO_MEMORY;
4604 
4605 		char* tmpPath = tmpPathBuffer.LockBuffer();
4606 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4607 			return B_NAME_TOO_LONG;
4608 
4609 		// get the dir vnode and the leaf name
4610 		leaf = _leaf;
4611 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4612 		if (error != B_OK)
4613 			return error;
4614 	} else {
4615 		// No path. Create the node in the root FS.
4616 		dirNode = sRoot;
4617 		inc_vnode_ref_count(dirNode);
4618 	}
4619 
4620 	VNodePutter _(dirNode);
4621 
4622 	// check support for creating special nodes
4623 	if (!HAS_FS_CALL(dirNode, create_special_node))
4624 		return B_UNSUPPORTED;
4625 
4626 	// create the node
4627 	fs_vnode superVnode;
4628 	ino_t nodeID;
4629 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4630 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4631 	if (status != B_OK)
4632 		return status;
4633 
4634 	// lookup the node
4635 	rw_lock_read_lock(&sVnodeLock);
4636 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4637 	rw_lock_read_unlock(&sVnodeLock);
4638 
4639 	if (*_createdVnode == NULL) {
4640 		panic("vfs_create_special_node(): lookup of node failed");
4641 		return B_ERROR;
4642 	}
4643 
4644 	return B_OK;
4645 }
4646 
4647 
4648 extern "C" void
4649 vfs_put_vnode(struct vnode* vnode)
4650 {
4651 	put_vnode(vnode);
4652 }
4653 
4654 
4655 extern "C" status_t
4656 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4657 {
4658 	// Get current working directory from io context
4659 	struct io_context* context = get_current_io_context(false);
4660 	status_t status = B_OK;
4661 
4662 	mutex_lock(&context->io_mutex);
4663 
4664 	if (context->cwd != NULL) {
4665 		*_mountID = context->cwd->device;
4666 		*_vnodeID = context->cwd->id;
4667 	} else
4668 		status = B_ERROR;
4669 
4670 	mutex_unlock(&context->io_mutex);
4671 	return status;
4672 }
4673 
4674 
4675 status_t
4676 vfs_unmount(dev_t mountID, uint32 flags)
4677 {
4678 	return fs_unmount(NULL, mountID, flags, true);
4679 }
4680 
4681 
4682 extern "C" status_t
4683 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4684 {
4685 	struct vnode* vnode;
4686 
4687 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4688 	if (status != B_OK)
4689 		return status;
4690 
4691 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4692 	put_vnode(vnode);
4693 	return B_OK;
4694 }
4695 
4696 
4697 extern "C" void
4698 vfs_free_unused_vnodes(int32 level)
4699 {
4700 	vnode_low_resource_handler(NULL,
4701 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4702 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4703 		level);
4704 }
4705 
4706 
4707 extern "C" bool
4708 vfs_can_page(struct vnode* vnode, void* cookie)
4709 {
4710 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4711 
4712 	if (HAS_FS_CALL(vnode, can_page))
4713 		return FS_CALL(vnode, can_page, cookie);
4714 	return false;
4715 }
4716 
4717 
4718 extern "C" status_t
4719 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4720 	const generic_io_vec* vecs, size_t count, uint32 flags,
4721 	generic_size_t* _numBytes)
4722 {
4723 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4724 		vecs, pos));
4725 
4726 #if VFS_PAGES_IO_TRACING
4727 	generic_size_t bytesRequested = *_numBytes;
4728 #endif
4729 
4730 	IORequest request;
4731 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4732 	if (status == B_OK) {
4733 		status = vfs_vnode_io(vnode, cookie, &request);
4734 		if (status == B_OK)
4735 			status = request.Wait();
4736 		*_numBytes = request.TransferredBytes();
4737 	}
4738 
4739 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4740 		status, *_numBytes));
4741 
4742 	return status;
4743 }
4744 
4745 
4746 extern "C" status_t
4747 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4748 	const generic_io_vec* vecs, size_t count, uint32 flags,
4749 	generic_size_t* _numBytes)
4750 {
4751 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4752 		vecs, pos));
4753 
4754 #if VFS_PAGES_IO_TRACING
4755 	generic_size_t bytesRequested = *_numBytes;
4756 #endif
4757 
4758 	IORequest request;
4759 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4760 	if (status == B_OK) {
4761 		status = vfs_vnode_io(vnode, cookie, &request);
4762 		if (status == B_OK)
4763 			status = request.Wait();
4764 		*_numBytes = request.TransferredBytes();
4765 	}
4766 
4767 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4768 		status, *_numBytes));
4769 
4770 	return status;
4771 }
4772 
4773 
4774 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4775 	created if \a allocate is \c true.
4776 	In case it's successful, it will also grab a reference to the cache
4777 	it returns.
4778 */
4779 extern "C" status_t
4780 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4781 {
4782 	if (vnode->cache != NULL) {
4783 		vnode->cache->AcquireRef();
4784 		*_cache = vnode->cache;
4785 		return B_OK;
4786 	}
4787 
4788 	rw_lock_read_lock(&sVnodeLock);
4789 	vnode->Lock();
4790 
4791 	status_t status = B_OK;
4792 
4793 	// The cache could have been created in the meantime
4794 	if (vnode->cache == NULL) {
4795 		if (allocate) {
4796 			// TODO: actually the vnode needs to be busy already here, or
4797 			//	else this won't work...
4798 			bool wasBusy = vnode->IsBusy();
4799 			vnode->SetBusy(true);
4800 
4801 			vnode->Unlock();
4802 			rw_lock_read_unlock(&sVnodeLock);
4803 
4804 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4805 
4806 			rw_lock_read_lock(&sVnodeLock);
4807 			vnode->Lock();
4808 			vnode->SetBusy(wasBusy);
4809 		} else
4810 			status = B_BAD_VALUE;
4811 	}
4812 
4813 	vnode->Unlock();
4814 	rw_lock_read_unlock(&sVnodeLock);
4815 
4816 	if (status == B_OK) {
4817 		vnode->cache->AcquireRef();
4818 		*_cache = vnode->cache;
4819 	}
4820 
4821 	return status;
4822 }
4823 
4824 
4825 status_t
4826 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4827 	file_io_vec* vecs, size_t* _count)
4828 {
4829 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4830 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4831 
4832 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4833 }
4834 
4835 
4836 status_t
4837 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4838 {
4839 	status_t status = FS_CALL(vnode, read_stat, stat);
4840 
4841 	// fill in the st_dev and st_ino fields
4842 	if (status == B_OK) {
4843 		stat->st_dev = vnode->device;
4844 		stat->st_ino = vnode->id;
4845 		// the rdev field must stay unset for non-special files
4846 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4847 			stat->st_rdev = -1;
4848 	}
4849 
4850 	return status;
4851 }
4852 
4853 
4854 status_t
4855 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4856 {
4857 	struct vnode* vnode;
4858 	status_t status = get_vnode(device, inode, &vnode, true, false);
4859 	if (status != B_OK)
4860 		return status;
4861 
4862 	status = vfs_stat_vnode(vnode, stat);
4863 
4864 	put_vnode(vnode);
4865 	return status;
4866 }
4867 
4868 
4869 status_t
4870 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4871 {
4872 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4873 }
4874 
4875 
4876 status_t
4877 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4878 	bool kernel, char* path, size_t pathLength)
4879 {
4880 	struct vnode* vnode;
4881 	status_t status;
4882 
4883 	// filter invalid leaf names
4884 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4885 		return B_BAD_VALUE;
4886 
4887 	// get the vnode matching the dir's node_ref
4888 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4889 		// special cases "." and "..": we can directly get the vnode of the
4890 		// referenced directory
4891 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4892 		leaf = NULL;
4893 	} else
4894 		status = get_vnode(device, inode, &vnode, true, false);
4895 	if (status != B_OK)
4896 		return status;
4897 
4898 	// get the directory path
4899 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4900 	put_vnode(vnode);
4901 		// we don't need the vnode anymore
4902 	if (status != B_OK)
4903 		return status;
4904 
4905 	// append the leaf name
4906 	if (leaf) {
4907 		// insert a directory separator if this is not the file system root
4908 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4909 				>= pathLength)
4910 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4911 			return B_NAME_TOO_LONG;
4912 		}
4913 	}
4914 
4915 	return B_OK;
4916 }
4917 
4918 
4919 /*!	If the given descriptor locked its vnode, that lock will be released. */
4920 void
4921 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4922 {
4923 	struct vnode* vnode = fd_vnode(descriptor);
4924 
4925 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4926 		vnode->mandatory_locked_by = NULL;
4927 }
4928 
4929 
4930 /*!	Releases any POSIX locks on the file descriptor. */
4931 status_t
4932 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4933 {
4934 	struct vnode* vnode = descriptor->u.vnode;
4935 	if (vnode == NULL)
4936 		return B_OK;
4937 
4938 	if (HAS_FS_CALL(vnode, release_lock))
4939 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4940 
4941 	return release_advisory_lock(vnode, context, NULL, NULL);
4942 }
4943 
4944 
4945 /*!	Closes all file descriptors of the specified I/O context that
4946 	have the O_CLOEXEC flag set.
4947 */
4948 void
4949 vfs_exec_io_context(io_context* context)
4950 {
4951 	uint32 i;
4952 
4953 	for (i = 0; i < context->table_size; i++) {
4954 		mutex_lock(&context->io_mutex);
4955 
4956 		struct file_descriptor* descriptor = context->fds[i];
4957 		bool remove = false;
4958 
4959 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4960 			context->fds[i] = NULL;
4961 			context->num_used_fds--;
4962 
4963 			remove = true;
4964 		}
4965 
4966 		mutex_unlock(&context->io_mutex);
4967 
4968 		if (remove) {
4969 			close_fd(context, descriptor);
4970 			put_fd(descriptor);
4971 		}
4972 	}
4973 }
4974 
4975 
4976 /*! Sets up a new io_control structure, and inherits the properties
4977 	of the parent io_control if it is given.
4978 */
4979 io_context*
4980 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4981 {
4982 	io_context* context = (io_context*)malloc(sizeof(io_context));
4983 	if (context == NULL)
4984 		return NULL;
4985 
4986 	TIOC(NewIOContext(context, parentContext));
4987 
4988 	memset(context, 0, sizeof(io_context));
4989 	context->ref_count = 1;
4990 
4991 	MutexLocker parentLocker;
4992 
4993 	size_t tableSize;
4994 	if (parentContext != NULL) {
4995 		parentLocker.SetTo(parentContext->io_mutex, false);
4996 		tableSize = parentContext->table_size;
4997 	} else
4998 		tableSize = DEFAULT_FD_TABLE_SIZE;
4999 
5000 	// allocate space for FDs and their close-on-exec flag
5001 	context->fds = (file_descriptor**)malloc(
5002 		sizeof(struct file_descriptor*) * tableSize
5003 		+ sizeof(struct select_sync*) * tableSize
5004 		+ (tableSize + 7) / 8);
5005 	if (context->fds == NULL) {
5006 		free(context);
5007 		return NULL;
5008 	}
5009 
5010 	context->select_infos = (select_info**)(context->fds + tableSize);
5011 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5012 
5013 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5014 		+ sizeof(struct select_sync*) * tableSize
5015 		+ (tableSize + 7) / 8);
5016 
5017 	mutex_init(&context->io_mutex, "I/O context");
5018 
5019 	// Copy all parent file descriptors
5020 
5021 	if (parentContext != NULL) {
5022 		size_t i;
5023 
5024 		mutex_lock(&sIOContextRootLock);
5025 		context->root = parentContext->root;
5026 		if (context->root)
5027 			inc_vnode_ref_count(context->root);
5028 		mutex_unlock(&sIOContextRootLock);
5029 
5030 		context->cwd = parentContext->cwd;
5031 		if (context->cwd)
5032 			inc_vnode_ref_count(context->cwd);
5033 
5034 		if (parentContext->inherit_fds) {
5035 			for (i = 0; i < tableSize; i++) {
5036 				struct file_descriptor* descriptor = parentContext->fds[i];
5037 
5038 				if (descriptor != NULL
5039 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5040 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5041 					if (closeOnExec && purgeCloseOnExec)
5042 						continue;
5043 
5044 					TFD(InheritFD(context, i, descriptor, parentContext));
5045 
5046 					context->fds[i] = descriptor;
5047 					context->num_used_fds++;
5048 					atomic_add(&descriptor->ref_count, 1);
5049 					atomic_add(&descriptor->open_count, 1);
5050 
5051 					if (closeOnExec)
5052 						fd_set_close_on_exec(context, i, true);
5053 				}
5054 			}
5055 		}
5056 
5057 		parentLocker.Unlock();
5058 	} else {
5059 		context->root = sRoot;
5060 		context->cwd = sRoot;
5061 
5062 		if (context->root)
5063 			inc_vnode_ref_count(context->root);
5064 
5065 		if (context->cwd)
5066 			inc_vnode_ref_count(context->cwd);
5067 	}
5068 
5069 	context->table_size = tableSize;
5070 	context->inherit_fds = parentContext != NULL;
5071 
5072 	list_init(&context->node_monitors);
5073 	context->max_monitors = DEFAULT_NODE_MONITORS;
5074 
5075 	return context;
5076 }
5077 
5078 
5079 void
5080 vfs_get_io_context(io_context* context)
5081 {
5082 	atomic_add(&context->ref_count, 1);
5083 }
5084 
5085 
5086 void
5087 vfs_put_io_context(io_context* context)
5088 {
5089 	if (atomic_add(&context->ref_count, -1) == 1)
5090 		free_io_context(context);
5091 }
5092 
5093 
5094 status_t
5095 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5096 {
5097 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5098 		return B_BAD_VALUE;
5099 
5100 	TIOC(ResizeIOContext(context, newSize));
5101 
5102 	MutexLocker _(context->io_mutex);
5103 
5104 	uint32 oldSize = context->table_size;
5105 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5106 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5107 
5108 	// If the tables shrink, make sure none of the fds being dropped are in use.
5109 	if (newSize < oldSize) {
5110 		for (uint32 i = oldSize; i-- > newSize;) {
5111 			if (context->fds[i])
5112 				return B_BUSY;
5113 		}
5114 	}
5115 
5116 	// store pointers to the old tables
5117 	file_descriptor** oldFDs = context->fds;
5118 	select_info** oldSelectInfos = context->select_infos;
5119 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5120 
5121 	// allocate new tables
5122 	file_descriptor** newFDs = (file_descriptor**)malloc(
5123 		sizeof(struct file_descriptor*) * newSize
5124 		+ sizeof(struct select_sync*) * newSize
5125 		+ newCloseOnExitBitmapSize);
5126 	if (newFDs == NULL)
5127 		return B_NO_MEMORY;
5128 
5129 	context->fds = newFDs;
5130 	context->select_infos = (select_info**)(context->fds + newSize);
5131 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5132 	context->table_size = newSize;
5133 
5134 	// copy entries from old tables
5135 	uint32 toCopy = min_c(oldSize, newSize);
5136 
5137 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5138 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5139 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5140 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5141 
5142 	// clear additional entries, if the tables grow
5143 	if (newSize > oldSize) {
5144 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5145 		memset(context->select_infos + oldSize, 0,
5146 			sizeof(void*) * (newSize - oldSize));
5147 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5148 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5149 	}
5150 
5151 	free(oldFDs);
5152 
5153 	return B_OK;
5154 }
5155 
5156 
5157 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5158 
5159 	Given an arbitrary vnode (identified by mount and node ID), the function
5160 	checks, whether the vnode is covered by another vnode. If it is, the
5161 	function returns the mount and node ID of the covering vnode. Otherwise
5162 	it simply returns the supplied mount and node ID.
5163 
5164 	In case of error (e.g. the supplied node could not be found) the variables
5165 	for storing the resolved mount and node ID remain untouched and an error
5166 	code is returned.
5167 
5168 	\param mountID The mount ID of the vnode in question.
5169 	\param nodeID The node ID of the vnode in question.
5170 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5171 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5172 	\return
5173 	- \c B_OK, if everything went fine,
5174 	- another error code, if something went wrong.
5175 */
5176 status_t
5177 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5178 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5179 {
5180 	// get the node
5181 	struct vnode* node;
5182 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5183 	if (error != B_OK)
5184 		return error;
5185 
5186 	// resolve the node
5187 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5188 		put_vnode(node);
5189 		node = coveringNode;
5190 	}
5191 
5192 	// set the return values
5193 	*resolvedMountID = node->device;
5194 	*resolvedNodeID = node->id;
5195 
5196 	put_vnode(node);
5197 
5198 	return B_OK;
5199 }
5200 
5201 
5202 status_t
5203 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5204 	ino_t* _mountPointNodeID)
5205 {
5206 	ReadLocker nodeLocker(sVnodeLock);
5207 	MutexLocker mountLocker(sMountMutex);
5208 
5209 	struct fs_mount* mount = find_mount(mountID);
5210 	if (mount == NULL)
5211 		return B_BAD_VALUE;
5212 
5213 	Vnode* mountPoint = mount->covers_vnode;
5214 
5215 	*_mountPointMountID = mountPoint->device;
5216 	*_mountPointNodeID = mountPoint->id;
5217 
5218 	return B_OK;
5219 }
5220 
5221 
5222 status_t
5223 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5224 	ino_t coveredNodeID)
5225 {
5226 	// get the vnodes
5227 	Vnode* vnode;
5228 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5229 	if (error != B_OK)
5230 		return B_BAD_VALUE;
5231 	VNodePutter vnodePutter(vnode);
5232 
5233 	Vnode* coveredVnode;
5234 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5235 		false);
5236 	if (error != B_OK)
5237 		return B_BAD_VALUE;
5238 	VNodePutter coveredVnodePutter(coveredVnode);
5239 
5240 	// establish the covered/covering links
5241 	WriteLocker locker(sVnodeLock);
5242 
5243 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5244 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5245 		return B_BUSY;
5246 	}
5247 
5248 	vnode->covers = coveredVnode;
5249 	vnode->SetCovering(true);
5250 
5251 	coveredVnode->covered_by = vnode;
5252 	coveredVnode->SetCovered(true);
5253 
5254 	// the vnodes do now reference each other
5255 	inc_vnode_ref_count(vnode);
5256 	inc_vnode_ref_count(coveredVnode);
5257 
5258 	return B_OK;
5259 }
5260 
5261 
5262 int
5263 vfs_getrlimit(int resource, struct rlimit* rlp)
5264 {
5265 	if (!rlp)
5266 		return B_BAD_ADDRESS;
5267 
5268 	switch (resource) {
5269 		case RLIMIT_NOFILE:
5270 		{
5271 			struct io_context* context = get_current_io_context(false);
5272 			MutexLocker _(context->io_mutex);
5273 
5274 			rlp->rlim_cur = context->table_size;
5275 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5276 			return 0;
5277 		}
5278 
5279 		case RLIMIT_NOVMON:
5280 		{
5281 			struct io_context* context = get_current_io_context(false);
5282 			MutexLocker _(context->io_mutex);
5283 
5284 			rlp->rlim_cur = context->max_monitors;
5285 			rlp->rlim_max = MAX_NODE_MONITORS;
5286 			return 0;
5287 		}
5288 
5289 		default:
5290 			return B_BAD_VALUE;
5291 	}
5292 }
5293 
5294 
5295 int
5296 vfs_setrlimit(int resource, const struct rlimit* rlp)
5297 {
5298 	if (!rlp)
5299 		return B_BAD_ADDRESS;
5300 
5301 	switch (resource) {
5302 		case RLIMIT_NOFILE:
5303 			/* TODO: check getuid() */
5304 			if (rlp->rlim_max != RLIM_SAVED_MAX
5305 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5306 				return B_NOT_ALLOWED;
5307 
5308 			return vfs_resize_fd_table(get_current_io_context(false),
5309 				rlp->rlim_cur);
5310 
5311 		case RLIMIT_NOVMON:
5312 			/* TODO: check getuid() */
5313 			if (rlp->rlim_max != RLIM_SAVED_MAX
5314 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5315 				return B_NOT_ALLOWED;
5316 
5317 			return resize_monitor_table(get_current_io_context(false),
5318 				rlp->rlim_cur);
5319 
5320 		default:
5321 			return B_BAD_VALUE;
5322 	}
5323 }
5324 
5325 
5326 status_t
5327 vfs_init(kernel_args* args)
5328 {
5329 	vnode::StaticInit();
5330 
5331 	sVnodeTable = new(std::nothrow) VnodeTable();
5332 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5333 		panic("vfs_init: error creating vnode hash table\n");
5334 
5335 	struct vnode dummy_vnode;
5336 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5337 
5338 	struct fs_mount dummyMount;
5339 	sMountsTable = new(std::nothrow) MountTable();
5340 	if (sMountsTable == NULL
5341 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5342 		panic("vfs_init: error creating mounts hash table\n");
5343 
5344 	node_monitor_init();
5345 
5346 	sRoot = NULL;
5347 
5348 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5349 
5350 	if (block_cache_init() != B_OK)
5351 		return B_ERROR;
5352 
5353 #ifdef ADD_DEBUGGER_COMMANDS
5354 	// add some debugger commands
5355 	add_debugger_command_etc("vnode", &dump_vnode,
5356 		"Print info about the specified vnode",
5357 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5358 		"Prints information about the vnode specified by address <vnode> or\n"
5359 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5360 		"constructed and printed. It might not be possible to construct a\n"
5361 		"complete path, though.\n",
5362 		0);
5363 	add_debugger_command("vnodes", &dump_vnodes,
5364 		"list all vnodes (from the specified device)");
5365 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5366 		"list all vnode caches");
5367 	add_debugger_command("mount", &dump_mount,
5368 		"info about the specified fs_mount");
5369 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5370 	add_debugger_command("io_context", &dump_io_context,
5371 		"info about the I/O context");
5372 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5373 		"info about vnode usage");
5374 #endif
5375 
5376 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5377 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5378 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5379 		0);
5380 
5381 	fifo_init();
5382 	file_map_init();
5383 
5384 	return file_cache_init();
5385 }
5386 
5387 
5388 //	#pragma mark - fd_ops implementations
5389 
5390 
5391 /*!
5392 	Calls fs_open() on the given vnode and returns a new
5393 	file descriptor for it
5394 */
5395 static int
5396 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5397 {
5398 	void* cookie;
5399 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5400 	if (status != B_OK)
5401 		return status;
5402 
5403 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5404 	if (fd < 0) {
5405 		FS_CALL(vnode, close, cookie);
5406 		FS_CALL(vnode, free_cookie, cookie);
5407 	}
5408 	return fd;
5409 }
5410 
5411 
5412 /*!
5413 	Calls fs_open() on the given vnode and returns a new
5414 	file descriptor for it
5415 */
5416 static int
5417 create_vnode(struct vnode* directory, const char* name, int openMode,
5418 	int perms, bool kernel)
5419 {
5420 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5421 	status_t status = B_ERROR;
5422 	struct vnode* vnode;
5423 	void* cookie;
5424 	ino_t newID;
5425 
5426 	// This is somewhat tricky: If the entry already exists, the FS responsible
5427 	// for the directory might not necessarily also be the one responsible for
5428 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5429 	// we can actually never call the create() hook without O_EXCL. Instead we
5430 	// try to look the entry up first. If it already exists, we just open the
5431 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5432 	// introduces a race condition, since someone else might have created the
5433 	// entry in the meantime. We hope the respective FS returns the correct
5434 	// error code and retry (up to 3 times) again.
5435 
5436 	for (int i = 0; i < 3 && status != B_OK; i++) {
5437 		// look the node up
5438 		status = lookup_dir_entry(directory, name, &vnode);
5439 		if (status == B_OK) {
5440 			VNodePutter putter(vnode);
5441 
5442 			if ((openMode & O_EXCL) != 0)
5443 				return B_FILE_EXISTS;
5444 
5445 			// If the node is a symlink, we have to follow it, unless
5446 			// O_NOTRAVERSE is set.
5447 			if (S_ISLNK(vnode->Type()) && traverse) {
5448 				putter.Put();
5449 				char clonedName[B_FILE_NAME_LENGTH + 1];
5450 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5451 						>= B_FILE_NAME_LENGTH) {
5452 					return B_NAME_TOO_LONG;
5453 				}
5454 
5455 				inc_vnode_ref_count(directory);
5456 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5457 					kernel, &vnode, NULL);
5458 				if (status != B_OK)
5459 					return status;
5460 
5461 				putter.SetTo(vnode);
5462 			}
5463 
5464 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5465 				return B_LINK_LIMIT;
5466 
5467 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5468 			// on success keep the vnode reference for the FD
5469 			if (fd >= 0)
5470 				putter.Detach();
5471 
5472 			return fd;
5473 		}
5474 
5475 		// it doesn't exist yet -- try to create it
5476 
5477 		if (!HAS_FS_CALL(directory, create))
5478 			return B_READ_ONLY_DEVICE;
5479 
5480 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5481 			&cookie, &newID);
5482 		if (status != B_OK
5483 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5484 			return status;
5485 		}
5486 	}
5487 
5488 	if (status != B_OK)
5489 		return status;
5490 
5491 	// the node has been created successfully
5492 
5493 	rw_lock_read_lock(&sVnodeLock);
5494 	vnode = lookup_vnode(directory->device, newID);
5495 	rw_lock_read_unlock(&sVnodeLock);
5496 
5497 	if (vnode == NULL) {
5498 		panic("vfs: fs_create() returned success but there is no vnode, "
5499 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5500 		return B_BAD_VALUE;
5501 	}
5502 
5503 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5504 	if (fd >= 0)
5505 		return fd;
5506 
5507 	status = fd;
5508 
5509 	// something went wrong, clean up
5510 
5511 	FS_CALL(vnode, close, cookie);
5512 	FS_CALL(vnode, free_cookie, cookie);
5513 	put_vnode(vnode);
5514 
5515 	FS_CALL(directory, unlink, name);
5516 
5517 	return status;
5518 }
5519 
5520 
5521 /*! Calls fs open_dir() on the given vnode and returns a new
5522 	file descriptor for it
5523 */
5524 static int
5525 open_dir_vnode(struct vnode* vnode, bool kernel)
5526 {
5527 	void* cookie;
5528 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5529 	if (status != B_OK)
5530 		return status;
5531 
5532 	// directory is opened, create a fd
5533 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5534 	if (status >= 0)
5535 		return status;
5536 
5537 	FS_CALL(vnode, close_dir, cookie);
5538 	FS_CALL(vnode, free_dir_cookie, cookie);
5539 
5540 	return status;
5541 }
5542 
5543 
5544 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5545 	file descriptor for it.
5546 	Used by attr_dir_open(), and attr_dir_open_fd().
5547 */
5548 static int
5549 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5550 {
5551 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5552 		return B_UNSUPPORTED;
5553 
5554 	void* cookie;
5555 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5556 	if (status != B_OK)
5557 		return status;
5558 
5559 	// directory is opened, create a fd
5560 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5561 		kernel);
5562 	if (status >= 0)
5563 		return status;
5564 
5565 	FS_CALL(vnode, close_attr_dir, cookie);
5566 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5567 
5568 	return status;
5569 }
5570 
5571 
5572 static int
5573 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5574 	int openMode, int perms, bool kernel)
5575 {
5576 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5577 		"kernel %d\n", name, openMode, perms, kernel));
5578 
5579 	// get directory to put the new file in
5580 	struct vnode* directory;
5581 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5582 	if (status != B_OK)
5583 		return status;
5584 
5585 	status = create_vnode(directory, name, openMode, perms, kernel);
5586 	put_vnode(directory);
5587 
5588 	return status;
5589 }
5590 
5591 
5592 static int
5593 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5594 {
5595 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5596 		openMode, perms, kernel));
5597 
5598 	// get directory to put the new file in
5599 	char name[B_FILE_NAME_LENGTH];
5600 	struct vnode* directory;
5601 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5602 		kernel);
5603 	if (status < 0)
5604 		return status;
5605 
5606 	status = create_vnode(directory, name, openMode, perms, kernel);
5607 
5608 	put_vnode(directory);
5609 	return status;
5610 }
5611 
5612 
5613 static int
5614 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5615 	int openMode, bool kernel)
5616 {
5617 	if (name == NULL || *name == '\0')
5618 		return B_BAD_VALUE;
5619 
5620 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5621 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5622 
5623 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5624 
5625 	// get the vnode matching the entry_ref
5626 	struct vnode* vnode;
5627 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5628 		kernel, &vnode);
5629 	if (status != B_OK)
5630 		return status;
5631 
5632 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5633 		put_vnode(vnode);
5634 		return B_LINK_LIMIT;
5635 	}
5636 
5637 	int newFD = open_vnode(vnode, openMode, kernel);
5638 	if (newFD >= 0) {
5639 		// The vnode reference has been transferred to the FD
5640 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5641 			directoryID, vnode->id, name);
5642 	} else
5643 		put_vnode(vnode);
5644 
5645 	return newFD;
5646 }
5647 
5648 
5649 static int
5650 file_open(int fd, char* path, int openMode, bool kernel)
5651 {
5652 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5653 
5654 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5655 		fd, path, openMode, kernel));
5656 
5657 	// get the vnode matching the vnode + path combination
5658 	struct vnode* vnode;
5659 	ino_t parentID;
5660 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5661 		&parentID, kernel);
5662 	if (status != B_OK)
5663 		return status;
5664 
5665 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5666 		put_vnode(vnode);
5667 		return B_LINK_LIMIT;
5668 	}
5669 
5670 	// open the vnode
5671 	int newFD = open_vnode(vnode, openMode, kernel);
5672 	if (newFD >= 0) {
5673 		// The vnode reference has been transferred to the FD
5674 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5675 			vnode->device, parentID, vnode->id, NULL);
5676 	} else
5677 		put_vnode(vnode);
5678 
5679 	return newFD;
5680 }
5681 
5682 
5683 static status_t
5684 file_close(struct file_descriptor* descriptor)
5685 {
5686 	struct vnode* vnode = descriptor->u.vnode;
5687 	status_t status = B_OK;
5688 
5689 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5690 
5691 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5692 		vnode->id);
5693 	if (HAS_FS_CALL(vnode, close)) {
5694 		status = FS_CALL(vnode, close, descriptor->cookie);
5695 	}
5696 
5697 	if (status == B_OK) {
5698 		// remove all outstanding locks for this team
5699 		if (HAS_FS_CALL(vnode, release_lock))
5700 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5701 		else
5702 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5703 	}
5704 	return status;
5705 }
5706 
5707 
5708 static void
5709 file_free_fd(struct file_descriptor* descriptor)
5710 {
5711 	struct vnode* vnode = descriptor->u.vnode;
5712 
5713 	if (vnode != NULL) {
5714 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5715 		put_vnode(vnode);
5716 	}
5717 }
5718 
5719 
5720 static status_t
5721 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5722 	size_t* length)
5723 {
5724 	struct vnode* vnode = descriptor->u.vnode;
5725 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5726 		pos, length, *length));
5727 
5728 	if (S_ISDIR(vnode->Type()))
5729 		return B_IS_A_DIRECTORY;
5730 
5731 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5732 }
5733 
5734 
5735 static status_t
5736 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5737 	size_t* length)
5738 {
5739 	struct vnode* vnode = descriptor->u.vnode;
5740 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5741 		length));
5742 
5743 	if (S_ISDIR(vnode->Type()))
5744 		return B_IS_A_DIRECTORY;
5745 	if (!HAS_FS_CALL(vnode, write))
5746 		return B_READ_ONLY_DEVICE;
5747 
5748 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5749 }
5750 
5751 
5752 static off_t
5753 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5754 {
5755 	struct vnode* vnode = descriptor->u.vnode;
5756 	off_t offset;
5757 	bool isDevice = false;
5758 
5759 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5760 		seekType));
5761 
5762 	// some kinds of files are not seekable
5763 	switch (vnode->Type() & S_IFMT) {
5764 		case S_IFIFO:
5765 		case S_IFSOCK:
5766 			return ESPIPE;
5767 
5768 		// drivers publish block devices as chr, so pick both
5769 		case S_IFBLK:
5770 		case S_IFCHR:
5771 			isDevice = true;
5772 			break;
5773 		// The Open Group Base Specs don't mention any file types besides pipes,
5774 		// fifos, and sockets specially, so we allow seeking them.
5775 		case S_IFREG:
5776 		case S_IFDIR:
5777 		case S_IFLNK:
5778 			break;
5779 	}
5780 
5781 	switch (seekType) {
5782 		case SEEK_SET:
5783 			offset = 0;
5784 			break;
5785 		case SEEK_CUR:
5786 			offset = descriptor->pos;
5787 			break;
5788 		case SEEK_END:
5789 		{
5790 			// stat() the node
5791 			if (!HAS_FS_CALL(vnode, read_stat))
5792 				return B_UNSUPPORTED;
5793 
5794 			struct stat stat;
5795 			status_t status = FS_CALL(vnode, read_stat, &stat);
5796 			if (status != B_OK)
5797 				return status;
5798 
5799 			offset = stat.st_size;
5800 
5801 			if (offset == 0 && isDevice) {
5802 				// stat() on regular drivers doesn't report size
5803 				device_geometry geometry;
5804 
5805 				if (HAS_FS_CALL(vnode, ioctl)) {
5806 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5807 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5808 					if (status == B_OK)
5809 						offset = (off_t)geometry.bytes_per_sector
5810 							* geometry.sectors_per_track
5811 							* geometry.cylinder_count
5812 							* geometry.head_count;
5813 				}
5814 			}
5815 
5816 			break;
5817 		}
5818 		default:
5819 			return B_BAD_VALUE;
5820 	}
5821 
5822 	// assumes off_t is 64 bits wide
5823 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5824 		return B_BUFFER_OVERFLOW;
5825 
5826 	pos += offset;
5827 	if (pos < 0)
5828 		return B_BAD_VALUE;
5829 
5830 	return descriptor->pos = pos;
5831 }
5832 
5833 
5834 static status_t
5835 file_select(struct file_descriptor* descriptor, uint8 event,
5836 	struct selectsync* sync)
5837 {
5838 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5839 
5840 	struct vnode* vnode = descriptor->u.vnode;
5841 
5842 	// If the FS has no select() hook, notify select() now.
5843 	if (!HAS_FS_CALL(vnode, select)) {
5844 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5845 			return notify_select_event(sync, event);
5846 		else
5847 			return B_OK;
5848 	}
5849 
5850 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5851 }
5852 
5853 
5854 static status_t
5855 file_deselect(struct file_descriptor* descriptor, uint8 event,
5856 	struct selectsync* sync)
5857 {
5858 	struct vnode* vnode = descriptor->u.vnode;
5859 
5860 	if (!HAS_FS_CALL(vnode, deselect))
5861 		return B_OK;
5862 
5863 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5864 }
5865 
5866 
5867 static status_t
5868 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5869 	bool kernel)
5870 {
5871 	struct vnode* vnode;
5872 	status_t status;
5873 
5874 	if (name == NULL || *name == '\0')
5875 		return B_BAD_VALUE;
5876 
5877 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5878 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5879 
5880 	status = get_vnode(mountID, parentID, &vnode, true, false);
5881 	if (status != B_OK)
5882 		return status;
5883 
5884 	if (HAS_FS_CALL(vnode, create_dir))
5885 		status = FS_CALL(vnode, create_dir, name, perms);
5886 	else
5887 		status = B_READ_ONLY_DEVICE;
5888 
5889 	put_vnode(vnode);
5890 	return status;
5891 }
5892 
5893 
5894 static status_t
5895 dir_create(int fd, char* path, int perms, bool kernel)
5896 {
5897 	char filename[B_FILE_NAME_LENGTH];
5898 	struct vnode* vnode;
5899 	status_t status;
5900 
5901 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5902 		kernel));
5903 
5904 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5905 	if (status < 0)
5906 		return status;
5907 
5908 	if (HAS_FS_CALL(vnode, create_dir)) {
5909 		status = FS_CALL(vnode, create_dir, filename, perms);
5910 	} else
5911 		status = B_READ_ONLY_DEVICE;
5912 
5913 	put_vnode(vnode);
5914 	return status;
5915 }
5916 
5917 
5918 static int
5919 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5920 {
5921 	FUNCTION(("dir_open_entry_ref()\n"));
5922 
5923 	if (name && name[0] == '\0')
5924 		return B_BAD_VALUE;
5925 
5926 	// get the vnode matching the entry_ref/node_ref
5927 	struct vnode* vnode;
5928 	status_t status;
5929 	if (name) {
5930 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5931 			&vnode);
5932 	} else
5933 		status = get_vnode(mountID, parentID, &vnode, true, false);
5934 	if (status != B_OK)
5935 		return status;
5936 
5937 	int newFD = open_dir_vnode(vnode, kernel);
5938 	if (newFD >= 0) {
5939 		// The vnode reference has been transferred to the FD
5940 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5941 			vnode->id, name);
5942 	} else
5943 		put_vnode(vnode);
5944 
5945 	return newFD;
5946 }
5947 
5948 
5949 static int
5950 dir_open(int fd, char* path, bool kernel)
5951 {
5952 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5953 		kernel));
5954 
5955 	// get the vnode matching the vnode + path combination
5956 	struct vnode* vnode = NULL;
5957 	ino_t parentID;
5958 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5959 		kernel);
5960 	if (status != B_OK)
5961 		return status;
5962 
5963 	// open the dir
5964 	int newFD = open_dir_vnode(vnode, kernel);
5965 	if (newFD >= 0) {
5966 		// The vnode reference has been transferred to the FD
5967 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5968 			parentID, vnode->id, NULL);
5969 	} else
5970 		put_vnode(vnode);
5971 
5972 	return newFD;
5973 }
5974 
5975 
5976 static status_t
5977 dir_close(struct file_descriptor* descriptor)
5978 {
5979 	struct vnode* vnode = descriptor->u.vnode;
5980 
5981 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5982 
5983 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5984 		vnode->id);
5985 	if (HAS_FS_CALL(vnode, close_dir))
5986 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5987 
5988 	return B_OK;
5989 }
5990 
5991 
5992 static void
5993 dir_free_fd(struct file_descriptor* descriptor)
5994 {
5995 	struct vnode* vnode = descriptor->u.vnode;
5996 
5997 	if (vnode != NULL) {
5998 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5999 		put_vnode(vnode);
6000 	}
6001 }
6002 
6003 
6004 static status_t
6005 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6006 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6007 {
6008 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6009 		bufferSize, _count);
6010 }
6011 
6012 
6013 static status_t
6014 fix_dirent(struct vnode* parent, struct dirent* entry,
6015 	struct io_context* ioContext)
6016 {
6017 	// set d_pdev and d_pino
6018 	entry->d_pdev = parent->device;
6019 	entry->d_pino = parent->id;
6020 
6021 	// If this is the ".." entry and the directory covering another vnode,
6022 	// we need to replace d_dev and d_ino with the actual values.
6023 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6024 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6025 			ioContext);
6026 	}
6027 
6028 	// resolve covered vnodes
6029 	ReadLocker _(&sVnodeLock);
6030 
6031 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6032 	if (vnode != NULL && vnode->covered_by != NULL) {
6033 		do {
6034 			vnode = vnode->covered_by;
6035 		} while (vnode->covered_by != NULL);
6036 
6037 		entry->d_dev = vnode->device;
6038 		entry->d_ino = vnode->id;
6039 	}
6040 
6041 	return B_OK;
6042 }
6043 
6044 
6045 static status_t
6046 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6047 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6048 {
6049 	if (!HAS_FS_CALL(vnode, read_dir))
6050 		return B_UNSUPPORTED;
6051 
6052 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6053 		_count);
6054 	if (error != B_OK)
6055 		return error;
6056 
6057 	// we need to adjust the read dirents
6058 	uint32 count = *_count;
6059 	for (uint32 i = 0; i < count; i++) {
6060 		error = fix_dirent(vnode, buffer, ioContext);
6061 		if (error != B_OK)
6062 			return error;
6063 
6064 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6065 	}
6066 
6067 	return error;
6068 }
6069 
6070 
6071 static status_t
6072 dir_rewind(struct file_descriptor* descriptor)
6073 {
6074 	struct vnode* vnode = descriptor->u.vnode;
6075 
6076 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6077 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6078 	}
6079 
6080 	return B_UNSUPPORTED;
6081 }
6082 
6083 
6084 static status_t
6085 dir_remove(int fd, char* path, bool kernel)
6086 {
6087 	char name[B_FILE_NAME_LENGTH];
6088 	struct vnode* directory;
6089 	status_t status;
6090 
6091 	if (path != NULL) {
6092 		// we need to make sure our path name doesn't stop with "/", ".",
6093 		// or ".."
6094 		char* lastSlash;
6095 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6096 			char* leaf = lastSlash + 1;
6097 			if (!strcmp(leaf, ".."))
6098 				return B_NOT_ALLOWED;
6099 
6100 			// omit multiple slashes
6101 			while (lastSlash > path && lastSlash[-1] == '/')
6102 				lastSlash--;
6103 
6104 			if (leaf[0]
6105 				&& strcmp(leaf, ".")) {
6106 				break;
6107 			}
6108 			// "name/" -> "name", or "name/." -> "name"
6109 			lastSlash[0] = '\0';
6110 		}
6111 
6112 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6113 			return B_NOT_ALLOWED;
6114 	}
6115 
6116 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6117 	if (status != B_OK)
6118 		return status;
6119 
6120 	if (HAS_FS_CALL(directory, remove_dir))
6121 		status = FS_CALL(directory, remove_dir, name);
6122 	else
6123 		status = B_READ_ONLY_DEVICE;
6124 
6125 	put_vnode(directory);
6126 	return status;
6127 }
6128 
6129 
6130 static status_t
6131 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6132 	size_t length)
6133 {
6134 	struct vnode* vnode = descriptor->u.vnode;
6135 
6136 	if (HAS_FS_CALL(vnode, ioctl))
6137 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6138 
6139 	return B_DEV_INVALID_IOCTL;
6140 }
6141 
6142 
6143 static status_t
6144 common_fcntl(int fd, int op, size_t argument, bool kernel)
6145 {
6146 	struct flock flock;
6147 
6148 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6149 		fd, op, argument, kernel ? "kernel" : "user"));
6150 
6151 	struct io_context* context = get_current_io_context(kernel);
6152 
6153 	struct file_descriptor* descriptor = get_fd(context, fd);
6154 	if (descriptor == NULL)
6155 		return B_FILE_ERROR;
6156 
6157 	struct vnode* vnode = fd_vnode(descriptor);
6158 
6159 	status_t status = B_OK;
6160 
6161 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6162 		if (descriptor->type != FDTYPE_FILE)
6163 			status = B_BAD_VALUE;
6164 		else if (kernel)
6165 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6166 		else if (user_memcpy(&flock, (struct flock*)argument,
6167 				sizeof(struct flock)) != B_OK)
6168 			status = B_BAD_ADDRESS;
6169 		if (status != B_OK) {
6170 			put_fd(descriptor);
6171 			return status;
6172 		}
6173 	}
6174 
6175 	switch (op) {
6176 		case F_SETFD:
6177 		{
6178 			// Set file descriptor flags
6179 
6180 			// O_CLOEXEC is the only flag available at this time
6181 			mutex_lock(&context->io_mutex);
6182 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6183 			mutex_unlock(&context->io_mutex);
6184 
6185 			status = B_OK;
6186 			break;
6187 		}
6188 
6189 		case F_GETFD:
6190 		{
6191 			// Get file descriptor flags
6192 			mutex_lock(&context->io_mutex);
6193 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6194 			mutex_unlock(&context->io_mutex);
6195 			break;
6196 		}
6197 
6198 		case F_SETFL:
6199 			// Set file descriptor open mode
6200 
6201 			// we only accept changes to O_APPEND and O_NONBLOCK
6202 			argument &= O_APPEND | O_NONBLOCK;
6203 			if (descriptor->ops->fd_set_flags != NULL) {
6204 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6205 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6206 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6207 					(int)argument);
6208 			} else
6209 				status = B_UNSUPPORTED;
6210 
6211 			if (status == B_OK) {
6212 				// update this descriptor's open_mode field
6213 				descriptor->open_mode = (descriptor->open_mode
6214 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6215 			}
6216 
6217 			break;
6218 
6219 		case F_GETFL:
6220 			// Get file descriptor open mode
6221 			status = descriptor->open_mode;
6222 			break;
6223 
6224 		case F_DUPFD:
6225 		case F_DUPFD_CLOEXEC:
6226 		{
6227 			status = new_fd_etc(context, descriptor, (int)argument);
6228 			if (status >= 0) {
6229 				mutex_lock(&context->io_mutex);
6230 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6231 				mutex_unlock(&context->io_mutex);
6232 
6233 				atomic_add(&descriptor->ref_count, 1);
6234 			}
6235 			break;
6236 		}
6237 
6238 		case F_GETLK:
6239 			if (vnode != NULL) {
6240 				struct flock normalizedLock;
6241 
6242 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6243 				status = normalize_flock(descriptor, &normalizedLock);
6244 				if (status != B_OK)
6245 					break;
6246 
6247 				if (HAS_FS_CALL(vnode, test_lock)) {
6248 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6249 						&normalizedLock);
6250 				} else
6251 					status = test_advisory_lock(vnode, &normalizedLock);
6252 				if (status == B_OK) {
6253 					if (normalizedLock.l_type == F_UNLCK) {
6254 						// no conflicting lock found, copy back the same struct
6255 						// we were given except change type to F_UNLCK
6256 						flock.l_type = F_UNLCK;
6257 						if (kernel) {
6258 							memcpy((struct flock*)argument, &flock,
6259 								sizeof(struct flock));
6260 						} else {
6261 							status = user_memcpy((struct flock*)argument,
6262 								&flock, sizeof(struct flock));
6263 						}
6264 					} else {
6265 						// a conflicting lock was found, copy back its range and
6266 						// type
6267 						if (normalizedLock.l_len == OFF_MAX)
6268 							normalizedLock.l_len = 0;
6269 
6270 						if (kernel) {
6271 							memcpy((struct flock*)argument,
6272 								&normalizedLock, sizeof(struct flock));
6273 						} else {
6274 							status = user_memcpy((struct flock*)argument,
6275 								&normalizedLock, sizeof(struct flock));
6276 						}
6277 					}
6278 				}
6279 			} else
6280 				status = B_BAD_VALUE;
6281 			break;
6282 
6283 		case F_SETLK:
6284 		case F_SETLKW:
6285 			status = normalize_flock(descriptor, &flock);
6286 			if (status != B_OK)
6287 				break;
6288 
6289 			if (vnode == NULL) {
6290 				status = B_BAD_VALUE;
6291 			} else if (flock.l_type == F_UNLCK) {
6292 				if (HAS_FS_CALL(vnode, release_lock)) {
6293 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6294 						&flock);
6295 				} else {
6296 					status = release_advisory_lock(vnode, context, NULL,
6297 						&flock);
6298 				}
6299 			} else {
6300 				// the open mode must match the lock type
6301 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6302 						&& flock.l_type == F_WRLCK)
6303 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6304 						&& flock.l_type == F_RDLCK))
6305 					status = B_FILE_ERROR;
6306 				else {
6307 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6308 						status = FS_CALL(vnode, acquire_lock,
6309 							descriptor->cookie, &flock, op == F_SETLKW);
6310 					} else {
6311 						status = acquire_advisory_lock(vnode, context, NULL,
6312 							&flock, op == F_SETLKW);
6313 					}
6314 				}
6315 			}
6316 			break;
6317 
6318 		// ToDo: add support for more ops?
6319 
6320 		default:
6321 			status = B_BAD_VALUE;
6322 	}
6323 
6324 	put_fd(descriptor);
6325 	return status;
6326 }
6327 
6328 
6329 static status_t
6330 common_sync(int fd, bool kernel)
6331 {
6332 	struct file_descriptor* descriptor;
6333 	struct vnode* vnode;
6334 	status_t status;
6335 
6336 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6337 
6338 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6339 	if (descriptor == NULL)
6340 		return B_FILE_ERROR;
6341 
6342 	if (HAS_FS_CALL(vnode, fsync))
6343 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6344 	else
6345 		status = B_UNSUPPORTED;
6346 
6347 	put_fd(descriptor);
6348 	return status;
6349 }
6350 
6351 
6352 static status_t
6353 common_lock_node(int fd, bool kernel)
6354 {
6355 	struct file_descriptor* descriptor;
6356 	struct vnode* vnode;
6357 
6358 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6359 	if (descriptor == NULL)
6360 		return B_FILE_ERROR;
6361 
6362 	status_t status = B_OK;
6363 
6364 	// We need to set the locking atomically - someone
6365 	// else might set one at the same time
6366 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6367 			(file_descriptor*)NULL) != NULL)
6368 		status = B_BUSY;
6369 
6370 	put_fd(descriptor);
6371 	return status;
6372 }
6373 
6374 
6375 static status_t
6376 common_unlock_node(int fd, bool kernel)
6377 {
6378 	struct file_descriptor* descriptor;
6379 	struct vnode* vnode;
6380 
6381 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6382 	if (descriptor == NULL)
6383 		return B_FILE_ERROR;
6384 
6385 	status_t status = B_OK;
6386 
6387 	// We need to set the locking atomically - someone
6388 	// else might set one at the same time
6389 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6390 			(file_descriptor*)NULL, descriptor) != descriptor)
6391 		status = B_BAD_VALUE;
6392 
6393 	put_fd(descriptor);
6394 	return status;
6395 }
6396 
6397 
6398 static status_t
6399 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6400 	bool kernel)
6401 {
6402 	struct vnode* vnode;
6403 	status_t status;
6404 
6405 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6406 	if (status != B_OK)
6407 		return status;
6408 
6409 	if (HAS_FS_CALL(vnode, read_symlink)) {
6410 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6411 	} else
6412 		status = B_BAD_VALUE;
6413 
6414 	put_vnode(vnode);
6415 	return status;
6416 }
6417 
6418 
6419 static status_t
6420 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6421 	bool kernel)
6422 {
6423 	// path validity checks have to be in the calling function!
6424 	char name[B_FILE_NAME_LENGTH];
6425 	struct vnode* vnode;
6426 	status_t status;
6427 
6428 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6429 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6430 
6431 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6432 	if (status != B_OK)
6433 		return status;
6434 
6435 	if (HAS_FS_CALL(vnode, create_symlink))
6436 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6437 	else {
6438 		status = HAS_FS_CALL(vnode, write)
6439 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6440 	}
6441 
6442 	put_vnode(vnode);
6443 
6444 	return status;
6445 }
6446 
6447 
6448 static status_t
6449 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6450 	bool traverseLeafLink, bool kernel)
6451 {
6452 	// path validity checks have to be in the calling function!
6453 
6454 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6455 		toPath, kernel));
6456 
6457 	char name[B_FILE_NAME_LENGTH];
6458 	struct vnode* directory;
6459 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6460 		kernel);
6461 	if (status != B_OK)
6462 		return status;
6463 
6464 	struct vnode* vnode;
6465 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6466 		kernel);
6467 	if (status != B_OK)
6468 		goto err;
6469 
6470 	if (directory->mount != vnode->mount) {
6471 		status = B_CROSS_DEVICE_LINK;
6472 		goto err1;
6473 	}
6474 
6475 	if (HAS_FS_CALL(directory, link))
6476 		status = FS_CALL(directory, link, name, vnode);
6477 	else
6478 		status = B_READ_ONLY_DEVICE;
6479 
6480 err1:
6481 	put_vnode(vnode);
6482 err:
6483 	put_vnode(directory);
6484 
6485 	return status;
6486 }
6487 
6488 
6489 static status_t
6490 common_unlink(int fd, char* path, bool kernel)
6491 {
6492 	char filename[B_FILE_NAME_LENGTH];
6493 	struct vnode* vnode;
6494 	status_t status;
6495 
6496 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6497 		kernel));
6498 
6499 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6500 	if (status < 0)
6501 		return status;
6502 
6503 	if (HAS_FS_CALL(vnode, unlink))
6504 		status = FS_CALL(vnode, unlink, filename);
6505 	else
6506 		status = B_READ_ONLY_DEVICE;
6507 
6508 	put_vnode(vnode);
6509 
6510 	return status;
6511 }
6512 
6513 
6514 static status_t
6515 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6516 {
6517 	struct vnode* vnode;
6518 	status_t status;
6519 
6520 	// TODO: honor effectiveUserGroup argument
6521 
6522 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6523 	if (status != B_OK)
6524 		return status;
6525 
6526 	if (HAS_FS_CALL(vnode, access))
6527 		status = FS_CALL(vnode, access, mode);
6528 	else
6529 		status = B_OK;
6530 
6531 	put_vnode(vnode);
6532 
6533 	return status;
6534 }
6535 
6536 
6537 static status_t
6538 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6539 {
6540 	struct vnode* fromVnode;
6541 	struct vnode* toVnode;
6542 	char fromName[B_FILE_NAME_LENGTH];
6543 	char toName[B_FILE_NAME_LENGTH];
6544 	status_t status;
6545 
6546 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6547 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6548 
6549 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6550 	if (status != B_OK)
6551 		return status;
6552 
6553 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6554 	if (status != B_OK)
6555 		goto err1;
6556 
6557 	if (fromVnode->device != toVnode->device) {
6558 		status = B_CROSS_DEVICE_LINK;
6559 		goto err2;
6560 	}
6561 
6562 	if (fromName[0] == '\0' || toName[0] == '\0'
6563 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6564 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6565 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6566 		status = B_BAD_VALUE;
6567 		goto err2;
6568 	}
6569 
6570 	if (HAS_FS_CALL(fromVnode, rename))
6571 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6572 	else
6573 		status = B_READ_ONLY_DEVICE;
6574 
6575 err2:
6576 	put_vnode(toVnode);
6577 err1:
6578 	put_vnode(fromVnode);
6579 
6580 	return status;
6581 }
6582 
6583 
6584 static status_t
6585 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6586 {
6587 	struct vnode* vnode = descriptor->u.vnode;
6588 
6589 	FUNCTION(("common_read_stat: stat %p\n", stat));
6590 
6591 	// TODO: remove this once all file systems properly set them!
6592 	stat->st_crtim.tv_nsec = 0;
6593 	stat->st_ctim.tv_nsec = 0;
6594 	stat->st_mtim.tv_nsec = 0;
6595 	stat->st_atim.tv_nsec = 0;
6596 
6597 	return vfs_stat_vnode(vnode, stat);
6598 }
6599 
6600 
6601 static status_t
6602 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6603 	int statMask)
6604 {
6605 	struct vnode* vnode = descriptor->u.vnode;
6606 
6607 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6608 		vnode, stat, statMask));
6609 
6610 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6611 		&& (statMask & B_STAT_SIZE) != 0) {
6612 		return B_BAD_VALUE;
6613 	}
6614 
6615 	if (!HAS_FS_CALL(vnode, write_stat))
6616 		return B_READ_ONLY_DEVICE;
6617 
6618 	return FS_CALL(vnode, write_stat, stat, statMask);
6619 }
6620 
6621 
6622 static status_t
6623 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6624 	struct stat* stat, bool kernel)
6625 {
6626 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6627 		stat));
6628 
6629 	struct vnode* vnode;
6630 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6631 		NULL, kernel);
6632 	if (status != B_OK)
6633 		return status;
6634 
6635 	status = vfs_stat_vnode(vnode, stat);
6636 
6637 	put_vnode(vnode);
6638 	return status;
6639 }
6640 
6641 
6642 static status_t
6643 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6644 	const struct stat* stat, int statMask, bool kernel)
6645 {
6646 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6647 		"kernel %d\n", fd, path, stat, statMask, kernel));
6648 
6649 	struct vnode* vnode;
6650 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6651 		NULL, kernel);
6652 	if (status != B_OK)
6653 		return status;
6654 
6655 	if (HAS_FS_CALL(vnode, write_stat))
6656 		status = FS_CALL(vnode, write_stat, stat, statMask);
6657 	else
6658 		status = B_READ_ONLY_DEVICE;
6659 
6660 	put_vnode(vnode);
6661 
6662 	return status;
6663 }
6664 
6665 
6666 static int
6667 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6668 {
6669 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6670 		kernel));
6671 
6672 	struct vnode* vnode;
6673 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6674 		NULL, kernel);
6675 	if (status != B_OK)
6676 		return status;
6677 
6678 	status = open_attr_dir_vnode(vnode, kernel);
6679 	if (status < 0)
6680 		put_vnode(vnode);
6681 
6682 	return status;
6683 }
6684 
6685 
6686 static status_t
6687 attr_dir_close(struct file_descriptor* descriptor)
6688 {
6689 	struct vnode* vnode = descriptor->u.vnode;
6690 
6691 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6692 
6693 	if (HAS_FS_CALL(vnode, close_attr_dir))
6694 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6695 
6696 	return B_OK;
6697 }
6698 
6699 
6700 static void
6701 attr_dir_free_fd(struct file_descriptor* descriptor)
6702 {
6703 	struct vnode* vnode = descriptor->u.vnode;
6704 
6705 	if (vnode != NULL) {
6706 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6707 		put_vnode(vnode);
6708 	}
6709 }
6710 
6711 
6712 static status_t
6713 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6714 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6715 {
6716 	struct vnode* vnode = descriptor->u.vnode;
6717 
6718 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6719 
6720 	if (HAS_FS_CALL(vnode, read_attr_dir))
6721 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6722 			bufferSize, _count);
6723 
6724 	return B_UNSUPPORTED;
6725 }
6726 
6727 
6728 static status_t
6729 attr_dir_rewind(struct file_descriptor* descriptor)
6730 {
6731 	struct vnode* vnode = descriptor->u.vnode;
6732 
6733 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6734 
6735 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6736 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6737 
6738 	return B_UNSUPPORTED;
6739 }
6740 
6741 
6742 static int
6743 attr_create(int fd, char* path, const char* name, uint32 type,
6744 	int openMode, bool kernel)
6745 {
6746 	if (name == NULL || *name == '\0')
6747 		return B_BAD_VALUE;
6748 
6749 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6750 	struct vnode* vnode;
6751 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6752 		kernel);
6753 	if (status != B_OK)
6754 		return status;
6755 
6756 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6757 		status = B_LINK_LIMIT;
6758 		goto err;
6759 	}
6760 
6761 	if (!HAS_FS_CALL(vnode, create_attr)) {
6762 		status = B_READ_ONLY_DEVICE;
6763 		goto err;
6764 	}
6765 
6766 	void* cookie;
6767 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6768 	if (status != B_OK)
6769 		goto err;
6770 
6771 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6772 	if (fd >= 0)
6773 		return fd;
6774 
6775 	status = fd;
6776 
6777 	FS_CALL(vnode, close_attr, cookie);
6778 	FS_CALL(vnode, free_attr_cookie, cookie);
6779 
6780 	FS_CALL(vnode, remove_attr, name);
6781 
6782 err:
6783 	put_vnode(vnode);
6784 
6785 	return status;
6786 }
6787 
6788 
6789 static int
6790 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6791 {
6792 	if (name == NULL || *name == '\0')
6793 		return B_BAD_VALUE;
6794 
6795 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6796 	struct vnode* vnode;
6797 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6798 		kernel);
6799 	if (status != B_OK)
6800 		return status;
6801 
6802 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6803 		status = B_LINK_LIMIT;
6804 		goto err;
6805 	}
6806 
6807 	if (!HAS_FS_CALL(vnode, open_attr)) {
6808 		status = B_UNSUPPORTED;
6809 		goto err;
6810 	}
6811 
6812 	void* cookie;
6813 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6814 	if (status != B_OK)
6815 		goto err;
6816 
6817 	// now we only need a file descriptor for this attribute and we're done
6818 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6819 	if (fd >= 0)
6820 		return fd;
6821 
6822 	status = fd;
6823 
6824 	FS_CALL(vnode, close_attr, cookie);
6825 	FS_CALL(vnode, free_attr_cookie, cookie);
6826 
6827 err:
6828 	put_vnode(vnode);
6829 
6830 	return status;
6831 }
6832 
6833 
6834 static status_t
6835 attr_close(struct file_descriptor* descriptor)
6836 {
6837 	struct vnode* vnode = descriptor->u.vnode;
6838 
6839 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6840 
6841 	if (HAS_FS_CALL(vnode, close_attr))
6842 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6843 
6844 	return B_OK;
6845 }
6846 
6847 
6848 static void
6849 attr_free_fd(struct file_descriptor* descriptor)
6850 {
6851 	struct vnode* vnode = descriptor->u.vnode;
6852 
6853 	if (vnode != NULL) {
6854 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6855 		put_vnode(vnode);
6856 	}
6857 }
6858 
6859 
6860 static status_t
6861 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6862 	size_t* length)
6863 {
6864 	struct vnode* vnode = descriptor->u.vnode;
6865 
6866 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6867 		pos, length, *length));
6868 
6869 	if (!HAS_FS_CALL(vnode, read_attr))
6870 		return B_UNSUPPORTED;
6871 
6872 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6873 }
6874 
6875 
6876 static status_t
6877 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6878 	size_t* length)
6879 {
6880 	struct vnode* vnode = descriptor->u.vnode;
6881 
6882 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6883 		length));
6884 
6885 	if (!HAS_FS_CALL(vnode, write_attr))
6886 		return B_UNSUPPORTED;
6887 
6888 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6889 }
6890 
6891 
6892 static off_t
6893 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6894 {
6895 	off_t offset;
6896 
6897 	switch (seekType) {
6898 		case SEEK_SET:
6899 			offset = 0;
6900 			break;
6901 		case SEEK_CUR:
6902 			offset = descriptor->pos;
6903 			break;
6904 		case SEEK_END:
6905 		{
6906 			struct vnode* vnode = descriptor->u.vnode;
6907 			if (!HAS_FS_CALL(vnode, read_stat))
6908 				return B_UNSUPPORTED;
6909 
6910 			struct stat stat;
6911 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6912 				&stat);
6913 			if (status != B_OK)
6914 				return status;
6915 
6916 			offset = stat.st_size;
6917 			break;
6918 		}
6919 		default:
6920 			return B_BAD_VALUE;
6921 	}
6922 
6923 	// assumes off_t is 64 bits wide
6924 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6925 		return B_BUFFER_OVERFLOW;
6926 
6927 	pos += offset;
6928 	if (pos < 0)
6929 		return B_BAD_VALUE;
6930 
6931 	return descriptor->pos = pos;
6932 }
6933 
6934 
6935 static status_t
6936 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6937 {
6938 	struct vnode* vnode = descriptor->u.vnode;
6939 
6940 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6941 
6942 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6943 		return B_UNSUPPORTED;
6944 
6945 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6946 }
6947 
6948 
6949 static status_t
6950 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6951 	int statMask)
6952 {
6953 	struct vnode* vnode = descriptor->u.vnode;
6954 
6955 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6956 
6957 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6958 		return B_READ_ONLY_DEVICE;
6959 
6960 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6961 }
6962 
6963 
6964 static status_t
6965 attr_remove(int fd, const char* name, bool kernel)
6966 {
6967 	struct file_descriptor* descriptor;
6968 	struct vnode* vnode;
6969 	status_t status;
6970 
6971 	if (name == NULL || *name == '\0')
6972 		return B_BAD_VALUE;
6973 
6974 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6975 		kernel));
6976 
6977 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6978 	if (descriptor == NULL)
6979 		return B_FILE_ERROR;
6980 
6981 	if (HAS_FS_CALL(vnode, remove_attr))
6982 		status = FS_CALL(vnode, remove_attr, name);
6983 	else
6984 		status = B_READ_ONLY_DEVICE;
6985 
6986 	put_fd(descriptor);
6987 
6988 	return status;
6989 }
6990 
6991 
6992 static status_t
6993 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6994 	bool kernel)
6995 {
6996 	struct file_descriptor* fromDescriptor;
6997 	struct file_descriptor* toDescriptor;
6998 	struct vnode* fromVnode;
6999 	struct vnode* toVnode;
7000 	status_t status;
7001 
7002 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7003 		|| *toName == '\0')
7004 		return B_BAD_VALUE;
7005 
7006 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7007 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7008 
7009 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7010 	if (fromDescriptor == NULL)
7011 		return B_FILE_ERROR;
7012 
7013 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7014 	if (toDescriptor == NULL) {
7015 		status = B_FILE_ERROR;
7016 		goto err;
7017 	}
7018 
7019 	// are the files on the same volume?
7020 	if (fromVnode->device != toVnode->device) {
7021 		status = B_CROSS_DEVICE_LINK;
7022 		goto err1;
7023 	}
7024 
7025 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7026 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7027 	} else
7028 		status = B_READ_ONLY_DEVICE;
7029 
7030 err1:
7031 	put_fd(toDescriptor);
7032 err:
7033 	put_fd(fromDescriptor);
7034 
7035 	return status;
7036 }
7037 
7038 
7039 static int
7040 index_dir_open(dev_t mountID, bool kernel)
7041 {
7042 	struct fs_mount* mount;
7043 	void* cookie;
7044 
7045 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7046 		kernel));
7047 
7048 	status_t status = get_mount(mountID, &mount);
7049 	if (status != B_OK)
7050 		return status;
7051 
7052 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7053 		status = B_UNSUPPORTED;
7054 		goto error;
7055 	}
7056 
7057 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7058 	if (status != B_OK)
7059 		goto error;
7060 
7061 	// get fd for the index directory
7062 	int fd;
7063 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7064 	if (fd >= 0)
7065 		return fd;
7066 
7067 	// something went wrong
7068 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7069 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7070 
7071 	status = fd;
7072 
7073 error:
7074 	put_mount(mount);
7075 	return status;
7076 }
7077 
7078 
7079 static status_t
7080 index_dir_close(struct file_descriptor* descriptor)
7081 {
7082 	struct fs_mount* mount = descriptor->u.mount;
7083 
7084 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7085 
7086 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7087 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7088 
7089 	return B_OK;
7090 }
7091 
7092 
7093 static void
7094 index_dir_free_fd(struct file_descriptor* descriptor)
7095 {
7096 	struct fs_mount* mount = descriptor->u.mount;
7097 
7098 	if (mount != NULL) {
7099 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7100 		put_mount(mount);
7101 	}
7102 }
7103 
7104 
7105 static status_t
7106 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7107 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7108 {
7109 	struct fs_mount* mount = descriptor->u.mount;
7110 
7111 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7112 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7113 			bufferSize, _count);
7114 	}
7115 
7116 	return B_UNSUPPORTED;
7117 }
7118 
7119 
7120 static status_t
7121 index_dir_rewind(struct file_descriptor* descriptor)
7122 {
7123 	struct fs_mount* mount = descriptor->u.mount;
7124 
7125 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7126 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7127 
7128 	return B_UNSUPPORTED;
7129 }
7130 
7131 
7132 static status_t
7133 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7134 	bool kernel)
7135 {
7136 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7137 		mountID, name, kernel));
7138 
7139 	struct fs_mount* mount;
7140 	status_t status = get_mount(mountID, &mount);
7141 	if (status != B_OK)
7142 		return status;
7143 
7144 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7145 		status = B_READ_ONLY_DEVICE;
7146 		goto out;
7147 	}
7148 
7149 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7150 
7151 out:
7152 	put_mount(mount);
7153 	return status;
7154 }
7155 
7156 
7157 #if 0
7158 static status_t
7159 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7160 {
7161 	struct vnode* vnode = descriptor->u.vnode;
7162 
7163 	// ToDo: currently unused!
7164 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7165 	if (!HAS_FS_CALL(vnode, read_index_stat))
7166 		return B_UNSUPPORTED;
7167 
7168 	return B_UNSUPPORTED;
7169 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7170 }
7171 
7172 
7173 static void
7174 index_free_fd(struct file_descriptor* descriptor)
7175 {
7176 	struct vnode* vnode = descriptor->u.vnode;
7177 
7178 	if (vnode != NULL) {
7179 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7180 		put_vnode(vnode);
7181 	}
7182 }
7183 #endif
7184 
7185 
7186 static status_t
7187 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7188 	bool kernel)
7189 {
7190 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7191 		mountID, name, kernel));
7192 
7193 	struct fs_mount* mount;
7194 	status_t status = get_mount(mountID, &mount);
7195 	if (status != B_OK)
7196 		return status;
7197 
7198 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7199 		status = B_UNSUPPORTED;
7200 		goto out;
7201 	}
7202 
7203 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7204 
7205 out:
7206 	put_mount(mount);
7207 	return status;
7208 }
7209 
7210 
7211 static status_t
7212 index_remove(dev_t mountID, const char* name, bool kernel)
7213 {
7214 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7215 		mountID, name, kernel));
7216 
7217 	struct fs_mount* mount;
7218 	status_t status = get_mount(mountID, &mount);
7219 	if (status != B_OK)
7220 		return status;
7221 
7222 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7223 		status = B_READ_ONLY_DEVICE;
7224 		goto out;
7225 	}
7226 
7227 	status = FS_MOUNT_CALL(mount, remove_index, name);
7228 
7229 out:
7230 	put_mount(mount);
7231 	return status;
7232 }
7233 
7234 
7235 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7236 		It would be nice if the FS would find some more kernel support
7237 		for them.
7238 		For example, query parsing should be moved into the kernel.
7239 */
7240 static int
7241 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7242 	int32 token, bool kernel)
7243 {
7244 	struct fs_mount* mount;
7245 	void* cookie;
7246 
7247 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7248 		device, query, kernel));
7249 
7250 	status_t status = get_mount(device, &mount);
7251 	if (status != B_OK)
7252 		return status;
7253 
7254 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7255 		status = B_UNSUPPORTED;
7256 		goto error;
7257 	}
7258 
7259 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7260 		&cookie);
7261 	if (status != B_OK)
7262 		goto error;
7263 
7264 	// get fd for the index directory
7265 	int fd;
7266 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7267 	if (fd >= 0)
7268 		return fd;
7269 
7270 	status = fd;
7271 
7272 	// something went wrong
7273 	FS_MOUNT_CALL(mount, close_query, cookie);
7274 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7275 
7276 error:
7277 	put_mount(mount);
7278 	return status;
7279 }
7280 
7281 
7282 static status_t
7283 query_close(struct file_descriptor* descriptor)
7284 {
7285 	struct fs_mount* mount = descriptor->u.mount;
7286 
7287 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7288 
7289 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7290 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7291 
7292 	return B_OK;
7293 }
7294 
7295 
7296 static void
7297 query_free_fd(struct file_descriptor* descriptor)
7298 {
7299 	struct fs_mount* mount = descriptor->u.mount;
7300 
7301 	if (mount != NULL) {
7302 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7303 		put_mount(mount);
7304 	}
7305 }
7306 
7307 
7308 static status_t
7309 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7310 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7311 {
7312 	struct fs_mount* mount = descriptor->u.mount;
7313 
7314 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7315 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7316 			bufferSize, _count);
7317 	}
7318 
7319 	return B_UNSUPPORTED;
7320 }
7321 
7322 
7323 static status_t
7324 query_rewind(struct file_descriptor* descriptor)
7325 {
7326 	struct fs_mount* mount = descriptor->u.mount;
7327 
7328 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7329 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7330 
7331 	return B_UNSUPPORTED;
7332 }
7333 
7334 
7335 //	#pragma mark - General File System functions
7336 
7337 
7338 static dev_t
7339 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7340 	const char* args, bool kernel)
7341 {
7342 	struct ::fs_mount* mount;
7343 	status_t status = B_OK;
7344 	fs_volume* volume = NULL;
7345 	int32 layer = 0;
7346 	Vnode* coveredNode = NULL;
7347 
7348 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7349 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7350 
7351 	// The path is always safe, we just have to make sure that fsName is
7352 	// almost valid - we can't make any assumptions about args, though.
7353 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7354 	// We'll get it from the DDM later.
7355 	if (fsName == NULL) {
7356 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7357 			return B_BAD_VALUE;
7358 	} else if (fsName[0] == '\0')
7359 		return B_BAD_VALUE;
7360 
7361 	RecursiveLocker mountOpLocker(sMountOpLock);
7362 
7363 	// Helper to delete a newly created file device on failure.
7364 	// Not exactly beautiful, but helps to keep the code below cleaner.
7365 	struct FileDeviceDeleter {
7366 		FileDeviceDeleter() : id(-1) {}
7367 		~FileDeviceDeleter()
7368 		{
7369 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7370 		}
7371 
7372 		partition_id id;
7373 	} fileDeviceDeleter;
7374 
7375 	// If the file system is not a "virtual" one, the device argument should
7376 	// point to a real file/device (if given at all).
7377 	// get the partition
7378 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7379 	KPartition* partition = NULL;
7380 	KPath normalizedDevice;
7381 	bool newlyCreatedFileDevice = false;
7382 
7383 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7384 		// normalize the device path
7385 		status = normalizedDevice.SetTo(device, true);
7386 		if (status != B_OK)
7387 			return status;
7388 
7389 		// get a corresponding partition from the DDM
7390 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7391 		if (partition == NULL) {
7392 			// Partition not found: This either means, the user supplied
7393 			// an invalid path, or the path refers to an image file. We try
7394 			// to let the DDM create a file device for the path.
7395 			partition_id deviceID = ddm->CreateFileDevice(
7396 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7397 			if (deviceID >= 0) {
7398 				partition = ddm->RegisterPartition(deviceID);
7399 				if (newlyCreatedFileDevice)
7400 					fileDeviceDeleter.id = deviceID;
7401 			}
7402 		}
7403 
7404 		if (!partition) {
7405 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7406 				normalizedDevice.Path()));
7407 			return B_ENTRY_NOT_FOUND;
7408 		}
7409 
7410 		device = normalizedDevice.Path();
7411 			// correct path to file device
7412 	}
7413 	PartitionRegistrar partitionRegistrar(partition, true);
7414 
7415 	// Write lock the partition's device. For the time being, we keep the lock
7416 	// until we're done mounting -- not nice, but ensure, that no-one is
7417 	// interfering.
7418 	// TODO: Just mark the partition busy while mounting!
7419 	KDiskDevice* diskDevice = NULL;
7420 	if (partition) {
7421 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7422 		if (!diskDevice) {
7423 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7424 			return B_ERROR;
7425 		}
7426 	}
7427 
7428 	DeviceWriteLocker writeLocker(diskDevice, true);
7429 		// this takes over the write lock acquired before
7430 
7431 	if (partition != NULL) {
7432 		// make sure, that the partition is not busy
7433 		if (partition->IsBusy()) {
7434 			TRACE(("fs_mount(): Partition is busy.\n"));
7435 			return B_BUSY;
7436 		}
7437 
7438 		// if no FS name had been supplied, we get it from the partition
7439 		if (fsName == NULL) {
7440 			KDiskSystem* diskSystem = partition->DiskSystem();
7441 			if (!diskSystem) {
7442 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7443 					"recognize it.\n"));
7444 				return B_BAD_VALUE;
7445 			}
7446 
7447 			if (!diskSystem->IsFileSystem()) {
7448 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7449 					"partitioning system.\n"));
7450 				return B_BAD_VALUE;
7451 			}
7452 
7453 			// The disk system name will not change, and the KDiskSystem
7454 			// object will not go away while the disk device is locked (and
7455 			// the partition has a reference to it), so this is safe.
7456 			fsName = diskSystem->Name();
7457 		}
7458 	}
7459 
7460 	mount = new(std::nothrow) (struct ::fs_mount);
7461 	if (mount == NULL)
7462 		return B_NO_MEMORY;
7463 
7464 	mount->device_name = strdup(device);
7465 		// "device" can be NULL
7466 
7467 	status = mount->entry_cache.Init();
7468 	if (status != B_OK)
7469 		goto err1;
7470 
7471 	// initialize structure
7472 	mount->id = sNextMountID++;
7473 	mount->partition = NULL;
7474 	mount->root_vnode = NULL;
7475 	mount->covers_vnode = NULL;
7476 	mount->unmounting = false;
7477 	mount->owns_file_device = false;
7478 	mount->volume = NULL;
7479 
7480 	// build up the volume(s)
7481 	while (true) {
7482 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7483 		if (layerFSName == NULL) {
7484 			if (layer == 0) {
7485 				status = B_NO_MEMORY;
7486 				goto err1;
7487 			}
7488 
7489 			break;
7490 		}
7491 		MemoryDeleter layerFSNameDeleter(layerFSName);
7492 
7493 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7494 		if (volume == NULL) {
7495 			status = B_NO_MEMORY;
7496 			goto err1;
7497 		}
7498 
7499 		volume->id = mount->id;
7500 		volume->partition = partition != NULL ? partition->ID() : -1;
7501 		volume->layer = layer++;
7502 		volume->private_volume = NULL;
7503 		volume->ops = NULL;
7504 		volume->sub_volume = NULL;
7505 		volume->super_volume = NULL;
7506 		volume->file_system = NULL;
7507 		volume->file_system_name = NULL;
7508 
7509 		volume->file_system_name = get_file_system_name(layerFSName);
7510 		if (volume->file_system_name == NULL) {
7511 			status = B_NO_MEMORY;
7512 			free(volume);
7513 			goto err1;
7514 		}
7515 
7516 		volume->file_system = get_file_system(layerFSName);
7517 		if (volume->file_system == NULL) {
7518 			status = B_DEVICE_NOT_FOUND;
7519 			free(volume->file_system_name);
7520 			free(volume);
7521 			goto err1;
7522 		}
7523 
7524 		if (mount->volume == NULL)
7525 			mount->volume = volume;
7526 		else {
7527 			volume->super_volume = mount->volume;
7528 			mount->volume->sub_volume = volume;
7529 			mount->volume = volume;
7530 		}
7531 	}
7532 
7533 	// insert mount struct into list before we call FS's mount() function
7534 	// so that vnodes can be created for this mount
7535 	mutex_lock(&sMountMutex);
7536 	sMountsTable->Insert(mount);
7537 	mutex_unlock(&sMountMutex);
7538 
7539 	ino_t rootID;
7540 
7541 	if (!sRoot) {
7542 		// we haven't mounted anything yet
7543 		if (strcmp(path, "/") != 0) {
7544 			status = B_ERROR;
7545 			goto err2;
7546 		}
7547 
7548 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7549 			args, &rootID);
7550 		if (status != B_OK || mount->volume->ops == NULL)
7551 			goto err2;
7552 	} else {
7553 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7554 		if (status != B_OK)
7555 			goto err2;
7556 
7557 		mount->covers_vnode = coveredNode;
7558 
7559 		// make sure covered_vnode is a directory
7560 		if (!S_ISDIR(coveredNode->Type())) {
7561 			status = B_NOT_A_DIRECTORY;
7562 			goto err3;
7563 		}
7564 
7565 		if (coveredNode->IsCovered()) {
7566 			// this is already a covered vnode
7567 			status = B_BUSY;
7568 			goto err3;
7569 		}
7570 
7571 		// mount it/them
7572 		fs_volume* volume = mount->volume;
7573 		while (volume) {
7574 			status = volume->file_system->mount(volume, device, flags, args,
7575 				&rootID);
7576 			if (status != B_OK || volume->ops == NULL) {
7577 				if (status == B_OK && volume->ops == NULL)
7578 					panic("fs_mount: mount() succeeded but ops is NULL!");
7579 				if (volume->sub_volume)
7580 					goto err4;
7581 				goto err3;
7582 			}
7583 
7584 			volume = volume->super_volume;
7585 		}
7586 
7587 		volume = mount->volume;
7588 		while (volume) {
7589 			if (volume->ops->all_layers_mounted != NULL)
7590 				volume->ops->all_layers_mounted(volume);
7591 			volume = volume->super_volume;
7592 		}
7593 	}
7594 
7595 	// the root node is supposed to be owned by the file system - it must
7596 	// exist at this point
7597 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7598 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7599 		panic("fs_mount: file system does not own its root node!\n");
7600 		status = B_ERROR;
7601 		goto err4;
7602 	}
7603 
7604 	// set up the links between the root vnode and the vnode it covers
7605 	rw_lock_write_lock(&sVnodeLock);
7606 	if (coveredNode != NULL) {
7607 		if (coveredNode->IsCovered()) {
7608 			// the vnode is covered now
7609 			status = B_BUSY;
7610 			rw_lock_write_unlock(&sVnodeLock);
7611 			goto err4;
7612 		}
7613 
7614 		mount->root_vnode->covers = coveredNode;
7615 		mount->root_vnode->SetCovering(true);
7616 
7617 		coveredNode->covered_by = mount->root_vnode;
7618 		coveredNode->SetCovered(true);
7619 	}
7620 	rw_lock_write_unlock(&sVnodeLock);
7621 
7622 	if (!sRoot) {
7623 		sRoot = mount->root_vnode;
7624 		mutex_lock(&sIOContextRootLock);
7625 		get_current_io_context(true)->root = sRoot;
7626 		mutex_unlock(&sIOContextRootLock);
7627 		inc_vnode_ref_count(sRoot);
7628 	}
7629 
7630 	// supply the partition (if any) with the mount cookie and mark it mounted
7631 	if (partition) {
7632 		partition->SetMountCookie(mount->volume->private_volume);
7633 		partition->SetVolumeID(mount->id);
7634 
7635 		// keep a partition reference as long as the partition is mounted
7636 		partitionRegistrar.Detach();
7637 		mount->partition = partition;
7638 		mount->owns_file_device = newlyCreatedFileDevice;
7639 		fileDeviceDeleter.id = -1;
7640 	}
7641 
7642 	notify_mount(mount->id,
7643 		coveredNode != NULL ? coveredNode->device : -1,
7644 		coveredNode ? coveredNode->id : -1);
7645 
7646 	return mount->id;
7647 
7648 err4:
7649 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7650 err3:
7651 	if (coveredNode != NULL)
7652 		put_vnode(coveredNode);
7653 err2:
7654 	mutex_lock(&sMountMutex);
7655 	sMountsTable->Remove(mount);
7656 	mutex_unlock(&sMountMutex);
7657 err1:
7658 	delete mount;
7659 
7660 	return status;
7661 }
7662 
7663 
7664 static status_t
7665 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7666 {
7667 	struct fs_mount* mount;
7668 	status_t err;
7669 
7670 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7671 		mountID, kernel));
7672 
7673 	struct vnode* pathVnode = NULL;
7674 	if (path != NULL) {
7675 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7676 		if (err != B_OK)
7677 			return B_ENTRY_NOT_FOUND;
7678 	}
7679 
7680 	RecursiveLocker mountOpLocker(sMountOpLock);
7681 
7682 	// this lock is not strictly necessary, but here in case of KDEBUG
7683 	// to keep the ASSERT in find_mount() working.
7684 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7685 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7686 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7687 	if (mount == NULL) {
7688 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7689 			pathVnode);
7690 	}
7691 
7692 	if (path != NULL) {
7693 		put_vnode(pathVnode);
7694 
7695 		if (mount->root_vnode != pathVnode) {
7696 			// not mountpoint
7697 			return B_BAD_VALUE;
7698 		}
7699 	}
7700 
7701 	// if the volume is associated with a partition, lock the device of the
7702 	// partition as long as we are unmounting
7703 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7704 	KPartition* partition = mount->partition;
7705 	KDiskDevice* diskDevice = NULL;
7706 	if (partition != NULL) {
7707 		if (partition->Device() == NULL) {
7708 			dprintf("fs_unmount(): There is no device!\n");
7709 			return B_ERROR;
7710 		}
7711 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7712 		if (!diskDevice) {
7713 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7714 			return B_ERROR;
7715 		}
7716 	}
7717 	DeviceWriteLocker writeLocker(diskDevice, true);
7718 
7719 	// make sure, that the partition is not busy
7720 	if (partition != NULL) {
7721 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7722 			TRACE(("fs_unmount(): Partition is busy.\n"));
7723 			return B_BUSY;
7724 		}
7725 	}
7726 
7727 	// grab the vnode master mutex to keep someone from creating
7728 	// a vnode while we're figuring out if we can continue
7729 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7730 
7731 	bool disconnectedDescriptors = false;
7732 
7733 	while (true) {
7734 		bool busy = false;
7735 
7736 		// cycle through the list of vnodes associated with this mount and
7737 		// make sure all of them are not busy or have refs on them
7738 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7739 		while (struct vnode* vnode = iterator.Next()) {
7740 			if (vnode->IsBusy()) {
7741 				busy = true;
7742 				break;
7743 			}
7744 
7745 			// check the vnode's ref count -- subtract additional references for
7746 			// covering
7747 			int32 refCount = vnode->ref_count;
7748 			if (vnode->covers != NULL)
7749 				refCount--;
7750 			if (vnode->covered_by != NULL)
7751 				refCount--;
7752 
7753 			if (refCount != 0) {
7754 				// there are still vnodes in use on this mount, so we cannot
7755 				// unmount yet
7756 				busy = true;
7757 				break;
7758 			}
7759 		}
7760 
7761 		if (!busy)
7762 			break;
7763 
7764 		if ((flags & B_FORCE_UNMOUNT) == 0)
7765 			return B_BUSY;
7766 
7767 		if (disconnectedDescriptors) {
7768 			// wait a bit until the last access is finished, and then try again
7769 			vnodesWriteLocker.Unlock();
7770 			snooze(100000);
7771 			// TODO: if there is some kind of bug that prevents the ref counts
7772 			// from getting back to zero, this will fall into an endless loop...
7773 			vnodesWriteLocker.Lock();
7774 			continue;
7775 		}
7776 
7777 		// the file system is still busy - but we're forced to unmount it,
7778 		// so let's disconnect all open file descriptors
7779 
7780 		mount->unmounting = true;
7781 			// prevent new vnodes from being created
7782 
7783 		vnodesWriteLocker.Unlock();
7784 
7785 		disconnect_mount_or_vnode_fds(mount, NULL);
7786 		disconnectedDescriptors = true;
7787 
7788 		vnodesWriteLocker.Lock();
7789 	}
7790 
7791 	// We can safely continue. Mark all of the vnodes busy and this mount
7792 	// structure in unmounting state. Also undo the vnode covers/covered_by
7793 	// links.
7794 	mount->unmounting = true;
7795 
7796 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7797 	while (struct vnode* vnode = iterator.Next()) {
7798 		// Remove all covers/covered_by links from other mounts' nodes to this
7799 		// vnode and adjust the node ref count accordingly. We will release the
7800 		// references to the external vnodes below.
7801 		if (Vnode* coveredNode = vnode->covers) {
7802 			if (Vnode* coveringNode = vnode->covered_by) {
7803 				// We have both covered and covering vnodes, so just remove us
7804 				// from the chain.
7805 				coveredNode->covered_by = coveringNode;
7806 				coveringNode->covers = coveredNode;
7807 				vnode->ref_count -= 2;
7808 
7809 				vnode->covered_by = NULL;
7810 				vnode->covers = NULL;
7811 				vnode->SetCovering(false);
7812 				vnode->SetCovered(false);
7813 			} else {
7814 				// We only have a covered vnode. Remove its link to us.
7815 				coveredNode->covered_by = NULL;
7816 				coveredNode->SetCovered(false);
7817 				vnode->ref_count--;
7818 
7819 				// If the other node is an external vnode, we keep its link
7820 				// link around so we can put the reference later on. Otherwise
7821 				// we get rid of it right now.
7822 				if (coveredNode->mount == mount) {
7823 					vnode->covers = NULL;
7824 					coveredNode->ref_count--;
7825 				}
7826 			}
7827 		} else if (Vnode* coveringNode = vnode->covered_by) {
7828 			// We only have a covering vnode. Remove its link to us.
7829 			coveringNode->covers = NULL;
7830 			coveringNode->SetCovering(false);
7831 			vnode->ref_count--;
7832 
7833 			// If the other node is an external vnode, we keep its link
7834 			// link around so we can put the reference later on. Otherwise
7835 			// we get rid of it right now.
7836 			if (coveringNode->mount == mount) {
7837 				vnode->covered_by = NULL;
7838 				coveringNode->ref_count--;
7839 			}
7840 		}
7841 
7842 		vnode->SetBusy(true);
7843 		vnode_to_be_freed(vnode);
7844 	}
7845 
7846 	vnodesWriteLocker.Unlock();
7847 
7848 	// Free all vnodes associated with this mount.
7849 	// They will be removed from the mount list by free_vnode(), so
7850 	// we don't have to do this.
7851 	while (struct vnode* vnode = mount->vnodes.Head()) {
7852 		// Put the references to external covered/covering vnodes we kept above.
7853 		if (Vnode* coveredNode = vnode->covers)
7854 			put_vnode(coveredNode);
7855 		if (Vnode* coveringNode = vnode->covered_by)
7856 			put_vnode(coveringNode);
7857 
7858 		free_vnode(vnode, false);
7859 	}
7860 
7861 	// remove the mount structure from the hash table
7862 	mutex_lock(&sMountMutex);
7863 	sMountsTable->Remove(mount);
7864 	mutex_unlock(&sMountMutex);
7865 
7866 	mountOpLocker.Unlock();
7867 
7868 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7869 	notify_unmount(mount->id);
7870 
7871 	// dereference the partition and mark it unmounted
7872 	if (partition) {
7873 		partition->SetVolumeID(-1);
7874 		partition->SetMountCookie(NULL);
7875 
7876 		if (mount->owns_file_device)
7877 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7878 		partition->Unregister();
7879 	}
7880 
7881 	delete mount;
7882 	return B_OK;
7883 }
7884 
7885 
7886 static status_t
7887 fs_sync(dev_t device)
7888 {
7889 	struct fs_mount* mount;
7890 	status_t status = get_mount(device, &mount);
7891 	if (status != B_OK)
7892 		return status;
7893 
7894 	struct vnode marker;
7895 	memset(&marker, 0, sizeof(marker));
7896 	marker.SetBusy(true);
7897 	marker.SetRemoved(true);
7898 
7899 	// First, synchronize all file caches
7900 
7901 	while (true) {
7902 		WriteLocker locker(sVnodeLock);
7903 			// Note: That's the easy way. Which is probably OK for sync(),
7904 			// since it's a relatively rare call and doesn't need to allow for
7905 			// a lot of concurrency. Using a read lock would be possible, but
7906 			// also more involved, since we had to lock the individual nodes
7907 			// and take care of the locking order, which we might not want to
7908 			// do while holding fs_mount::lock.
7909 
7910 		// synchronize access to vnode list
7911 		mutex_lock(&mount->lock);
7912 
7913 		struct vnode* vnode;
7914 		if (!marker.IsRemoved()) {
7915 			vnode = mount->vnodes.GetNext(&marker);
7916 			mount->vnodes.Remove(&marker);
7917 			marker.SetRemoved(true);
7918 		} else
7919 			vnode = mount->vnodes.First();
7920 
7921 		while (vnode != NULL && (vnode->cache == NULL
7922 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7923 			// TODO: we could track writes (and writable mapped vnodes)
7924 			//	and have a simple flag that we could test for here
7925 			vnode = mount->vnodes.GetNext(vnode);
7926 		}
7927 
7928 		if (vnode != NULL) {
7929 			// insert marker vnode again
7930 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7931 			marker.SetRemoved(false);
7932 		}
7933 
7934 		mutex_unlock(&mount->lock);
7935 
7936 		if (vnode == NULL)
7937 			break;
7938 
7939 		vnode = lookup_vnode(mount->id, vnode->id);
7940 		if (vnode == NULL || vnode->IsBusy())
7941 			continue;
7942 
7943 		if (vnode->ref_count == 0) {
7944 			// this vnode has been unused before
7945 			vnode_used(vnode);
7946 		}
7947 		inc_vnode_ref_count(vnode);
7948 
7949 		locker.Unlock();
7950 
7951 		if (vnode->cache != NULL && !vnode->IsRemoved())
7952 			vnode->cache->WriteModified();
7953 
7954 		put_vnode(vnode);
7955 	}
7956 
7957 	// Let the file systems do their synchronizing work
7958 	if (HAS_FS_MOUNT_CALL(mount, sync))
7959 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7960 
7961 	// Finally, flush the underlying device's write cache (if possible.)
7962 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7963 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7964 
7965 	put_mount(mount);
7966 	return status;
7967 }
7968 
7969 
7970 static status_t
7971 fs_read_info(dev_t device, struct fs_info* info)
7972 {
7973 	struct fs_mount* mount;
7974 	status_t status = get_mount(device, &mount);
7975 	if (status != B_OK)
7976 		return status;
7977 
7978 	memset(info, 0, sizeof(struct fs_info));
7979 
7980 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7981 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7982 
7983 	// fill in info the file system doesn't (have to) know about
7984 	if (status == B_OK) {
7985 		info->dev = mount->id;
7986 		info->root = mount->root_vnode->id;
7987 
7988 		fs_volume* volume = mount->volume;
7989 		while (volume->super_volume != NULL)
7990 			volume = volume->super_volume;
7991 
7992 		strlcpy(info->fsh_name, volume->file_system_name,
7993 			sizeof(info->fsh_name));
7994 		if (mount->device_name != NULL) {
7995 			strlcpy(info->device_name, mount->device_name,
7996 				sizeof(info->device_name));
7997 		}
7998 	}
7999 
8000 	// if the call is not supported by the file system, there are still
8001 	// the parts that we filled out ourselves
8002 
8003 	put_mount(mount);
8004 	return status;
8005 }
8006 
8007 
8008 static status_t
8009 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8010 {
8011 	struct fs_mount* mount;
8012 	status_t status = get_mount(device, &mount);
8013 	if (status != B_OK)
8014 		return status;
8015 
8016 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8017 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8018 	else
8019 		status = B_READ_ONLY_DEVICE;
8020 
8021 	put_mount(mount);
8022 	return status;
8023 }
8024 
8025 
8026 static dev_t
8027 fs_next_device(int32* _cookie)
8028 {
8029 	struct fs_mount* mount = NULL;
8030 	dev_t device = *_cookie;
8031 
8032 	mutex_lock(&sMountMutex);
8033 
8034 	// Since device IDs are assigned sequentially, this algorithm
8035 	// does work good enough. It makes sure that the device list
8036 	// returned is sorted, and that no device is skipped when an
8037 	// already visited device got unmounted.
8038 
8039 	while (device < sNextMountID) {
8040 		mount = find_mount(device++);
8041 		if (mount != NULL && mount->volume->private_volume != NULL)
8042 			break;
8043 	}
8044 
8045 	*_cookie = device;
8046 
8047 	if (mount != NULL)
8048 		device = mount->id;
8049 	else
8050 		device = B_BAD_VALUE;
8051 
8052 	mutex_unlock(&sMountMutex);
8053 
8054 	return device;
8055 }
8056 
8057 
8058 ssize_t
8059 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8060 	void *buffer, size_t readBytes)
8061 {
8062 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8063 	if (attrFD < 0)
8064 		return attrFD;
8065 
8066 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8067 
8068 	_kern_close(attrFD);
8069 
8070 	return bytesRead;
8071 }
8072 
8073 
8074 static status_t
8075 get_cwd(char* buffer, size_t size, bool kernel)
8076 {
8077 	// Get current working directory from io context
8078 	struct io_context* context = get_current_io_context(kernel);
8079 	status_t status;
8080 
8081 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8082 
8083 	mutex_lock(&context->io_mutex);
8084 
8085 	struct vnode* vnode = context->cwd;
8086 	if (vnode)
8087 		inc_vnode_ref_count(vnode);
8088 
8089 	mutex_unlock(&context->io_mutex);
8090 
8091 	if (vnode) {
8092 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8093 		put_vnode(vnode);
8094 	} else
8095 		status = B_ERROR;
8096 
8097 	return status;
8098 }
8099 
8100 
8101 static status_t
8102 set_cwd(int fd, char* path, bool kernel)
8103 {
8104 	struct io_context* context;
8105 	struct vnode* vnode = NULL;
8106 	struct vnode* oldDirectory;
8107 	status_t status;
8108 
8109 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8110 
8111 	// Get vnode for passed path, and bail if it failed
8112 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8113 	if (status < 0)
8114 		return status;
8115 
8116 	if (!S_ISDIR(vnode->Type())) {
8117 		// nope, can't cwd to here
8118 		status = B_NOT_A_DIRECTORY;
8119 		goto err;
8120 	}
8121 
8122 	// We need to have the permission to enter the directory, too
8123 	if (HAS_FS_CALL(vnode, access)) {
8124 		status = FS_CALL(vnode, access, X_OK);
8125 		if (status != B_OK)
8126 			goto err;
8127 	}
8128 
8129 	// Get current io context and lock
8130 	context = get_current_io_context(kernel);
8131 	mutex_lock(&context->io_mutex);
8132 
8133 	// save the old current working directory first
8134 	oldDirectory = context->cwd;
8135 	context->cwd = vnode;
8136 
8137 	mutex_unlock(&context->io_mutex);
8138 
8139 	if (oldDirectory)
8140 		put_vnode(oldDirectory);
8141 
8142 	return B_NO_ERROR;
8143 
8144 err:
8145 	put_vnode(vnode);
8146 	return status;
8147 }
8148 
8149 
8150 static status_t
8151 user_copy_name(char* to, const char* from, size_t length)
8152 {
8153 	ssize_t len = user_strlcpy(to, from, length);
8154 	if (len < 0)
8155 		return len;
8156 	if (len >= (ssize_t)length)
8157 		return B_NAME_TOO_LONG;
8158 	return B_OK;
8159 }
8160 
8161 
8162 //	#pragma mark - kernel mirrored syscalls
8163 
8164 
8165 dev_t
8166 _kern_mount(const char* path, const char* device, const char* fsName,
8167 	uint32 flags, const char* args, size_t argsLength)
8168 {
8169 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8170 	if (pathBuffer.InitCheck() != B_OK)
8171 		return B_NO_MEMORY;
8172 
8173 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8174 }
8175 
8176 
8177 status_t
8178 _kern_unmount(const char* path, uint32 flags)
8179 {
8180 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8181 	if (pathBuffer.InitCheck() != B_OK)
8182 		return B_NO_MEMORY;
8183 
8184 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8185 }
8186 
8187 
8188 status_t
8189 _kern_read_fs_info(dev_t device, struct fs_info* info)
8190 {
8191 	if (info == NULL)
8192 		return B_BAD_VALUE;
8193 
8194 	return fs_read_info(device, info);
8195 }
8196 
8197 
8198 status_t
8199 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8200 {
8201 	if (info == NULL)
8202 		return B_BAD_VALUE;
8203 
8204 	return fs_write_info(device, info, mask);
8205 }
8206 
8207 
8208 status_t
8209 _kern_sync(void)
8210 {
8211 	// Note: _kern_sync() is also called from _user_sync()
8212 	int32 cookie = 0;
8213 	dev_t device;
8214 	while ((device = next_dev(&cookie)) >= 0) {
8215 		status_t status = fs_sync(device);
8216 		if (status != B_OK && status != B_BAD_VALUE) {
8217 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8218 				strerror(status));
8219 		}
8220 	}
8221 
8222 	return B_OK;
8223 }
8224 
8225 
8226 dev_t
8227 _kern_next_device(int32* _cookie)
8228 {
8229 	return fs_next_device(_cookie);
8230 }
8231 
8232 
8233 status_t
8234 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8235 	size_t infoSize)
8236 {
8237 	if (infoSize != sizeof(fd_info))
8238 		return B_BAD_VALUE;
8239 
8240 	// get the team
8241 	Team* team = Team::Get(teamID);
8242 	if (team == NULL)
8243 		return B_BAD_TEAM_ID;
8244 	BReference<Team> teamReference(team, true);
8245 
8246 	// now that we have a team reference, its I/O context won't go away
8247 	io_context* context = team->io_context;
8248 	MutexLocker contextLocker(context->io_mutex);
8249 
8250 	uint32 slot = *_cookie;
8251 
8252 	struct file_descriptor* descriptor;
8253 	while (slot < context->table_size
8254 		&& (descriptor = context->fds[slot]) == NULL) {
8255 		slot++;
8256 	}
8257 
8258 	if (slot >= context->table_size)
8259 		return B_ENTRY_NOT_FOUND;
8260 
8261 	info->number = slot;
8262 	info->open_mode = descriptor->open_mode;
8263 
8264 	struct vnode* vnode = fd_vnode(descriptor);
8265 	if (vnode != NULL) {
8266 		info->device = vnode->device;
8267 		info->node = vnode->id;
8268 	} else if (descriptor->u.mount != NULL) {
8269 		info->device = descriptor->u.mount->id;
8270 		info->node = -1;
8271 	}
8272 
8273 	*_cookie = slot + 1;
8274 	return B_OK;
8275 }
8276 
8277 
8278 int
8279 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8280 	int perms)
8281 {
8282 	if ((openMode & O_CREAT) != 0) {
8283 		return file_create_entry_ref(device, inode, name, openMode, perms,
8284 			true);
8285 	}
8286 
8287 	return file_open_entry_ref(device, inode, name, openMode, true);
8288 }
8289 
8290 
8291 /*!	\brief Opens a node specified by a FD + path pair.
8292 
8293 	At least one of \a fd and \a path must be specified.
8294 	If only \a fd is given, the function opens the node identified by this
8295 	FD. If only a path is given, this path is opened. If both are given and
8296 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8297 	of the directory (!) identified by \a fd.
8298 
8299 	\param fd The FD. May be < 0.
8300 	\param path The absolute or relative path. May be \c NULL.
8301 	\param openMode The open mode.
8302 	\return A FD referring to the newly opened node, or an error code,
8303 			if an error occurs.
8304 */
8305 int
8306 _kern_open(int fd, const char* path, int openMode, int perms)
8307 {
8308 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8309 	if (pathBuffer.InitCheck() != B_OK)
8310 		return B_NO_MEMORY;
8311 
8312 	if ((openMode & O_CREAT) != 0)
8313 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8314 
8315 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8316 }
8317 
8318 
8319 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8320 
8321 	The supplied name may be \c NULL, in which case directory identified
8322 	by \a device and \a inode will be opened. Otherwise \a device and
8323 	\a inode identify the parent directory of the directory to be opened
8324 	and \a name its entry name.
8325 
8326 	\param device If \a name is specified the ID of the device the parent
8327 		   directory of the directory to be opened resides on, otherwise
8328 		   the device of the directory itself.
8329 	\param inode If \a name is specified the node ID of the parent
8330 		   directory of the directory to be opened, otherwise node ID of the
8331 		   directory itself.
8332 	\param name The entry name of the directory to be opened. If \c NULL,
8333 		   the \a device + \a inode pair identify the node to be opened.
8334 	\return The FD of the newly opened directory or an error code, if
8335 			something went wrong.
8336 */
8337 int
8338 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8339 {
8340 	return dir_open_entry_ref(device, inode, name, true);
8341 }
8342 
8343 
8344 /*!	\brief Opens a directory specified by a FD + path pair.
8345 
8346 	At least one of \a fd and \a path must be specified.
8347 	If only \a fd is given, the function opens the directory identified by this
8348 	FD. If only a path is given, this path is opened. If both are given and
8349 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8350 	of the directory (!) identified by \a fd.
8351 
8352 	\param fd The FD. May be < 0.
8353 	\param path The absolute or relative path. May be \c NULL.
8354 	\return A FD referring to the newly opened directory, or an error code,
8355 			if an error occurs.
8356 */
8357 int
8358 _kern_open_dir(int fd, const char* path)
8359 {
8360 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8361 	if (pathBuffer.InitCheck() != B_OK)
8362 		return B_NO_MEMORY;
8363 
8364 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8365 }
8366 
8367 
8368 status_t
8369 _kern_fcntl(int fd, int op, size_t argument)
8370 {
8371 	return common_fcntl(fd, op, argument, true);
8372 }
8373 
8374 
8375 status_t
8376 _kern_fsync(int fd)
8377 {
8378 	return common_sync(fd, true);
8379 }
8380 
8381 
8382 status_t
8383 _kern_lock_node(int fd)
8384 {
8385 	return common_lock_node(fd, true);
8386 }
8387 
8388 
8389 status_t
8390 _kern_unlock_node(int fd)
8391 {
8392 	return common_unlock_node(fd, true);
8393 }
8394 
8395 
8396 status_t
8397 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8398 	int perms)
8399 {
8400 	return dir_create_entry_ref(device, inode, name, perms, true);
8401 }
8402 
8403 
8404 /*!	\brief Creates a directory specified by a FD + path pair.
8405 
8406 	\a path must always be specified (it contains the name of the new directory
8407 	at least). If only a path is given, this path identifies the location at
8408 	which the directory shall be created. If both \a fd and \a path are given
8409 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8410 	of the directory (!) identified by \a fd.
8411 
8412 	\param fd The FD. May be < 0.
8413 	\param path The absolute or relative path. Must not be \c NULL.
8414 	\param perms The access permissions the new directory shall have.
8415 	\return \c B_OK, if the directory has been created successfully, another
8416 			error code otherwise.
8417 */
8418 status_t
8419 _kern_create_dir(int fd, const char* path, int perms)
8420 {
8421 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8422 	if (pathBuffer.InitCheck() != B_OK)
8423 		return B_NO_MEMORY;
8424 
8425 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8426 }
8427 
8428 
8429 status_t
8430 _kern_remove_dir(int fd, const char* path)
8431 {
8432 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8433 	if (pathBuffer.InitCheck() != B_OK)
8434 		return B_NO_MEMORY;
8435 
8436 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8437 }
8438 
8439 
8440 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8441 
8442 	At least one of \a fd and \a path must be specified.
8443 	If only \a fd is given, the function the symlink to be read is the node
8444 	identified by this FD. If only a path is given, this path identifies the
8445 	symlink to be read. If both are given and the path is absolute, \a fd is
8446 	ignored; a relative path is reckoned off of the directory (!) identified
8447 	by \a fd.
8448 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8449 	will still be updated to reflect the required buffer size.
8450 
8451 	\param fd The FD. May be < 0.
8452 	\param path The absolute or relative path. May be \c NULL.
8453 	\param buffer The buffer into which the contents of the symlink shall be
8454 		   written.
8455 	\param _bufferSize A pointer to the size of the supplied buffer.
8456 	\return The length of the link on success or an appropriate error code
8457 */
8458 status_t
8459 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8460 {
8461 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8462 	if (pathBuffer.InitCheck() != B_OK)
8463 		return B_NO_MEMORY;
8464 
8465 	return common_read_link(fd, pathBuffer.LockBuffer(),
8466 		buffer, _bufferSize, true);
8467 }
8468 
8469 
8470 /*!	\brief Creates a symlink specified by a FD + path pair.
8471 
8472 	\a path must always be specified (it contains the name of the new symlink
8473 	at least). If only a path is given, this path identifies the location at
8474 	which the symlink shall be created. If both \a fd and \a path are given and
8475 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8476 	of the directory (!) identified by \a fd.
8477 
8478 	\param fd The FD. May be < 0.
8479 	\param toPath The absolute or relative path. Must not be \c NULL.
8480 	\param mode The access permissions the new symlink shall have.
8481 	\return \c B_OK, if the symlink has been created successfully, another
8482 			error code otherwise.
8483 */
8484 status_t
8485 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8486 {
8487 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8488 	if (pathBuffer.InitCheck() != B_OK)
8489 		return B_NO_MEMORY;
8490 
8491 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8492 		toPath, mode, true);
8493 }
8494 
8495 
8496 status_t
8497 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8498 	bool traverseLeafLink)
8499 {
8500 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8501 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8502 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8503 		return B_NO_MEMORY;
8504 
8505 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8506 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8507 }
8508 
8509 
8510 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8511 
8512 	\a path must always be specified (it contains at least the name of the entry
8513 	to be deleted). If only a path is given, this path identifies the entry
8514 	directly. If both \a fd and \a path are given and the path is absolute,
8515 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8516 	identified by \a fd.
8517 
8518 	\param fd The FD. May be < 0.
8519 	\param path The absolute or relative path. Must not be \c NULL.
8520 	\return \c B_OK, if the entry has been removed successfully, another
8521 			error code otherwise.
8522 */
8523 status_t
8524 _kern_unlink(int fd, const char* path)
8525 {
8526 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8527 	if (pathBuffer.InitCheck() != B_OK)
8528 		return B_NO_MEMORY;
8529 
8530 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8531 }
8532 
8533 
8534 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8535 		   by another FD + path pair.
8536 
8537 	\a oldPath and \a newPath must always be specified (they contain at least
8538 	the name of the entry). If only a path is given, this path identifies the
8539 	entry directly. If both a FD and a path are given and the path is absolute,
8540 	the FD is ignored; a relative path is reckoned off of the directory (!)
8541 	identified by the respective FD.
8542 
8543 	\param oldFD The FD of the old location. May be < 0.
8544 	\param oldPath The absolute or relative path of the old location. Must not
8545 		   be \c NULL.
8546 	\param newFD The FD of the new location. May be < 0.
8547 	\param newPath The absolute or relative path of the new location. Must not
8548 		   be \c NULL.
8549 	\return \c B_OK, if the entry has been moved successfully, another
8550 			error code otherwise.
8551 */
8552 status_t
8553 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8554 {
8555 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8556 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8557 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8558 		return B_NO_MEMORY;
8559 
8560 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8561 		newFD, newPathBuffer.LockBuffer(), true);
8562 }
8563 
8564 
8565 status_t
8566 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8567 {
8568 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8569 	if (pathBuffer.InitCheck() != B_OK)
8570 		return B_NO_MEMORY;
8571 
8572 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8573 		true);
8574 }
8575 
8576 
8577 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8578 
8579 	If only \a fd is given, the stat operation associated with the type
8580 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8581 	given, this path identifies the entry for whose node to retrieve the
8582 	stat data. If both \a fd and \a path are given and the path is absolute,
8583 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8584 	identified by \a fd and specifies the entry whose stat data shall be
8585 	retrieved.
8586 
8587 	\param fd The FD. May be < 0.
8588 	\param path The absolute or relative path. Must not be \c NULL.
8589 	\param traverseLeafLink If \a path is given, \c true specifies that the
8590 		   function shall not stick to symlinks, but traverse them.
8591 	\param stat The buffer the stat data shall be written into.
8592 	\param statSize The size of the supplied stat buffer.
8593 	\return \c B_OK, if the the stat data have been read successfully, another
8594 			error code otherwise.
8595 */
8596 status_t
8597 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8598 	struct stat* stat, size_t statSize)
8599 {
8600 	struct stat completeStat;
8601 	struct stat* originalStat = NULL;
8602 	status_t status;
8603 
8604 	if (statSize > sizeof(struct stat))
8605 		return B_BAD_VALUE;
8606 
8607 	// this supports different stat extensions
8608 	if (statSize < sizeof(struct stat)) {
8609 		originalStat = stat;
8610 		stat = &completeStat;
8611 	}
8612 
8613 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8614 
8615 	if (status == B_OK && originalStat != NULL)
8616 		memcpy(originalStat, stat, statSize);
8617 
8618 	return status;
8619 }
8620 
8621 
8622 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8623 
8624 	If only \a fd is given, the stat operation associated with the type
8625 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8626 	given, this path identifies the entry for whose node to write the
8627 	stat data. If both \a fd and \a path are given and the path is absolute,
8628 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8629 	identified by \a fd and specifies the entry whose stat data shall be
8630 	written.
8631 
8632 	\param fd The FD. May be < 0.
8633 	\param path The absolute or relative path. May be \c NULL.
8634 	\param traverseLeafLink If \a path is given, \c true specifies that the
8635 		   function shall not stick to symlinks, but traverse them.
8636 	\param stat The buffer containing the stat data to be written.
8637 	\param statSize The size of the supplied stat buffer.
8638 	\param statMask A mask specifying which parts of the stat data shall be
8639 		   written.
8640 	\return \c B_OK, if the the stat data have been written successfully,
8641 			another error code otherwise.
8642 */
8643 status_t
8644 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8645 	const struct stat* stat, size_t statSize, int statMask)
8646 {
8647 	struct stat completeStat;
8648 
8649 	if (statSize > sizeof(struct stat))
8650 		return B_BAD_VALUE;
8651 
8652 	// this supports different stat extensions
8653 	if (statSize < sizeof(struct stat)) {
8654 		memset((uint8*)&completeStat + statSize, 0,
8655 			sizeof(struct stat) - statSize);
8656 		memcpy(&completeStat, stat, statSize);
8657 		stat = &completeStat;
8658 	}
8659 
8660 	status_t status;
8661 
8662 	if (path != NULL) {
8663 		// path given: write the stat of the node referred to by (fd, path)
8664 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8665 		if (pathBuffer.InitCheck() != B_OK)
8666 			return B_NO_MEMORY;
8667 
8668 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8669 			traverseLeafLink, stat, statMask, true);
8670 	} else {
8671 		// no path given: get the FD and use the FD operation
8672 		struct file_descriptor* descriptor
8673 			= get_fd(get_current_io_context(true), fd);
8674 		if (descriptor == NULL)
8675 			return B_FILE_ERROR;
8676 
8677 		if (descriptor->ops->fd_write_stat)
8678 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8679 		else
8680 			status = B_UNSUPPORTED;
8681 
8682 		put_fd(descriptor);
8683 	}
8684 
8685 	return status;
8686 }
8687 
8688 
8689 int
8690 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8691 {
8692 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8693 	if (pathBuffer.InitCheck() != B_OK)
8694 		return B_NO_MEMORY;
8695 
8696 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8697 }
8698 
8699 
8700 int
8701 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8702 	int openMode)
8703 {
8704 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8705 	if (pathBuffer.InitCheck() != B_OK)
8706 		return B_NO_MEMORY;
8707 
8708 	if ((openMode & O_CREAT) != 0) {
8709 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8710 			true);
8711 	}
8712 
8713 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8714 }
8715 
8716 
8717 status_t
8718 _kern_remove_attr(int fd, const char* name)
8719 {
8720 	return attr_remove(fd, name, true);
8721 }
8722 
8723 
8724 status_t
8725 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8726 	const char* toName)
8727 {
8728 	return attr_rename(fromFile, fromName, toFile, toName, true);
8729 }
8730 
8731 
8732 int
8733 _kern_open_index_dir(dev_t device)
8734 {
8735 	return index_dir_open(device, true);
8736 }
8737 
8738 
8739 status_t
8740 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8741 {
8742 	return index_create(device, name, type, flags, true);
8743 }
8744 
8745 
8746 status_t
8747 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8748 {
8749 	return index_name_read_stat(device, name, stat, true);
8750 }
8751 
8752 
8753 status_t
8754 _kern_remove_index(dev_t device, const char* name)
8755 {
8756 	return index_remove(device, name, true);
8757 }
8758 
8759 
8760 status_t
8761 _kern_getcwd(char* buffer, size_t size)
8762 {
8763 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8764 
8765 	// Call vfs to get current working directory
8766 	return get_cwd(buffer, size, true);
8767 }
8768 
8769 
8770 status_t
8771 _kern_setcwd(int fd, const char* path)
8772 {
8773 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8774 	if (pathBuffer.InitCheck() != B_OK)
8775 		return B_NO_MEMORY;
8776 
8777 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8778 }
8779 
8780 
8781 //	#pragma mark - userland syscalls
8782 
8783 
8784 dev_t
8785 _user_mount(const char* userPath, const char* userDevice,
8786 	const char* userFileSystem, uint32 flags, const char* userArgs,
8787 	size_t argsLength)
8788 {
8789 	char fileSystem[B_FILE_NAME_LENGTH];
8790 	KPath path, device;
8791 	char* args = NULL;
8792 	status_t status;
8793 
8794 	if (!IS_USER_ADDRESS(userPath))
8795 		return B_BAD_ADDRESS;
8796 
8797 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8798 		return B_NO_MEMORY;
8799 
8800 	status = user_copy_name(path.LockBuffer(), userPath,
8801 		B_PATH_NAME_LENGTH);
8802 	if (status != B_OK)
8803 		return status;
8804 
8805 	if (userFileSystem != NULL) {
8806 		if (!IS_USER_ADDRESS(userFileSystem))
8807 			return B_BAD_ADDRESS;
8808 
8809 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8810 		if (status != B_OK)
8811 			return status;
8812 	}
8813 
8814 	if (userDevice != NULL) {
8815 		if (!IS_USER_ADDRESS(userDevice))
8816 			return B_BAD_ADDRESS;
8817 
8818 		status = user_copy_name(device.LockBuffer(), userDevice,
8819 			B_PATH_NAME_LENGTH);
8820 		if (status != B_OK)
8821 			return status;
8822 	}
8823 
8824 	if (userArgs != NULL && argsLength > 0) {
8825 		if (!IS_USER_ADDRESS(userArgs))
8826 			return B_BAD_ADDRESS;
8827 
8828 		// this is a safety restriction
8829 		if (argsLength >= 65536)
8830 			return B_NAME_TOO_LONG;
8831 
8832 		args = (char*)malloc(argsLength + 1);
8833 		if (args == NULL)
8834 			return B_NO_MEMORY;
8835 
8836 		status = user_copy_name(args, userArgs, argsLength + 1);
8837 		if (status != B_OK) {
8838 			free(args);
8839 			return status;
8840 		}
8841 	}
8842 	path.UnlockBuffer();
8843 	device.UnlockBuffer();
8844 
8845 	status = fs_mount(path.LockBuffer(),
8846 		userDevice != NULL ? device.Path() : NULL,
8847 		userFileSystem ? fileSystem : NULL, flags, args, false);
8848 
8849 	free(args);
8850 	return status;
8851 }
8852 
8853 
8854 status_t
8855 _user_unmount(const char* userPath, uint32 flags)
8856 {
8857 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8858 
8859 	if (!IS_USER_ADDRESS(userPath))
8860 		return B_BAD_ADDRESS;
8861 
8862 	if (pathBuffer.InitCheck() != B_OK)
8863 		return B_NO_MEMORY;
8864 
8865 	char* path = pathBuffer.LockBuffer();
8866 
8867 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8868 	if (status != B_OK)
8869 		return status;
8870 
8871 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8872 }
8873 
8874 
8875 status_t
8876 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8877 {
8878 	struct fs_info info;
8879 	status_t status;
8880 
8881 	if (userInfo == NULL)
8882 		return B_BAD_VALUE;
8883 
8884 	if (!IS_USER_ADDRESS(userInfo))
8885 		return B_BAD_ADDRESS;
8886 
8887 	status = fs_read_info(device, &info);
8888 	if (status != B_OK)
8889 		return status;
8890 
8891 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8892 		return B_BAD_ADDRESS;
8893 
8894 	return B_OK;
8895 }
8896 
8897 
8898 status_t
8899 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8900 {
8901 	struct fs_info info;
8902 
8903 	if (userInfo == NULL)
8904 		return B_BAD_VALUE;
8905 
8906 	if (!IS_USER_ADDRESS(userInfo)
8907 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8908 		return B_BAD_ADDRESS;
8909 
8910 	return fs_write_info(device, &info, mask);
8911 }
8912 
8913 
8914 dev_t
8915 _user_next_device(int32* _userCookie)
8916 {
8917 	int32 cookie;
8918 	dev_t device;
8919 
8920 	if (!IS_USER_ADDRESS(_userCookie)
8921 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8922 		return B_BAD_ADDRESS;
8923 
8924 	device = fs_next_device(&cookie);
8925 
8926 	if (device >= B_OK) {
8927 		// update user cookie
8928 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8929 			return B_BAD_ADDRESS;
8930 	}
8931 
8932 	return device;
8933 }
8934 
8935 
8936 status_t
8937 _user_sync(void)
8938 {
8939 	return _kern_sync();
8940 }
8941 
8942 
8943 status_t
8944 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8945 	size_t infoSize)
8946 {
8947 	struct fd_info info;
8948 	uint32 cookie;
8949 
8950 	// only root can do this (or should root's group be enough?)
8951 	if (geteuid() != 0)
8952 		return B_NOT_ALLOWED;
8953 
8954 	if (infoSize != sizeof(fd_info))
8955 		return B_BAD_VALUE;
8956 
8957 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8958 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8962 	if (status != B_OK)
8963 		return status;
8964 
8965 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8966 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8967 		return B_BAD_ADDRESS;
8968 
8969 	return status;
8970 }
8971 
8972 
8973 status_t
8974 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8975 	char* userPath, size_t pathLength)
8976 {
8977 	if (!IS_USER_ADDRESS(userPath))
8978 		return B_BAD_ADDRESS;
8979 
8980 	KPath path(B_PATH_NAME_LENGTH + 1);
8981 	if (path.InitCheck() != B_OK)
8982 		return B_NO_MEMORY;
8983 
8984 	// copy the leaf name onto the stack
8985 	char stackLeaf[B_FILE_NAME_LENGTH];
8986 	if (leaf != NULL) {
8987 		if (!IS_USER_ADDRESS(leaf))
8988 			return B_BAD_ADDRESS;
8989 
8990 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8991 		if (status != B_OK)
8992 			return status;
8993 
8994 		leaf = stackLeaf;
8995 	}
8996 
8997 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8998 		false, path.LockBuffer(), path.BufferSize());
8999 	if (status != B_OK)
9000 		return status;
9001 
9002 	path.UnlockBuffer();
9003 
9004 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9005 	if (length < 0)
9006 		return length;
9007 	if (length >= (int)pathLength)
9008 		return B_BUFFER_OVERFLOW;
9009 
9010 	return B_OK;
9011 }
9012 
9013 
9014 status_t
9015 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9016 {
9017 	if (userPath == NULL || buffer == NULL)
9018 		return B_BAD_VALUE;
9019 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9020 		return B_BAD_ADDRESS;
9021 
9022 	// copy path from userland
9023 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9024 	if (pathBuffer.InitCheck() != B_OK)
9025 		return B_NO_MEMORY;
9026 	char* path = pathBuffer.LockBuffer();
9027 
9028 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9029 	if (status != B_OK)
9030 		return status;
9031 
9032 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9033 		false);
9034 	if (error != B_OK)
9035 		return error;
9036 
9037 	// copy back to userland
9038 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9039 	if (len < 0)
9040 		return len;
9041 	if (len >= B_PATH_NAME_LENGTH)
9042 		return B_BUFFER_OVERFLOW;
9043 
9044 	return B_OK;
9045 }
9046 
9047 
9048 int
9049 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9050 	int openMode, int perms)
9051 {
9052 	char name[B_FILE_NAME_LENGTH];
9053 
9054 	if (userName == NULL || device < 0 || inode < 0)
9055 		return B_BAD_VALUE;
9056 	if (!IS_USER_ADDRESS(userName))
9057 		return B_BAD_ADDRESS;
9058 	status_t status = user_copy_name(name, userName, sizeof(name));
9059 	if (status != B_OK)
9060 		return status;
9061 
9062 	if ((openMode & O_CREAT) != 0) {
9063 		return file_create_entry_ref(device, inode, name, openMode, perms,
9064 			false);
9065 	}
9066 
9067 	return file_open_entry_ref(device, inode, name, openMode, false);
9068 }
9069 
9070 
9071 int
9072 _user_open(int fd, const char* userPath, int openMode, int perms)
9073 {
9074 	KPath path(B_PATH_NAME_LENGTH + 1);
9075 	if (path.InitCheck() != B_OK)
9076 		return B_NO_MEMORY;
9077 
9078 	char* buffer = path.LockBuffer();
9079 
9080 	if (!IS_USER_ADDRESS(userPath))
9081 		return B_BAD_ADDRESS;
9082 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9083 	if (status != B_OK)
9084 		return status;
9085 
9086 	if ((openMode & O_CREAT) != 0)
9087 		return file_create(fd, buffer, openMode, perms, false);
9088 
9089 	return file_open(fd, buffer, openMode, false);
9090 }
9091 
9092 
9093 int
9094 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9095 {
9096 	if (userName != NULL) {
9097 		char name[B_FILE_NAME_LENGTH];
9098 
9099 		if (!IS_USER_ADDRESS(userName))
9100 			return B_BAD_ADDRESS;
9101 		status_t status = user_copy_name(name, userName, sizeof(name));
9102 		if (status != B_OK)
9103 			return status;
9104 
9105 		return dir_open_entry_ref(device, inode, name, false);
9106 	}
9107 	return dir_open_entry_ref(device, inode, NULL, false);
9108 }
9109 
9110 
9111 int
9112 _user_open_dir(int fd, const char* userPath)
9113 {
9114 	if (userPath == NULL)
9115 		return dir_open(fd, NULL, false);
9116 
9117 	KPath path(B_PATH_NAME_LENGTH + 1);
9118 	if (path.InitCheck() != B_OK)
9119 		return B_NO_MEMORY;
9120 
9121 	char* buffer = path.LockBuffer();
9122 
9123 	if (!IS_USER_ADDRESS(userPath))
9124 		return B_BAD_ADDRESS;
9125 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9126 	if (status != B_OK)
9127 		return status;
9128 
9129 	return dir_open(fd, buffer, false);
9130 }
9131 
9132 
9133 /*!	\brief Opens a directory's parent directory and returns the entry name
9134 		   of the former.
9135 
9136 	Aside from that it returns the directory's entry name, this method is
9137 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9138 	equivalent, if \a userName is \c NULL.
9139 
9140 	If a name buffer is supplied and the name does not fit the buffer, the
9141 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9142 
9143 	\param fd A FD referring to a directory.
9144 	\param userName Buffer the directory's entry name shall be written into.
9145 		   May be \c NULL.
9146 	\param nameLength Size of the name buffer.
9147 	\return The file descriptor of the opened parent directory, if everything
9148 			went fine, an error code otherwise.
9149 */
9150 int
9151 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9152 {
9153 	bool kernel = false;
9154 
9155 	if (userName && !IS_USER_ADDRESS(userName))
9156 		return B_BAD_ADDRESS;
9157 
9158 	// open the parent dir
9159 	int parentFD = dir_open(fd, (char*)"..", kernel);
9160 	if (parentFD < 0)
9161 		return parentFD;
9162 	FDCloser fdCloser(parentFD, kernel);
9163 
9164 	if (userName) {
9165 		// get the vnodes
9166 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9167 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9168 		VNodePutter parentVNodePutter(parentVNode);
9169 		VNodePutter dirVNodePutter(dirVNode);
9170 		if (!parentVNode || !dirVNode)
9171 			return B_FILE_ERROR;
9172 
9173 		// get the vnode name
9174 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9175 		struct dirent* buffer = (struct dirent*)_buffer;
9176 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9177 			sizeof(_buffer), get_current_io_context(false));
9178 		if (status != B_OK)
9179 			return status;
9180 
9181 		// copy the name to the userland buffer
9182 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9183 		if (len < 0)
9184 			return len;
9185 		if (len >= (int)nameLength)
9186 			return B_BUFFER_OVERFLOW;
9187 	}
9188 
9189 	return fdCloser.Detach();
9190 }
9191 
9192 
9193 status_t
9194 _user_fcntl(int fd, int op, size_t argument)
9195 {
9196 	status_t status = common_fcntl(fd, op, argument, false);
9197 	if (op == F_SETLKW)
9198 		syscall_restart_handle_post(status);
9199 
9200 	return status;
9201 }
9202 
9203 
9204 status_t
9205 _user_fsync(int fd)
9206 {
9207 	return common_sync(fd, false);
9208 }
9209 
9210 
9211 status_t
9212 _user_flock(int fd, int operation)
9213 {
9214 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9215 
9216 	// Check if the operation is valid
9217 	switch (operation & ~LOCK_NB) {
9218 		case LOCK_UN:
9219 		case LOCK_SH:
9220 		case LOCK_EX:
9221 			break;
9222 
9223 		default:
9224 			return B_BAD_VALUE;
9225 	}
9226 
9227 	struct file_descriptor* descriptor;
9228 	struct vnode* vnode;
9229 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9230 	if (descriptor == NULL)
9231 		return B_FILE_ERROR;
9232 
9233 	if (descriptor->type != FDTYPE_FILE) {
9234 		put_fd(descriptor);
9235 		return B_BAD_VALUE;
9236 	}
9237 
9238 	struct flock flock;
9239 	flock.l_start = 0;
9240 	flock.l_len = OFF_MAX;
9241 	flock.l_whence = 0;
9242 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9243 
9244 	status_t status;
9245 	if ((operation & LOCK_UN) != 0) {
9246 		if (HAS_FS_CALL(vnode, release_lock))
9247 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9248 		else
9249 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9250 	} else {
9251 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9252 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9253 				(operation & LOCK_NB) == 0);
9254 		} else {
9255 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9256 				(operation & LOCK_NB) == 0);
9257 		}
9258 	}
9259 
9260 	syscall_restart_handle_post(status);
9261 
9262 	put_fd(descriptor);
9263 	return status;
9264 }
9265 
9266 
9267 status_t
9268 _user_lock_node(int fd)
9269 {
9270 	return common_lock_node(fd, false);
9271 }
9272 
9273 
9274 status_t
9275 _user_unlock_node(int fd)
9276 {
9277 	return common_unlock_node(fd, false);
9278 }
9279 
9280 
9281 status_t
9282 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9283 	int perms)
9284 {
9285 	char name[B_FILE_NAME_LENGTH];
9286 	status_t status;
9287 
9288 	if (!IS_USER_ADDRESS(userName))
9289 		return B_BAD_ADDRESS;
9290 
9291 	status = user_copy_name(name, userName, sizeof(name));
9292 	if (status != B_OK)
9293 		return status;
9294 
9295 	return dir_create_entry_ref(device, inode, name, perms, false);
9296 }
9297 
9298 
9299 status_t
9300 _user_create_dir(int fd, const char* userPath, int perms)
9301 {
9302 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9303 	if (pathBuffer.InitCheck() != B_OK)
9304 		return B_NO_MEMORY;
9305 
9306 	char* path = pathBuffer.LockBuffer();
9307 
9308 	if (!IS_USER_ADDRESS(userPath))
9309 		return B_BAD_ADDRESS;
9310 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9311 	if (status != B_OK)
9312 		return status;
9313 
9314 	return dir_create(fd, path, perms, false);
9315 }
9316 
9317 
9318 status_t
9319 _user_remove_dir(int fd, const char* userPath)
9320 {
9321 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9322 	if (pathBuffer.InitCheck() != B_OK)
9323 		return B_NO_MEMORY;
9324 
9325 	char* path = pathBuffer.LockBuffer();
9326 
9327 	if (userPath != NULL) {
9328 		if (!IS_USER_ADDRESS(userPath))
9329 			return B_BAD_ADDRESS;
9330 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9331 		if (status != B_OK)
9332 			return status;
9333 	}
9334 
9335 	return dir_remove(fd, userPath ? path : NULL, false);
9336 }
9337 
9338 
9339 status_t
9340 _user_read_link(int fd, const char* userPath, char* userBuffer,
9341 	size_t* userBufferSize)
9342 {
9343 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9344 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9345 		return B_NO_MEMORY;
9346 
9347 	size_t bufferSize;
9348 
9349 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9350 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9351 		return B_BAD_ADDRESS;
9352 
9353 	char* path = pathBuffer.LockBuffer();
9354 	char* buffer = linkBuffer.LockBuffer();
9355 
9356 	if (userPath) {
9357 		if (!IS_USER_ADDRESS(userPath))
9358 			return B_BAD_ADDRESS;
9359 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9360 		if (status != B_OK)
9361 			return status;
9362 
9363 		if (bufferSize > B_PATH_NAME_LENGTH)
9364 			bufferSize = B_PATH_NAME_LENGTH;
9365 	}
9366 
9367 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9368 		&bufferSize, false);
9369 
9370 	// we also update the bufferSize in case of errors
9371 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9372 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9373 		return B_BAD_ADDRESS;
9374 
9375 	if (status != B_OK)
9376 		return status;
9377 
9378 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9379 		return B_BAD_ADDRESS;
9380 
9381 	return B_OK;
9382 }
9383 
9384 
9385 status_t
9386 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9387 	int mode)
9388 {
9389 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9390 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9391 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9392 		return B_NO_MEMORY;
9393 
9394 	char* path = pathBuffer.LockBuffer();
9395 	char* toPath = toPathBuffer.LockBuffer();
9396 
9397 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9398 		return B_BAD_ADDRESS;
9399 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9400 	if (status != B_OK)
9401 		return status;
9402 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9403 	if (status != B_OK)
9404 		return status;
9405 
9406 	return common_create_symlink(fd, path, toPath, mode, false);
9407 }
9408 
9409 
9410 status_t
9411 _user_create_link(int pathFD, const char* userPath, int toFD,
9412 	const char* userToPath, bool traverseLeafLink)
9413 {
9414 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9415 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9416 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9417 		return B_NO_MEMORY;
9418 
9419 	char* path = pathBuffer.LockBuffer();
9420 	char* toPath = toPathBuffer.LockBuffer();
9421 
9422 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9423 		return B_BAD_ADDRESS;
9424 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9425 	if (status != B_OK)
9426 		return status;
9427 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9428 	if (status != B_OK)
9429 		return status;
9430 
9431 	status = check_path(toPath);
9432 	if (status != B_OK)
9433 		return status;
9434 
9435 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9436 		false);
9437 }
9438 
9439 
9440 status_t
9441 _user_unlink(int fd, const char* userPath)
9442 {
9443 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9444 	if (pathBuffer.InitCheck() != B_OK)
9445 		return B_NO_MEMORY;
9446 
9447 	char* path = pathBuffer.LockBuffer();
9448 
9449 	if (!IS_USER_ADDRESS(userPath))
9450 		return B_BAD_ADDRESS;
9451 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9452 	if (status != B_OK)
9453 		return status;
9454 
9455 	return common_unlink(fd, path, false);
9456 }
9457 
9458 
9459 status_t
9460 _user_rename(int oldFD, const char* userOldPath, int newFD,
9461 	const char* userNewPath)
9462 {
9463 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9464 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9465 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9466 		return B_NO_MEMORY;
9467 
9468 	char* oldPath = oldPathBuffer.LockBuffer();
9469 	char* newPath = newPathBuffer.LockBuffer();
9470 
9471 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9472 		return B_BAD_ADDRESS;
9473 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9474 	if (status != B_OK)
9475 		return status;
9476 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9477 	if (status != B_OK)
9478 		return status;
9479 
9480 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9481 }
9482 
9483 
9484 status_t
9485 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9486 {
9487 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9488 	if (pathBuffer.InitCheck() != B_OK)
9489 		return B_NO_MEMORY;
9490 
9491 	char* path = pathBuffer.LockBuffer();
9492 
9493 	if (!IS_USER_ADDRESS(userPath))
9494 		return B_BAD_ADDRESS;
9495 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9496 	if (status != B_OK)
9497 		return status;
9498 
9499 	// split into directory vnode and filename path
9500 	char filename[B_FILE_NAME_LENGTH];
9501 	struct vnode* dir;
9502 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9503 	if (status != B_OK)
9504 		return status;
9505 
9506 	VNodePutter _(dir);
9507 
9508 	// the underlying FS needs to support creating FIFOs
9509 	if (!HAS_FS_CALL(dir, create_special_node))
9510 		return B_UNSUPPORTED;
9511 
9512 	// create the entry	-- the FIFO sub node is set up automatically
9513 	fs_vnode superVnode;
9514 	ino_t nodeID;
9515 	status = FS_CALL(dir, create_special_node, filename, NULL,
9516 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9517 
9518 	// create_special_node() acquired a reference for us that we don't need.
9519 	if (status == B_OK)
9520 		put_vnode(dir->mount->volume, nodeID);
9521 
9522 	return status;
9523 }
9524 
9525 
9526 status_t
9527 _user_create_pipe(int* userFDs)
9528 {
9529 	// rootfs should support creating FIFOs, but let's be sure
9530 	if (!HAS_FS_CALL(sRoot, create_special_node))
9531 		return B_UNSUPPORTED;
9532 
9533 	// create the node	-- the FIFO sub node is set up automatically
9534 	fs_vnode superVnode;
9535 	ino_t nodeID;
9536 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9537 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9538 	if (status != B_OK)
9539 		return status;
9540 
9541 	// We've got one reference to the node and need another one.
9542 	struct vnode* vnode;
9543 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9544 	if (status != B_OK) {
9545 		// that should not happen
9546 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9547 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9548 		return status;
9549 	}
9550 
9551 	// Everything looks good so far. Open two FDs for reading respectively
9552 	// writing.
9553 	int fds[2];
9554 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9555 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9556 
9557 	FDCloser closer0(fds[0], false);
9558 	FDCloser closer1(fds[1], false);
9559 
9560 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9561 
9562 	// copy FDs to userland
9563 	if (status == B_OK) {
9564 		if (!IS_USER_ADDRESS(userFDs)
9565 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9566 			status = B_BAD_ADDRESS;
9567 		}
9568 	}
9569 
9570 	// keep FDs, if everything went fine
9571 	if (status == B_OK) {
9572 		closer0.Detach();
9573 		closer1.Detach();
9574 	}
9575 
9576 	return status;
9577 }
9578 
9579 
9580 status_t
9581 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9582 {
9583 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9584 	if (pathBuffer.InitCheck() != B_OK)
9585 		return B_NO_MEMORY;
9586 
9587 	char* path = pathBuffer.LockBuffer();
9588 
9589 	if (!IS_USER_ADDRESS(userPath))
9590 		return B_BAD_ADDRESS;
9591 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9592 	if (status != B_OK)
9593 		return status;
9594 
9595 	return common_access(fd, path, mode, effectiveUserGroup, false);
9596 }
9597 
9598 
9599 status_t
9600 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9601 	struct stat* userStat, size_t statSize)
9602 {
9603 	struct stat stat;
9604 	status_t status;
9605 
9606 	if (statSize > sizeof(struct stat))
9607 		return B_BAD_VALUE;
9608 
9609 	if (!IS_USER_ADDRESS(userStat))
9610 		return B_BAD_ADDRESS;
9611 
9612 	if (userPath != NULL) {
9613 		// path given: get the stat of the node referred to by (fd, path)
9614 		if (!IS_USER_ADDRESS(userPath))
9615 			return B_BAD_ADDRESS;
9616 
9617 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9618 		if (pathBuffer.InitCheck() != B_OK)
9619 			return B_NO_MEMORY;
9620 
9621 		char* path = pathBuffer.LockBuffer();
9622 
9623 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9624 		if (status != B_OK)
9625 			return status;
9626 
9627 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9628 	} else {
9629 		// no path given: get the FD and use the FD operation
9630 		struct file_descriptor* descriptor
9631 			= get_fd(get_current_io_context(false), fd);
9632 		if (descriptor == NULL)
9633 			return B_FILE_ERROR;
9634 
9635 		if (descriptor->ops->fd_read_stat)
9636 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9637 		else
9638 			status = B_UNSUPPORTED;
9639 
9640 		put_fd(descriptor);
9641 	}
9642 
9643 	if (status != B_OK)
9644 		return status;
9645 
9646 	return user_memcpy(userStat, &stat, statSize);
9647 }
9648 
9649 
9650 status_t
9651 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9652 	const struct stat* userStat, size_t statSize, int statMask)
9653 {
9654 	if (statSize > sizeof(struct stat))
9655 		return B_BAD_VALUE;
9656 
9657 	struct stat stat;
9658 
9659 	if (!IS_USER_ADDRESS(userStat)
9660 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9661 		return B_BAD_ADDRESS;
9662 
9663 	// clear additional stat fields
9664 	if (statSize < sizeof(struct stat))
9665 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9666 
9667 	status_t status;
9668 
9669 	if (userPath != NULL) {
9670 		// path given: write the stat of the node referred to by (fd, path)
9671 		if (!IS_USER_ADDRESS(userPath))
9672 			return B_BAD_ADDRESS;
9673 
9674 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9675 		if (pathBuffer.InitCheck() != B_OK)
9676 			return B_NO_MEMORY;
9677 
9678 		char* path = pathBuffer.LockBuffer();
9679 
9680 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9681 		if (status != B_OK)
9682 			return status;
9683 
9684 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9685 			statMask, false);
9686 	} else {
9687 		// no path given: get the FD and use the FD operation
9688 		struct file_descriptor* descriptor
9689 			= get_fd(get_current_io_context(false), fd);
9690 		if (descriptor == NULL)
9691 			return B_FILE_ERROR;
9692 
9693 		if (descriptor->ops->fd_write_stat) {
9694 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9695 				statMask);
9696 		} else
9697 			status = B_UNSUPPORTED;
9698 
9699 		put_fd(descriptor);
9700 	}
9701 
9702 	return status;
9703 }
9704 
9705 
9706 int
9707 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9708 {
9709 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9710 	if (pathBuffer.InitCheck() != B_OK)
9711 		return B_NO_MEMORY;
9712 
9713 	char* path = pathBuffer.LockBuffer();
9714 
9715 	if (userPath != NULL) {
9716 		if (!IS_USER_ADDRESS(userPath))
9717 			return B_BAD_ADDRESS;
9718 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9719 		if (status != B_OK)
9720 			return status;
9721 	}
9722 
9723 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9724 }
9725 
9726 
9727 ssize_t
9728 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9729 	size_t readBytes)
9730 {
9731 	char attribute[B_FILE_NAME_LENGTH];
9732 
9733 	if (userAttribute == NULL)
9734 		return B_BAD_VALUE;
9735 	if (!IS_USER_ADDRESS(userAttribute))
9736 		return B_BAD_ADDRESS;
9737 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9738 	if (status != B_OK)
9739 		return status;
9740 
9741 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9742 	if (attr < 0)
9743 		return attr;
9744 
9745 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9746 	_user_close(attr);
9747 
9748 	return bytes;
9749 }
9750 
9751 
9752 ssize_t
9753 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9754 	const void* buffer, size_t writeBytes)
9755 {
9756 	char attribute[B_FILE_NAME_LENGTH];
9757 
9758 	if (userAttribute == NULL)
9759 		return B_BAD_VALUE;
9760 	if (!IS_USER_ADDRESS(userAttribute))
9761 		return B_BAD_ADDRESS;
9762 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9763 	if (status != B_OK)
9764 		return status;
9765 
9766 	// Try to support the BeOS typical truncation as well as the position
9767 	// argument
9768 	int attr = attr_create(fd, NULL, attribute, type,
9769 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9770 	if (attr < 0)
9771 		return attr;
9772 
9773 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9774 	_user_close(attr);
9775 
9776 	return bytes;
9777 }
9778 
9779 
9780 status_t
9781 _user_stat_attr(int fd, const char* userAttribute,
9782 	struct attr_info* userAttrInfo)
9783 {
9784 	char attribute[B_FILE_NAME_LENGTH];
9785 
9786 	if (userAttribute == NULL || userAttrInfo == NULL)
9787 		return B_BAD_VALUE;
9788 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9789 		return B_BAD_ADDRESS;
9790 	status_t status = user_copy_name(attribute, userAttribute,
9791 		sizeof(attribute));
9792 	if (status != B_OK)
9793 		return status;
9794 
9795 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9796 	if (attr < 0)
9797 		return attr;
9798 
9799 	struct file_descriptor* descriptor
9800 		= get_fd(get_current_io_context(false), attr);
9801 	if (descriptor == NULL) {
9802 		_user_close(attr);
9803 		return B_FILE_ERROR;
9804 	}
9805 
9806 	struct stat stat;
9807 	if (descriptor->ops->fd_read_stat)
9808 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9809 	else
9810 		status = B_UNSUPPORTED;
9811 
9812 	put_fd(descriptor);
9813 	_user_close(attr);
9814 
9815 	if (status == B_OK) {
9816 		attr_info info;
9817 		info.type = stat.st_type;
9818 		info.size = stat.st_size;
9819 
9820 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9821 			return B_BAD_ADDRESS;
9822 	}
9823 
9824 	return status;
9825 }
9826 
9827 
9828 int
9829 _user_open_attr(int fd, const char* userPath, const char* userName,
9830 	uint32 type, int openMode)
9831 {
9832 	char name[B_FILE_NAME_LENGTH];
9833 
9834 	if (!IS_USER_ADDRESS(userName))
9835 		return B_BAD_ADDRESS;
9836 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9837 	if (status != B_OK)
9838 		return status;
9839 
9840 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9841 	if (pathBuffer.InitCheck() != B_OK)
9842 		return B_NO_MEMORY;
9843 
9844 	char* path = pathBuffer.LockBuffer();
9845 
9846 	if (userPath != NULL) {
9847 		if (!IS_USER_ADDRESS(userPath))
9848 			return B_BAD_ADDRESS;
9849 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9850 		if (status != B_OK)
9851 			return status;
9852 	}
9853 
9854 	if ((openMode & O_CREAT) != 0) {
9855 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9856 			false);
9857 	}
9858 
9859 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9860 }
9861 
9862 
9863 status_t
9864 _user_remove_attr(int fd, const char* userName)
9865 {
9866 	char name[B_FILE_NAME_LENGTH];
9867 
9868 	if (!IS_USER_ADDRESS(userName))
9869 		return B_BAD_ADDRESS;
9870 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9871 	if (status != B_OK)
9872 		return status;
9873 
9874 	return attr_remove(fd, name, false);
9875 }
9876 
9877 
9878 status_t
9879 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9880 	const char* userToName)
9881 {
9882 	if (!IS_USER_ADDRESS(userFromName)
9883 		|| !IS_USER_ADDRESS(userToName))
9884 		return B_BAD_ADDRESS;
9885 
9886 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9887 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9888 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9889 		return B_NO_MEMORY;
9890 
9891 	char* fromName = fromNameBuffer.LockBuffer();
9892 	char* toName = toNameBuffer.LockBuffer();
9893 
9894 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9895 	if (status != B_OK)
9896 		return status;
9897 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9898 	if (status != B_OK)
9899 		return status;
9900 
9901 	return attr_rename(fromFile, fromName, toFile, toName, false);
9902 }
9903 
9904 
9905 int
9906 _user_open_index_dir(dev_t device)
9907 {
9908 	return index_dir_open(device, false);
9909 }
9910 
9911 
9912 status_t
9913 _user_create_index(dev_t device, const char* userName, uint32 type,
9914 	uint32 flags)
9915 {
9916 	char name[B_FILE_NAME_LENGTH];
9917 
9918 	if (!IS_USER_ADDRESS(userName))
9919 		return B_BAD_ADDRESS;
9920 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9921 	if (status != B_OK)
9922 		return status;
9923 
9924 	return index_create(device, name, type, flags, false);
9925 }
9926 
9927 
9928 status_t
9929 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9930 {
9931 	char name[B_FILE_NAME_LENGTH];
9932 	struct stat stat;
9933 	status_t status;
9934 
9935 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9936 		return B_BAD_ADDRESS;
9937 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9938 	if (status != B_OK)
9939 		return status;
9940 
9941 	status = index_name_read_stat(device, name, &stat, false);
9942 	if (status == B_OK) {
9943 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9944 			return B_BAD_ADDRESS;
9945 	}
9946 
9947 	return status;
9948 }
9949 
9950 
9951 status_t
9952 _user_remove_index(dev_t device, const char* userName)
9953 {
9954 	char name[B_FILE_NAME_LENGTH];
9955 
9956 	if (!IS_USER_ADDRESS(userName))
9957 		return B_BAD_ADDRESS;
9958 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9959 	if (status != B_OK)
9960 		return status;
9961 
9962 	return index_remove(device, name, false);
9963 }
9964 
9965 
9966 status_t
9967 _user_getcwd(char* userBuffer, size_t size)
9968 {
9969 	if (size == 0)
9970 		return B_BAD_VALUE;
9971 	if (!IS_USER_ADDRESS(userBuffer))
9972 		return B_BAD_ADDRESS;
9973 
9974 	if (size > kMaxPathLength)
9975 		size = kMaxPathLength;
9976 
9977 	KPath pathBuffer(size);
9978 	if (pathBuffer.InitCheck() != B_OK)
9979 		return B_NO_MEMORY;
9980 
9981 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9982 
9983 	char* path = pathBuffer.LockBuffer();
9984 
9985 	status_t status = get_cwd(path, size, false);
9986 	if (status != B_OK)
9987 		return status;
9988 
9989 	// Copy back the result
9990 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9991 		return B_BAD_ADDRESS;
9992 
9993 	return status;
9994 }
9995 
9996 
9997 status_t
9998 _user_setcwd(int fd, const char* userPath)
9999 {
10000 	TRACE(("user_setcwd: path = %p\n", userPath));
10001 
10002 	KPath pathBuffer(B_PATH_NAME_LENGTH);
10003 	if (pathBuffer.InitCheck() != B_OK)
10004 		return B_NO_MEMORY;
10005 
10006 	char* path = pathBuffer.LockBuffer();
10007 
10008 	if (userPath != NULL) {
10009 		if (!IS_USER_ADDRESS(userPath))
10010 			return B_BAD_ADDRESS;
10011 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10012 		if (status != B_OK)
10013 			return status;
10014 	}
10015 
10016 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10017 }
10018 
10019 
10020 status_t
10021 _user_change_root(const char* userPath)
10022 {
10023 	// only root is allowed to chroot()
10024 	if (geteuid() != 0)
10025 		return B_NOT_ALLOWED;
10026 
10027 	// alloc path buffer
10028 	KPath pathBuffer(B_PATH_NAME_LENGTH);
10029 	if (pathBuffer.InitCheck() != B_OK)
10030 		return B_NO_MEMORY;
10031 
10032 	// copy userland path to kernel
10033 	char* path = pathBuffer.LockBuffer();
10034 	if (userPath != NULL) {
10035 		if (!IS_USER_ADDRESS(userPath))
10036 			return B_BAD_ADDRESS;
10037 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10038 		if (status != B_OK)
10039 			return status;
10040 	}
10041 
10042 	// get the vnode
10043 	struct vnode* vnode;
10044 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10045 	if (status != B_OK)
10046 		return status;
10047 
10048 	// set the new root
10049 	struct io_context* context = get_current_io_context(false);
10050 	mutex_lock(&sIOContextRootLock);
10051 	struct vnode* oldRoot = context->root;
10052 	context->root = vnode;
10053 	mutex_unlock(&sIOContextRootLock);
10054 
10055 	put_vnode(oldRoot);
10056 
10057 	return B_OK;
10058 }
10059 
10060 
10061 int
10062 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10063 	uint32 flags, port_id port, int32 token)
10064 {
10065 	char* query;
10066 
10067 	if (device < 0 || userQuery == NULL || queryLength == 0)
10068 		return B_BAD_VALUE;
10069 
10070 	if (!IS_USER_ADDRESS(userQuery))
10071 		return B_BAD_ADDRESS;
10072 
10073 	// this is a safety restriction
10074 	if (queryLength >= 65536)
10075 		return B_NAME_TOO_LONG;
10076 
10077 	query = (char*)malloc(queryLength + 1);
10078 	if (query == NULL)
10079 		return B_NO_MEMORY;
10080 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
10081 		free(query);
10082 		return B_BAD_ADDRESS;
10083 	}
10084 
10085 	int fd = query_open(device, query, flags, port, token, false);
10086 
10087 	free(query);
10088 	return fd;
10089 }
10090 
10091 
10092 #include "vfs_request_io.cpp"
10093