xref: /haiku/src/system/kernel/fs/vfs.cpp (revision ed24eb5ff12640d052171c6a7feba37fab8a75d1)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/ThreadAutoLock.h>
56 #include <util/DoublyLinkedList.h>
57 #include <vfs.h>
58 #include <vm/vm.h>
59 #include <vm/VMCache.h>
60 #include <wait_for_objects.h>
61 
62 #include "EntryCache.h"
63 #include "fifo.h"
64 #include "IORequest.h"
65 #include "unused_vnodes.h"
66 #include "vfs_tracing.h"
67 #include "Vnode.h"
68 #include "../cache/vnode_store.h"
69 
70 
71 //#define TRACE_VFS
72 #ifdef TRACE_VFS
73 #	define TRACE(x) dprintf x
74 #	define FUNCTION(x) dprintf x
75 #else
76 #	define TRACE(x) ;
77 #	define FUNCTION(x) ;
78 #endif
79 
80 #define ADD_DEBUGGER_COMMANDS
81 
82 
83 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
84 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
85 
86 #if KDEBUG
87 #	define FS_CALL(vnode, op, params...) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode, params) \
90 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
91 #	define FS_CALL_NO_PARAMS(vnode, op) \
92 		( HAS_FS_CALL(vnode, op) ? \
93 			vnode->ops->op(vnode->mount->volume, vnode) \
94 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
95 #	define FS_MOUNT_CALL(mount, op, params...) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume, params) \
98 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
99 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
100 		( HAS_FS_MOUNT_CALL(mount, op) ? \
101 			mount->volume->ops->op(mount->volume) \
102 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
103 #else
104 #	define FS_CALL(vnode, op, params...) \
105 			vnode->ops->op(vnode->mount->volume, vnode, params)
106 #	define FS_CALL_NO_PARAMS(vnode, op) \
107 			vnode->ops->op(vnode->mount->volume, vnode)
108 #	define FS_MOUNT_CALL(mount, op, params...) \
109 			mount->volume->ops->op(mount->volume, params)
110 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
111 			mount->volume->ops->op(mount->volume)
112 #endif
113 
114 
115 const static size_t kMaxPathLength = 65536;
116 	// The absolute maximum path length (for getcwd() - this is not depending
117 	// on PATH_MAX
118 
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		mutex_init(&lock, "mount lock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		mutex_destroy(&lock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	mutex			lock;	// guards the vnodes list
165 	struct vnode*	root_vnode;
166 	struct vnode*	covers_vnode;	// immutable
167 	KPartition*		partition;
168 	VnodeList		vnodes;
169 	EntryCache		entry_cache;
170 	bool			unmounting;
171 	bool			owns_file_device;
172 };
173 
174 
175 namespace {
176 
177 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
178 	list_link		link;
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sVnodeCache;
333 object_cache* sFileDescriptorCache;
334 
335 #define VNODE_HASH_TABLE_SIZE 1024
336 static VnodeTable* sVnodeTable;
337 static struct vnode* sRoot;
338 
339 #define MOUNTS_HASH_TABLE_SIZE 16
340 static MountTable* sMountsTable;
341 static dev_t sNextMountID = 1;
342 
343 #define MAX_TEMP_IO_VECS 8
344 
345 // How long to wait for busy vnodes (10s)
346 #define BUSY_VNODE_RETRIES 2000
347 #define BUSY_VNODE_DELAY 5000
348 
349 mode_t __gUmask = 022;
350 
351 /* function declarations */
352 
353 static void free_unused_vnodes();
354 
355 // file descriptor operation prototypes
356 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
357 	void* buffer, size_t* _bytes);
358 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
359 	const void* buffer, size_t* _bytes);
360 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
361 	int seekType);
362 static void file_free_fd(struct file_descriptor* descriptor);
363 static status_t file_close(struct file_descriptor* descriptor);
364 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
365 	struct selectsync* sync);
366 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
367 	struct selectsync* sync);
368 static status_t dir_read(struct io_context* context,
369 	struct file_descriptor* descriptor, struct dirent* buffer,
370 	size_t bufferSize, uint32* _count);
371 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
372 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
373 static status_t dir_rewind(struct file_descriptor* descriptor);
374 static void dir_free_fd(struct file_descriptor* descriptor);
375 static status_t dir_close(struct file_descriptor* descriptor);
376 static status_t attr_dir_read(struct io_context* context,
377 	struct file_descriptor* descriptor, struct dirent* buffer,
378 	size_t bufferSize, uint32* _count);
379 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
380 static void attr_dir_free_fd(struct file_descriptor* descriptor);
381 static status_t attr_dir_close(struct file_descriptor* descriptor);
382 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
383 	void* buffer, size_t* _bytes);
384 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
385 	const void* buffer, size_t* _bytes);
386 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
387 	int seekType);
388 static void attr_free_fd(struct file_descriptor* descriptor);
389 static status_t attr_close(struct file_descriptor* descriptor);
390 static status_t attr_read_stat(struct file_descriptor* descriptor,
391 	struct stat* statData);
392 static status_t attr_write_stat(struct file_descriptor* descriptor,
393 	const struct stat* stat, int statMask);
394 static status_t index_dir_read(struct io_context* context,
395 	struct file_descriptor* descriptor, struct dirent* buffer,
396 	size_t bufferSize, uint32* _count);
397 static status_t index_dir_rewind(struct file_descriptor* descriptor);
398 static void index_dir_free_fd(struct file_descriptor* descriptor);
399 static status_t index_dir_close(struct file_descriptor* descriptor);
400 static status_t query_read(struct io_context* context,
401 	struct file_descriptor* descriptor, struct dirent* buffer,
402 	size_t bufferSize, uint32* _count);
403 static status_t query_rewind(struct file_descriptor* descriptor);
404 static void query_free_fd(struct file_descriptor* descriptor);
405 static status_t query_close(struct file_descriptor* descriptor);
406 
407 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
408 	void* buffer, size_t length);
409 static status_t common_read_stat(struct file_descriptor* descriptor,
410 	struct stat* statData);
411 static status_t common_write_stat(struct file_descriptor* descriptor,
412 	const struct stat* statData, int statMask);
413 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
414 	struct stat* stat, bool kernel);
415 
416 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
417 	bool traverseLeafLink, int count, bool kernel,
418 	struct vnode** _vnode, ino_t* _parentID);
419 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
420 	size_t bufferSize, bool kernel);
421 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
422 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
423 static void inc_vnode_ref_count(struct vnode* vnode);
424 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
425 	bool reenter);
426 static inline void put_vnode(struct vnode* vnode);
427 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
428 	bool kernel);
429 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
430 
431 
432 static struct fd_ops sFileOps = {
433 	file_read,
434 	file_write,
435 	file_seek,
436 	common_ioctl,
437 	NULL,		// set_flags
438 	file_select,
439 	file_deselect,
440 	NULL,		// read_dir()
441 	NULL,		// rewind_dir()
442 	common_read_stat,
443 	common_write_stat,
444 	file_close,
445 	file_free_fd
446 };
447 
448 static struct fd_ops sDirectoryOps = {
449 	NULL,		// read()
450 	NULL,		// write()
451 	NULL,		// seek()
452 	common_ioctl,
453 	NULL,		// set_flags
454 	NULL,		// select()
455 	NULL,		// deselect()
456 	dir_read,
457 	dir_rewind,
458 	common_read_stat,
459 	common_write_stat,
460 	dir_close,
461 	dir_free_fd
462 };
463 
464 static struct fd_ops sAttributeDirectoryOps = {
465 	NULL,		// read()
466 	NULL,		// write()
467 	NULL,		// seek()
468 	common_ioctl,
469 	NULL,		// set_flags
470 	NULL,		// select()
471 	NULL,		// deselect()
472 	attr_dir_read,
473 	attr_dir_rewind,
474 	common_read_stat,
475 	common_write_stat,
476 	attr_dir_close,
477 	attr_dir_free_fd
478 };
479 
480 static struct fd_ops sAttributeOps = {
481 	attr_read,
482 	attr_write,
483 	attr_seek,
484 	common_ioctl,
485 	NULL,		// set_flags
486 	NULL,		// select()
487 	NULL,		// deselect()
488 	NULL,		// read_dir()
489 	NULL,		// rewind_dir()
490 	attr_read_stat,
491 	attr_write_stat,
492 	attr_close,
493 	attr_free_fd
494 };
495 
496 static struct fd_ops sIndexDirectoryOps = {
497 	NULL,		// read()
498 	NULL,		// write()
499 	NULL,		// seek()
500 	NULL,		// ioctl()
501 	NULL,		// set_flags
502 	NULL,		// select()
503 	NULL,		// deselect()
504 	index_dir_read,
505 	index_dir_rewind,
506 	NULL,		// read_stat()
507 	NULL,		// write_stat()
508 	index_dir_close,
509 	index_dir_free_fd
510 };
511 
512 #if 0
513 static struct fd_ops sIndexOps = {
514 	NULL,		// read()
515 	NULL,		// write()
516 	NULL,		// seek()
517 	NULL,		// ioctl()
518 	NULL,		// set_flags
519 	NULL,		// select()
520 	NULL,		// deselect()
521 	NULL,		// dir_read()
522 	NULL,		// dir_rewind()
523 	index_read_stat,	// read_stat()
524 	NULL,		// write_stat()
525 	NULL,		// dir_close()
526 	NULL		// free_fd()
527 };
528 #endif
529 
530 static struct fd_ops sQueryOps = {
531 	NULL,		// read()
532 	NULL,		// write()
533 	NULL,		// seek()
534 	NULL,		// ioctl()
535 	NULL,		// set_flags
536 	NULL,		// select()
537 	NULL,		// deselect()
538 	query_read,
539 	query_rewind,
540 	NULL,		// read_stat()
541 	NULL,		// write_stat()
542 	query_close,
543 	query_free_fd
544 };
545 
546 
547 namespace {
548 
549 class VNodePutter {
550 public:
551 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
552 
553 	~VNodePutter()
554 	{
555 		Put();
556 	}
557 
558 	void SetTo(struct vnode* vnode)
559 	{
560 		Put();
561 		fVNode = vnode;
562 	}
563 
564 	void Put()
565 	{
566 		if (fVNode) {
567 			put_vnode(fVNode);
568 			fVNode = NULL;
569 		}
570 	}
571 
572 	struct vnode* Detach()
573 	{
574 		struct vnode* vnode = fVNode;
575 		fVNode = NULL;
576 		return vnode;
577 	}
578 
579 private:
580 	struct vnode* fVNode;
581 };
582 
583 
584 class FDCloser {
585 public:
586 	FDCloser() : fFD(-1), fKernel(true) {}
587 
588 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
589 
590 	~FDCloser()
591 	{
592 		Close();
593 	}
594 
595 	void SetTo(int fd, bool kernel)
596 	{
597 		Close();
598 		fFD = fd;
599 		fKernel = kernel;
600 	}
601 
602 	void Close()
603 	{
604 		if (fFD >= 0) {
605 			if (fKernel)
606 				_kern_close(fFD);
607 			else
608 				_user_close(fFD);
609 			fFD = -1;
610 		}
611 	}
612 
613 	int Detach()
614 	{
615 		int fd = fFD;
616 		fFD = -1;
617 		return fd;
618 	}
619 
620 private:
621 	int		fFD;
622 	bool	fKernel;
623 };
624 
625 } // namespace
626 
627 
628 #if VFS_PAGES_IO_TRACING
629 
630 namespace VFSPagesIOTracing {
631 
632 class PagesIOTraceEntry : public AbstractTraceEntry {
633 protected:
634 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
635 		const generic_io_vec* vecs, uint32 count, uint32 flags,
636 		generic_size_t bytesRequested, status_t status,
637 		generic_size_t bytesTransferred)
638 		:
639 		fVnode(vnode),
640 		fMountID(vnode->mount->id),
641 		fNodeID(vnode->id),
642 		fCookie(cookie),
643 		fPos(pos),
644 		fCount(count),
645 		fFlags(flags),
646 		fBytesRequested(bytesRequested),
647 		fStatus(status),
648 		fBytesTransferred(bytesTransferred)
649 	{
650 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
651 			sizeof(generic_io_vec) * count, false);
652 	}
653 
654 	void AddDump(TraceOutput& out, const char* mode)
655 	{
656 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
657 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
658 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
659 			(uint64)fBytesRequested);
660 
661 		if (fVecs != NULL) {
662 			for (uint32 i = 0; i < fCount; i++) {
663 				if (i > 0)
664 					out.Print(", ");
665 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
666 					(uint64)fVecs[i].length);
667 			}
668 		}
669 
670 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
671 			"transferred: %" B_PRIu64, fFlags, fStatus,
672 			(uint64)fBytesTransferred);
673 	}
674 
675 protected:
676 	struct vnode*	fVnode;
677 	dev_t			fMountID;
678 	ino_t			fNodeID;
679 	void*			fCookie;
680 	off_t			fPos;
681 	generic_io_vec*	fVecs;
682 	uint32			fCount;
683 	uint32			fFlags;
684 	generic_size_t	fBytesRequested;
685 	status_t		fStatus;
686 	generic_size_t	fBytesTransferred;
687 };
688 
689 
690 class ReadPages : public PagesIOTraceEntry {
691 public:
692 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
693 		const generic_io_vec* vecs, uint32 count, uint32 flags,
694 		generic_size_t bytesRequested, status_t status,
695 		generic_size_t bytesTransferred)
696 		:
697 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
698 			bytesRequested, status, bytesTransferred)
699 	{
700 		Initialized();
701 	}
702 
703 	virtual void AddDump(TraceOutput& out)
704 	{
705 		PagesIOTraceEntry::AddDump(out, "read");
706 	}
707 };
708 
709 
710 class WritePages : public PagesIOTraceEntry {
711 public:
712 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
713 		const generic_io_vec* vecs, uint32 count, uint32 flags,
714 		generic_size_t bytesRequested, status_t status,
715 		generic_size_t bytesTransferred)
716 		:
717 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
718 			bytesRequested, status, bytesTransferred)
719 	{
720 		Initialized();
721 	}
722 
723 	virtual void AddDump(TraceOutput& out)
724 	{
725 		PagesIOTraceEntry::AddDump(out, "write");
726 	}
727 };
728 
729 }	// namespace VFSPagesIOTracing
730 
731 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
732 #else
733 #	define TPIO(x) ;
734 #endif	// VFS_PAGES_IO_TRACING
735 
736 
737 /*! Finds the mounted device (the fs_mount structure) with the given ID.
738 	Note, you must hold the sMountLock lock when you call this function.
739 */
740 static struct fs_mount*
741 find_mount(dev_t id)
742 {
743 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
744 
745 	return sMountsTable->Lookup(id);
746 }
747 
748 
749 static status_t
750 get_mount(dev_t id, struct fs_mount** _mount)
751 {
752 	struct fs_mount* mount;
753 
754 	ReadLocker nodeLocker(sVnodeLock);
755 	ReadLocker mountLocker(sMountLock);
756 
757 	mount = find_mount(id);
758 	if (mount == NULL)
759 		return B_BAD_VALUE;
760 
761 	struct vnode* rootNode = mount->root_vnode;
762 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
763 		|| rootNode->ref_count == 0) {
764 		// might have been called during a mount/unmount operation
765 		return B_BUSY;
766 	}
767 
768 	inc_vnode_ref_count(rootNode);
769 	*_mount = mount;
770 	return B_OK;
771 }
772 
773 
774 static void
775 put_mount(struct fs_mount* mount)
776 {
777 	if (mount)
778 		put_vnode(mount->root_vnode);
779 }
780 
781 
782 /*!	Tries to open the specified file system module.
783 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
784 	Returns a pointer to file system module interface, or NULL if it
785 	could not open the module.
786 */
787 static file_system_module_info*
788 get_file_system(const char* fsName)
789 {
790 	char name[B_FILE_NAME_LENGTH];
791 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
792 		// construct module name if we didn't get one
793 		// (we currently support only one API)
794 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
795 		fsName = NULL;
796 	}
797 
798 	file_system_module_info* info;
799 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
800 		return NULL;
801 
802 	return info;
803 }
804 
805 
806 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
807 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available.
811 */
812 static char*
813 get_file_system_name(const char* fsName)
814 {
815 	const size_t length = strlen("file_systems/");
816 
817 	if (strncmp(fsName, "file_systems/", length)) {
818 		// the name already seems to be the module's file name
819 		return strdup(fsName);
820 	}
821 
822 	fsName += length;
823 	const char* end = strchr(fsName, '/');
824 	if (end == NULL) {
825 		// this doesn't seem to be a valid name, but well...
826 		return strdup(fsName);
827 	}
828 
829 	// cut off the trailing /v1
830 
831 	char* name = (char*)malloc(end + 1 - fsName);
832 	if (name == NULL)
833 		return NULL;
834 
835 	strlcpy(name, fsName, end + 1 - fsName);
836 	return name;
837 }
838 
839 
840 /*!	Accepts a list of file system names separated by a colon, one for each
841 	layer and returns the file system name for the specified layer.
842 	The name is allocated for you, and you have to free() it when you're
843 	done with it.
844 	Returns NULL if the required memory is not available or if there is no
845 	name for the specified layer.
846 */
847 static char*
848 get_file_system_name_for_layer(const char* fsNames, int32 layer)
849 {
850 	while (layer >= 0) {
851 		const char* end = strchr(fsNames, ':');
852 		if (end == NULL) {
853 			if (layer == 0)
854 				return strdup(fsNames);
855 			return NULL;
856 		}
857 
858 		if (layer == 0) {
859 			size_t length = end - fsNames + 1;
860 			char* result = (char*)malloc(length);
861 			strlcpy(result, fsNames, length);
862 			return result;
863 		}
864 
865 		fsNames = end + 1;
866 		layer--;
867 	}
868 
869 	return NULL;
870 }
871 
872 
873 static void
874 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
875 {
876 	MutexLocker _(mount->lock);
877 	mount->vnodes.Add(vnode);
878 }
879 
880 
881 static void
882 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
883 {
884 	MutexLocker _(mount->lock);
885 	mount->vnodes.Remove(vnode);
886 }
887 
888 
889 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
890 
891 	The caller must hold the sVnodeLock (read lock at least).
892 
893 	\param mountID the mount ID.
894 	\param vnodeID the node ID.
895 
896 	\return The vnode structure, if it was found in the hash table, \c NULL
897 			otherwise.
898 */
899 static struct vnode*
900 lookup_vnode(dev_t mountID, ino_t vnodeID)
901 {
902 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
903 
904 	struct vnode_hash_key key;
905 
906 	key.device = mountID;
907 	key.vnode = vnodeID;
908 
909 	return sVnodeTable->Lookup(key);
910 }
911 
912 
913 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
914 
915 	This will also wait for BUSY_VNODE_DELAY before returning if one should
916 	still wait for the vnode becoming unbusy.
917 
918 	\return \c true if one should retry, \c false if not.
919 */
920 static bool
921 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
922 {
923 	if (--tries < 0) {
924 		// vnode doesn't seem to become unbusy
925 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
926 			" is not becoming unbusy!\n", mountID, vnodeID);
927 		return false;
928 	}
929 	snooze(BUSY_VNODE_DELAY);
930 	return true;
931 }
932 
933 
934 /*!	Creates a new vnode with the given mount and node ID.
935 	If the node already exists, it is returned instead and no new node is
936 	created. In either case -- but not, if an error occurs -- the function write
937 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
938 	error the lock is not held on return.
939 
940 	\param mountID The mount ID.
941 	\param vnodeID The vnode ID.
942 	\param _vnode Will be set to the new vnode on success.
943 	\param _nodeCreated Will be set to \c true when the returned vnode has
944 		been newly created, \c false when it already existed. Will not be
945 		changed on error.
946 	\return \c B_OK, when the vnode was successfully created and inserted or
947 		a node with the given ID was found, \c B_NO_MEMORY or
948 		\c B_ENTRY_NOT_FOUND on error.
949 */
950 static status_t
951 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
952 	bool& _nodeCreated)
953 {
954 	FUNCTION(("create_new_vnode_and_lock()\n"));
955 
956 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
957 	if (vnode == NULL)
958 		return B_NO_MEMORY;
959 
960 	// initialize basic values
961 	memset(vnode, 0, sizeof(struct vnode));
962 	vnode->device = mountID;
963 	vnode->id = vnodeID;
964 	vnode->ref_count = 1;
965 	vnode->SetBusy(true);
966 
967 	// look up the node -- it might have been added by someone else in the
968 	// meantime
969 	rw_lock_write_lock(&sVnodeLock);
970 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
971 	if (existingVnode != NULL) {
972 		object_cache_free(sVnodeCache, vnode, 0);
973 		_vnode = existingVnode;
974 		_nodeCreated = false;
975 		return B_OK;
976 	}
977 
978 	// get the mount structure
979 	rw_lock_read_lock(&sMountLock);
980 	vnode->mount = find_mount(mountID);
981 	if (!vnode->mount || vnode->mount->unmounting) {
982 		rw_lock_read_unlock(&sMountLock);
983 		rw_lock_write_unlock(&sVnodeLock);
984 		object_cache_free(sVnodeCache, vnode, 0);
985 		return B_ENTRY_NOT_FOUND;
986 	}
987 
988 	// add the vnode to the mount's node list and the hash table
989 	sVnodeTable->Insert(vnode);
990 	add_vnode_to_mount_list(vnode, vnode->mount);
991 
992 	rw_lock_read_unlock(&sMountLock);
993 
994 	_vnode = vnode;
995 	_nodeCreated = true;
996 
997 	// keep the vnode lock locked
998 	return B_OK;
999 }
1000 
1001 
1002 /*!	Frees the vnode and all resources it has acquired, and removes
1003 	it from the vnode hash as well as from its mount structure.
1004 	Will also make sure that any cache modifications are written back.
1005 */
1006 static void
1007 free_vnode(struct vnode* vnode, bool reenter)
1008 {
1009 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1010 		vnode);
1011 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1012 
1013 	// write back any changes in this vnode's cache -- but only
1014 	// if the vnode won't be deleted, in which case the changes
1015 	// will be discarded
1016 
1017 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1018 		FS_CALL_NO_PARAMS(vnode, fsync);
1019 
1020 	// Note: If this vnode has a cache attached, there will still be two
1021 	// references to that cache at this point. The last one belongs to the vnode
1022 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1023 	// cache. Each but the last reference to a cache also includes a reference
1024 	// to the vnode. The file cache, however, released its reference (cf.
1025 	// file_cache_create()), so that this vnode's ref count has the chance to
1026 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1027 	// cache reference to be released, which will also release a (no longer
1028 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1029 	// count, so that it will neither become negative nor 0.
1030 	vnode->ref_count = 2;
1031 
1032 	if (!vnode->IsUnpublished()) {
1033 		if (vnode->IsRemoved())
1034 			FS_CALL(vnode, remove_vnode, reenter);
1035 		else
1036 			FS_CALL(vnode, put_vnode, reenter);
1037 	}
1038 
1039 	// If the vnode has a VMCache attached, make sure that it won't try to get
1040 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1041 	// long as the vnode is busy and in the hash, that won't happen, but as
1042 	// soon as we've removed it from the hash, it could reload the vnode -- with
1043 	// a new cache attached!
1044 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1045 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1046 
1047 	// The file system has removed the resources of the vnode now, so we can
1048 	// make it available again (by removing the busy vnode from the hash).
1049 	rw_lock_write_lock(&sVnodeLock);
1050 	sVnodeTable->Remove(vnode);
1051 	rw_lock_write_unlock(&sVnodeLock);
1052 
1053 	// if we have a VMCache attached, remove it
1054 	if (vnode->cache)
1055 		vnode->cache->ReleaseRef();
1056 
1057 	vnode->cache = NULL;
1058 
1059 	remove_vnode_from_mount_list(vnode, vnode->mount);
1060 
1061 	object_cache_free(sVnodeCache, vnode, 0);
1062 }
1063 
1064 
1065 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1066 	if the counter dropped to 0.
1067 
1068 	The caller must, of course, own a reference to the vnode to call this
1069 	function.
1070 	The caller must not hold the sVnodeLock or the sMountLock.
1071 
1072 	\param vnode the vnode.
1073 	\param alwaysFree don't move this vnode into the unused list, but really
1074 		   delete it if possible.
1075 	\param reenter \c true, if this function is called (indirectly) from within
1076 		   a file system. This will be passed to file system hooks only.
1077 	\return \c B_OK, if everything went fine, an error code otherwise.
1078 */
1079 static status_t
1080 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1081 {
1082 	ReadLocker locker(sVnodeLock);
1083 	AutoLocker<Vnode> nodeLocker(vnode);
1084 
1085 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1086 
1087 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1088 
1089 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1090 		vnode->ref_count));
1091 
1092 	if (oldRefCount != 1)
1093 		return B_OK;
1094 
1095 	if (vnode->IsBusy())
1096 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1097 
1098 	bool freeNode = false;
1099 	bool freeUnusedNodes = false;
1100 
1101 	// Just insert the vnode into an unused list if we don't need
1102 	// to delete it
1103 	if (vnode->IsRemoved() || alwaysFree) {
1104 		vnode_to_be_freed(vnode);
1105 		vnode->SetBusy(true);
1106 		freeNode = true;
1107 	} else
1108 		freeUnusedNodes = vnode_unused(vnode);
1109 
1110 	nodeLocker.Unlock();
1111 	locker.Unlock();
1112 
1113 	if (freeNode)
1114 		free_vnode(vnode, reenter);
1115 	else if (freeUnusedNodes)
1116 		free_unused_vnodes();
1117 
1118 	return B_OK;
1119 }
1120 
1121 
1122 /*!	\brief Increments the reference counter of the given vnode.
1123 
1124 	The caller must make sure that the node isn't deleted while this function
1125 	is called. This can be done either:
1126 	- by ensuring that a reference to the node exists and remains in existence,
1127 	  or
1128 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1129 	  or by holding sVnodeLock write locked.
1130 
1131 	In the second case the caller is responsible for dealing with the ref count
1132 	0 -> 1 transition. That is 1. this function must not be invoked when the
1133 	node is busy in the first place and 2. vnode_used() must be called for the
1134 	node.
1135 
1136 	\param vnode the vnode.
1137 */
1138 static void
1139 inc_vnode_ref_count(struct vnode* vnode)
1140 {
1141 	atomic_add(&vnode->ref_count, 1);
1142 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1143 		vnode->ref_count));
1144 }
1145 
1146 
1147 static bool
1148 is_special_node_type(int type)
1149 {
1150 	// at the moment only FIFOs are supported
1151 	return S_ISFIFO(type);
1152 }
1153 
1154 
1155 static status_t
1156 create_special_sub_node(struct vnode* vnode, uint32 flags)
1157 {
1158 	if (S_ISFIFO(vnode->Type()))
1159 		return create_fifo_vnode(vnode->mount->volume, vnode);
1160 
1161 	return B_BAD_VALUE;
1162 }
1163 
1164 
1165 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1166 
1167 	If the node is not yet in memory, it will be loaded.
1168 
1169 	The caller must not hold the sVnodeLock or the sMountLock.
1170 
1171 	\param mountID the mount ID.
1172 	\param vnodeID the node ID.
1173 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1174 		   retrieved vnode structure shall be written.
1175 	\param reenter \c true, if this function is called (indirectly) from within
1176 		   a file system.
1177 	\return \c B_OK, if everything when fine, an error code otherwise.
1178 */
1179 static status_t
1180 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1181 	int reenter)
1182 {
1183 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1184 		mountID, vnodeID, _vnode));
1185 
1186 	rw_lock_read_lock(&sVnodeLock);
1187 
1188 	int32 tries = BUSY_VNODE_RETRIES;
1189 restart:
1190 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1191 	AutoLocker<Vnode> nodeLocker(vnode);
1192 
1193 	if (vnode && vnode->IsBusy()) {
1194 		// vnodes in the Removed state (except ones still Unpublished)
1195 		// which are also Busy will disappear soon, so we do not wait for them.
1196 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1197 
1198 		nodeLocker.Unlock();
1199 		rw_lock_read_unlock(&sVnodeLock);
1200 		if (!canWait) {
1201 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1202 				mountID, vnodeID);
1203 			return B_BUSY;
1204 		}
1205 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1206 			return B_BUSY;
1207 
1208 		rw_lock_read_lock(&sVnodeLock);
1209 		goto restart;
1210 	}
1211 
1212 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1213 
1214 	status_t status;
1215 
1216 	if (vnode) {
1217 		if (vnode->ref_count == 0) {
1218 			// this vnode has been unused before
1219 			vnode_used(vnode);
1220 		}
1221 		inc_vnode_ref_count(vnode);
1222 
1223 		nodeLocker.Unlock();
1224 		rw_lock_read_unlock(&sVnodeLock);
1225 	} else {
1226 		// we need to create a new vnode and read it in
1227 		rw_lock_read_unlock(&sVnodeLock);
1228 			// unlock -- create_new_vnode_and_lock() write-locks on success
1229 		bool nodeCreated;
1230 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1231 			nodeCreated);
1232 		if (status != B_OK)
1233 			return status;
1234 
1235 		if (!nodeCreated) {
1236 			rw_lock_read_lock(&sVnodeLock);
1237 			rw_lock_write_unlock(&sVnodeLock);
1238 			goto restart;
1239 		}
1240 
1241 		rw_lock_write_unlock(&sVnodeLock);
1242 
1243 		int type;
1244 		uint32 flags;
1245 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1246 			&flags, reenter);
1247 		if (status == B_OK && vnode->private_node == NULL)
1248 			status = B_BAD_VALUE;
1249 
1250 		bool gotNode = status == B_OK;
1251 		bool publishSpecialSubNode = false;
1252 		if (gotNode) {
1253 			vnode->SetType(type);
1254 			publishSpecialSubNode = is_special_node_type(type)
1255 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1256 		}
1257 
1258 		if (gotNode && publishSpecialSubNode)
1259 			status = create_special_sub_node(vnode, flags);
1260 
1261 		if (status != B_OK) {
1262 			if (gotNode)
1263 				FS_CALL(vnode, put_vnode, reenter);
1264 
1265 			rw_lock_write_lock(&sVnodeLock);
1266 			sVnodeTable->Remove(vnode);
1267 			remove_vnode_from_mount_list(vnode, vnode->mount);
1268 			rw_lock_write_unlock(&sVnodeLock);
1269 
1270 			object_cache_free(sVnodeCache, vnode, 0);
1271 			return status;
1272 		}
1273 
1274 		rw_lock_read_lock(&sVnodeLock);
1275 		vnode->Lock();
1276 
1277 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1278 		vnode->SetBusy(false);
1279 
1280 		vnode->Unlock();
1281 		rw_lock_read_unlock(&sVnodeLock);
1282 	}
1283 
1284 	TRACE(("get_vnode: returning %p\n", vnode));
1285 
1286 	*_vnode = vnode;
1287 	return B_OK;
1288 }
1289 
1290 
1291 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1292 	if the counter dropped to 0.
1293 
1294 	The caller must, of course, own a reference to the vnode to call this
1295 	function.
1296 	The caller must not hold the sVnodeLock or the sMountLock.
1297 
1298 	\param vnode the vnode.
1299 */
1300 static inline void
1301 put_vnode(struct vnode* vnode)
1302 {
1303 	dec_vnode_ref_count(vnode, false, false);
1304 }
1305 
1306 
1307 static void
1308 free_unused_vnodes(int32 level)
1309 {
1310 	unused_vnodes_check_started();
1311 
1312 	if (level == B_NO_LOW_RESOURCE) {
1313 		unused_vnodes_check_done();
1314 		return;
1315 	}
1316 
1317 	flush_hot_vnodes();
1318 
1319 	// determine how many nodes to free
1320 	uint32 count = 1;
1321 	{
1322 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1323 
1324 		switch (level) {
1325 			case B_LOW_RESOURCE_NOTE:
1326 				count = sUnusedVnodes / 100;
1327 				break;
1328 			case B_LOW_RESOURCE_WARNING:
1329 				count = sUnusedVnodes / 10;
1330 				break;
1331 			case B_LOW_RESOURCE_CRITICAL:
1332 				count = sUnusedVnodes;
1333 				break;
1334 		}
1335 
1336 		if (count > sUnusedVnodes)
1337 			count = sUnusedVnodes;
1338 	}
1339 
1340 	// Write back the modified pages of some unused vnodes and free them.
1341 
1342 	for (uint32 i = 0; i < count; i++) {
1343 		ReadLocker vnodesReadLocker(sVnodeLock);
1344 
1345 		// get the first node
1346 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1347 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1348 			&sUnusedVnodeList);
1349 		unusedVnodesLocker.Unlock();
1350 
1351 		if (vnode == NULL)
1352 			break;
1353 
1354 		// lock the node
1355 		AutoLocker<Vnode> nodeLocker(vnode);
1356 
1357 		// Check whether the node is still unused -- since we only append to the
1358 		// tail of the unused queue, the vnode should still be at its head.
1359 		// Alternatively we could check its ref count for 0 and its busy flag,
1360 		// but if the node is no longer at the head of the queue, it means it
1361 		// has been touched in the meantime, i.e. it is no longer the least
1362 		// recently used unused vnode and we rather don't free it.
1363 		unusedVnodesLocker.Lock();
1364 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1365 			continue;
1366 		unusedVnodesLocker.Unlock();
1367 
1368 		ASSERT(!vnode->IsBusy());
1369 
1370 		// grab a reference
1371 		inc_vnode_ref_count(vnode);
1372 		vnode_used(vnode);
1373 
1374 		// write back changes and free the node
1375 		nodeLocker.Unlock();
1376 		vnodesReadLocker.Unlock();
1377 
1378 		if (vnode->cache != NULL)
1379 			vnode->cache->WriteModified();
1380 
1381 		dec_vnode_ref_count(vnode, true, false);
1382 			// this should free the vnode when it's still unused
1383 	}
1384 
1385 	unused_vnodes_check_done();
1386 }
1387 
1388 
1389 /*!	Gets the vnode the given vnode is covering.
1390 
1391 	The caller must have \c sVnodeLock read-locked at least.
1392 
1393 	The function returns a reference to the retrieved vnode (if any), the caller
1394 	is responsible to free.
1395 
1396 	\param vnode The vnode whose covered node shall be returned.
1397 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1398 		vnode.
1399 */
1400 static inline Vnode*
1401 get_covered_vnode_locked(Vnode* vnode)
1402 {
1403 	if (Vnode* coveredNode = vnode->covers) {
1404 		while (coveredNode->covers != NULL)
1405 			coveredNode = coveredNode->covers;
1406 
1407 		inc_vnode_ref_count(coveredNode);
1408 		return coveredNode;
1409 	}
1410 
1411 	return NULL;
1412 }
1413 
1414 
1415 /*!	Gets the vnode the given vnode is covering.
1416 
1417 	The caller must not hold \c sVnodeLock. Note that this implies a race
1418 	condition, since the situation can change at any time.
1419 
1420 	The function returns a reference to the retrieved vnode (if any), the caller
1421 	is responsible to free.
1422 
1423 	\param vnode The vnode whose covered node shall be returned.
1424 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1425 		vnode.
1426 */
1427 static inline Vnode*
1428 get_covered_vnode(Vnode* vnode)
1429 {
1430 	if (!vnode->IsCovering())
1431 		return NULL;
1432 
1433 	ReadLocker vnodeReadLocker(sVnodeLock);
1434 	return get_covered_vnode_locked(vnode);
1435 }
1436 
1437 
1438 /*!	Gets the vnode the given vnode is covered by.
1439 
1440 	The caller must have \c sVnodeLock read-locked at least.
1441 
1442 	The function returns a reference to the retrieved vnode (if any), the caller
1443 	is responsible to free.
1444 
1445 	\param vnode The vnode whose covering node shall be returned.
1446 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1447 		any vnode.
1448 */
1449 static Vnode*
1450 get_covering_vnode_locked(Vnode* vnode)
1451 {
1452 	if (Vnode* coveringNode = vnode->covered_by) {
1453 		while (coveringNode->covered_by != NULL)
1454 			coveringNode = coveringNode->covered_by;
1455 
1456 		inc_vnode_ref_count(coveringNode);
1457 		return coveringNode;
1458 	}
1459 
1460 	return NULL;
1461 }
1462 
1463 
1464 /*!	Gets the vnode the given vnode is covered by.
1465 
1466 	The caller must not hold \c sVnodeLock. Note that this implies a race
1467 	condition, since the situation can change at any time.
1468 
1469 	The function returns a reference to the retrieved vnode (if any), the caller
1470 	is responsible to free.
1471 
1472 	\param vnode The vnode whose covering node shall be returned.
1473 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1474 		any vnode.
1475 */
1476 static inline Vnode*
1477 get_covering_vnode(Vnode* vnode)
1478 {
1479 	if (!vnode->IsCovered())
1480 		return NULL;
1481 
1482 	ReadLocker vnodeReadLocker(sVnodeLock);
1483 	return get_covering_vnode_locked(vnode);
1484 }
1485 
1486 
1487 static void
1488 free_unused_vnodes()
1489 {
1490 	free_unused_vnodes(
1491 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1492 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1493 }
1494 
1495 
1496 static void
1497 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1498 {
1499 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1500 
1501 	free_unused_vnodes(level);
1502 }
1503 
1504 
1505 static inline void
1506 put_advisory_locking(struct advisory_locking* locking)
1507 {
1508 	release_sem(locking->lock);
1509 }
1510 
1511 
1512 /*!	Returns the advisory_locking object of the \a vnode in case it
1513 	has one, and locks it.
1514 	You have to call put_advisory_locking() when you're done with
1515 	it.
1516 	Note, you must not have the vnode mutex locked when calling
1517 	this function.
1518 */
1519 static struct advisory_locking*
1520 get_advisory_locking(struct vnode* vnode)
1521 {
1522 	rw_lock_read_lock(&sVnodeLock);
1523 	vnode->Lock();
1524 
1525 	struct advisory_locking* locking = vnode->advisory_locking;
1526 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1527 
1528 	vnode->Unlock();
1529 	rw_lock_read_unlock(&sVnodeLock);
1530 
1531 	if (lock >= 0)
1532 		lock = acquire_sem(lock);
1533 	if (lock < 0) {
1534 		// This means the locking has been deleted in the mean time
1535 		// or had never existed in the first place - otherwise, we
1536 		// would get the lock at some point.
1537 		return NULL;
1538 	}
1539 
1540 	return locking;
1541 }
1542 
1543 
1544 /*!	Creates a locked advisory_locking object, and attaches it to the
1545 	given \a vnode.
1546 	Returns B_OK in case of success - also if the vnode got such an
1547 	object from someone else in the mean time, you'll still get this
1548 	one locked then.
1549 */
1550 static status_t
1551 create_advisory_locking(struct vnode* vnode)
1552 {
1553 	if (vnode == NULL)
1554 		return B_FILE_ERROR;
1555 
1556 	ObjectDeleter<advisory_locking> lockingDeleter;
1557 	struct advisory_locking* locking = NULL;
1558 
1559 	while (get_advisory_locking(vnode) == NULL) {
1560 		// no locking object set on the vnode yet, create one
1561 		if (locking == NULL) {
1562 			locking = new(std::nothrow) advisory_locking;
1563 			if (locking == NULL)
1564 				return B_NO_MEMORY;
1565 			lockingDeleter.SetTo(locking);
1566 
1567 			locking->wait_sem = create_sem(0, "advisory lock");
1568 			if (locking->wait_sem < 0)
1569 				return locking->wait_sem;
1570 
1571 			locking->lock = create_sem(0, "advisory locking");
1572 			if (locking->lock < 0)
1573 				return locking->lock;
1574 		}
1575 
1576 		// set our newly created locking object
1577 		ReadLocker _(sVnodeLock);
1578 		AutoLocker<Vnode> nodeLocker(vnode);
1579 		if (vnode->advisory_locking == NULL) {
1580 			vnode->advisory_locking = locking;
1581 			lockingDeleter.Detach();
1582 			return B_OK;
1583 		}
1584 	}
1585 
1586 	// The vnode already had a locking object. That's just as well.
1587 
1588 	return B_OK;
1589 }
1590 
1591 
1592 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1593 	with the advisory_lock \a lock.
1594 */
1595 static bool
1596 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1597 {
1598 	if (flock == NULL)
1599 		return true;
1600 
1601 	return lock->start <= flock->l_start - 1 + flock->l_len
1602 		&& lock->end >= flock->l_start;
1603 }
1604 
1605 
1606 /*!	Tests whether acquiring a lock would block.
1607 */
1608 static status_t
1609 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1610 {
1611 	flock->l_type = F_UNLCK;
1612 
1613 	struct advisory_locking* locking = get_advisory_locking(vnode);
1614 	if (locking == NULL)
1615 		return B_OK;
1616 
1617 	team_id team = team_get_current_team_id();
1618 
1619 	LockList::Iterator iterator = locking->locks.GetIterator();
1620 	while (iterator.HasNext()) {
1621 		struct advisory_lock* lock = iterator.Next();
1622 
1623 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1624 			// locks do overlap
1625 			if (flock->l_type != F_RDLCK || !lock->shared) {
1626 				// collision
1627 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1628 				flock->l_whence = SEEK_SET;
1629 				flock->l_start = lock->start;
1630 				flock->l_len = lock->end - lock->start + 1;
1631 				flock->l_pid = lock->team;
1632 				break;
1633 			}
1634 		}
1635 	}
1636 
1637 	put_advisory_locking(locking);
1638 	return B_OK;
1639 }
1640 
1641 
1642 /*!	Removes the specified lock, or all locks of the calling team
1643 	if \a flock is NULL.
1644 */
1645 static status_t
1646 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1647 	struct file_descriptor* descriptor, struct flock* flock)
1648 {
1649 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1650 
1651 	struct advisory_locking* locking = get_advisory_locking(vnode);
1652 	if (locking == NULL)
1653 		return B_OK;
1654 
1655 	// find matching lock entries
1656 
1657 	LockList::Iterator iterator = locking->locks.GetIterator();
1658 	while (iterator.HasNext()) {
1659 		struct advisory_lock* lock = iterator.Next();
1660 		bool removeLock = false;
1661 
1662 		if (descriptor != NULL && lock->bound_to == descriptor) {
1663 			// Remove flock() locks
1664 			removeLock = true;
1665 		} else if (lock->bound_to == context
1666 				&& advisory_lock_intersects(lock, flock)) {
1667 			// Remove POSIX locks
1668 			bool endsBeyond = false;
1669 			bool startsBefore = false;
1670 			if (flock != NULL) {
1671 				startsBefore = lock->start < flock->l_start;
1672 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1673 			}
1674 
1675 			if (!startsBefore && !endsBeyond) {
1676 				// lock is completely contained in flock
1677 				removeLock = true;
1678 			} else if (startsBefore && !endsBeyond) {
1679 				// cut the end of the lock
1680 				lock->end = flock->l_start - 1;
1681 			} else if (!startsBefore && endsBeyond) {
1682 				// cut the start of the lock
1683 				lock->start = flock->l_start + flock->l_len;
1684 			} else {
1685 				// divide the lock into two locks
1686 				struct advisory_lock* secondLock = new advisory_lock;
1687 				if (secondLock == NULL) {
1688 					// TODO: we should probably revert the locks we already
1689 					// changed... (ie. allocate upfront)
1690 					put_advisory_locking(locking);
1691 					return B_NO_MEMORY;
1692 				}
1693 
1694 				lock->end = flock->l_start - 1;
1695 
1696 				secondLock->bound_to = context;
1697 				secondLock->team = lock->team;
1698 				secondLock->session = lock->session;
1699 				// values must already be normalized when getting here
1700 				secondLock->start = flock->l_start + flock->l_len;
1701 				secondLock->end = lock->end;
1702 				secondLock->shared = lock->shared;
1703 
1704 				locking->locks.Add(secondLock);
1705 			}
1706 		}
1707 
1708 		if (removeLock) {
1709 			// this lock is no longer used
1710 			iterator.Remove();
1711 			delete lock;
1712 		}
1713 	}
1714 
1715 	bool removeLocking = locking->locks.IsEmpty();
1716 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1717 
1718 	put_advisory_locking(locking);
1719 
1720 	if (removeLocking) {
1721 		// We can remove the whole advisory locking structure; it's no
1722 		// longer used
1723 		locking = get_advisory_locking(vnode);
1724 		if (locking != NULL) {
1725 			ReadLocker locker(sVnodeLock);
1726 			AutoLocker<Vnode> nodeLocker(vnode);
1727 
1728 			// the locking could have been changed in the mean time
1729 			if (locking->locks.IsEmpty()) {
1730 				vnode->advisory_locking = NULL;
1731 				nodeLocker.Unlock();
1732 				locker.Unlock();
1733 
1734 				// we've detached the locking from the vnode, so we can
1735 				// safely delete it
1736 				delete locking;
1737 			} else {
1738 				// the locking is in use again
1739 				nodeLocker.Unlock();
1740 				locker.Unlock();
1741 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1742 			}
1743 		}
1744 	}
1745 
1746 	return B_OK;
1747 }
1748 
1749 
1750 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1751 	will wait for the lock to become available, if there are any collisions
1752 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1753 
1754 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1755 	BSD flock() semantics are used, that is, all children can unlock the file
1756 	in question (we even allow parents to remove the lock, though, but that
1757 	seems to be in line to what the BSD's are doing).
1758 */
1759 static status_t
1760 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1761 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1762 {
1763 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1764 		vnode, flock, wait ? "yes" : "no"));
1765 
1766 	bool shared = flock->l_type == F_RDLCK;
1767 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1768 	status_t status = B_OK;
1769 
1770 	// TODO: do deadlock detection!
1771 
1772 	struct advisory_locking* locking;
1773 
1774 	while (true) {
1775 		// if this vnode has an advisory_locking structure attached,
1776 		// lock that one and search for any colliding file lock
1777 		status = create_advisory_locking(vnode);
1778 		if (status != B_OK)
1779 			return status;
1780 
1781 		locking = vnode->advisory_locking;
1782 		team_id team = team_get_current_team_id();
1783 		sem_id waitForLock = -1;
1784 
1785 		// test for collisions
1786 		LockList::Iterator iterator = locking->locks.GetIterator();
1787 		while (iterator.HasNext()) {
1788 			struct advisory_lock* lock = iterator.Next();
1789 
1790 			// TODO: locks from the same team might be joinable!
1791 			if ((lock->team != team || lock->bound_to != boundTo)
1792 					&& advisory_lock_intersects(lock, flock)) {
1793 				// locks do overlap
1794 				if (!shared || !lock->shared) {
1795 					// we need to wait
1796 					waitForLock = locking->wait_sem;
1797 					break;
1798 				}
1799 			}
1800 		}
1801 
1802 		if (waitForLock < 0)
1803 			break;
1804 
1805 		// We need to wait. Do that or fail now, if we've been asked not to.
1806 
1807 		if (!wait) {
1808 			put_advisory_locking(locking);
1809 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1810 		}
1811 
1812 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1813 			B_CAN_INTERRUPT, 0);
1814 		if (status != B_OK && status != B_BAD_SEM_ID)
1815 			return status;
1816 
1817 		// We have been notified, but we need to re-lock the locking object. So
1818 		// go another round...
1819 	}
1820 
1821 	// install new lock
1822 
1823 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1824 	if (lock == NULL) {
1825 		put_advisory_locking(locking);
1826 		return B_NO_MEMORY;
1827 	}
1828 
1829 	lock->bound_to = boundTo;
1830 	lock->team = team_get_current_team_id();
1831 	lock->session = thread_get_current_thread()->team->session_id;
1832 	// values must already be normalized when getting here
1833 	lock->start = flock->l_start;
1834 	lock->end = flock->l_start - 1 + flock->l_len;
1835 	lock->shared = shared;
1836 
1837 	locking->locks.Add(lock);
1838 	put_advisory_locking(locking);
1839 
1840 	return status;
1841 }
1842 
1843 
1844 /*!	Normalizes the \a flock structure to make it easier to compare the
1845 	structure with others. The l_start and l_len fields are set to absolute
1846 	values according to the l_whence field.
1847 */
1848 static status_t
1849 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1850 {
1851 	switch (flock->l_whence) {
1852 		case SEEK_SET:
1853 			break;
1854 		case SEEK_CUR:
1855 			flock->l_start += descriptor->pos;
1856 			break;
1857 		case SEEK_END:
1858 		{
1859 			struct vnode* vnode = descriptor->u.vnode;
1860 			struct stat stat;
1861 			status_t status;
1862 
1863 			if (!HAS_FS_CALL(vnode, read_stat))
1864 				return B_UNSUPPORTED;
1865 
1866 			status = FS_CALL(vnode, read_stat, &stat);
1867 			if (status != B_OK)
1868 				return status;
1869 
1870 			flock->l_start += stat.st_size;
1871 			break;
1872 		}
1873 		default:
1874 			return B_BAD_VALUE;
1875 	}
1876 
1877 	if (flock->l_start < 0)
1878 		flock->l_start = 0;
1879 	if (flock->l_len == 0)
1880 		flock->l_len = OFF_MAX;
1881 
1882 	// don't let the offset and length overflow
1883 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1884 		flock->l_len = OFF_MAX - flock->l_start;
1885 
1886 	if (flock->l_len < 0) {
1887 		// a negative length reverses the region
1888 		flock->l_start += flock->l_len;
1889 		flock->l_len = -flock->l_len;
1890 	}
1891 
1892 	return B_OK;
1893 }
1894 
1895 
1896 static void
1897 replace_vnode_if_disconnected(struct fs_mount* mount,
1898 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1899 	struct vnode* fallBack, bool lockRootLock)
1900 {
1901 	struct vnode* givenVnode = vnode;
1902 	bool vnodeReplaced = false;
1903 
1904 	ReadLocker vnodeReadLocker(sVnodeLock);
1905 
1906 	if (lockRootLock)
1907 		mutex_lock(&sIOContextRootLock);
1908 
1909 	while (vnode != NULL && vnode->mount == mount
1910 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1911 		if (vnode->covers != NULL) {
1912 			// redirect the vnode to the covered vnode
1913 			vnode = vnode->covers;
1914 		} else
1915 			vnode = fallBack;
1916 
1917 		vnodeReplaced = true;
1918 	}
1919 
1920 	// If we've replaced the node, grab a reference for the new one.
1921 	if (vnodeReplaced && vnode != NULL)
1922 		inc_vnode_ref_count(vnode);
1923 
1924 	if (lockRootLock)
1925 		mutex_unlock(&sIOContextRootLock);
1926 
1927 	vnodeReadLocker.Unlock();
1928 
1929 	if (vnodeReplaced)
1930 		put_vnode(givenVnode);
1931 }
1932 
1933 
1934 /*!	Disconnects all file descriptors that are associated with the
1935 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1936 	\a mount object.
1937 
1938 	Note, after you've called this function, there might still be ongoing
1939 	accesses - they won't be interrupted if they already happened before.
1940 	However, any subsequent access will fail.
1941 
1942 	This is not a cheap function and should be used with care and rarely.
1943 	TODO: there is currently no means to stop a blocking read/write!
1944 */
1945 static void
1946 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1947 	struct vnode* vnodeToDisconnect)
1948 {
1949 	// iterate over all teams and peek into their file descriptors
1950 	TeamListIterator teamIterator;
1951 	while (Team* team = teamIterator.Next()) {
1952 		BReference<Team> teamReference(team, true);
1953 		TeamLocker teamLocker(team);
1954 
1955 		// lock the I/O context
1956 		io_context* context = team->io_context;
1957 		if (context == NULL)
1958 			continue;
1959 		MutexLocker contextLocker(context->io_mutex);
1960 
1961 		teamLocker.Unlock();
1962 
1963 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1964 			sRoot, true);
1965 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1966 			sRoot, false);
1967 
1968 		for (uint32 i = 0; i < context->table_size; i++) {
1969 			struct file_descriptor* descriptor = context->fds[i];
1970 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1971 				continue;
1972 
1973 			inc_fd_ref_count(descriptor);
1974 
1975 			// if this descriptor points at this mount, we
1976 			// need to disconnect it to be able to unmount
1977 			struct vnode* vnode = fd_vnode(descriptor);
1978 			if (vnodeToDisconnect != NULL) {
1979 				if (vnode == vnodeToDisconnect)
1980 					disconnect_fd(descriptor);
1981 			} else if ((vnode != NULL && vnode->mount == mount)
1982 				|| (vnode == NULL && descriptor->u.mount == mount))
1983 				disconnect_fd(descriptor);
1984 
1985 			put_fd(descriptor);
1986 		}
1987 	}
1988 }
1989 
1990 
1991 /*!	\brief Gets the root node of the current IO context.
1992 	If \a kernel is \c true, the kernel IO context will be used.
1993 	The caller obtains a reference to the returned node.
1994 */
1995 struct vnode*
1996 get_root_vnode(bool kernel)
1997 {
1998 	if (!kernel) {
1999 		// Get current working directory from io context
2000 		struct io_context* context = get_current_io_context(kernel);
2001 
2002 		mutex_lock(&sIOContextRootLock);
2003 
2004 		struct vnode* root = context->root;
2005 		if (root != NULL)
2006 			inc_vnode_ref_count(root);
2007 
2008 		mutex_unlock(&sIOContextRootLock);
2009 
2010 		if (root != NULL)
2011 			return root;
2012 
2013 		// That should never happen.
2014 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2015 			"have a root\n", team_get_current_team_id());
2016 	}
2017 
2018 	inc_vnode_ref_count(sRoot);
2019 	return sRoot;
2020 }
2021 
2022 
2023 /*!	\brief Gets the directory path and leaf name for a given path.
2024 
2025 	The supplied \a path is transformed to refer to the directory part of
2026 	the entry identified by the original path, and into the buffer \a filename
2027 	the leaf name of the original entry is written.
2028 	Neither the returned path nor the leaf name can be expected to be
2029 	canonical.
2030 
2031 	\param path The path to be analyzed. Must be able to store at least one
2032 		   additional character.
2033 	\param filename The buffer into which the leaf name will be written.
2034 		   Must be of size B_FILE_NAME_LENGTH at least.
2035 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2036 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2037 		   if the given path name is empty.
2038 */
2039 static status_t
2040 get_dir_path_and_leaf(char* path, char* filename)
2041 {
2042 	if (*path == '\0')
2043 		return B_ENTRY_NOT_FOUND;
2044 
2045 	char* last = strrchr(path, '/');
2046 		// '/' are not allowed in file names!
2047 
2048 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2049 
2050 	if (last == NULL) {
2051 		// this path is single segment with no '/' in it
2052 		// ex. "foo"
2053 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2054 			return B_NAME_TOO_LONG;
2055 
2056 		strcpy(path, ".");
2057 	} else {
2058 		last++;
2059 		if (last[0] == '\0') {
2060 			// special case: the path ends in one or more '/' - remove them
2061 			while (*--last == '/' && last != path);
2062 			last[1] = '\0';
2063 
2064 			if (last == path && last[0] == '/') {
2065 				// This path points to the root of the file system
2066 				strcpy(filename, ".");
2067 				return B_OK;
2068 			}
2069 			for (; last != path && *(last - 1) != '/'; last--);
2070 				// rewind to the start of the leaf before the '/'
2071 		}
2072 
2073 		// normal leaf: replace the leaf portion of the path with a '.'
2074 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2075 			return B_NAME_TOO_LONG;
2076 
2077 		last[0] = '.';
2078 		last[1] = '\0';
2079 	}
2080 	return B_OK;
2081 }
2082 
2083 
2084 static status_t
2085 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2086 	bool traverse, bool kernel, struct vnode** _vnode)
2087 {
2088 	char clonedName[B_FILE_NAME_LENGTH + 1];
2089 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2090 		return B_NAME_TOO_LONG;
2091 
2092 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2093 	struct vnode* directory;
2094 
2095 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2096 	if (status < 0)
2097 		return status;
2098 
2099 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2100 		_vnode, NULL);
2101 }
2102 
2103 
2104 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2105 	and returns the respective vnode.
2106 	On success a reference to the vnode is acquired for the caller.
2107 */
2108 static status_t
2109 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2110 {
2111 	ino_t id;
2112 	bool missing;
2113 
2114 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2115 		return missing ? B_ENTRY_NOT_FOUND
2116 			: get_vnode(dir->device, id, _vnode, true, false);
2117 	}
2118 
2119 	status_t status = FS_CALL(dir, lookup, name, &id);
2120 	if (status != B_OK)
2121 		return status;
2122 
2123 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2124 	// have a reference and just need to look the node up.
2125 	rw_lock_read_lock(&sVnodeLock);
2126 	*_vnode = lookup_vnode(dir->device, id);
2127 	rw_lock_read_unlock(&sVnodeLock);
2128 
2129 	if (*_vnode == NULL) {
2130 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2131 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2132 		return B_ENTRY_NOT_FOUND;
2133 	}
2134 
2135 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2136 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2137 //		(*_vnode)->mount->id, (*_vnode)->id);
2138 
2139 	return B_OK;
2140 }
2141 
2142 
2143 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2144 	\a path must not be NULL.
2145 	If it returns successfully, \a path contains the name of the last path
2146 	component. This function clobbers the buffer pointed to by \a path only
2147 	if it does contain more than one component.
2148 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2149 	it is successful or not!
2150 */
2151 static status_t
2152 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2153 	int count, struct io_context* ioContext, struct vnode** _vnode,
2154 	ino_t* _parentID)
2155 {
2156 	status_t status = B_OK;
2157 	ino_t lastParentID = vnode->id;
2158 
2159 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2160 
2161 	if (path == NULL) {
2162 		put_vnode(vnode);
2163 		return B_BAD_VALUE;
2164 	}
2165 
2166 	if (*path == '\0') {
2167 		put_vnode(vnode);
2168 		return B_ENTRY_NOT_FOUND;
2169 	}
2170 
2171 	while (true) {
2172 		struct vnode* nextVnode;
2173 		char* nextPath;
2174 
2175 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2176 			path));
2177 
2178 		// done?
2179 		if (path[0] == '\0')
2180 			break;
2181 
2182 		// walk to find the next path component ("path" will point to a single
2183 		// path component), and filter out multiple slashes
2184 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2185 				nextPath++);
2186 
2187 		bool directoryFound = false;
2188 		if (*nextPath == '/') {
2189 			directoryFound = true;
2190 			*nextPath = '\0';
2191 			do
2192 				nextPath++;
2193 			while (*nextPath == '/');
2194 		}
2195 
2196 		// See if the '..' is at a covering vnode move to the covered
2197 		// vnode so we pass the '..' path to the underlying filesystem.
2198 		// Also prevent breaking the root of the IO context.
2199 		if (strcmp("..", path) == 0) {
2200 			if (vnode == ioContext->root) {
2201 				// Attempted prison break! Keep it contained.
2202 				path = nextPath;
2203 				continue;
2204 			}
2205 
2206 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2207 				nextVnode = coveredVnode;
2208 				put_vnode(vnode);
2209 				vnode = nextVnode;
2210 			}
2211 		}
2212 
2213 		// check if vnode is really a directory
2214 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2215 			status = B_NOT_A_DIRECTORY;
2216 
2217 		// Check if we have the right to search the current directory vnode.
2218 		// If a file system doesn't have the access() function, we assume that
2219 		// searching a directory is always allowed
2220 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2221 			status = FS_CALL(vnode, access, X_OK);
2222 
2223 		// Tell the filesystem to get the vnode of this path component (if we
2224 		// got the permission from the call above)
2225 		if (status == B_OK)
2226 			status = lookup_dir_entry(vnode, path, &nextVnode);
2227 
2228 		if (status != B_OK) {
2229 			put_vnode(vnode);
2230 			return status;
2231 		}
2232 
2233 		// If the new node is a symbolic link, resolve it (if we've been told
2234 		// to do it)
2235 		if (S_ISLNK(nextVnode->Type())
2236 			&& (traverseLeafLink || directoryFound)) {
2237 			size_t bufferSize;
2238 			char* buffer;
2239 
2240 			TRACE(("traverse link\n"));
2241 
2242 			// it's not exactly nice style using goto in this way, but hey,
2243 			// it works :-/
2244 			if (count + 1 > B_MAX_SYMLINKS) {
2245 				status = B_LINK_LIMIT;
2246 				goto resolve_link_error;
2247 			}
2248 
2249 			bufferSize = B_PATH_NAME_LENGTH;
2250 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2251 			if (buffer == NULL) {
2252 				status = B_NO_MEMORY;
2253 				goto resolve_link_error;
2254 			}
2255 
2256 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2257 				bufferSize--;
2258 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2259 				// null-terminate
2260 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2261 					buffer[bufferSize] = '\0';
2262 			} else
2263 				status = B_BAD_VALUE;
2264 
2265 			if (status != B_OK) {
2266 				free(buffer);
2267 
2268 		resolve_link_error:
2269 				put_vnode(vnode);
2270 				put_vnode(nextVnode);
2271 
2272 				return status;
2273 			}
2274 			put_vnode(nextVnode);
2275 
2276 			// Check if we start from the root directory or the current
2277 			// directory ("vnode" still points to that one).
2278 			// Cut off all leading slashes if it's the root directory
2279 			path = buffer;
2280 			bool absoluteSymlink = false;
2281 			if (path[0] == '/') {
2282 				// we don't need the old directory anymore
2283 				put_vnode(vnode);
2284 
2285 				while (*++path == '/')
2286 					;
2287 
2288 				mutex_lock(&sIOContextRootLock);
2289 				vnode = ioContext->root;
2290 				inc_vnode_ref_count(vnode);
2291 				mutex_unlock(&sIOContextRootLock);
2292 
2293 				absoluteSymlink = true;
2294 			}
2295 
2296 			inc_vnode_ref_count(vnode);
2297 				// balance the next recursion - we will decrement the
2298 				// ref_count of the vnode, no matter if we succeeded or not
2299 
2300 			if (absoluteSymlink && *path == '\0') {
2301 				// symlink was just "/"
2302 				nextVnode = vnode;
2303 			} else {
2304 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2305 					ioContext, &nextVnode, &lastParentID);
2306 			}
2307 
2308 			object_cache_free(sPathNameCache, buffer, 0);
2309 
2310 			if (status != B_OK) {
2311 				put_vnode(vnode);
2312 				return status;
2313 			}
2314 		} else
2315 			lastParentID = vnode->id;
2316 
2317 		// decrease the ref count on the old dir we just looked up into
2318 		put_vnode(vnode);
2319 
2320 		path = nextPath;
2321 		vnode = nextVnode;
2322 
2323 		// see if we hit a covered node
2324 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2325 			put_vnode(vnode);
2326 			vnode = coveringNode;
2327 		}
2328 	}
2329 
2330 	*_vnode = vnode;
2331 	if (_parentID)
2332 		*_parentID = lastParentID;
2333 
2334 	return B_OK;
2335 }
2336 
2337 
2338 static status_t
2339 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2340 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2341 {
2342 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2343 		get_current_io_context(kernel), _vnode, _parentID);
2344 }
2345 
2346 
2347 static status_t
2348 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2349 	ino_t* _parentID, bool kernel)
2350 {
2351 	struct vnode* start = NULL;
2352 
2353 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2354 
2355 	if (!path)
2356 		return B_BAD_VALUE;
2357 
2358 	if (*path == '\0')
2359 		return B_ENTRY_NOT_FOUND;
2360 
2361 	// figure out if we need to start at root or at cwd
2362 	if (*path == '/') {
2363 		if (sRoot == NULL) {
2364 			// we're a bit early, aren't we?
2365 			return B_ERROR;
2366 		}
2367 
2368 		while (*++path == '/')
2369 			;
2370 		start = get_root_vnode(kernel);
2371 
2372 		if (*path == '\0') {
2373 			*_vnode = start;
2374 			return B_OK;
2375 		}
2376 
2377 	} else {
2378 		struct io_context* context = get_current_io_context(kernel);
2379 
2380 		mutex_lock(&context->io_mutex);
2381 		start = context->cwd;
2382 		if (start != NULL)
2383 			inc_vnode_ref_count(start);
2384 		mutex_unlock(&context->io_mutex);
2385 
2386 		if (start == NULL)
2387 			return B_ERROR;
2388 	}
2389 
2390 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2391 		_parentID);
2392 }
2393 
2394 
2395 /*! Returns the vnode in the next to last segment of the path, and returns
2396 	the last portion in filename.
2397 	The path buffer must be able to store at least one additional character.
2398 */
2399 static status_t
2400 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2401 	bool kernel)
2402 {
2403 	status_t status = get_dir_path_and_leaf(path, filename);
2404 	if (status != B_OK)
2405 		return status;
2406 
2407 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2408 }
2409 
2410 
2411 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2412 		   to by a FD + path pair.
2413 
2414 	\a path must be given in either case. \a fd might be omitted, in which
2415 	case \a path is either an absolute path or one relative to the current
2416 	directory. If both a supplied and \a path is relative it is reckoned off
2417 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2418 	ignored.
2419 
2420 	The caller has the responsibility to call put_vnode() on the returned
2421 	directory vnode.
2422 
2423 	\param fd The FD. May be < 0.
2424 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2425 	       is modified by this function. It must have at least room for a
2426 	       string one character longer than the path it contains.
2427 	\param _vnode A pointer to a variable the directory vnode shall be written
2428 		   into.
2429 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2430 		   the leaf name of the specified entry will be written.
2431 	\param kernel \c true, if invoked from inside the kernel, \c false if
2432 		   invoked from userland.
2433 	\return \c B_OK, if everything went fine, another error code otherwise.
2434 */
2435 static status_t
2436 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2437 	char* filename, bool kernel)
2438 {
2439 	if (!path)
2440 		return B_BAD_VALUE;
2441 	if (*path == '\0')
2442 		return B_ENTRY_NOT_FOUND;
2443 	if (fd < 0)
2444 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2445 
2446 	status_t status = get_dir_path_and_leaf(path, filename);
2447 	if (status != B_OK)
2448 		return status;
2449 
2450 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2451 }
2452 
2453 
2454 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2455 		   to by a vnode + path pair.
2456 
2457 	\a path must be given in either case. \a vnode might be omitted, in which
2458 	case \a path is either an absolute path or one relative to the current
2459 	directory. If both a supplied and \a path is relative it is reckoned off
2460 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2461 	ignored.
2462 
2463 	The caller has the responsibility to call put_vnode() on the returned
2464 	directory vnode.
2465 
2466 	\param vnode The vnode. May be \c NULL.
2467 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2468 	       is modified by this function. It must have at least room for a
2469 	       string one character longer than the path it contains.
2470 	\param _vnode A pointer to a variable the directory vnode shall be written
2471 		   into.
2472 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2473 		   the leaf name of the specified entry will be written.
2474 	\param kernel \c true, if invoked from inside the kernel, \c false if
2475 		   invoked from userland.
2476 	\return \c B_OK, if everything went fine, another error code otherwise.
2477 */
2478 static status_t
2479 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2480 	struct vnode** _vnode, char* filename, bool kernel)
2481 {
2482 	if (!path)
2483 		return B_BAD_VALUE;
2484 	if (*path == '\0')
2485 		return B_ENTRY_NOT_FOUND;
2486 	if (vnode == NULL || path[0] == '/')
2487 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2488 
2489 	status_t status = get_dir_path_and_leaf(path, filename);
2490 	if (status != B_OK)
2491 		return status;
2492 
2493 	inc_vnode_ref_count(vnode);
2494 		// vnode_path_to_vnode() always decrements the ref count
2495 
2496 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2497 }
2498 
2499 
2500 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2501 */
2502 static status_t
2503 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2504 	size_t bufferSize, struct io_context* ioContext)
2505 {
2506 	if (bufferSize < sizeof(struct dirent))
2507 		return B_BAD_VALUE;
2508 
2509 	// See if the vnode is covering another vnode and move to the covered
2510 	// vnode so we get the underlying file system
2511 	VNodePutter vnodePutter;
2512 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2513 		vnode = coveredVnode;
2514 		vnodePutter.SetTo(vnode);
2515 	}
2516 
2517 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2518 		// The FS supports getting the name of a vnode.
2519 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2520 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2521 			return B_OK;
2522 	}
2523 
2524 	// The FS doesn't support getting the name of a vnode. So we search the
2525 	// parent directory for the vnode, if the caller let us.
2526 
2527 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2528 		return B_UNSUPPORTED;
2529 
2530 	void* cookie;
2531 
2532 	status_t status = FS_CALL(parent, open_dir, &cookie);
2533 	if (status >= B_OK) {
2534 		while (true) {
2535 			uint32 num = 1;
2536 			// We use the FS hook directly instead of dir_read(), since we don't
2537 			// want the entries to be fixed. We have already resolved vnode to
2538 			// the covered node.
2539 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2540 				&num);
2541 			if (status != B_OK)
2542 				break;
2543 			if (num == 0) {
2544 				status = B_ENTRY_NOT_FOUND;
2545 				break;
2546 			}
2547 
2548 			if (vnode->id == buffer->d_ino) {
2549 				// found correct entry!
2550 				break;
2551 			}
2552 		}
2553 
2554 		FS_CALL(parent, close_dir, cookie);
2555 		FS_CALL(parent, free_dir_cookie, cookie);
2556 	}
2557 	return status;
2558 }
2559 
2560 
2561 static status_t
2562 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2563 	size_t nameSize, bool kernel)
2564 {
2565 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2566 	struct dirent* dirent = (struct dirent*)buffer;
2567 
2568 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2569 		get_current_io_context(kernel));
2570 	if (status != B_OK)
2571 		return status;
2572 
2573 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2574 		return B_BUFFER_OVERFLOW;
2575 
2576 	return B_OK;
2577 }
2578 
2579 
2580 /*!	Gets the full path to a given directory vnode.
2581 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2582 	file system doesn't support this call, it will fall back to iterating
2583 	through the parent directory to get the name of the child.
2584 
2585 	To protect against circular loops, it supports a maximum tree depth
2586 	of 256 levels.
2587 
2588 	Note that the path may not be correct the time this function returns!
2589 	It doesn't use any locking to prevent returning the correct path, as
2590 	paths aren't safe anyway: the path to a file can change at any time.
2591 
2592 	It might be a good idea, though, to check if the returned path exists
2593 	in the calling function (it's not done here because of efficiency)
2594 */
2595 static status_t
2596 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2597 	bool kernel)
2598 {
2599 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2600 
2601 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2602 		return B_BAD_VALUE;
2603 
2604 	if (!S_ISDIR(vnode->Type()))
2605 		return B_NOT_A_DIRECTORY;
2606 
2607 	char* path = buffer;
2608 	int32 insert = bufferSize;
2609 	int32 maxLevel = 256;
2610 	int32 length;
2611 	status_t status = B_OK;
2612 	struct io_context* ioContext = get_current_io_context(kernel);
2613 
2614 	// we don't use get_vnode() here because this call is more
2615 	// efficient and does all we need from get_vnode()
2616 	inc_vnode_ref_count(vnode);
2617 
2618 	path[--insert] = '\0';
2619 		// the path is filled right to left
2620 
2621 	while (true) {
2622 		// If the node is the context's root, bail out. Otherwise resolve mount
2623 		// points.
2624 		if (vnode == ioContext->root)
2625 			break;
2626 
2627 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2628 			put_vnode(vnode);
2629 			vnode = coveredVnode;
2630 		}
2631 
2632 		// lookup the parent vnode
2633 		struct vnode* parentVnode;
2634 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2635 		if (status != B_OK)
2636 			goto out;
2637 
2638 		if (parentVnode == vnode) {
2639 			// The caller apparently got their hands on a node outside of their
2640 			// context's root. Now we've hit the global root.
2641 			put_vnode(parentVnode);
2642 			break;
2643 		}
2644 
2645 		// get the node's name
2646 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2647 			// also used for fs_read_dir()
2648 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2649 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2650 			sizeof(nameBuffer), ioContext);
2651 
2652 		// release the current vnode, we only need its parent from now on
2653 		put_vnode(vnode);
2654 		vnode = parentVnode;
2655 
2656 		if (status != B_OK)
2657 			goto out;
2658 
2659 		// TODO: add an explicit check for loops in about 10 levels to do
2660 		// real loop detection
2661 
2662 		// don't go deeper as 'maxLevel' to prevent circular loops
2663 		if (maxLevel-- < 0) {
2664 			status = B_LINK_LIMIT;
2665 			goto out;
2666 		}
2667 
2668 		// add the name in front of the current path
2669 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2670 		length = strlen(name);
2671 		insert -= length;
2672 		if (insert <= 0) {
2673 			status = B_RESULT_NOT_REPRESENTABLE;
2674 			goto out;
2675 		}
2676 		memcpy(path + insert, name, length);
2677 		path[--insert] = '/';
2678 	}
2679 
2680 	// the root dir will result in an empty path: fix it
2681 	if (path[insert] == '\0')
2682 		path[--insert] = '/';
2683 
2684 	TRACE(("  path is: %s\n", path + insert));
2685 
2686 	// move the path to the start of the buffer
2687 	length = bufferSize - insert;
2688 	memmove(buffer, path + insert, length);
2689 
2690 out:
2691 	put_vnode(vnode);
2692 	return status;
2693 }
2694 
2695 
2696 /*!	Checks the length of every path component, and adds a '.'
2697 	if the path ends in a slash.
2698 	The given path buffer must be able to store at least one
2699 	additional character.
2700 */
2701 static status_t
2702 check_path(char* to)
2703 {
2704 	int32 length = 0;
2705 
2706 	// check length of every path component
2707 
2708 	while (*to) {
2709 		char* begin;
2710 		if (*to == '/')
2711 			to++, length++;
2712 
2713 		begin = to;
2714 		while (*to != '/' && *to)
2715 			to++, length++;
2716 
2717 		if (to - begin > B_FILE_NAME_LENGTH)
2718 			return B_NAME_TOO_LONG;
2719 	}
2720 
2721 	if (length == 0)
2722 		return B_ENTRY_NOT_FOUND;
2723 
2724 	// complete path if there is a slash at the end
2725 
2726 	if (*(to - 1) == '/') {
2727 		if (length > B_PATH_NAME_LENGTH - 2)
2728 			return B_NAME_TOO_LONG;
2729 
2730 		to[0] = '.';
2731 		to[1] = '\0';
2732 	}
2733 
2734 	return B_OK;
2735 }
2736 
2737 
2738 static struct file_descriptor*
2739 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2740 {
2741 	struct file_descriptor* descriptor
2742 		= get_fd(get_current_io_context(kernel), fd);
2743 	if (descriptor == NULL)
2744 		return NULL;
2745 
2746 	struct vnode* vnode = fd_vnode(descriptor);
2747 	if (vnode == NULL) {
2748 		put_fd(descriptor);
2749 		return NULL;
2750 	}
2751 
2752 	// ToDo: when we can close a file descriptor at any point, investigate
2753 	//	if this is still valid to do (accessing the vnode without ref_count
2754 	//	or locking)
2755 	*_vnode = vnode;
2756 	return descriptor;
2757 }
2758 
2759 
2760 static struct vnode*
2761 get_vnode_from_fd(int fd, bool kernel)
2762 {
2763 	struct file_descriptor* descriptor;
2764 	struct vnode* vnode;
2765 
2766 	descriptor = get_fd(get_current_io_context(kernel), fd);
2767 	if (descriptor == NULL)
2768 		return NULL;
2769 
2770 	vnode = fd_vnode(descriptor);
2771 	if (vnode != NULL)
2772 		inc_vnode_ref_count(vnode);
2773 
2774 	put_fd(descriptor);
2775 	return vnode;
2776 }
2777 
2778 
2779 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2780 	only the path will be considered. In this case, the \a path must not be
2781 	NULL.
2782 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2783 	and should be NULL for files.
2784 */
2785 static status_t
2786 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2787 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2788 {
2789 	if (fd < 0 && !path)
2790 		return B_BAD_VALUE;
2791 
2792 	if (path != NULL && *path == '\0')
2793 		return B_ENTRY_NOT_FOUND;
2794 
2795 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2796 		// no FD or absolute path
2797 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2798 	}
2799 
2800 	// FD only, or FD + relative path
2801 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2802 	if (vnode == NULL)
2803 		return B_FILE_ERROR;
2804 
2805 	if (path != NULL) {
2806 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2807 			_vnode, _parentID);
2808 	}
2809 
2810 	// there is no relative path to take into account
2811 
2812 	*_vnode = vnode;
2813 	if (_parentID)
2814 		*_parentID = -1;
2815 
2816 	return B_OK;
2817 }
2818 
2819 
2820 static int
2821 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2822 	void* cookie, int openMode, bool kernel)
2823 {
2824 	struct file_descriptor* descriptor;
2825 	int fd;
2826 
2827 	// If the vnode is locked, we don't allow creating a new file/directory
2828 	// file_descriptor for it
2829 	if (vnode && vnode->mandatory_locked_by != NULL
2830 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2831 		return B_BUSY;
2832 
2833 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2834 		return B_BAD_VALUE;
2835 
2836 	descriptor = alloc_fd();
2837 	if (!descriptor)
2838 		return B_NO_MEMORY;
2839 
2840 	if (vnode)
2841 		descriptor->u.vnode = vnode;
2842 	else
2843 		descriptor->u.mount = mount;
2844 	descriptor->cookie = cookie;
2845 
2846 	switch (type) {
2847 		// vnode types
2848 		case FDTYPE_FILE:
2849 			descriptor->ops = &sFileOps;
2850 			break;
2851 		case FDTYPE_DIR:
2852 			descriptor->ops = &sDirectoryOps;
2853 			break;
2854 		case FDTYPE_ATTR:
2855 			descriptor->ops = &sAttributeOps;
2856 			break;
2857 		case FDTYPE_ATTR_DIR:
2858 			descriptor->ops = &sAttributeDirectoryOps;
2859 			break;
2860 
2861 		// mount types
2862 		case FDTYPE_INDEX_DIR:
2863 			descriptor->ops = &sIndexDirectoryOps;
2864 			break;
2865 		case FDTYPE_QUERY:
2866 			descriptor->ops = &sQueryOps;
2867 			break;
2868 
2869 		default:
2870 			panic("get_new_fd() called with unknown type %d\n", type);
2871 			break;
2872 	}
2873 	descriptor->type = type;
2874 	descriptor->open_mode = openMode;
2875 
2876 	io_context* context = get_current_io_context(kernel);
2877 	fd = new_fd(context, descriptor);
2878 	if (fd < 0) {
2879 		descriptor->ops = NULL;
2880 		put_fd(descriptor);
2881 		return B_NO_MORE_FDS;
2882 	}
2883 
2884 	mutex_lock(&context->io_mutex);
2885 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2886 	mutex_unlock(&context->io_mutex);
2887 
2888 	return fd;
2889 }
2890 
2891 
2892 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2893 	vfs_normalize_path(). See there for more documentation.
2894 */
2895 static status_t
2896 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2897 {
2898 	VNodePutter dirPutter;
2899 	struct vnode* dir = NULL;
2900 	status_t error;
2901 
2902 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2903 		// get dir vnode + leaf name
2904 		struct vnode* nextDir;
2905 		char leaf[B_FILE_NAME_LENGTH];
2906 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2907 		if (error != B_OK)
2908 			return error;
2909 
2910 		dir = nextDir;
2911 		strcpy(path, leaf);
2912 		dirPutter.SetTo(dir);
2913 
2914 		// get file vnode, if we shall resolve links
2915 		bool fileExists = false;
2916 		struct vnode* fileVnode;
2917 		VNodePutter fileVnodePutter;
2918 		if (traverseLink) {
2919 			inc_vnode_ref_count(dir);
2920 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2921 					NULL) == B_OK) {
2922 				fileVnodePutter.SetTo(fileVnode);
2923 				fileExists = true;
2924 			}
2925 		}
2926 
2927 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2928 			// we're done -- construct the path
2929 			bool hasLeaf = true;
2930 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2931 				// special cases "." and ".." -- get the dir, forget the leaf
2932 				inc_vnode_ref_count(dir);
2933 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2934 					&nextDir, NULL);
2935 				if (error != B_OK)
2936 					return error;
2937 				dir = nextDir;
2938 				dirPutter.SetTo(dir);
2939 				hasLeaf = false;
2940 			}
2941 
2942 			// get the directory path
2943 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2944 			if (error != B_OK)
2945 				return error;
2946 
2947 			// append the leaf name
2948 			if (hasLeaf) {
2949 				// insert a directory separator if this is not the file system
2950 				// root
2951 				if ((strcmp(path, "/") != 0
2952 					&& strlcat(path, "/", pathSize) >= pathSize)
2953 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2954 					return B_NAME_TOO_LONG;
2955 				}
2956 			}
2957 
2958 			return B_OK;
2959 		}
2960 
2961 		// read link
2962 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2963 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2964 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2965 			if (error != B_OK)
2966 				return error;
2967 			if (bufferSize < B_PATH_NAME_LENGTH)
2968 				path[bufferSize] = '\0';
2969 		} else
2970 			return B_BAD_VALUE;
2971 	}
2972 
2973 	return B_LINK_LIMIT;
2974 }
2975 
2976 
2977 static status_t
2978 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2979 	struct io_context* ioContext)
2980 {
2981 	// Make sure the IO context root is not bypassed.
2982 	if (parent == ioContext->root) {
2983 		*_device = parent->device;
2984 		*_node = parent->id;
2985 		return B_OK;
2986 	}
2987 
2988 	inc_vnode_ref_count(parent);
2989 		// vnode_path_to_vnode() puts the node
2990 
2991 	// ".." is guaranteed not to be clobbered by this call
2992 	struct vnode* vnode;
2993 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2994 		ioContext, &vnode, NULL);
2995 	if (status == B_OK) {
2996 		*_device = vnode->device;
2997 		*_node = vnode->id;
2998 		put_vnode(vnode);
2999 	}
3000 
3001 	return status;
3002 }
3003 
3004 
3005 #ifdef ADD_DEBUGGER_COMMANDS
3006 
3007 
3008 static void
3009 _dump_advisory_locking(advisory_locking* locking)
3010 {
3011 	if (locking == NULL)
3012 		return;
3013 
3014 	kprintf("   lock:        %" B_PRId32, locking->lock);
3015 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3016 
3017 	int32 index = 0;
3018 	LockList::Iterator iterator = locking->locks.GetIterator();
3019 	while (iterator.HasNext()) {
3020 		struct advisory_lock* lock = iterator.Next();
3021 
3022 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3023 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3024 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3025 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3026 	}
3027 }
3028 
3029 
3030 static void
3031 _dump_mount(struct fs_mount* mount)
3032 {
3033 	kprintf("MOUNT: %p\n", mount);
3034 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3035 	kprintf(" device_name:   %s\n", mount->device_name);
3036 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3037 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3038 	kprintf(" partition:     %p\n", mount->partition);
3039 	kprintf(" lock:          %p\n", &mount->lock);
3040 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3041 		mount->owns_file_device ? " owns_file_device" : "");
3042 
3043 	fs_volume* volume = mount->volume;
3044 	while (volume != NULL) {
3045 		kprintf(" volume %p:\n", volume);
3046 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3047 		kprintf("  private_volume:   %p\n", volume->private_volume);
3048 		kprintf("  ops:              %p\n", volume->ops);
3049 		kprintf("  file_system:      %p\n", volume->file_system);
3050 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3051 		volume = volume->super_volume;
3052 	}
3053 
3054 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3055 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3056 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3057 	set_debug_variable("_partition", (addr_t)mount->partition);
3058 }
3059 
3060 
3061 static bool
3062 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3063 	const char* name)
3064 {
3065 	bool insertSlash = buffer[bufferSize] != '\0';
3066 	size_t nameLength = strlen(name);
3067 
3068 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3069 		return false;
3070 
3071 	if (insertSlash)
3072 		buffer[--bufferSize] = '/';
3073 
3074 	bufferSize -= nameLength;
3075 	memcpy(buffer + bufferSize, name, nameLength);
3076 
3077 	return true;
3078 }
3079 
3080 
3081 static bool
3082 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3083 	ino_t nodeID)
3084 {
3085 	if (bufferSize == 0)
3086 		return false;
3087 
3088 	bool insertSlash = buffer[bufferSize] != '\0';
3089 	if (insertSlash)
3090 		buffer[--bufferSize] = '/';
3091 
3092 	size_t size = snprintf(buffer, bufferSize,
3093 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3094 	if (size > bufferSize) {
3095 		if (insertSlash)
3096 			bufferSize++;
3097 		return false;
3098 	}
3099 
3100 	if (size < bufferSize)
3101 		memmove(buffer + bufferSize - size, buffer, size);
3102 
3103 	bufferSize -= size;
3104 	return true;
3105 }
3106 
3107 
3108 static char*
3109 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3110 	bool& _truncated)
3111 {
3112 	// null-terminate the path
3113 	buffer[--bufferSize] = '\0';
3114 
3115 	while (true) {
3116 		while (vnode->covers != NULL)
3117 			vnode = vnode->covers;
3118 
3119 		if (vnode == sRoot) {
3120 			_truncated = bufferSize == 0;
3121 			if (!_truncated)
3122 				buffer[--bufferSize] = '/';
3123 			return buffer + bufferSize;
3124 		}
3125 
3126 		// resolve the name
3127 		ino_t dirID;
3128 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3129 			vnode->id, dirID);
3130 		if (name == NULL) {
3131 			// Failed to resolve the name -- prepend "<dev,node>/".
3132 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3133 				vnode->mount->id, vnode->id);
3134 			return buffer + bufferSize;
3135 		}
3136 
3137 		// prepend the name
3138 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3139 			_truncated = true;
3140 			return buffer + bufferSize;
3141 		}
3142 
3143 		// resolve the directory node
3144 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3145 		if (nextVnode == NULL) {
3146 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3147 				vnode->mount->id, dirID);
3148 			return buffer + bufferSize;
3149 		}
3150 
3151 		vnode = nextVnode;
3152 	}
3153 }
3154 
3155 
3156 static void
3157 _dump_vnode(struct vnode* vnode, bool printPath)
3158 {
3159 	kprintf("VNODE: %p\n", vnode);
3160 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3161 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3162 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3163 	kprintf(" private_node:  %p\n", vnode->private_node);
3164 	kprintf(" mount:         %p\n", vnode->mount);
3165 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3166 	kprintf(" covers:        %p\n", vnode->covers);
3167 	kprintf(" cache:         %p\n", vnode->cache);
3168 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3169 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3170 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3171 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3172 
3173 	_dump_advisory_locking(vnode->advisory_locking);
3174 
3175 	if (printPath) {
3176 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3177 		if (buffer != NULL) {
3178 			bool truncated;
3179 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3180 				B_PATH_NAME_LENGTH, truncated);
3181 			if (path != NULL) {
3182 				kprintf(" path:          ");
3183 				if (truncated)
3184 					kputs("<truncated>/");
3185 				kputs(path);
3186 				kputs("\n");
3187 			} else
3188 				kprintf("Failed to resolve vnode path.\n");
3189 
3190 			debug_free(buffer);
3191 		} else
3192 			kprintf("Failed to allocate memory for constructing the path.\n");
3193 	}
3194 
3195 	set_debug_variable("_node", (addr_t)vnode->private_node);
3196 	set_debug_variable("_mount", (addr_t)vnode->mount);
3197 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3198 	set_debug_variable("_covers", (addr_t)vnode->covers);
3199 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3200 }
3201 
3202 
3203 static int
3204 dump_mount(int argc, char** argv)
3205 {
3206 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3207 		kprintf("usage: %s [id|address]\n", argv[0]);
3208 		return 0;
3209 	}
3210 
3211 	ulong val = parse_expression(argv[1]);
3212 	uint32 id = val;
3213 
3214 	struct fs_mount* mount = sMountsTable->Lookup(id);
3215 	if (mount == NULL) {
3216 		if (IS_USER_ADDRESS(id)) {
3217 			kprintf("fs_mount not found\n");
3218 			return 0;
3219 		}
3220 		mount = (fs_mount*)val;
3221 	}
3222 
3223 	_dump_mount(mount);
3224 	return 0;
3225 }
3226 
3227 
3228 static int
3229 dump_mounts(int argc, char** argv)
3230 {
3231 	if (argc != 1) {
3232 		kprintf("usage: %s\n", argv[0]);
3233 		return 0;
3234 	}
3235 
3236 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3237 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3238 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3239 
3240 	struct fs_mount* mount;
3241 
3242 	MountTable::Iterator iterator(sMountsTable);
3243 	while (iterator.HasNext()) {
3244 		mount = iterator.Next();
3245 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3246 			mount->root_vnode->covers, mount->volume->private_volume,
3247 			mount->volume->file_system_name);
3248 
3249 		fs_volume* volume = mount->volume;
3250 		while (volume->super_volume != NULL) {
3251 			volume = volume->super_volume;
3252 			kprintf("                                     %p %s\n",
3253 				volume->private_volume, volume->file_system_name);
3254 		}
3255 	}
3256 
3257 	return 0;
3258 }
3259 
3260 
3261 static int
3262 dump_vnode(int argc, char** argv)
3263 {
3264 	bool printPath = false;
3265 	int argi = 1;
3266 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3267 		printPath = true;
3268 		argi++;
3269 	}
3270 
3271 	if (argi >= argc || argi + 2 < argc) {
3272 		print_debugger_command_usage(argv[0]);
3273 		return 0;
3274 	}
3275 
3276 	struct vnode* vnode = NULL;
3277 
3278 	if (argi + 1 == argc) {
3279 		vnode = (struct vnode*)parse_expression(argv[argi]);
3280 		if (IS_USER_ADDRESS(vnode)) {
3281 			kprintf("invalid vnode address\n");
3282 			return 0;
3283 		}
3284 		_dump_vnode(vnode, printPath);
3285 		return 0;
3286 	}
3287 
3288 	dev_t device = parse_expression(argv[argi]);
3289 	ino_t id = parse_expression(argv[argi + 1]);
3290 
3291 	VnodeTable::Iterator iterator(sVnodeTable);
3292 	while (iterator.HasNext()) {
3293 		vnode = iterator.Next();
3294 		if (vnode->id != id || vnode->device != device)
3295 			continue;
3296 
3297 		_dump_vnode(vnode, printPath);
3298 	}
3299 
3300 	return 0;
3301 }
3302 
3303 
3304 static int
3305 dump_vnodes(int argc, char** argv)
3306 {
3307 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3308 		kprintf("usage: %s [device]\n", argv[0]);
3309 		return 0;
3310 	}
3311 
3312 	// restrict dumped nodes to a certain device if requested
3313 	dev_t device = parse_expression(argv[1]);
3314 
3315 	struct vnode* vnode;
3316 
3317 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3318 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3319 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3320 
3321 	VnodeTable::Iterator iterator(sVnodeTable);
3322 	while (iterator.HasNext()) {
3323 		vnode = iterator.Next();
3324 		if (vnode->device != device)
3325 			continue;
3326 
3327 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3328 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3329 			vnode->private_node, vnode->advisory_locking,
3330 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3331 			vnode->IsUnpublished() ? "u" : "-");
3332 	}
3333 
3334 	return 0;
3335 }
3336 
3337 
3338 static int
3339 dump_vnode_caches(int argc, char** argv)
3340 {
3341 	struct vnode* vnode;
3342 
3343 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3344 		kprintf("usage: %s [device]\n", argv[0]);
3345 		return 0;
3346 	}
3347 
3348 	// restrict dumped nodes to a certain device if requested
3349 	dev_t device = -1;
3350 	if (argc > 1)
3351 		device = parse_expression(argv[1]);
3352 
3353 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3354 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3355 
3356 	VnodeTable::Iterator iterator(sVnodeTable);
3357 	while (iterator.HasNext()) {
3358 		vnode = iterator.Next();
3359 		if (vnode->cache == NULL)
3360 			continue;
3361 		if (device != -1 && vnode->device != device)
3362 			continue;
3363 
3364 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3365 			vnode, vnode->device, vnode->id, vnode->cache,
3366 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3367 			vnode->cache->page_count);
3368 	}
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 int
3375 dump_io_context(int argc, char** argv)
3376 {
3377 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3378 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3379 		return 0;
3380 	}
3381 
3382 	struct io_context* context = NULL;
3383 
3384 	if (argc > 1) {
3385 		ulong num = parse_expression(argv[1]);
3386 		if (IS_KERNEL_ADDRESS(num))
3387 			context = (struct io_context*)num;
3388 		else {
3389 			Team* team = team_get_team_struct_locked(num);
3390 			if (team == NULL) {
3391 				kprintf("could not find team with ID %lu\n", num);
3392 				return 0;
3393 			}
3394 			context = (struct io_context*)team->io_context;
3395 		}
3396 	} else
3397 		context = get_current_io_context(true);
3398 
3399 	kprintf("I/O CONTEXT: %p\n", context);
3400 	kprintf(" root vnode:\t%p\n", context->root);
3401 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3402 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3403 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3404 
3405 	if (context->num_used_fds) {
3406 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3407 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3408 	}
3409 
3410 	for (uint32 i = 0; i < context->table_size; i++) {
3411 		struct file_descriptor* fd = context->fds[i];
3412 		if (fd == NULL)
3413 			continue;
3414 
3415 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3416 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3417 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3418 			fd->pos, fd->cookie,
3419 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3420 				? "mount" : "vnode",
3421 			fd->u.vnode);
3422 	}
3423 
3424 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3425 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3426 
3427 	set_debug_variable("_cwd", (addr_t)context->cwd);
3428 
3429 	return 0;
3430 }
3431 
3432 
3433 int
3434 dump_vnode_usage(int argc, char** argv)
3435 {
3436 	if (argc != 1) {
3437 		kprintf("usage: %s\n", argv[0]);
3438 		return 0;
3439 	}
3440 
3441 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3442 		sUnusedVnodes, kMaxUnusedVnodes);
3443 
3444 	uint32 count = sVnodeTable->CountElements();
3445 
3446 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3447 		count - sUnusedVnodes);
3448 	return 0;
3449 }
3450 
3451 #endif	// ADD_DEBUGGER_COMMANDS
3452 
3453 
3454 /*!	Clears memory specified by an iovec array.
3455 */
3456 static void
3457 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3458 {
3459 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3460 		size_t length = std::min(vecs[i].iov_len, bytes);
3461 		memset(vecs[i].iov_base, 0, length);
3462 		bytes -= length;
3463 	}
3464 }
3465 
3466 
3467 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3468 	and calls the file system hooks to read/write the request to disk.
3469 */
3470 static status_t
3471 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3472 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3473 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3474 	bool doWrite)
3475 {
3476 	if (fileVecCount == 0) {
3477 		// There are no file vecs at this offset, so we're obviously trying
3478 		// to access the file outside of its bounds
3479 		return B_BAD_VALUE;
3480 	}
3481 
3482 	size_t numBytes = *_numBytes;
3483 	uint32 fileVecIndex;
3484 	size_t vecOffset = *_vecOffset;
3485 	uint32 vecIndex = *_vecIndex;
3486 	status_t status;
3487 	size_t size;
3488 
3489 	if (!doWrite && vecOffset == 0) {
3490 		// now directly read the data from the device
3491 		// the first file_io_vec can be read directly
3492 		// TODO: we could also write directly
3493 
3494 		if (fileVecs[0].length < (off_t)numBytes)
3495 			size = fileVecs[0].length;
3496 		else
3497 			size = numBytes;
3498 
3499 		if (fileVecs[0].offset >= 0) {
3500 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3501 				&vecs[vecIndex], vecCount - vecIndex, &size);
3502 		} else {
3503 			// sparse read
3504 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3505 			status = B_OK;
3506 		}
3507 		if (status != B_OK)
3508 			return status;
3509 
3510 		ASSERT((off_t)size <= fileVecs[0].length);
3511 
3512 		// If the file portion was contiguous, we're already done now
3513 		if (size == numBytes)
3514 			return B_OK;
3515 
3516 		// if we reached the end of the file, we can return as well
3517 		if ((off_t)size != fileVecs[0].length) {
3518 			*_numBytes = size;
3519 			return B_OK;
3520 		}
3521 
3522 		fileVecIndex = 1;
3523 
3524 		// first, find out where we have to continue in our iovecs
3525 		for (; vecIndex < vecCount; vecIndex++) {
3526 			if (size < vecs[vecIndex].iov_len)
3527 				break;
3528 
3529 			size -= vecs[vecIndex].iov_len;
3530 		}
3531 
3532 		vecOffset = size;
3533 	} else {
3534 		fileVecIndex = 0;
3535 		size = 0;
3536 	}
3537 
3538 	// Too bad, let's process the rest of the file_io_vecs
3539 
3540 	size_t totalSize = size;
3541 	size_t bytesLeft = numBytes - size;
3542 
3543 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3544 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3545 		off_t fileOffset = fileVec.offset;
3546 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3547 
3548 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3549 			fileLeft));
3550 
3551 		// process the complete fileVec
3552 		while (fileLeft > 0) {
3553 			iovec tempVecs[MAX_TEMP_IO_VECS];
3554 			uint32 tempCount = 0;
3555 
3556 			// size tracks how much of what is left of the current fileVec
3557 			// (fileLeft) has been assigned to tempVecs
3558 			size = 0;
3559 
3560 			// assign what is left of the current fileVec to the tempVecs
3561 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3562 					&& tempCount < MAX_TEMP_IO_VECS;) {
3563 				// try to satisfy one iovec per iteration (or as much as
3564 				// possible)
3565 
3566 				// bytes left of the current iovec
3567 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3568 				if (vecLeft == 0) {
3569 					vecOffset = 0;
3570 					vecIndex++;
3571 					continue;
3572 				}
3573 
3574 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3575 					vecIndex, vecOffset, size));
3576 
3577 				// actually available bytes
3578 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3579 
3580 				tempVecs[tempCount].iov_base
3581 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3582 				tempVecs[tempCount].iov_len = tempVecSize;
3583 				tempCount++;
3584 
3585 				size += tempVecSize;
3586 				vecOffset += tempVecSize;
3587 			}
3588 
3589 			size_t bytes = size;
3590 
3591 			if (fileOffset == -1) {
3592 				if (doWrite) {
3593 					panic("sparse write attempt: vnode %p", vnode);
3594 					status = B_IO_ERROR;
3595 				} else {
3596 					// sparse read
3597 					zero_iovecs(tempVecs, tempCount, bytes);
3598 					status = B_OK;
3599 				}
3600 			} else if (doWrite) {
3601 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3602 					tempVecs, tempCount, &bytes);
3603 			} else {
3604 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3605 					tempVecs, tempCount, &bytes);
3606 			}
3607 			if (status != B_OK)
3608 				return status;
3609 
3610 			totalSize += bytes;
3611 			bytesLeft -= size;
3612 			if (fileOffset >= 0)
3613 				fileOffset += size;
3614 			fileLeft -= size;
3615 			//dprintf("-> file left = %Lu\n", fileLeft);
3616 
3617 			if (size != bytes || vecIndex >= vecCount) {
3618 				// there are no more bytes or iovecs, let's bail out
3619 				*_numBytes = totalSize;
3620 				return B_OK;
3621 			}
3622 		}
3623 	}
3624 
3625 	*_vecIndex = vecIndex;
3626 	*_vecOffset = vecOffset;
3627 	*_numBytes = totalSize;
3628 	return B_OK;
3629 }
3630 
3631 
3632 static bool
3633 is_user_in_group(gid_t gid)
3634 {
3635 	if (gid == getegid())
3636 		return true;
3637 
3638 	gid_t groups[NGROUPS_MAX];
3639 	int groupCount = getgroups(NGROUPS_MAX, groups);
3640 	for (int i = 0; i < groupCount; i++) {
3641 		if (gid == groups[i])
3642 			return true;
3643 	}
3644 
3645 	return false;
3646 }
3647 
3648 
3649 static status_t
3650 free_io_context(io_context* context)
3651 {
3652 	uint32 i;
3653 
3654 	TIOC(FreeIOContext(context));
3655 
3656 	if (context->root)
3657 		put_vnode(context->root);
3658 
3659 	if (context->cwd)
3660 		put_vnode(context->cwd);
3661 
3662 	mutex_lock(&context->io_mutex);
3663 
3664 	for (i = 0; i < context->table_size; i++) {
3665 		if (struct file_descriptor* descriptor = context->fds[i]) {
3666 			close_fd(context, descriptor);
3667 			put_fd(descriptor);
3668 		}
3669 	}
3670 
3671 	mutex_destroy(&context->io_mutex);
3672 
3673 	remove_node_monitors(context);
3674 	free(context->fds);
3675 	free(context);
3676 
3677 	return B_OK;
3678 }
3679 
3680 
3681 static status_t
3682 resize_monitor_table(struct io_context* context, const int newSize)
3683 {
3684 	int	status = B_OK;
3685 
3686 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3687 		return B_BAD_VALUE;
3688 
3689 	mutex_lock(&context->io_mutex);
3690 
3691 	if ((size_t)newSize < context->num_monitors) {
3692 		status = B_BUSY;
3693 		goto out;
3694 	}
3695 	context->max_monitors = newSize;
3696 
3697 out:
3698 	mutex_unlock(&context->io_mutex);
3699 	return status;
3700 }
3701 
3702 
3703 //	#pragma mark - public API for file systems
3704 
3705 
3706 extern "C" status_t
3707 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3708 	fs_vnode_ops* ops)
3709 {
3710 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3711 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3712 
3713 	if (privateNode == NULL)
3714 		return B_BAD_VALUE;
3715 
3716 	int32 tries = BUSY_VNODE_RETRIES;
3717 restart:
3718 	// create the node
3719 	bool nodeCreated;
3720 	struct vnode* vnode;
3721 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3722 		nodeCreated);
3723 	if (status != B_OK)
3724 		return status;
3725 
3726 	WriteLocker nodeLocker(sVnodeLock, true);
3727 		// create_new_vnode_and_lock() has locked for us
3728 
3729 	if (!nodeCreated && vnode->IsBusy()) {
3730 		nodeLocker.Unlock();
3731 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3732 			return B_BUSY;
3733 		goto restart;
3734 	}
3735 
3736 	// file system integrity check:
3737 	// test if the vnode already exists and bail out if this is the case!
3738 	if (!nodeCreated) {
3739 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3740 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3741 			vnode->private_node);
3742 		return B_ERROR;
3743 	}
3744 
3745 	vnode->private_node = privateNode;
3746 	vnode->ops = ops;
3747 	vnode->SetUnpublished(true);
3748 
3749 	TRACE(("returns: %s\n", strerror(status)));
3750 
3751 	return status;
3752 }
3753 
3754 
3755 extern "C" status_t
3756 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3757 	fs_vnode_ops* ops, int type, uint32 flags)
3758 {
3759 	FUNCTION(("publish_vnode()\n"));
3760 
3761 	int32 tries = BUSY_VNODE_RETRIES;
3762 restart:
3763 	WriteLocker locker(sVnodeLock);
3764 
3765 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3766 
3767 	bool nodeCreated = false;
3768 	if (vnode == NULL) {
3769 		if (privateNode == NULL)
3770 			return B_BAD_VALUE;
3771 
3772 		// create the node
3773 		locker.Unlock();
3774 			// create_new_vnode_and_lock() will re-lock for us on success
3775 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3776 			nodeCreated);
3777 		if (status != B_OK)
3778 			return status;
3779 
3780 		locker.SetTo(sVnodeLock, true);
3781 	}
3782 
3783 	if (nodeCreated) {
3784 		vnode->private_node = privateNode;
3785 		vnode->ops = ops;
3786 		vnode->SetUnpublished(true);
3787 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3788 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3789 		// already known, but not published
3790 	} else if (vnode->IsBusy()) {
3791 		locker.Unlock();
3792 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3793 			return B_BUSY;
3794 		goto restart;
3795 	} else
3796 		return B_BAD_VALUE;
3797 
3798 	bool publishSpecialSubNode = false;
3799 
3800 	vnode->SetType(type);
3801 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3802 	publishSpecialSubNode = is_special_node_type(type)
3803 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3804 
3805 	status_t status = B_OK;
3806 
3807 	// create sub vnodes, if necessary
3808 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3809 		locker.Unlock();
3810 
3811 		fs_volume* subVolume = volume;
3812 		if (volume->sub_volume != NULL) {
3813 			while (status == B_OK && subVolume->sub_volume != NULL) {
3814 				subVolume = subVolume->sub_volume;
3815 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3816 					vnode);
3817 			}
3818 		}
3819 
3820 		if (status == B_OK && publishSpecialSubNode)
3821 			status = create_special_sub_node(vnode, flags);
3822 
3823 		if (status != B_OK) {
3824 			// error -- clean up the created sub vnodes
3825 			while (subVolume->super_volume != volume) {
3826 				subVolume = subVolume->super_volume;
3827 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3828 			}
3829 		}
3830 
3831 		if (status == B_OK) {
3832 			ReadLocker vnodesReadLocker(sVnodeLock);
3833 			AutoLocker<Vnode> nodeLocker(vnode);
3834 			vnode->SetBusy(false);
3835 			vnode->SetUnpublished(false);
3836 		} else {
3837 			locker.Lock();
3838 			sVnodeTable->Remove(vnode);
3839 			remove_vnode_from_mount_list(vnode, vnode->mount);
3840 			object_cache_free(sVnodeCache, vnode, 0);
3841 		}
3842 	} else {
3843 		// we still hold the write lock -- mark the node unbusy and published
3844 		vnode->SetBusy(false);
3845 		vnode->SetUnpublished(false);
3846 	}
3847 
3848 	TRACE(("returns: %s\n", strerror(status)));
3849 
3850 	return status;
3851 }
3852 
3853 
3854 extern "C" status_t
3855 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3856 {
3857 	struct vnode* vnode;
3858 
3859 	if (volume == NULL)
3860 		return B_BAD_VALUE;
3861 
3862 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3863 	if (status != B_OK)
3864 		return status;
3865 
3866 	// If this is a layered FS, we need to get the node cookie for the requested
3867 	// layer.
3868 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3869 		fs_vnode resolvedNode;
3870 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3871 			&resolvedNode);
3872 		if (status != B_OK) {
3873 			panic("get_vnode(): Failed to get super node for vnode %p, "
3874 				"volume: %p", vnode, volume);
3875 			put_vnode(vnode);
3876 			return status;
3877 		}
3878 
3879 		if (_privateNode != NULL)
3880 			*_privateNode = resolvedNode.private_node;
3881 	} else if (_privateNode != NULL)
3882 		*_privateNode = vnode->private_node;
3883 
3884 	return B_OK;
3885 }
3886 
3887 
3888 extern "C" status_t
3889 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3890 {
3891 	ReadLocker nodeLocker(sVnodeLock);
3892 
3893 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3894 	if (vnode == NULL)
3895 		return B_BAD_VALUE;
3896 
3897 	inc_vnode_ref_count(vnode);
3898 	return B_OK;
3899 }
3900 
3901 
3902 extern "C" status_t
3903 put_vnode(fs_volume* volume, ino_t vnodeID)
3904 {
3905 	struct vnode* vnode;
3906 
3907 	rw_lock_read_lock(&sVnodeLock);
3908 	vnode = lookup_vnode(volume->id, vnodeID);
3909 	rw_lock_read_unlock(&sVnodeLock);
3910 
3911 	if (vnode == NULL)
3912 		return B_BAD_VALUE;
3913 
3914 	dec_vnode_ref_count(vnode, false, true);
3915 	return B_OK;
3916 }
3917 
3918 
3919 extern "C" status_t
3920 remove_vnode(fs_volume* volume, ino_t vnodeID)
3921 {
3922 	ReadLocker locker(sVnodeLock);
3923 
3924 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3925 	if (vnode == NULL)
3926 		return B_ENTRY_NOT_FOUND;
3927 
3928 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3929 		// this vnode is in use
3930 		return B_BUSY;
3931 	}
3932 
3933 	vnode->Lock();
3934 
3935 	vnode->SetRemoved(true);
3936 	bool removeUnpublished = false;
3937 
3938 	if (vnode->IsUnpublished()) {
3939 		// prepare the vnode for deletion
3940 		removeUnpublished = true;
3941 		vnode->SetBusy(true);
3942 	}
3943 
3944 	vnode->Unlock();
3945 	locker.Unlock();
3946 
3947 	if (removeUnpublished) {
3948 		// If the vnode hasn't been published yet, we delete it here
3949 		atomic_add(&vnode->ref_count, -1);
3950 		free_vnode(vnode, true);
3951 	}
3952 
3953 	return B_OK;
3954 }
3955 
3956 
3957 extern "C" status_t
3958 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3959 {
3960 	struct vnode* vnode;
3961 
3962 	rw_lock_read_lock(&sVnodeLock);
3963 
3964 	vnode = lookup_vnode(volume->id, vnodeID);
3965 	if (vnode) {
3966 		AutoLocker<Vnode> nodeLocker(vnode);
3967 		vnode->SetRemoved(false);
3968 	}
3969 
3970 	rw_lock_read_unlock(&sVnodeLock);
3971 	return B_OK;
3972 }
3973 
3974 
3975 extern "C" status_t
3976 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3977 {
3978 	ReadLocker _(sVnodeLock);
3979 
3980 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3981 		if (_removed != NULL)
3982 			*_removed = vnode->IsRemoved();
3983 		return B_OK;
3984 	}
3985 
3986 	return B_BAD_VALUE;
3987 }
3988 
3989 
3990 extern "C" fs_volume*
3991 volume_for_vnode(fs_vnode* _vnode)
3992 {
3993 	if (_vnode == NULL)
3994 		return NULL;
3995 
3996 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3997 	return vnode->mount->volume;
3998 }
3999 
4000 
4001 extern "C" status_t
4002 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4003 	uid_t nodeUserID)
4004 {
4005 	// get node permissions
4006 	int userPermissions = (mode & S_IRWXU) >> 6;
4007 	int groupPermissions = (mode & S_IRWXG) >> 3;
4008 	int otherPermissions = mode & S_IRWXO;
4009 
4010 	// get the node permissions for this uid/gid
4011 	int permissions = 0;
4012 	uid_t uid = geteuid();
4013 
4014 	if (uid == 0) {
4015 		// user is root
4016 		// root has always read/write permission, but at least one of the
4017 		// X bits must be set for execute permission
4018 		permissions = userPermissions | groupPermissions | otherPermissions
4019 			| S_IROTH | S_IWOTH;
4020 		if (S_ISDIR(mode))
4021 			permissions |= S_IXOTH;
4022 	} else if (uid == nodeUserID) {
4023 		// user is node owner
4024 		permissions = userPermissions;
4025 	} else if (is_user_in_group(nodeGroupID)) {
4026 		// user is in owning group
4027 		permissions = groupPermissions;
4028 	} else {
4029 		// user is one of the others
4030 		permissions = otherPermissions;
4031 	}
4032 
4033 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4034 }
4035 
4036 
4037 #if 0
4038 extern "C" status_t
4039 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4040 	size_t* _numBytes)
4041 {
4042 	struct file_descriptor* descriptor;
4043 	struct vnode* vnode;
4044 
4045 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4046 	if (descriptor == NULL)
4047 		return B_FILE_ERROR;
4048 
4049 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4050 		count, 0, _numBytes);
4051 
4052 	put_fd(descriptor);
4053 	return status;
4054 }
4055 
4056 
4057 extern "C" status_t
4058 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4059 	size_t* _numBytes)
4060 {
4061 	struct file_descriptor* descriptor;
4062 	struct vnode* vnode;
4063 
4064 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4065 	if (descriptor == NULL)
4066 		return B_FILE_ERROR;
4067 
4068 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4069 		count, 0, _numBytes);
4070 
4071 	put_fd(descriptor);
4072 	return status;
4073 }
4074 #endif
4075 
4076 
4077 extern "C" status_t
4078 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4079 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4080 	size_t* _bytes)
4081 {
4082 	struct file_descriptor* descriptor;
4083 	struct vnode* vnode;
4084 
4085 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4086 	if (descriptor == NULL)
4087 		return B_FILE_ERROR;
4088 
4089 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4090 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4091 		false);
4092 
4093 	put_fd(descriptor);
4094 	return status;
4095 }
4096 
4097 
4098 extern "C" status_t
4099 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4100 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4101 	size_t* _bytes)
4102 {
4103 	struct file_descriptor* descriptor;
4104 	struct vnode* vnode;
4105 
4106 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4107 	if (descriptor == NULL)
4108 		return B_FILE_ERROR;
4109 
4110 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4111 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4112 		true);
4113 
4114 	put_fd(descriptor);
4115 	return status;
4116 }
4117 
4118 
4119 extern "C" status_t
4120 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4121 {
4122 	// lookup mount -- the caller is required to make sure that the mount
4123 	// won't go away
4124 	ReadLocker locker(sMountLock);
4125 	struct fs_mount* mount = find_mount(mountID);
4126 	if (mount == NULL)
4127 		return B_BAD_VALUE;
4128 	locker.Unlock();
4129 
4130 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4131 }
4132 
4133 
4134 extern "C" status_t
4135 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4136 {
4137 	// lookup mount -- the caller is required to make sure that the mount
4138 	// won't go away
4139 	ReadLocker locker(sMountLock);
4140 	struct fs_mount* mount = find_mount(mountID);
4141 	if (mount == NULL)
4142 		return B_BAD_VALUE;
4143 	locker.Unlock();
4144 
4145 	return mount->entry_cache.Add(dirID, name, -1, true);
4146 }
4147 
4148 
4149 extern "C" status_t
4150 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4151 {
4152 	// lookup mount -- the caller is required to make sure that the mount
4153 	// won't go away
4154 	ReadLocker locker(sMountLock);
4155 	struct fs_mount* mount = find_mount(mountID);
4156 	if (mount == NULL)
4157 		return B_BAD_VALUE;
4158 	locker.Unlock();
4159 
4160 	return mount->entry_cache.Remove(dirID, name);
4161 }
4162 
4163 
4164 //	#pragma mark - private VFS API
4165 //	Functions the VFS exports for other parts of the kernel
4166 
4167 
4168 /*! Acquires another reference to the vnode that has to be released
4169 	by calling vfs_put_vnode().
4170 */
4171 void
4172 vfs_acquire_vnode(struct vnode* vnode)
4173 {
4174 	inc_vnode_ref_count(vnode);
4175 }
4176 
4177 
4178 /*! This is currently called from file_cache_create() only.
4179 	It's probably a temporary solution as long as devfs requires that
4180 	fs_read_pages()/fs_write_pages() are called with the standard
4181 	open cookie and not with a device cookie.
4182 	If that's done differently, remove this call; it has no other
4183 	purpose.
4184 */
4185 extern "C" status_t
4186 vfs_get_cookie_from_fd(int fd, void** _cookie)
4187 {
4188 	struct file_descriptor* descriptor;
4189 
4190 	descriptor = get_fd(get_current_io_context(true), fd);
4191 	if (descriptor == NULL)
4192 		return B_FILE_ERROR;
4193 
4194 	*_cookie = descriptor->cookie;
4195 	return B_OK;
4196 }
4197 
4198 
4199 extern "C" status_t
4200 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4201 {
4202 	*vnode = get_vnode_from_fd(fd, kernel);
4203 
4204 	if (*vnode == NULL)
4205 		return B_FILE_ERROR;
4206 
4207 	return B_NO_ERROR;
4208 }
4209 
4210 
4211 extern "C" status_t
4212 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4213 {
4214 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4215 		path, kernel));
4216 
4217 	KPath pathBuffer;
4218 	if (pathBuffer.InitCheck() != B_OK)
4219 		return B_NO_MEMORY;
4220 
4221 	char* buffer = pathBuffer.LockBuffer();
4222 	strlcpy(buffer, path, pathBuffer.BufferSize());
4223 
4224 	struct vnode* vnode;
4225 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4226 	if (status != B_OK)
4227 		return status;
4228 
4229 	*_vnode = vnode;
4230 	return B_OK;
4231 }
4232 
4233 
4234 extern "C" status_t
4235 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4236 {
4237 	struct vnode* vnode = NULL;
4238 
4239 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4240 	if (status != B_OK)
4241 		return status;
4242 
4243 	*_vnode = vnode;
4244 	return B_OK;
4245 }
4246 
4247 
4248 extern "C" status_t
4249 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4250 	const char* name, struct vnode** _vnode)
4251 {
4252 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4253 }
4254 
4255 
4256 extern "C" void
4257 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4258 {
4259 	*_mountID = vnode->device;
4260 	*_vnodeID = vnode->id;
4261 }
4262 
4263 
4264 /*!
4265 	Helper function abstracting the process of "converting" a given
4266 	vnode-pointer to a fs_vnode-pointer.
4267 	Currently only used in bindfs.
4268 */
4269 extern "C" fs_vnode*
4270 vfs_fsnode_for_vnode(struct vnode* vnode)
4271 {
4272 	return vnode;
4273 }
4274 
4275 
4276 /*!
4277 	Calls fs_open() on the given vnode and returns a new
4278 	file descriptor for it
4279 */
4280 int
4281 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4282 {
4283 	return open_vnode(vnode, openMode, kernel);
4284 }
4285 
4286 
4287 /*!	Looks up a vnode with the given mount and vnode ID.
4288 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4289 	to the node.
4290 	It's currently only be used by file_cache_create().
4291 */
4292 extern "C" status_t
4293 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4294 {
4295 	rw_lock_read_lock(&sVnodeLock);
4296 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4297 	rw_lock_read_unlock(&sVnodeLock);
4298 
4299 	if (vnode == NULL)
4300 		return B_ERROR;
4301 
4302 	*_vnode = vnode;
4303 	return B_OK;
4304 }
4305 
4306 
4307 extern "C" status_t
4308 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4309 	bool traverseLeafLink, bool kernel, void** _node)
4310 {
4311 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4312 		volume, path, kernel));
4313 
4314 	KPath pathBuffer;
4315 	if (pathBuffer.InitCheck() != B_OK)
4316 		return B_NO_MEMORY;
4317 
4318 	fs_mount* mount;
4319 	status_t status = get_mount(volume->id, &mount);
4320 	if (status != B_OK)
4321 		return status;
4322 
4323 	char* buffer = pathBuffer.LockBuffer();
4324 	strlcpy(buffer, path, pathBuffer.BufferSize());
4325 
4326 	struct vnode* vnode = mount->root_vnode;
4327 
4328 	if (buffer[0] == '/')
4329 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4330 	else {
4331 		inc_vnode_ref_count(vnode);
4332 			// vnode_path_to_vnode() releases a reference to the starting vnode
4333 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4334 			kernel, &vnode, NULL);
4335 	}
4336 
4337 	put_mount(mount);
4338 
4339 	if (status != B_OK)
4340 		return status;
4341 
4342 	if (vnode->device != volume->id) {
4343 		// wrong mount ID - must not gain access on foreign file system nodes
4344 		put_vnode(vnode);
4345 		return B_BAD_VALUE;
4346 	}
4347 
4348 	// Use get_vnode() to resolve the cookie for the right layer.
4349 	status = get_vnode(volume, vnode->id, _node);
4350 	put_vnode(vnode);
4351 
4352 	return status;
4353 }
4354 
4355 
4356 status_t
4357 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4358 	struct stat* stat, bool kernel)
4359 {
4360 	status_t status;
4361 
4362 	if (path != NULL) {
4363 		// path given: get the stat of the node referred to by (fd, path)
4364 		KPath pathBuffer(path);
4365 		if (pathBuffer.InitCheck() != B_OK)
4366 			return B_NO_MEMORY;
4367 
4368 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4369 			traverseLeafLink, stat, kernel);
4370 	} else {
4371 		// no path given: get the FD and use the FD operation
4372 		struct file_descriptor* descriptor
4373 			= get_fd(get_current_io_context(kernel), fd);
4374 		if (descriptor == NULL)
4375 			return B_FILE_ERROR;
4376 
4377 		if (descriptor->ops->fd_read_stat)
4378 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4379 		else
4380 			status = B_UNSUPPORTED;
4381 
4382 		put_fd(descriptor);
4383 	}
4384 
4385 	return status;
4386 }
4387 
4388 
4389 /*!	Finds the full path to the file that contains the module \a moduleName,
4390 	puts it into \a pathBuffer, and returns B_OK for success.
4391 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4392 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4393 	\a pathBuffer is clobbered in any case and must not be relied on if this
4394 	functions returns unsuccessfully.
4395 	\a basePath and \a pathBuffer must not point to the same space.
4396 */
4397 status_t
4398 vfs_get_module_path(const char* basePath, const char* moduleName,
4399 	char* pathBuffer, size_t bufferSize)
4400 {
4401 	struct vnode* dir;
4402 	struct vnode* file;
4403 	status_t status;
4404 	size_t length;
4405 	char* path;
4406 
4407 	if (bufferSize == 0
4408 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4409 		return B_BUFFER_OVERFLOW;
4410 
4411 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4412 	if (status != B_OK)
4413 		return status;
4414 
4415 	// the path buffer had been clobbered by the above call
4416 	length = strlcpy(pathBuffer, basePath, bufferSize);
4417 	if (pathBuffer[length - 1] != '/')
4418 		pathBuffer[length++] = '/';
4419 
4420 	path = pathBuffer + length;
4421 	bufferSize -= length;
4422 
4423 	while (moduleName) {
4424 		char* nextPath = strchr(moduleName, '/');
4425 		if (nextPath == NULL)
4426 			length = strlen(moduleName);
4427 		else {
4428 			length = nextPath - moduleName;
4429 			nextPath++;
4430 		}
4431 
4432 		if (length + 1 >= bufferSize) {
4433 			status = B_BUFFER_OVERFLOW;
4434 			goto err;
4435 		}
4436 
4437 		memcpy(path, moduleName, length);
4438 		path[length] = '\0';
4439 		moduleName = nextPath;
4440 
4441 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4442 		if (status != B_OK) {
4443 			// vnode_path_to_vnode() has already released the reference to dir
4444 			return status;
4445 		}
4446 
4447 		if (S_ISDIR(file->Type())) {
4448 			// goto the next directory
4449 			path[length] = '/';
4450 			path[length + 1] = '\0';
4451 			path += length + 1;
4452 			bufferSize -= length + 1;
4453 
4454 			dir = file;
4455 		} else if (S_ISREG(file->Type())) {
4456 			// it's a file so it should be what we've searched for
4457 			put_vnode(file);
4458 
4459 			return B_OK;
4460 		} else {
4461 			TRACE(("vfs_get_module_path(): something is strange here: "
4462 				"0x%08" B_PRIx32 "...\n", file->Type()));
4463 			status = B_ERROR;
4464 			dir = file;
4465 			goto err;
4466 		}
4467 	}
4468 
4469 	// if we got here, the moduleName just pointed to a directory, not to
4470 	// a real module - what should we do in this case?
4471 	status = B_ENTRY_NOT_FOUND;
4472 
4473 err:
4474 	put_vnode(dir);
4475 	return status;
4476 }
4477 
4478 
4479 /*!	\brief Normalizes a given path.
4480 
4481 	The path must refer to an existing or non-existing entry in an existing
4482 	directory, that is chopping off the leaf component the remaining path must
4483 	refer to an existing directory.
4484 
4485 	The returned will be canonical in that it will be absolute, will not
4486 	contain any "." or ".." components or duplicate occurrences of '/'s,
4487 	and none of the directory components will by symbolic links.
4488 
4489 	Any two paths referring to the same entry, will result in the same
4490 	normalized path (well, that is pretty much the definition of `normalized',
4491 	isn't it :-).
4492 
4493 	\param path The path to be normalized.
4494 	\param buffer The buffer into which the normalized path will be written.
4495 		   May be the same one as \a path.
4496 	\param bufferSize The size of \a buffer.
4497 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4498 	\param kernel \c true, if the IO context of the kernel shall be used,
4499 		   otherwise that of the team this thread belongs to. Only relevant,
4500 		   if the path is relative (to get the CWD).
4501 	\return \c B_OK if everything went fine, another error code otherwise.
4502 */
4503 status_t
4504 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4505 	bool traverseLink, bool kernel)
4506 {
4507 	if (!path || !buffer || bufferSize < 1)
4508 		return B_BAD_VALUE;
4509 
4510 	if (path != buffer) {
4511 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4512 			return B_BUFFER_OVERFLOW;
4513 	}
4514 
4515 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4516 }
4517 
4518 
4519 /*!	\brief Gets the parent of the passed in node.
4520 
4521 	Gets the parent of the passed in node, and correctly resolves covered
4522 	nodes.
4523 */
4524 extern "C" status_t
4525 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4526 {
4527 	return resolve_covered_parent(parent, device, node,
4528 		get_current_io_context(true));
4529 }
4530 
4531 
4532 /*!	\brief Creates a special node in the file system.
4533 
4534 	The caller gets a reference to the newly created node (which is passed
4535 	back through \a _createdVnode) and is responsible for releasing it.
4536 
4537 	\param path The path where to create the entry for the node. Can be \c NULL,
4538 		in which case the node is created without an entry in the root FS -- it
4539 		will automatically be deleted when the last reference has been released.
4540 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4541 		the target file system will just create the node with its standard
4542 		operations. Depending on the type of the node a subnode might be created
4543 		automatically, though.
4544 	\param mode The type and permissions for the node to be created.
4545 	\param flags Flags to be passed to the creating FS.
4546 	\param kernel \c true, if called in the kernel context (relevant only if
4547 		\a path is not \c NULL and not absolute).
4548 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4549 		file system creating the node, with the private data pointer and
4550 		operations for the super node. Can be \c NULL.
4551 	\param _createVnode Pointer to pre-allocated storage where to store the
4552 		pointer to the newly created node.
4553 	\return \c B_OK, if everything went fine, another error code otherwise.
4554 */
4555 status_t
4556 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4557 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4558 	struct vnode** _createdVnode)
4559 {
4560 	struct vnode* dirNode;
4561 	char _leaf[B_FILE_NAME_LENGTH];
4562 	char* leaf = NULL;
4563 
4564 	if (path) {
4565 		// We've got a path. Get the dir vnode and the leaf name.
4566 		KPath tmpPathBuffer;
4567 		if (tmpPathBuffer.InitCheck() != B_OK)
4568 			return B_NO_MEMORY;
4569 
4570 		char* tmpPath = tmpPathBuffer.LockBuffer();
4571 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4572 			return B_NAME_TOO_LONG;
4573 
4574 		// get the dir vnode and the leaf name
4575 		leaf = _leaf;
4576 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4577 		if (error != B_OK)
4578 			return error;
4579 	} else {
4580 		// No path. Create the node in the root FS.
4581 		dirNode = sRoot;
4582 		inc_vnode_ref_count(dirNode);
4583 	}
4584 
4585 	VNodePutter _(dirNode);
4586 
4587 	// check support for creating special nodes
4588 	if (!HAS_FS_CALL(dirNode, create_special_node))
4589 		return B_UNSUPPORTED;
4590 
4591 	// create the node
4592 	fs_vnode superVnode;
4593 	ino_t nodeID;
4594 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4595 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4596 	if (status != B_OK)
4597 		return status;
4598 
4599 	// lookup the node
4600 	rw_lock_read_lock(&sVnodeLock);
4601 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4602 	rw_lock_read_unlock(&sVnodeLock);
4603 
4604 	if (*_createdVnode == NULL) {
4605 		panic("vfs_create_special_node(): lookup of node failed");
4606 		return B_ERROR;
4607 	}
4608 
4609 	return B_OK;
4610 }
4611 
4612 
4613 extern "C" void
4614 vfs_put_vnode(struct vnode* vnode)
4615 {
4616 	put_vnode(vnode);
4617 }
4618 
4619 
4620 extern "C" status_t
4621 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4622 {
4623 	// Get current working directory from io context
4624 	struct io_context* context = get_current_io_context(false);
4625 	status_t status = B_OK;
4626 
4627 	mutex_lock(&context->io_mutex);
4628 
4629 	if (context->cwd != NULL) {
4630 		*_mountID = context->cwd->device;
4631 		*_vnodeID = context->cwd->id;
4632 	} else
4633 		status = B_ERROR;
4634 
4635 	mutex_unlock(&context->io_mutex);
4636 	return status;
4637 }
4638 
4639 
4640 status_t
4641 vfs_unmount(dev_t mountID, uint32 flags)
4642 {
4643 	return fs_unmount(NULL, mountID, flags, true);
4644 }
4645 
4646 
4647 extern "C" status_t
4648 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4649 {
4650 	struct vnode* vnode;
4651 
4652 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4653 	if (status != B_OK)
4654 		return status;
4655 
4656 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4657 	put_vnode(vnode);
4658 	return B_OK;
4659 }
4660 
4661 
4662 extern "C" void
4663 vfs_free_unused_vnodes(int32 level)
4664 {
4665 	vnode_low_resource_handler(NULL,
4666 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4667 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4668 		level);
4669 }
4670 
4671 
4672 extern "C" bool
4673 vfs_can_page(struct vnode* vnode, void* cookie)
4674 {
4675 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4676 
4677 	if (HAS_FS_CALL(vnode, can_page))
4678 		return FS_CALL(vnode, can_page, cookie);
4679 	return false;
4680 }
4681 
4682 
4683 extern "C" status_t
4684 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4685 	const generic_io_vec* vecs, size_t count, uint32 flags,
4686 	generic_size_t* _numBytes)
4687 {
4688 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4689 		vecs, pos));
4690 
4691 #if VFS_PAGES_IO_TRACING
4692 	generic_size_t bytesRequested = *_numBytes;
4693 #endif
4694 
4695 	IORequest request;
4696 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4697 	if (status == B_OK) {
4698 		status = vfs_vnode_io(vnode, cookie, &request);
4699 		if (status == B_OK)
4700 			status = request.Wait();
4701 		*_numBytes = request.TransferredBytes();
4702 	}
4703 
4704 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4705 		status, *_numBytes));
4706 
4707 	return status;
4708 }
4709 
4710 
4711 extern "C" status_t
4712 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4713 	const generic_io_vec* vecs, size_t count, uint32 flags,
4714 	generic_size_t* _numBytes)
4715 {
4716 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4717 		vecs, pos));
4718 
4719 #if VFS_PAGES_IO_TRACING
4720 	generic_size_t bytesRequested = *_numBytes;
4721 #endif
4722 
4723 	IORequest request;
4724 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4725 	if (status == B_OK) {
4726 		status = vfs_vnode_io(vnode, cookie, &request);
4727 		if (status == B_OK)
4728 			status = request.Wait();
4729 		*_numBytes = request.TransferredBytes();
4730 	}
4731 
4732 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4733 		status, *_numBytes));
4734 
4735 	return status;
4736 }
4737 
4738 
4739 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4740 	created if \a allocate is \c true.
4741 	In case it's successful, it will also grab a reference to the cache
4742 	it returns.
4743 */
4744 extern "C" status_t
4745 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4746 {
4747 	if (vnode->cache != NULL) {
4748 		vnode->cache->AcquireRef();
4749 		*_cache = vnode->cache;
4750 		return B_OK;
4751 	}
4752 
4753 	rw_lock_read_lock(&sVnodeLock);
4754 	vnode->Lock();
4755 
4756 	status_t status = B_OK;
4757 
4758 	// The cache could have been created in the meantime
4759 	if (vnode->cache == NULL) {
4760 		if (allocate) {
4761 			// TODO: actually the vnode needs to be busy already here, or
4762 			//	else this won't work...
4763 			bool wasBusy = vnode->IsBusy();
4764 			vnode->SetBusy(true);
4765 
4766 			vnode->Unlock();
4767 			rw_lock_read_unlock(&sVnodeLock);
4768 
4769 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4770 
4771 			rw_lock_read_lock(&sVnodeLock);
4772 			vnode->Lock();
4773 			vnode->SetBusy(wasBusy);
4774 		} else
4775 			status = B_BAD_VALUE;
4776 	}
4777 
4778 	vnode->Unlock();
4779 	rw_lock_read_unlock(&sVnodeLock);
4780 
4781 	if (status == B_OK) {
4782 		vnode->cache->AcquireRef();
4783 		*_cache = vnode->cache;
4784 	}
4785 
4786 	return status;
4787 }
4788 
4789 
4790 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4791 	their own.
4792 	In case it's successful, it will also grab a reference to the cache
4793 	it returns.
4794 */
4795 extern "C" status_t
4796 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4797 {
4798 	rw_lock_read_lock(&sVnodeLock);
4799 	vnode->Lock();
4800 
4801 	status_t status = B_OK;
4802 	if (vnode->cache != NULL) {
4803 		status = B_NOT_ALLOWED;
4804 	} else {
4805 		vnode->cache = _cache;
4806 		_cache->AcquireRef();
4807 	}
4808 
4809 	vnode->Unlock();
4810 	rw_lock_read_unlock(&sVnodeLock);
4811 	return status;
4812 }
4813 
4814 
4815 status_t
4816 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4817 	file_io_vec* vecs, size_t* _count)
4818 {
4819 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4820 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4821 
4822 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4823 }
4824 
4825 
4826 status_t
4827 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4828 {
4829 	status_t status = FS_CALL(vnode, read_stat, stat);
4830 
4831 	// fill in the st_dev and st_ino fields
4832 	if (status == B_OK) {
4833 		stat->st_dev = vnode->device;
4834 		stat->st_ino = vnode->id;
4835 		// the rdev field must stay unset for non-special files
4836 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4837 			stat->st_rdev = -1;
4838 	}
4839 
4840 	return status;
4841 }
4842 
4843 
4844 status_t
4845 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4846 {
4847 	struct vnode* vnode;
4848 	status_t status = get_vnode(device, inode, &vnode, true, false);
4849 	if (status != B_OK)
4850 		return status;
4851 
4852 	status = vfs_stat_vnode(vnode, stat);
4853 
4854 	put_vnode(vnode);
4855 	return status;
4856 }
4857 
4858 
4859 status_t
4860 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4861 {
4862 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4863 }
4864 
4865 
4866 status_t
4867 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4868 	bool kernel, char* path, size_t pathLength)
4869 {
4870 	struct vnode* vnode;
4871 	status_t status;
4872 
4873 	// filter invalid leaf names
4874 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4875 		return B_BAD_VALUE;
4876 
4877 	// get the vnode matching the dir's node_ref
4878 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4879 		// special cases "." and "..": we can directly get the vnode of the
4880 		// referenced directory
4881 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4882 		leaf = NULL;
4883 	} else
4884 		status = get_vnode(device, inode, &vnode, true, false);
4885 	if (status != B_OK)
4886 		return status;
4887 
4888 	// get the directory path
4889 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4890 	put_vnode(vnode);
4891 		// we don't need the vnode anymore
4892 	if (status != B_OK)
4893 		return status;
4894 
4895 	// append the leaf name
4896 	if (leaf) {
4897 		// insert a directory separator if this is not the file system root
4898 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4899 				>= pathLength)
4900 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4901 			return B_NAME_TOO_LONG;
4902 		}
4903 	}
4904 
4905 	return B_OK;
4906 }
4907 
4908 
4909 /*!	If the given descriptor locked its vnode, that lock will be released. */
4910 void
4911 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4912 {
4913 	struct vnode* vnode = fd_vnode(descriptor);
4914 
4915 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4916 		vnode->mandatory_locked_by = NULL;
4917 }
4918 
4919 
4920 /*!	Releases any POSIX locks on the file descriptor. */
4921 status_t
4922 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4923 {
4924 	struct vnode* vnode = descriptor->u.vnode;
4925 	if (vnode == NULL)
4926 		return B_OK;
4927 
4928 	if (HAS_FS_CALL(vnode, release_lock))
4929 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4930 
4931 	return release_advisory_lock(vnode, context, NULL, NULL);
4932 }
4933 
4934 
4935 /*!	Closes all file descriptors of the specified I/O context that
4936 	have the O_CLOEXEC flag set.
4937 */
4938 void
4939 vfs_exec_io_context(io_context* context)
4940 {
4941 	uint32 i;
4942 
4943 	for (i = 0; i < context->table_size; i++) {
4944 		mutex_lock(&context->io_mutex);
4945 
4946 		struct file_descriptor* descriptor = context->fds[i];
4947 		bool remove = false;
4948 
4949 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4950 			context->fds[i] = NULL;
4951 			context->num_used_fds--;
4952 
4953 			remove = true;
4954 		}
4955 
4956 		mutex_unlock(&context->io_mutex);
4957 
4958 		if (remove) {
4959 			close_fd(context, descriptor);
4960 			put_fd(descriptor);
4961 		}
4962 	}
4963 }
4964 
4965 
4966 /*! Sets up a new io_control structure, and inherits the properties
4967 	of the parent io_control if it is given.
4968 */
4969 io_context*
4970 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4971 {
4972 	io_context* context = (io_context*)malloc(sizeof(io_context));
4973 	if (context == NULL)
4974 		return NULL;
4975 
4976 	TIOC(NewIOContext(context, parentContext));
4977 
4978 	memset(context, 0, sizeof(io_context));
4979 	context->ref_count = 1;
4980 
4981 	MutexLocker parentLocker;
4982 
4983 	size_t tableSize;
4984 	if (parentContext != NULL) {
4985 		parentLocker.SetTo(parentContext->io_mutex, false);
4986 		tableSize = parentContext->table_size;
4987 	} else
4988 		tableSize = DEFAULT_FD_TABLE_SIZE;
4989 
4990 	// allocate space for FDs and their close-on-exec flag
4991 	context->fds = (file_descriptor**)malloc(
4992 		sizeof(struct file_descriptor*) * tableSize
4993 		+ sizeof(struct select_info**) * tableSize
4994 		+ (tableSize + 7) / 8);
4995 	if (context->fds == NULL) {
4996 		free(context);
4997 		return NULL;
4998 	}
4999 
5000 	context->select_infos = (select_info**)(context->fds + tableSize);
5001 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5002 
5003 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5004 		+ sizeof(struct select_info**) * tableSize
5005 		+ (tableSize + 7) / 8);
5006 
5007 	mutex_init(&context->io_mutex, "I/O context");
5008 
5009 	// Copy all parent file descriptors
5010 
5011 	if (parentContext != NULL) {
5012 		size_t i;
5013 
5014 		mutex_lock(&sIOContextRootLock);
5015 		context->root = parentContext->root;
5016 		if (context->root)
5017 			inc_vnode_ref_count(context->root);
5018 		mutex_unlock(&sIOContextRootLock);
5019 
5020 		context->cwd = parentContext->cwd;
5021 		if (context->cwd)
5022 			inc_vnode_ref_count(context->cwd);
5023 
5024 		if (parentContext->inherit_fds) {
5025 			for (i = 0; i < tableSize; i++) {
5026 				struct file_descriptor* descriptor = parentContext->fds[i];
5027 
5028 				if (descriptor != NULL
5029 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5030 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5031 					if (closeOnExec && purgeCloseOnExec)
5032 						continue;
5033 
5034 					TFD(InheritFD(context, i, descriptor, parentContext));
5035 
5036 					context->fds[i] = descriptor;
5037 					context->num_used_fds++;
5038 					atomic_add(&descriptor->ref_count, 1);
5039 					atomic_add(&descriptor->open_count, 1);
5040 
5041 					if (closeOnExec)
5042 						fd_set_close_on_exec(context, i, true);
5043 				}
5044 			}
5045 		}
5046 
5047 		parentLocker.Unlock();
5048 	} else {
5049 		context->root = sRoot;
5050 		context->cwd = sRoot;
5051 
5052 		if (context->root)
5053 			inc_vnode_ref_count(context->root);
5054 
5055 		if (context->cwd)
5056 			inc_vnode_ref_count(context->cwd);
5057 	}
5058 
5059 	context->table_size = tableSize;
5060 	context->inherit_fds = parentContext != NULL;
5061 
5062 	list_init(&context->node_monitors);
5063 	context->max_monitors = DEFAULT_NODE_MONITORS;
5064 
5065 	return context;
5066 }
5067 
5068 
5069 void
5070 vfs_get_io_context(io_context* context)
5071 {
5072 	atomic_add(&context->ref_count, 1);
5073 }
5074 
5075 
5076 void
5077 vfs_put_io_context(io_context* context)
5078 {
5079 	if (atomic_add(&context->ref_count, -1) == 1)
5080 		free_io_context(context);
5081 }
5082 
5083 
5084 status_t
5085 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5086 {
5087 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5088 		return B_BAD_VALUE;
5089 
5090 	TIOC(ResizeIOContext(context, newSize));
5091 
5092 	MutexLocker _(context->io_mutex);
5093 
5094 	uint32 oldSize = context->table_size;
5095 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5096 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5097 
5098 	// If the tables shrink, make sure none of the fds being dropped are in use.
5099 	if (newSize < oldSize) {
5100 		for (uint32 i = oldSize; i-- > newSize;) {
5101 			if (context->fds[i])
5102 				return B_BUSY;
5103 		}
5104 	}
5105 
5106 	// store pointers to the old tables
5107 	file_descriptor** oldFDs = context->fds;
5108 	select_info** oldSelectInfos = context->select_infos;
5109 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5110 
5111 	// allocate new tables
5112 	file_descriptor** newFDs = (file_descriptor**)malloc(
5113 		sizeof(struct file_descriptor*) * newSize
5114 		+ sizeof(struct select_infos**) * newSize
5115 		+ newCloseOnExitBitmapSize);
5116 	if (newFDs == NULL)
5117 		return B_NO_MEMORY;
5118 
5119 	context->fds = newFDs;
5120 	context->select_infos = (select_info**)(context->fds + newSize);
5121 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5122 	context->table_size = newSize;
5123 
5124 	// copy entries from old tables
5125 	uint32 toCopy = min_c(oldSize, newSize);
5126 
5127 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5128 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5129 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5130 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5131 
5132 	// clear additional entries, if the tables grow
5133 	if (newSize > oldSize) {
5134 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5135 		memset(context->select_infos + oldSize, 0,
5136 			sizeof(void*) * (newSize - oldSize));
5137 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5138 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5139 	}
5140 
5141 	free(oldFDs);
5142 
5143 	return B_OK;
5144 }
5145 
5146 
5147 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5148 
5149 	Given an arbitrary vnode (identified by mount and node ID), the function
5150 	checks, whether the vnode is covered by another vnode. If it is, the
5151 	function returns the mount and node ID of the covering vnode. Otherwise
5152 	it simply returns the supplied mount and node ID.
5153 
5154 	In case of error (e.g. the supplied node could not be found) the variables
5155 	for storing the resolved mount and node ID remain untouched and an error
5156 	code is returned.
5157 
5158 	\param mountID The mount ID of the vnode in question.
5159 	\param nodeID The node ID of the vnode in question.
5160 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5161 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5162 	\return
5163 	- \c B_OK, if everything went fine,
5164 	- another error code, if something went wrong.
5165 */
5166 status_t
5167 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5168 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5169 {
5170 	// get the node
5171 	struct vnode* node;
5172 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5173 	if (error != B_OK)
5174 		return error;
5175 
5176 	// resolve the node
5177 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5178 		put_vnode(node);
5179 		node = coveringNode;
5180 	}
5181 
5182 	// set the return values
5183 	*resolvedMountID = node->device;
5184 	*resolvedNodeID = node->id;
5185 
5186 	put_vnode(node);
5187 
5188 	return B_OK;
5189 }
5190 
5191 
5192 status_t
5193 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5194 	ino_t* _mountPointNodeID)
5195 {
5196 	ReadLocker nodeLocker(sVnodeLock);
5197 	ReadLocker mountLocker(sMountLock);
5198 
5199 	struct fs_mount* mount = find_mount(mountID);
5200 	if (mount == NULL)
5201 		return B_BAD_VALUE;
5202 
5203 	Vnode* mountPoint = mount->covers_vnode;
5204 
5205 	*_mountPointMountID = mountPoint->device;
5206 	*_mountPointNodeID = mountPoint->id;
5207 
5208 	return B_OK;
5209 }
5210 
5211 
5212 status_t
5213 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5214 	ino_t coveredNodeID)
5215 {
5216 	// get the vnodes
5217 	Vnode* vnode;
5218 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5219 	if (error != B_OK)
5220 		return B_BAD_VALUE;
5221 	VNodePutter vnodePutter(vnode);
5222 
5223 	Vnode* coveredVnode;
5224 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5225 		false);
5226 	if (error != B_OK)
5227 		return B_BAD_VALUE;
5228 	VNodePutter coveredVnodePutter(coveredVnode);
5229 
5230 	// establish the covered/covering links
5231 	WriteLocker locker(sVnodeLock);
5232 
5233 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5234 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5235 		return B_BUSY;
5236 	}
5237 
5238 	vnode->covers = coveredVnode;
5239 	vnode->SetCovering(true);
5240 
5241 	coveredVnode->covered_by = vnode;
5242 	coveredVnode->SetCovered(true);
5243 
5244 	// the vnodes do now reference each other
5245 	inc_vnode_ref_count(vnode);
5246 	inc_vnode_ref_count(coveredVnode);
5247 
5248 	return B_OK;
5249 }
5250 
5251 
5252 int
5253 vfs_getrlimit(int resource, struct rlimit* rlp)
5254 {
5255 	if (!rlp)
5256 		return B_BAD_ADDRESS;
5257 
5258 	switch (resource) {
5259 		case RLIMIT_NOFILE:
5260 		{
5261 			struct io_context* context = get_current_io_context(false);
5262 			MutexLocker _(context->io_mutex);
5263 
5264 			rlp->rlim_cur = context->table_size;
5265 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5266 			return 0;
5267 		}
5268 
5269 		case RLIMIT_NOVMON:
5270 		{
5271 			struct io_context* context = get_current_io_context(false);
5272 			MutexLocker _(context->io_mutex);
5273 
5274 			rlp->rlim_cur = context->max_monitors;
5275 			rlp->rlim_max = MAX_NODE_MONITORS;
5276 			return 0;
5277 		}
5278 
5279 		default:
5280 			return B_BAD_VALUE;
5281 	}
5282 }
5283 
5284 
5285 int
5286 vfs_setrlimit(int resource, const struct rlimit* rlp)
5287 {
5288 	if (!rlp)
5289 		return B_BAD_ADDRESS;
5290 
5291 	switch (resource) {
5292 		case RLIMIT_NOFILE:
5293 			/* TODO: check getuid() */
5294 			if (rlp->rlim_max != RLIM_SAVED_MAX
5295 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5296 				return B_NOT_ALLOWED;
5297 
5298 			return vfs_resize_fd_table(get_current_io_context(false),
5299 				rlp->rlim_cur);
5300 
5301 		case RLIMIT_NOVMON:
5302 			/* TODO: check getuid() */
5303 			if (rlp->rlim_max != RLIM_SAVED_MAX
5304 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5305 				return B_NOT_ALLOWED;
5306 
5307 			return resize_monitor_table(get_current_io_context(false),
5308 				rlp->rlim_cur);
5309 
5310 		default:
5311 			return B_BAD_VALUE;
5312 	}
5313 }
5314 
5315 
5316 status_t
5317 vfs_init(kernel_args* args)
5318 {
5319 	vnode::StaticInit();
5320 
5321 	sVnodeTable = new(std::nothrow) VnodeTable();
5322 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5323 		panic("vfs_init: error creating vnode hash table\n");
5324 
5325 	struct vnode dummy_vnode;
5326 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5327 
5328 	struct fs_mount dummyMount;
5329 	sMountsTable = new(std::nothrow) MountTable();
5330 	if (sMountsTable == NULL
5331 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5332 		panic("vfs_init: error creating mounts hash table\n");
5333 
5334 	sPathNameCache = create_object_cache("vfs path names",
5335 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5336 	if (sPathNameCache == NULL)
5337 		panic("vfs_init: error creating path name object_cache\n");
5338 
5339 	sVnodeCache = create_object_cache("vfs vnodes",
5340 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5341 	if (sVnodeCache == NULL)
5342 		panic("vfs_init: error creating vnode object_cache\n");
5343 
5344 	sFileDescriptorCache = create_object_cache("vfs fds",
5345 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5346 	if (sFileDescriptorCache == NULL)
5347 		panic("vfs_init: error creating file descriptor object_cache\n");
5348 
5349 	node_monitor_init();
5350 
5351 	sRoot = NULL;
5352 
5353 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5354 
5355 	if (block_cache_init() != B_OK)
5356 		return B_ERROR;
5357 
5358 #ifdef ADD_DEBUGGER_COMMANDS
5359 	// add some debugger commands
5360 	add_debugger_command_etc("vnode", &dump_vnode,
5361 		"Print info about the specified vnode",
5362 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5363 		"Prints information about the vnode specified by address <vnode> or\n"
5364 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5365 		"constructed and printed. It might not be possible to construct a\n"
5366 		"complete path, though.\n",
5367 		0);
5368 	add_debugger_command("vnodes", &dump_vnodes,
5369 		"list all vnodes (from the specified device)");
5370 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5371 		"list all vnode caches");
5372 	add_debugger_command("mount", &dump_mount,
5373 		"info about the specified fs_mount");
5374 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5375 	add_debugger_command("io_context", &dump_io_context,
5376 		"info about the I/O context");
5377 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5378 		"info about vnode usage");
5379 #endif
5380 
5381 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5382 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5383 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5384 		0);
5385 
5386 	fifo_init();
5387 	file_map_init();
5388 
5389 	return file_cache_init();
5390 }
5391 
5392 
5393 //	#pragma mark - fd_ops implementations
5394 
5395 
5396 /*!
5397 	Calls fs_open() on the given vnode and returns a new
5398 	file descriptor for it
5399 */
5400 static int
5401 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5402 {
5403 	void* cookie;
5404 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5405 	if (status != B_OK)
5406 		return status;
5407 
5408 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5409 	if (fd < 0) {
5410 		FS_CALL(vnode, close, cookie);
5411 		FS_CALL(vnode, free_cookie, cookie);
5412 	}
5413 	return fd;
5414 }
5415 
5416 
5417 /*!
5418 	Calls fs_open() on the given vnode and returns a new
5419 	file descriptor for it
5420 */
5421 static int
5422 create_vnode(struct vnode* directory, const char* name, int openMode,
5423 	int perms, bool kernel)
5424 {
5425 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5426 	status_t status = B_ERROR;
5427 	struct vnode* vnode;
5428 	void* cookie;
5429 	ino_t newID;
5430 
5431 	// This is somewhat tricky: If the entry already exists, the FS responsible
5432 	// for the directory might not necessarily also be the one responsible for
5433 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5434 	// we can actually never call the create() hook without O_EXCL. Instead we
5435 	// try to look the entry up first. If it already exists, we just open the
5436 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5437 	// introduces a race condition, since someone else might have created the
5438 	// entry in the meantime. We hope the respective FS returns the correct
5439 	// error code and retry (up to 3 times) again.
5440 
5441 	for (int i = 0; i < 3 && status != B_OK; i++) {
5442 		// look the node up
5443 		status = lookup_dir_entry(directory, name, &vnode);
5444 		if (status == B_OK) {
5445 			VNodePutter putter(vnode);
5446 
5447 			if ((openMode & O_EXCL) != 0)
5448 				return B_FILE_EXISTS;
5449 
5450 			// If the node is a symlink, we have to follow it, unless
5451 			// O_NOTRAVERSE is set.
5452 			if (S_ISLNK(vnode->Type()) && traverse) {
5453 				putter.Put();
5454 				char clonedName[B_FILE_NAME_LENGTH + 1];
5455 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5456 						>= B_FILE_NAME_LENGTH) {
5457 					return B_NAME_TOO_LONG;
5458 				}
5459 
5460 				inc_vnode_ref_count(directory);
5461 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5462 					kernel, &vnode, NULL);
5463 				if (status != B_OK)
5464 					return status;
5465 
5466 				putter.SetTo(vnode);
5467 			}
5468 
5469 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5470 				return B_LINK_LIMIT;
5471 
5472 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5473 			// on success keep the vnode reference for the FD
5474 			if (fd >= 0)
5475 				putter.Detach();
5476 
5477 			return fd;
5478 		}
5479 
5480 		// it doesn't exist yet -- try to create it
5481 
5482 		if (!HAS_FS_CALL(directory, create))
5483 			return B_READ_ONLY_DEVICE;
5484 
5485 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5486 			&cookie, &newID);
5487 		if (status != B_OK
5488 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5489 			return status;
5490 		}
5491 	}
5492 
5493 	if (status != B_OK)
5494 		return status;
5495 
5496 	// the node has been created successfully
5497 
5498 	rw_lock_read_lock(&sVnodeLock);
5499 	vnode = lookup_vnode(directory->device, newID);
5500 	rw_lock_read_unlock(&sVnodeLock);
5501 
5502 	if (vnode == NULL) {
5503 		panic("vfs: fs_create() returned success but there is no vnode, "
5504 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5505 		return B_BAD_VALUE;
5506 	}
5507 
5508 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5509 	if (fd >= 0)
5510 		return fd;
5511 
5512 	status = fd;
5513 
5514 	// something went wrong, clean up
5515 
5516 	FS_CALL(vnode, close, cookie);
5517 	FS_CALL(vnode, free_cookie, cookie);
5518 	put_vnode(vnode);
5519 
5520 	FS_CALL(directory, unlink, name);
5521 
5522 	return status;
5523 }
5524 
5525 
5526 /*! Calls fs open_dir() on the given vnode and returns a new
5527 	file descriptor for it
5528 */
5529 static int
5530 open_dir_vnode(struct vnode* vnode, bool kernel)
5531 {
5532 	if (!HAS_FS_CALL(vnode, open_dir))
5533 		return B_UNSUPPORTED;
5534 
5535 	void* cookie;
5536 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5537 	if (status != B_OK)
5538 		return status;
5539 
5540 	// directory is opened, create a fd
5541 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5542 	if (status >= 0)
5543 		return status;
5544 
5545 	FS_CALL(vnode, close_dir, cookie);
5546 	FS_CALL(vnode, free_dir_cookie, cookie);
5547 
5548 	return status;
5549 }
5550 
5551 
5552 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5553 	file descriptor for it.
5554 	Used by attr_dir_open(), and attr_dir_open_fd().
5555 */
5556 static int
5557 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5558 {
5559 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5560 		return B_UNSUPPORTED;
5561 
5562 	void* cookie;
5563 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5564 	if (status != B_OK)
5565 		return status;
5566 
5567 	// directory is opened, create a fd
5568 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5569 		kernel);
5570 	if (status >= 0)
5571 		return status;
5572 
5573 	FS_CALL(vnode, close_attr_dir, cookie);
5574 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5575 
5576 	return status;
5577 }
5578 
5579 
5580 static int
5581 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5582 	int openMode, int perms, bool kernel)
5583 {
5584 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5585 		"kernel %d\n", name, openMode, perms, kernel));
5586 
5587 	// get directory to put the new file in
5588 	struct vnode* directory;
5589 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5590 	if (status != B_OK)
5591 		return status;
5592 
5593 	status = create_vnode(directory, name, openMode, perms, kernel);
5594 	put_vnode(directory);
5595 
5596 	return status;
5597 }
5598 
5599 
5600 static int
5601 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5602 {
5603 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5604 		openMode, perms, kernel));
5605 
5606 	// get directory to put the new file in
5607 	char name[B_FILE_NAME_LENGTH];
5608 	struct vnode* directory;
5609 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5610 		kernel);
5611 	if (status < 0)
5612 		return status;
5613 
5614 	status = create_vnode(directory, name, openMode, perms, kernel);
5615 
5616 	put_vnode(directory);
5617 	return status;
5618 }
5619 
5620 
5621 static int
5622 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5623 	int openMode, bool kernel)
5624 {
5625 	if (name == NULL || *name == '\0')
5626 		return B_BAD_VALUE;
5627 
5628 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5629 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5630 
5631 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5632 
5633 	// get the vnode matching the entry_ref
5634 	struct vnode* vnode;
5635 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5636 		kernel, &vnode);
5637 	if (status != B_OK)
5638 		return status;
5639 
5640 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5641 		put_vnode(vnode);
5642 		return B_LINK_LIMIT;
5643 	}
5644 
5645 	int newFD = open_vnode(vnode, openMode, kernel);
5646 	if (newFD >= 0) {
5647 		// The vnode reference has been transferred to the FD
5648 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5649 			directoryID, vnode->id, name);
5650 	} else
5651 		put_vnode(vnode);
5652 
5653 	return newFD;
5654 }
5655 
5656 
5657 static int
5658 file_open(int fd, char* path, int openMode, bool kernel)
5659 {
5660 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5661 
5662 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5663 		fd, path, openMode, kernel));
5664 
5665 	// get the vnode matching the vnode + path combination
5666 	struct vnode* vnode;
5667 	ino_t parentID;
5668 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5669 		&parentID, kernel);
5670 	if (status != B_OK)
5671 		return status;
5672 
5673 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5674 		put_vnode(vnode);
5675 		return B_LINK_LIMIT;
5676 	}
5677 
5678 	// open the vnode
5679 	int newFD = open_vnode(vnode, openMode, kernel);
5680 	if (newFD >= 0) {
5681 		// The vnode reference has been transferred to the FD
5682 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5683 			vnode->device, parentID, vnode->id, NULL);
5684 	} else
5685 		put_vnode(vnode);
5686 
5687 	return newFD;
5688 }
5689 
5690 
5691 static status_t
5692 file_close(struct file_descriptor* descriptor)
5693 {
5694 	struct vnode* vnode = descriptor->u.vnode;
5695 	status_t status = B_OK;
5696 
5697 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5698 
5699 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5700 		vnode->id);
5701 	if (HAS_FS_CALL(vnode, close)) {
5702 		status = FS_CALL(vnode, close, descriptor->cookie);
5703 	}
5704 
5705 	if (status == B_OK) {
5706 		// remove all outstanding locks for this team
5707 		if (HAS_FS_CALL(vnode, release_lock))
5708 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5709 		else
5710 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5711 	}
5712 	return status;
5713 }
5714 
5715 
5716 static void
5717 file_free_fd(struct file_descriptor* descriptor)
5718 {
5719 	struct vnode* vnode = descriptor->u.vnode;
5720 
5721 	if (vnode != NULL) {
5722 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5723 		put_vnode(vnode);
5724 	}
5725 }
5726 
5727 
5728 static status_t
5729 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5730 	size_t* length)
5731 {
5732 	struct vnode* vnode = descriptor->u.vnode;
5733 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5734 		pos, length, *length));
5735 
5736 	if (S_ISDIR(vnode->Type()))
5737 		return B_IS_A_DIRECTORY;
5738 
5739 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5740 }
5741 
5742 
5743 static status_t
5744 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5745 	size_t* length)
5746 {
5747 	struct vnode* vnode = descriptor->u.vnode;
5748 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5749 		length));
5750 
5751 	if (S_ISDIR(vnode->Type()))
5752 		return B_IS_A_DIRECTORY;
5753 	if (!HAS_FS_CALL(vnode, write))
5754 		return B_READ_ONLY_DEVICE;
5755 
5756 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5757 }
5758 
5759 
5760 static off_t
5761 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5762 {
5763 	struct vnode* vnode = descriptor->u.vnode;
5764 	off_t offset;
5765 	bool isDevice = false;
5766 
5767 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5768 		seekType));
5769 
5770 	// some kinds of files are not seekable
5771 	switch (vnode->Type() & S_IFMT) {
5772 		case S_IFIFO:
5773 		case S_IFSOCK:
5774 			return ESPIPE;
5775 
5776 		// drivers publish block devices as chr, so pick both
5777 		case S_IFBLK:
5778 		case S_IFCHR:
5779 			isDevice = true;
5780 			break;
5781 		// The Open Group Base Specs don't mention any file types besides pipes,
5782 		// fifos, and sockets specially, so we allow seeking them.
5783 		case S_IFREG:
5784 		case S_IFDIR:
5785 		case S_IFLNK:
5786 			break;
5787 	}
5788 
5789 	switch (seekType) {
5790 		case SEEK_SET:
5791 			offset = 0;
5792 			break;
5793 		case SEEK_CUR:
5794 			offset = descriptor->pos;
5795 			break;
5796 		case SEEK_END:
5797 		{
5798 			// stat() the node
5799 			if (!HAS_FS_CALL(vnode, read_stat))
5800 				return B_UNSUPPORTED;
5801 
5802 			struct stat stat;
5803 			status_t status = FS_CALL(vnode, read_stat, &stat);
5804 			if (status != B_OK)
5805 				return status;
5806 
5807 			offset = stat.st_size;
5808 
5809 			if (offset == 0 && isDevice) {
5810 				// stat() on regular drivers doesn't report size
5811 				device_geometry geometry;
5812 
5813 				if (HAS_FS_CALL(vnode, ioctl)) {
5814 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5815 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5816 					if (status == B_OK)
5817 						offset = (off_t)geometry.bytes_per_sector
5818 							* geometry.sectors_per_track
5819 							* geometry.cylinder_count
5820 							* geometry.head_count;
5821 				}
5822 			}
5823 
5824 			break;
5825 		}
5826 		case SEEK_DATA:
5827 		case SEEK_HOLE:
5828 		{
5829 			status_t status = B_BAD_VALUE;
5830 			if (HAS_FS_CALL(vnode, ioctl)) {
5831 				offset = pos;
5832 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5833 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5834 					&offset, sizeof(offset));
5835 				if (status == B_OK) {
5836 					if (offset > pos)
5837 						offset -= pos;
5838 					break;
5839 				}
5840 			}
5841 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5842 				return status;
5843 
5844 			// basic implementation with stat() the node
5845 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5846 				return B_BAD_VALUE;
5847 
5848 			struct stat stat;
5849 			status = FS_CALL(vnode, read_stat, &stat);
5850 			if (status != B_OK)
5851 				return status;
5852 
5853 			off_t end = stat.st_size;
5854 			if (pos >= end)
5855 				return ENXIO;
5856 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5857 			break;
5858 		}
5859 		default:
5860 			return B_BAD_VALUE;
5861 	}
5862 
5863 	// assumes off_t is 64 bits wide
5864 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5865 		return B_BUFFER_OVERFLOW;
5866 
5867 	pos += offset;
5868 	if (pos < 0)
5869 		return B_BAD_VALUE;
5870 
5871 	return descriptor->pos = pos;
5872 }
5873 
5874 
5875 static status_t
5876 file_select(struct file_descriptor* descriptor, uint8 event,
5877 	struct selectsync* sync)
5878 {
5879 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5880 
5881 	struct vnode* vnode = descriptor->u.vnode;
5882 
5883 	// If the FS has no select() hook, notify select() now.
5884 	if (!HAS_FS_CALL(vnode, select)) {
5885 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5886 			return notify_select_event(sync, event);
5887 		else
5888 			return B_OK;
5889 	}
5890 
5891 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5892 }
5893 
5894 
5895 static status_t
5896 file_deselect(struct file_descriptor* descriptor, uint8 event,
5897 	struct selectsync* sync)
5898 {
5899 	struct vnode* vnode = descriptor->u.vnode;
5900 
5901 	if (!HAS_FS_CALL(vnode, deselect))
5902 		return B_OK;
5903 
5904 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5905 }
5906 
5907 
5908 static status_t
5909 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5910 	bool kernel)
5911 {
5912 	struct vnode* vnode;
5913 	status_t status;
5914 
5915 	if (name == NULL || *name == '\0')
5916 		return B_BAD_VALUE;
5917 
5918 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5919 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5920 
5921 	status = get_vnode(mountID, parentID, &vnode, true, false);
5922 	if (status != B_OK)
5923 		return status;
5924 
5925 	if (HAS_FS_CALL(vnode, create_dir))
5926 		status = FS_CALL(vnode, create_dir, name, perms);
5927 	else
5928 		status = B_READ_ONLY_DEVICE;
5929 
5930 	put_vnode(vnode);
5931 	return status;
5932 }
5933 
5934 
5935 static status_t
5936 dir_create(int fd, char* path, int perms, bool kernel)
5937 {
5938 	char filename[B_FILE_NAME_LENGTH];
5939 	struct vnode* vnode;
5940 	status_t status;
5941 
5942 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5943 		kernel));
5944 
5945 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5946 	if (status < 0)
5947 		return status;
5948 
5949 	if (HAS_FS_CALL(vnode, create_dir)) {
5950 		status = FS_CALL(vnode, create_dir, filename, perms);
5951 	} else
5952 		status = B_READ_ONLY_DEVICE;
5953 
5954 	put_vnode(vnode);
5955 	return status;
5956 }
5957 
5958 
5959 static int
5960 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5961 {
5962 	FUNCTION(("dir_open_entry_ref()\n"));
5963 
5964 	if (name && name[0] == '\0')
5965 		return B_BAD_VALUE;
5966 
5967 	// get the vnode matching the entry_ref/node_ref
5968 	struct vnode* vnode;
5969 	status_t status;
5970 	if (name) {
5971 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5972 			&vnode);
5973 	} else
5974 		status = get_vnode(mountID, parentID, &vnode, true, false);
5975 	if (status != B_OK)
5976 		return status;
5977 
5978 	int newFD = open_dir_vnode(vnode, kernel);
5979 	if (newFD >= 0) {
5980 		// The vnode reference has been transferred to the FD
5981 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5982 			vnode->id, name);
5983 	} else
5984 		put_vnode(vnode);
5985 
5986 	return newFD;
5987 }
5988 
5989 
5990 static int
5991 dir_open(int fd, char* path, bool kernel)
5992 {
5993 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5994 		kernel));
5995 
5996 	// get the vnode matching the vnode + path combination
5997 	struct vnode* vnode = NULL;
5998 	ino_t parentID;
5999 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6000 		kernel);
6001 	if (status != B_OK)
6002 		return status;
6003 
6004 	// open the dir
6005 	int newFD = open_dir_vnode(vnode, kernel);
6006 	if (newFD >= 0) {
6007 		// The vnode reference has been transferred to the FD
6008 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6009 			parentID, vnode->id, NULL);
6010 	} else
6011 		put_vnode(vnode);
6012 
6013 	return newFD;
6014 }
6015 
6016 
6017 static status_t
6018 dir_close(struct file_descriptor* descriptor)
6019 {
6020 	struct vnode* vnode = descriptor->u.vnode;
6021 
6022 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6023 
6024 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6025 		vnode->id);
6026 	if (HAS_FS_CALL(vnode, close_dir))
6027 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6028 
6029 	return B_OK;
6030 }
6031 
6032 
6033 static void
6034 dir_free_fd(struct file_descriptor* descriptor)
6035 {
6036 	struct vnode* vnode = descriptor->u.vnode;
6037 
6038 	if (vnode != NULL) {
6039 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6040 		put_vnode(vnode);
6041 	}
6042 }
6043 
6044 
6045 static status_t
6046 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6047 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6048 {
6049 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6050 		bufferSize, _count);
6051 }
6052 
6053 
6054 static status_t
6055 fix_dirent(struct vnode* parent, struct dirent* entry,
6056 	struct io_context* ioContext)
6057 {
6058 	// set d_pdev and d_pino
6059 	entry->d_pdev = parent->device;
6060 	entry->d_pino = parent->id;
6061 
6062 	// If this is the ".." entry and the directory covering another vnode,
6063 	// we need to replace d_dev and d_ino with the actual values.
6064 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6065 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6066 			ioContext);
6067 	}
6068 
6069 	// resolve covered vnodes
6070 	ReadLocker _(&sVnodeLock);
6071 
6072 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6073 	if (vnode != NULL && vnode->covered_by != NULL) {
6074 		do {
6075 			vnode = vnode->covered_by;
6076 		} while (vnode->covered_by != NULL);
6077 
6078 		entry->d_dev = vnode->device;
6079 		entry->d_ino = vnode->id;
6080 	}
6081 
6082 	return B_OK;
6083 }
6084 
6085 
6086 static status_t
6087 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6088 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6089 {
6090 	if (!HAS_FS_CALL(vnode, read_dir))
6091 		return B_UNSUPPORTED;
6092 
6093 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6094 		_count);
6095 	if (error != B_OK)
6096 		return error;
6097 
6098 	// we need to adjust the read dirents
6099 	uint32 count = *_count;
6100 	for (uint32 i = 0; i < count; i++) {
6101 		error = fix_dirent(vnode, buffer, ioContext);
6102 		if (error != B_OK)
6103 			return error;
6104 
6105 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6106 	}
6107 
6108 	return error;
6109 }
6110 
6111 
6112 static status_t
6113 dir_rewind(struct file_descriptor* descriptor)
6114 {
6115 	struct vnode* vnode = descriptor->u.vnode;
6116 
6117 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6118 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6119 	}
6120 
6121 	return B_UNSUPPORTED;
6122 }
6123 
6124 
6125 static status_t
6126 dir_remove(int fd, char* path, bool kernel)
6127 {
6128 	char name[B_FILE_NAME_LENGTH];
6129 	struct vnode* directory;
6130 	status_t status;
6131 
6132 	if (path != NULL) {
6133 		// we need to make sure our path name doesn't stop with "/", ".",
6134 		// or ".."
6135 		char* lastSlash;
6136 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6137 			char* leaf = lastSlash + 1;
6138 			if (!strcmp(leaf, ".."))
6139 				return B_NOT_ALLOWED;
6140 
6141 			// omit multiple slashes
6142 			while (lastSlash > path && lastSlash[-1] == '/')
6143 				lastSlash--;
6144 
6145 			if (leaf[0]
6146 				&& strcmp(leaf, ".")) {
6147 				break;
6148 			}
6149 			// "name/" -> "name", or "name/." -> "name"
6150 			lastSlash[0] = '\0';
6151 		}
6152 
6153 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6154 			return B_NOT_ALLOWED;
6155 	}
6156 
6157 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6158 	if (status != B_OK)
6159 		return status;
6160 
6161 	if (HAS_FS_CALL(directory, remove_dir))
6162 		status = FS_CALL(directory, remove_dir, name);
6163 	else
6164 		status = B_READ_ONLY_DEVICE;
6165 
6166 	put_vnode(directory);
6167 	return status;
6168 }
6169 
6170 
6171 static status_t
6172 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6173 	size_t length)
6174 {
6175 	struct vnode* vnode = descriptor->u.vnode;
6176 
6177 	if (HAS_FS_CALL(vnode, ioctl))
6178 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6179 
6180 	return B_DEV_INVALID_IOCTL;
6181 }
6182 
6183 
6184 static status_t
6185 common_fcntl(int fd, int op, size_t argument, bool kernel)
6186 {
6187 	struct flock flock;
6188 
6189 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6190 		fd, op, argument, kernel ? "kernel" : "user"));
6191 
6192 	struct io_context* context = get_current_io_context(kernel);
6193 
6194 	struct file_descriptor* descriptor = get_fd(context, fd);
6195 	if (descriptor == NULL)
6196 		return B_FILE_ERROR;
6197 
6198 	struct vnode* vnode = fd_vnode(descriptor);
6199 
6200 	status_t status = B_OK;
6201 
6202 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6203 		if (descriptor->type != FDTYPE_FILE)
6204 			status = B_BAD_VALUE;
6205 		else if (kernel)
6206 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6207 		else if (user_memcpy(&flock, (struct flock*)argument,
6208 				sizeof(struct flock)) != B_OK)
6209 			status = B_BAD_ADDRESS;
6210 		if (status != B_OK) {
6211 			put_fd(descriptor);
6212 			return status;
6213 		}
6214 	}
6215 
6216 	switch (op) {
6217 		case F_SETFD:
6218 		{
6219 			// Set file descriptor flags
6220 
6221 			// O_CLOEXEC is the only flag available at this time
6222 			mutex_lock(&context->io_mutex);
6223 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6224 			mutex_unlock(&context->io_mutex);
6225 
6226 			status = B_OK;
6227 			break;
6228 		}
6229 
6230 		case F_GETFD:
6231 		{
6232 			// Get file descriptor flags
6233 			mutex_lock(&context->io_mutex);
6234 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6235 			mutex_unlock(&context->io_mutex);
6236 			break;
6237 		}
6238 
6239 		case F_SETFL:
6240 			// Set file descriptor open mode
6241 
6242 			// we only accept changes to O_APPEND and O_NONBLOCK
6243 			argument &= O_APPEND | O_NONBLOCK;
6244 			if (descriptor->ops->fd_set_flags != NULL) {
6245 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6246 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6247 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6248 					(int)argument);
6249 			} else
6250 				status = B_UNSUPPORTED;
6251 
6252 			if (status == B_OK) {
6253 				// update this descriptor's open_mode field
6254 				descriptor->open_mode = (descriptor->open_mode
6255 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6256 			}
6257 
6258 			break;
6259 
6260 		case F_GETFL:
6261 			// Get file descriptor open mode
6262 			status = descriptor->open_mode;
6263 			break;
6264 
6265 		case F_DUPFD:
6266 		case F_DUPFD_CLOEXEC:
6267 		{
6268 			status = new_fd_etc(context, descriptor, (int)argument);
6269 			if (status >= 0) {
6270 				mutex_lock(&context->io_mutex);
6271 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6272 				mutex_unlock(&context->io_mutex);
6273 
6274 				atomic_add(&descriptor->ref_count, 1);
6275 			}
6276 			break;
6277 		}
6278 
6279 		case F_GETLK:
6280 			if (vnode != NULL) {
6281 				struct flock normalizedLock;
6282 
6283 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6284 				status = normalize_flock(descriptor, &normalizedLock);
6285 				if (status != B_OK)
6286 					break;
6287 
6288 				if (HAS_FS_CALL(vnode, test_lock)) {
6289 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6290 						&normalizedLock);
6291 				} else
6292 					status = test_advisory_lock(vnode, &normalizedLock);
6293 				if (status == B_OK) {
6294 					if (normalizedLock.l_type == F_UNLCK) {
6295 						// no conflicting lock found, copy back the same struct
6296 						// we were given except change type to F_UNLCK
6297 						flock.l_type = F_UNLCK;
6298 						if (kernel) {
6299 							memcpy((struct flock*)argument, &flock,
6300 								sizeof(struct flock));
6301 						} else {
6302 							status = user_memcpy((struct flock*)argument,
6303 								&flock, sizeof(struct flock));
6304 						}
6305 					} else {
6306 						// a conflicting lock was found, copy back its range and
6307 						// type
6308 						if (normalizedLock.l_len == OFF_MAX)
6309 							normalizedLock.l_len = 0;
6310 
6311 						if (kernel) {
6312 							memcpy((struct flock*)argument,
6313 								&normalizedLock, sizeof(struct flock));
6314 						} else {
6315 							status = user_memcpy((struct flock*)argument,
6316 								&normalizedLock, sizeof(struct flock));
6317 						}
6318 					}
6319 				}
6320 			} else
6321 				status = B_BAD_VALUE;
6322 			break;
6323 
6324 		case F_SETLK:
6325 		case F_SETLKW:
6326 			status = normalize_flock(descriptor, &flock);
6327 			if (status != B_OK)
6328 				break;
6329 
6330 			if (vnode == NULL) {
6331 				status = B_BAD_VALUE;
6332 			} else if (flock.l_type == F_UNLCK) {
6333 				if (HAS_FS_CALL(vnode, release_lock)) {
6334 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6335 						&flock);
6336 				} else {
6337 					status = release_advisory_lock(vnode, context, NULL,
6338 						&flock);
6339 				}
6340 			} else {
6341 				// the open mode must match the lock type
6342 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6343 						&& flock.l_type == F_WRLCK)
6344 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6345 						&& flock.l_type == F_RDLCK))
6346 					status = B_FILE_ERROR;
6347 				else {
6348 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6349 						status = FS_CALL(vnode, acquire_lock,
6350 							descriptor->cookie, &flock, op == F_SETLKW);
6351 					} else {
6352 						status = acquire_advisory_lock(vnode, context, NULL,
6353 							&flock, op == F_SETLKW);
6354 					}
6355 				}
6356 			}
6357 			break;
6358 
6359 		// ToDo: add support for more ops?
6360 
6361 		default:
6362 			status = B_BAD_VALUE;
6363 	}
6364 
6365 	put_fd(descriptor);
6366 	return status;
6367 }
6368 
6369 
6370 static status_t
6371 common_sync(int fd, bool kernel)
6372 {
6373 	struct file_descriptor* descriptor;
6374 	struct vnode* vnode;
6375 	status_t status;
6376 
6377 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6378 
6379 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6380 	if (descriptor == NULL)
6381 		return B_FILE_ERROR;
6382 
6383 	if (HAS_FS_CALL(vnode, fsync))
6384 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6385 	else
6386 		status = B_UNSUPPORTED;
6387 
6388 	put_fd(descriptor);
6389 	return status;
6390 }
6391 
6392 
6393 static status_t
6394 common_lock_node(int fd, bool kernel)
6395 {
6396 	struct file_descriptor* descriptor;
6397 	struct vnode* vnode;
6398 
6399 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6400 	if (descriptor == NULL)
6401 		return B_FILE_ERROR;
6402 
6403 	status_t status = B_OK;
6404 
6405 	// We need to set the locking atomically - someone
6406 	// else might set one at the same time
6407 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6408 			(file_descriptor*)NULL) != NULL)
6409 		status = B_BUSY;
6410 
6411 	put_fd(descriptor);
6412 	return status;
6413 }
6414 
6415 
6416 static status_t
6417 common_unlock_node(int fd, bool kernel)
6418 {
6419 	struct file_descriptor* descriptor;
6420 	struct vnode* vnode;
6421 
6422 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6423 	if (descriptor == NULL)
6424 		return B_FILE_ERROR;
6425 
6426 	status_t status = B_OK;
6427 
6428 	// We need to set the locking atomically - someone
6429 	// else might set one at the same time
6430 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6431 			(file_descriptor*)NULL, descriptor) != descriptor)
6432 		status = B_BAD_VALUE;
6433 
6434 	put_fd(descriptor);
6435 	return status;
6436 }
6437 
6438 
6439 static status_t
6440 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6441 {
6442 	struct file_descriptor* descriptor;
6443 	struct vnode* vnode;
6444 
6445 	if (offset < 0 || length == 0)
6446 		return B_BAD_VALUE;
6447 	if (offset > OFF_MAX - length)
6448 		return B_FILE_TOO_LARGE;
6449 
6450 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6451 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6452 		return B_FILE_ERROR;
6453 
6454 	switch (vnode->Type() & S_IFMT) {
6455 		case S_IFIFO:
6456 		case S_IFSOCK:
6457 			return ESPIPE;
6458 
6459 		case S_IFBLK:
6460 		case S_IFCHR:
6461 		case S_IFDIR:
6462 		case S_IFLNK:
6463 			return B_DEVICE_NOT_FOUND;
6464 
6465 		case S_IFREG:
6466 			break;
6467 	}
6468 
6469 	status_t status = B_OK;
6470 	if (HAS_FS_CALL(vnode, preallocate)) {
6471 		status = FS_CALL(vnode, preallocate, offset, length);
6472 	} else {
6473 		status = HAS_FS_CALL(vnode, write)
6474 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6475 	}
6476 
6477 	return status;
6478 }
6479 
6480 
6481 static status_t
6482 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6483 	bool kernel)
6484 {
6485 	struct vnode* vnode;
6486 	status_t status;
6487 
6488 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6489 	if (status != B_OK)
6490 		return status;
6491 
6492 	if (HAS_FS_CALL(vnode, read_symlink)) {
6493 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6494 	} else
6495 		status = B_BAD_VALUE;
6496 
6497 	put_vnode(vnode);
6498 	return status;
6499 }
6500 
6501 
6502 static status_t
6503 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6504 	bool kernel)
6505 {
6506 	// path validity checks have to be in the calling function!
6507 	char name[B_FILE_NAME_LENGTH];
6508 	struct vnode* vnode;
6509 	status_t status;
6510 
6511 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6512 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6513 
6514 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6515 	if (status != B_OK)
6516 		return status;
6517 
6518 	if (HAS_FS_CALL(vnode, create_symlink))
6519 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6520 	else {
6521 		status = HAS_FS_CALL(vnode, write)
6522 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6523 	}
6524 
6525 	put_vnode(vnode);
6526 
6527 	return status;
6528 }
6529 
6530 
6531 static status_t
6532 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6533 	bool traverseLeafLink, bool kernel)
6534 {
6535 	// path validity checks have to be in the calling function!
6536 
6537 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6538 		toPath, kernel));
6539 
6540 	char name[B_FILE_NAME_LENGTH];
6541 	struct vnode* directory;
6542 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6543 		kernel);
6544 	if (status != B_OK)
6545 		return status;
6546 
6547 	struct vnode* vnode;
6548 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6549 		kernel);
6550 	if (status != B_OK)
6551 		goto err;
6552 
6553 	if (directory->mount != vnode->mount) {
6554 		status = B_CROSS_DEVICE_LINK;
6555 		goto err1;
6556 	}
6557 
6558 	if (HAS_FS_CALL(directory, link))
6559 		status = FS_CALL(directory, link, name, vnode);
6560 	else
6561 		status = B_READ_ONLY_DEVICE;
6562 
6563 err1:
6564 	put_vnode(vnode);
6565 err:
6566 	put_vnode(directory);
6567 
6568 	return status;
6569 }
6570 
6571 
6572 static status_t
6573 common_unlink(int fd, char* path, bool kernel)
6574 {
6575 	char filename[B_FILE_NAME_LENGTH];
6576 	struct vnode* vnode;
6577 	status_t status;
6578 
6579 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6580 		kernel));
6581 
6582 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6583 	if (status < 0)
6584 		return status;
6585 
6586 	if (HAS_FS_CALL(vnode, unlink))
6587 		status = FS_CALL(vnode, unlink, filename);
6588 	else
6589 		status = B_READ_ONLY_DEVICE;
6590 
6591 	put_vnode(vnode);
6592 
6593 	return status;
6594 }
6595 
6596 
6597 static status_t
6598 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6599 {
6600 	struct vnode* vnode;
6601 	status_t status;
6602 
6603 	// TODO: honor effectiveUserGroup argument
6604 
6605 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6606 	if (status != B_OK)
6607 		return status;
6608 
6609 	if (HAS_FS_CALL(vnode, access))
6610 		status = FS_CALL(vnode, access, mode);
6611 	else
6612 		status = B_OK;
6613 
6614 	put_vnode(vnode);
6615 
6616 	return status;
6617 }
6618 
6619 
6620 static status_t
6621 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6622 {
6623 	struct vnode* fromVnode;
6624 	struct vnode* toVnode;
6625 	char fromName[B_FILE_NAME_LENGTH];
6626 	char toName[B_FILE_NAME_LENGTH];
6627 	status_t status;
6628 
6629 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6630 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6631 
6632 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6633 	if (status != B_OK)
6634 		return status;
6635 
6636 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6637 	if (status != B_OK)
6638 		goto err1;
6639 
6640 	if (fromVnode->device != toVnode->device) {
6641 		status = B_CROSS_DEVICE_LINK;
6642 		goto err2;
6643 	}
6644 
6645 	if (fromName[0] == '\0' || toName[0] == '\0'
6646 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6647 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6648 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6649 		status = B_BAD_VALUE;
6650 		goto err2;
6651 	}
6652 
6653 	if (HAS_FS_CALL(fromVnode, rename))
6654 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6655 	else
6656 		status = B_READ_ONLY_DEVICE;
6657 
6658 err2:
6659 	put_vnode(toVnode);
6660 err1:
6661 	put_vnode(fromVnode);
6662 
6663 	return status;
6664 }
6665 
6666 
6667 static status_t
6668 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6669 {
6670 	struct vnode* vnode = descriptor->u.vnode;
6671 
6672 	FUNCTION(("common_read_stat: stat %p\n", stat));
6673 
6674 	// TODO: remove this once all file systems properly set them!
6675 	stat->st_crtim.tv_nsec = 0;
6676 	stat->st_ctim.tv_nsec = 0;
6677 	stat->st_mtim.tv_nsec = 0;
6678 	stat->st_atim.tv_nsec = 0;
6679 
6680 	return vfs_stat_vnode(vnode, stat);
6681 }
6682 
6683 
6684 static status_t
6685 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6686 	int statMask)
6687 {
6688 	struct vnode* vnode = descriptor->u.vnode;
6689 
6690 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6691 		vnode, stat, statMask));
6692 
6693 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6694 		&& (statMask & B_STAT_SIZE) != 0) {
6695 		return B_BAD_VALUE;
6696 	}
6697 
6698 	if (!HAS_FS_CALL(vnode, write_stat))
6699 		return B_READ_ONLY_DEVICE;
6700 
6701 	return FS_CALL(vnode, write_stat, stat, statMask);
6702 }
6703 
6704 
6705 static status_t
6706 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6707 	struct stat* stat, bool kernel)
6708 {
6709 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6710 		stat));
6711 
6712 	struct vnode* vnode;
6713 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6714 		NULL, kernel);
6715 	if (status != B_OK)
6716 		return status;
6717 
6718 	status = vfs_stat_vnode(vnode, stat);
6719 
6720 	put_vnode(vnode);
6721 	return status;
6722 }
6723 
6724 
6725 static status_t
6726 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6727 	const struct stat* stat, int statMask, bool kernel)
6728 {
6729 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6730 		"kernel %d\n", fd, path, stat, statMask, kernel));
6731 
6732 	struct vnode* vnode;
6733 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6734 		NULL, kernel);
6735 	if (status != B_OK)
6736 		return status;
6737 
6738 	if (HAS_FS_CALL(vnode, write_stat))
6739 		status = FS_CALL(vnode, write_stat, stat, statMask);
6740 	else
6741 		status = B_READ_ONLY_DEVICE;
6742 
6743 	put_vnode(vnode);
6744 
6745 	return status;
6746 }
6747 
6748 
6749 static int
6750 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6751 {
6752 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6753 		kernel));
6754 
6755 	struct vnode* vnode;
6756 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6757 		NULL, kernel);
6758 	if (status != B_OK)
6759 		return status;
6760 
6761 	status = open_attr_dir_vnode(vnode, kernel);
6762 	if (status < 0)
6763 		put_vnode(vnode);
6764 
6765 	return status;
6766 }
6767 
6768 
6769 static status_t
6770 attr_dir_close(struct file_descriptor* descriptor)
6771 {
6772 	struct vnode* vnode = descriptor->u.vnode;
6773 
6774 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6775 
6776 	if (HAS_FS_CALL(vnode, close_attr_dir))
6777 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6778 
6779 	return B_OK;
6780 }
6781 
6782 
6783 static void
6784 attr_dir_free_fd(struct file_descriptor* descriptor)
6785 {
6786 	struct vnode* vnode = descriptor->u.vnode;
6787 
6788 	if (vnode != NULL) {
6789 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6790 		put_vnode(vnode);
6791 	}
6792 }
6793 
6794 
6795 static status_t
6796 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6797 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6798 {
6799 	struct vnode* vnode = descriptor->u.vnode;
6800 
6801 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6802 
6803 	if (HAS_FS_CALL(vnode, read_attr_dir))
6804 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6805 			bufferSize, _count);
6806 
6807 	return B_UNSUPPORTED;
6808 }
6809 
6810 
6811 static status_t
6812 attr_dir_rewind(struct file_descriptor* descriptor)
6813 {
6814 	struct vnode* vnode = descriptor->u.vnode;
6815 
6816 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6817 
6818 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6819 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6820 
6821 	return B_UNSUPPORTED;
6822 }
6823 
6824 
6825 static int
6826 attr_create(int fd, char* path, const char* name, uint32 type,
6827 	int openMode, bool kernel)
6828 {
6829 	if (name == NULL || *name == '\0')
6830 		return B_BAD_VALUE;
6831 
6832 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6833 	struct vnode* vnode;
6834 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6835 		kernel);
6836 	if (status != B_OK)
6837 		return status;
6838 
6839 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6840 		status = B_LINK_LIMIT;
6841 		goto err;
6842 	}
6843 
6844 	if (!HAS_FS_CALL(vnode, create_attr)) {
6845 		status = B_READ_ONLY_DEVICE;
6846 		goto err;
6847 	}
6848 
6849 	void* cookie;
6850 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6851 	if (status != B_OK)
6852 		goto err;
6853 
6854 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6855 	if (fd >= 0)
6856 		return fd;
6857 
6858 	status = fd;
6859 
6860 	FS_CALL(vnode, close_attr, cookie);
6861 	FS_CALL(vnode, free_attr_cookie, cookie);
6862 
6863 	FS_CALL(vnode, remove_attr, name);
6864 
6865 err:
6866 	put_vnode(vnode);
6867 
6868 	return status;
6869 }
6870 
6871 
6872 static int
6873 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6874 {
6875 	if (name == NULL || *name == '\0')
6876 		return B_BAD_VALUE;
6877 
6878 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6879 	struct vnode* vnode;
6880 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6881 		kernel);
6882 	if (status != B_OK)
6883 		return status;
6884 
6885 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6886 		status = B_LINK_LIMIT;
6887 		goto err;
6888 	}
6889 
6890 	if (!HAS_FS_CALL(vnode, open_attr)) {
6891 		status = B_UNSUPPORTED;
6892 		goto err;
6893 	}
6894 
6895 	void* cookie;
6896 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6897 	if (status != B_OK)
6898 		goto err;
6899 
6900 	// now we only need a file descriptor for this attribute and we're done
6901 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6902 	if (fd >= 0)
6903 		return fd;
6904 
6905 	status = fd;
6906 
6907 	FS_CALL(vnode, close_attr, cookie);
6908 	FS_CALL(vnode, free_attr_cookie, cookie);
6909 
6910 err:
6911 	put_vnode(vnode);
6912 
6913 	return status;
6914 }
6915 
6916 
6917 static status_t
6918 attr_close(struct file_descriptor* descriptor)
6919 {
6920 	struct vnode* vnode = descriptor->u.vnode;
6921 
6922 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6923 
6924 	if (HAS_FS_CALL(vnode, close_attr))
6925 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6926 
6927 	return B_OK;
6928 }
6929 
6930 
6931 static void
6932 attr_free_fd(struct file_descriptor* descriptor)
6933 {
6934 	struct vnode* vnode = descriptor->u.vnode;
6935 
6936 	if (vnode != NULL) {
6937 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6938 		put_vnode(vnode);
6939 	}
6940 }
6941 
6942 
6943 static status_t
6944 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6945 	size_t* length)
6946 {
6947 	struct vnode* vnode = descriptor->u.vnode;
6948 
6949 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6950 		pos, length, *length));
6951 
6952 	if (!HAS_FS_CALL(vnode, read_attr))
6953 		return B_UNSUPPORTED;
6954 
6955 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6956 }
6957 
6958 
6959 static status_t
6960 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6961 	size_t* length)
6962 {
6963 	struct vnode* vnode = descriptor->u.vnode;
6964 
6965 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6966 		length));
6967 
6968 	if (!HAS_FS_CALL(vnode, write_attr))
6969 		return B_UNSUPPORTED;
6970 
6971 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6972 }
6973 
6974 
6975 static off_t
6976 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6977 {
6978 	off_t offset;
6979 
6980 	switch (seekType) {
6981 		case SEEK_SET:
6982 			offset = 0;
6983 			break;
6984 		case SEEK_CUR:
6985 			offset = descriptor->pos;
6986 			break;
6987 		case SEEK_END:
6988 		{
6989 			struct vnode* vnode = descriptor->u.vnode;
6990 			if (!HAS_FS_CALL(vnode, read_stat))
6991 				return B_UNSUPPORTED;
6992 
6993 			struct stat stat;
6994 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6995 				&stat);
6996 			if (status != B_OK)
6997 				return status;
6998 
6999 			offset = stat.st_size;
7000 			break;
7001 		}
7002 		default:
7003 			return B_BAD_VALUE;
7004 	}
7005 
7006 	// assumes off_t is 64 bits wide
7007 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7008 		return B_BUFFER_OVERFLOW;
7009 
7010 	pos += offset;
7011 	if (pos < 0)
7012 		return B_BAD_VALUE;
7013 
7014 	return descriptor->pos = pos;
7015 }
7016 
7017 
7018 static status_t
7019 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7020 {
7021 	struct vnode* vnode = descriptor->u.vnode;
7022 
7023 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7024 
7025 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7026 		return B_UNSUPPORTED;
7027 
7028 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7029 }
7030 
7031 
7032 static status_t
7033 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7034 	int statMask)
7035 {
7036 	struct vnode* vnode = descriptor->u.vnode;
7037 
7038 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7039 
7040 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7041 		return B_READ_ONLY_DEVICE;
7042 
7043 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7044 }
7045 
7046 
7047 static status_t
7048 attr_remove(int fd, const char* name, bool kernel)
7049 {
7050 	struct file_descriptor* descriptor;
7051 	struct vnode* vnode;
7052 	status_t status;
7053 
7054 	if (name == NULL || *name == '\0')
7055 		return B_BAD_VALUE;
7056 
7057 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7058 		kernel));
7059 
7060 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7061 	if (descriptor == NULL)
7062 		return B_FILE_ERROR;
7063 
7064 	if (HAS_FS_CALL(vnode, remove_attr))
7065 		status = FS_CALL(vnode, remove_attr, name);
7066 	else
7067 		status = B_READ_ONLY_DEVICE;
7068 
7069 	put_fd(descriptor);
7070 
7071 	return status;
7072 }
7073 
7074 
7075 static status_t
7076 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7077 	bool kernel)
7078 {
7079 	struct file_descriptor* fromDescriptor;
7080 	struct file_descriptor* toDescriptor;
7081 	struct vnode* fromVnode;
7082 	struct vnode* toVnode;
7083 	status_t status;
7084 
7085 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7086 		|| *toName == '\0')
7087 		return B_BAD_VALUE;
7088 
7089 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7090 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7091 
7092 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7093 	if (fromDescriptor == NULL)
7094 		return B_FILE_ERROR;
7095 
7096 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7097 	if (toDescriptor == NULL) {
7098 		status = B_FILE_ERROR;
7099 		goto err;
7100 	}
7101 
7102 	// are the files on the same volume?
7103 	if (fromVnode->device != toVnode->device) {
7104 		status = B_CROSS_DEVICE_LINK;
7105 		goto err1;
7106 	}
7107 
7108 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7109 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7110 	} else
7111 		status = B_READ_ONLY_DEVICE;
7112 
7113 err1:
7114 	put_fd(toDescriptor);
7115 err:
7116 	put_fd(fromDescriptor);
7117 
7118 	return status;
7119 }
7120 
7121 
7122 static int
7123 index_dir_open(dev_t mountID, bool kernel)
7124 {
7125 	struct fs_mount* mount;
7126 	void* cookie;
7127 
7128 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7129 		kernel));
7130 
7131 	status_t status = get_mount(mountID, &mount);
7132 	if (status != B_OK)
7133 		return status;
7134 
7135 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7136 		status = B_UNSUPPORTED;
7137 		goto error;
7138 	}
7139 
7140 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7141 	if (status != B_OK)
7142 		goto error;
7143 
7144 	// get fd for the index directory
7145 	int fd;
7146 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7147 	if (fd >= 0)
7148 		return fd;
7149 
7150 	// something went wrong
7151 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7152 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7153 
7154 	status = fd;
7155 
7156 error:
7157 	put_mount(mount);
7158 	return status;
7159 }
7160 
7161 
7162 static status_t
7163 index_dir_close(struct file_descriptor* descriptor)
7164 {
7165 	struct fs_mount* mount = descriptor->u.mount;
7166 
7167 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7168 
7169 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7170 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7171 
7172 	return B_OK;
7173 }
7174 
7175 
7176 static void
7177 index_dir_free_fd(struct file_descriptor* descriptor)
7178 {
7179 	struct fs_mount* mount = descriptor->u.mount;
7180 
7181 	if (mount != NULL) {
7182 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7183 		put_mount(mount);
7184 	}
7185 }
7186 
7187 
7188 static status_t
7189 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7190 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7191 {
7192 	struct fs_mount* mount = descriptor->u.mount;
7193 
7194 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7195 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7196 			bufferSize, _count);
7197 	}
7198 
7199 	return B_UNSUPPORTED;
7200 }
7201 
7202 
7203 static status_t
7204 index_dir_rewind(struct file_descriptor* descriptor)
7205 {
7206 	struct fs_mount* mount = descriptor->u.mount;
7207 
7208 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7209 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7210 
7211 	return B_UNSUPPORTED;
7212 }
7213 
7214 
7215 static status_t
7216 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7217 	bool kernel)
7218 {
7219 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7220 		mountID, name, kernel));
7221 
7222 	struct fs_mount* mount;
7223 	status_t status = get_mount(mountID, &mount);
7224 	if (status != B_OK)
7225 		return status;
7226 
7227 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7228 		status = B_READ_ONLY_DEVICE;
7229 		goto out;
7230 	}
7231 
7232 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7233 
7234 out:
7235 	put_mount(mount);
7236 	return status;
7237 }
7238 
7239 
7240 #if 0
7241 static status_t
7242 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7243 {
7244 	struct vnode* vnode = descriptor->u.vnode;
7245 
7246 	// ToDo: currently unused!
7247 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7248 	if (!HAS_FS_CALL(vnode, read_index_stat))
7249 		return B_UNSUPPORTED;
7250 
7251 	return B_UNSUPPORTED;
7252 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7253 }
7254 
7255 
7256 static void
7257 index_free_fd(struct file_descriptor* descriptor)
7258 {
7259 	struct vnode* vnode = descriptor->u.vnode;
7260 
7261 	if (vnode != NULL) {
7262 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7263 		put_vnode(vnode);
7264 	}
7265 }
7266 #endif
7267 
7268 
7269 static status_t
7270 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7271 	bool kernel)
7272 {
7273 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7274 		mountID, name, kernel));
7275 
7276 	struct fs_mount* mount;
7277 	status_t status = get_mount(mountID, &mount);
7278 	if (status != B_OK)
7279 		return status;
7280 
7281 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7282 		status = B_UNSUPPORTED;
7283 		goto out;
7284 	}
7285 
7286 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7287 
7288 out:
7289 	put_mount(mount);
7290 	return status;
7291 }
7292 
7293 
7294 static status_t
7295 index_remove(dev_t mountID, const char* name, bool kernel)
7296 {
7297 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7298 		mountID, name, kernel));
7299 
7300 	struct fs_mount* mount;
7301 	status_t status = get_mount(mountID, &mount);
7302 	if (status != B_OK)
7303 		return status;
7304 
7305 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7306 		status = B_READ_ONLY_DEVICE;
7307 		goto out;
7308 	}
7309 
7310 	status = FS_MOUNT_CALL(mount, remove_index, name);
7311 
7312 out:
7313 	put_mount(mount);
7314 	return status;
7315 }
7316 
7317 
7318 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7319 		It would be nice if the FS would find some more kernel support
7320 		for them.
7321 		For example, query parsing should be moved into the kernel.
7322 */
7323 static int
7324 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7325 	int32 token, bool kernel)
7326 {
7327 	struct fs_mount* mount;
7328 	void* cookie;
7329 
7330 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7331 		device, query, kernel));
7332 
7333 	status_t status = get_mount(device, &mount);
7334 	if (status != B_OK)
7335 		return status;
7336 
7337 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7338 		status = B_UNSUPPORTED;
7339 		goto error;
7340 	}
7341 
7342 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7343 		&cookie);
7344 	if (status != B_OK)
7345 		goto error;
7346 
7347 	// get fd for the index directory
7348 	int fd;
7349 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7350 	if (fd >= 0)
7351 		return fd;
7352 
7353 	status = fd;
7354 
7355 	// something went wrong
7356 	FS_MOUNT_CALL(mount, close_query, cookie);
7357 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7358 
7359 error:
7360 	put_mount(mount);
7361 	return status;
7362 }
7363 
7364 
7365 static status_t
7366 query_close(struct file_descriptor* descriptor)
7367 {
7368 	struct fs_mount* mount = descriptor->u.mount;
7369 
7370 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7371 
7372 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7373 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7374 
7375 	return B_OK;
7376 }
7377 
7378 
7379 static void
7380 query_free_fd(struct file_descriptor* descriptor)
7381 {
7382 	struct fs_mount* mount = descriptor->u.mount;
7383 
7384 	if (mount != NULL) {
7385 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7386 		put_mount(mount);
7387 	}
7388 }
7389 
7390 
7391 static status_t
7392 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7393 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7394 {
7395 	struct fs_mount* mount = descriptor->u.mount;
7396 
7397 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7398 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7399 			bufferSize, _count);
7400 	}
7401 
7402 	return B_UNSUPPORTED;
7403 }
7404 
7405 
7406 static status_t
7407 query_rewind(struct file_descriptor* descriptor)
7408 {
7409 	struct fs_mount* mount = descriptor->u.mount;
7410 
7411 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7412 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7413 
7414 	return B_UNSUPPORTED;
7415 }
7416 
7417 
7418 //	#pragma mark - General File System functions
7419 
7420 
7421 static dev_t
7422 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7423 	const char* args, bool kernel)
7424 {
7425 	struct ::fs_mount* mount;
7426 	status_t status = B_OK;
7427 	fs_volume* volume = NULL;
7428 	int32 layer = 0;
7429 	Vnode* coveredNode = NULL;
7430 
7431 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7432 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7433 
7434 	// The path is always safe, we just have to make sure that fsName is
7435 	// almost valid - we can't make any assumptions about args, though.
7436 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7437 	// We'll get it from the DDM later.
7438 	if (fsName == NULL) {
7439 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7440 			return B_BAD_VALUE;
7441 	} else if (fsName[0] == '\0')
7442 		return B_BAD_VALUE;
7443 
7444 	RecursiveLocker mountOpLocker(sMountOpLock);
7445 
7446 	// Helper to delete a newly created file device on failure.
7447 	// Not exactly beautiful, but helps to keep the code below cleaner.
7448 	struct FileDeviceDeleter {
7449 		FileDeviceDeleter() : id(-1) {}
7450 		~FileDeviceDeleter()
7451 		{
7452 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7453 		}
7454 
7455 		partition_id id;
7456 	} fileDeviceDeleter;
7457 
7458 	// If the file system is not a "virtual" one, the device argument should
7459 	// point to a real file/device (if given at all).
7460 	// get the partition
7461 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7462 	KPartition* partition = NULL;
7463 	KPath normalizedDevice;
7464 	bool newlyCreatedFileDevice = false;
7465 
7466 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7467 		// normalize the device path
7468 		status = normalizedDevice.SetTo(device, true);
7469 		if (status != B_OK)
7470 			return status;
7471 
7472 		// get a corresponding partition from the DDM
7473 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7474 		if (partition == NULL) {
7475 			// Partition not found: This either means, the user supplied
7476 			// an invalid path, or the path refers to an image file. We try
7477 			// to let the DDM create a file device for the path.
7478 			partition_id deviceID = ddm->CreateFileDevice(
7479 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7480 			if (deviceID >= 0) {
7481 				partition = ddm->RegisterPartition(deviceID);
7482 				if (newlyCreatedFileDevice)
7483 					fileDeviceDeleter.id = deviceID;
7484 			}
7485 		}
7486 
7487 		if (!partition) {
7488 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7489 				normalizedDevice.Path()));
7490 			return B_ENTRY_NOT_FOUND;
7491 		}
7492 
7493 		device = normalizedDevice.Path();
7494 			// correct path to file device
7495 	}
7496 	PartitionRegistrar partitionRegistrar(partition, true);
7497 
7498 	// Write lock the partition's device. For the time being, we keep the lock
7499 	// until we're done mounting -- not nice, but ensure, that no-one is
7500 	// interfering.
7501 	// TODO: Just mark the partition busy while mounting!
7502 	KDiskDevice* diskDevice = NULL;
7503 	if (partition) {
7504 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7505 		if (!diskDevice) {
7506 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7507 			return B_ERROR;
7508 		}
7509 	}
7510 
7511 	DeviceWriteLocker writeLocker(diskDevice, true);
7512 		// this takes over the write lock acquired before
7513 
7514 	if (partition != NULL) {
7515 		// make sure, that the partition is not busy
7516 		if (partition->IsBusy()) {
7517 			TRACE(("fs_mount(): Partition is busy.\n"));
7518 			return B_BUSY;
7519 		}
7520 
7521 		// if no FS name had been supplied, we get it from the partition
7522 		if (fsName == NULL) {
7523 			KDiskSystem* diskSystem = partition->DiskSystem();
7524 			if (!diskSystem) {
7525 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7526 					"recognize it.\n"));
7527 				return B_BAD_VALUE;
7528 			}
7529 
7530 			if (!diskSystem->IsFileSystem()) {
7531 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7532 					"partitioning system.\n"));
7533 				return B_BAD_VALUE;
7534 			}
7535 
7536 			// The disk system name will not change, and the KDiskSystem
7537 			// object will not go away while the disk device is locked (and
7538 			// the partition has a reference to it), so this is safe.
7539 			fsName = diskSystem->Name();
7540 		}
7541 	}
7542 
7543 	mount = new(std::nothrow) (struct ::fs_mount);
7544 	if (mount == NULL)
7545 		return B_NO_MEMORY;
7546 
7547 	mount->device_name = strdup(device);
7548 		// "device" can be NULL
7549 
7550 	status = mount->entry_cache.Init();
7551 	if (status != B_OK)
7552 		goto err1;
7553 
7554 	// initialize structure
7555 	mount->id = sNextMountID++;
7556 	mount->partition = NULL;
7557 	mount->root_vnode = NULL;
7558 	mount->covers_vnode = NULL;
7559 	mount->unmounting = false;
7560 	mount->owns_file_device = false;
7561 	mount->volume = NULL;
7562 
7563 	// build up the volume(s)
7564 	while (true) {
7565 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7566 		if (layerFSName == NULL) {
7567 			if (layer == 0) {
7568 				status = B_NO_MEMORY;
7569 				goto err1;
7570 			}
7571 
7572 			break;
7573 		}
7574 		MemoryDeleter layerFSNameDeleter(layerFSName);
7575 
7576 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7577 		if (volume == NULL) {
7578 			status = B_NO_MEMORY;
7579 			goto err1;
7580 		}
7581 
7582 		volume->id = mount->id;
7583 		volume->partition = partition != NULL ? partition->ID() : -1;
7584 		volume->layer = layer++;
7585 		volume->private_volume = NULL;
7586 		volume->ops = NULL;
7587 		volume->sub_volume = NULL;
7588 		volume->super_volume = NULL;
7589 		volume->file_system = NULL;
7590 		volume->file_system_name = NULL;
7591 
7592 		volume->file_system_name = get_file_system_name(layerFSName);
7593 		if (volume->file_system_name == NULL) {
7594 			status = B_NO_MEMORY;
7595 			free(volume);
7596 			goto err1;
7597 		}
7598 
7599 		volume->file_system = get_file_system(layerFSName);
7600 		if (volume->file_system == NULL) {
7601 			status = B_DEVICE_NOT_FOUND;
7602 			free(volume->file_system_name);
7603 			free(volume);
7604 			goto err1;
7605 		}
7606 
7607 		if (mount->volume == NULL)
7608 			mount->volume = volume;
7609 		else {
7610 			volume->super_volume = mount->volume;
7611 			mount->volume->sub_volume = volume;
7612 			mount->volume = volume;
7613 		}
7614 	}
7615 
7616 	// insert mount struct into list before we call FS's mount() function
7617 	// so that vnodes can be created for this mount
7618 	rw_lock_write_lock(&sMountLock);
7619 	sMountsTable->Insert(mount);
7620 	rw_lock_write_unlock(&sMountLock);
7621 
7622 	ino_t rootID;
7623 
7624 	if (!sRoot) {
7625 		// we haven't mounted anything yet
7626 		if (strcmp(path, "/") != 0) {
7627 			status = B_ERROR;
7628 			goto err2;
7629 		}
7630 
7631 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7632 			args, &rootID);
7633 		if (status != B_OK || mount->volume->ops == NULL)
7634 			goto err2;
7635 	} else {
7636 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7637 		if (status != B_OK)
7638 			goto err2;
7639 
7640 		mount->covers_vnode = coveredNode;
7641 
7642 		// make sure covered_vnode is a directory
7643 		if (!S_ISDIR(coveredNode->Type())) {
7644 			status = B_NOT_A_DIRECTORY;
7645 			goto err3;
7646 		}
7647 
7648 		if (coveredNode->IsCovered()) {
7649 			// this is already a covered vnode
7650 			status = B_BUSY;
7651 			goto err3;
7652 		}
7653 
7654 		// mount it/them
7655 		fs_volume* volume = mount->volume;
7656 		while (volume) {
7657 			status = volume->file_system->mount(volume, device, flags, args,
7658 				&rootID);
7659 			if (status != B_OK || volume->ops == NULL) {
7660 				if (status == B_OK && volume->ops == NULL)
7661 					panic("fs_mount: mount() succeeded but ops is NULL!");
7662 				if (volume->sub_volume)
7663 					goto err4;
7664 				goto err3;
7665 			}
7666 
7667 			volume = volume->super_volume;
7668 		}
7669 
7670 		volume = mount->volume;
7671 		while (volume) {
7672 			if (volume->ops->all_layers_mounted != NULL)
7673 				volume->ops->all_layers_mounted(volume);
7674 			volume = volume->super_volume;
7675 		}
7676 	}
7677 
7678 	// the root node is supposed to be owned by the file system - it must
7679 	// exist at this point
7680 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7681 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7682 		panic("fs_mount: file system does not own its root node!\n");
7683 		status = B_ERROR;
7684 		goto err4;
7685 	}
7686 
7687 	// set up the links between the root vnode and the vnode it covers
7688 	rw_lock_write_lock(&sVnodeLock);
7689 	if (coveredNode != NULL) {
7690 		if (coveredNode->IsCovered()) {
7691 			// the vnode is covered now
7692 			status = B_BUSY;
7693 			rw_lock_write_unlock(&sVnodeLock);
7694 			goto err4;
7695 		}
7696 
7697 		mount->root_vnode->covers = coveredNode;
7698 		mount->root_vnode->SetCovering(true);
7699 
7700 		coveredNode->covered_by = mount->root_vnode;
7701 		coveredNode->SetCovered(true);
7702 	}
7703 	rw_lock_write_unlock(&sVnodeLock);
7704 
7705 	if (!sRoot) {
7706 		sRoot = mount->root_vnode;
7707 		mutex_lock(&sIOContextRootLock);
7708 		get_current_io_context(true)->root = sRoot;
7709 		mutex_unlock(&sIOContextRootLock);
7710 		inc_vnode_ref_count(sRoot);
7711 	}
7712 
7713 	// supply the partition (if any) with the mount cookie and mark it mounted
7714 	if (partition) {
7715 		partition->SetMountCookie(mount->volume->private_volume);
7716 		partition->SetVolumeID(mount->id);
7717 
7718 		// keep a partition reference as long as the partition is mounted
7719 		partitionRegistrar.Detach();
7720 		mount->partition = partition;
7721 		mount->owns_file_device = newlyCreatedFileDevice;
7722 		fileDeviceDeleter.id = -1;
7723 	}
7724 
7725 	notify_mount(mount->id,
7726 		coveredNode != NULL ? coveredNode->device : -1,
7727 		coveredNode ? coveredNode->id : -1);
7728 
7729 	return mount->id;
7730 
7731 err4:
7732 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7733 err3:
7734 	if (coveredNode != NULL)
7735 		put_vnode(coveredNode);
7736 err2:
7737 	rw_lock_write_lock(&sMountLock);
7738 	sMountsTable->Remove(mount);
7739 	rw_lock_write_unlock(&sMountLock);
7740 err1:
7741 	delete mount;
7742 
7743 	return status;
7744 }
7745 
7746 
7747 static status_t
7748 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7749 {
7750 	struct fs_mount* mount;
7751 	status_t err;
7752 
7753 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7754 		mountID, kernel));
7755 
7756 	struct vnode* pathVnode = NULL;
7757 	if (path != NULL) {
7758 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7759 		if (err != B_OK)
7760 			return B_ENTRY_NOT_FOUND;
7761 	}
7762 
7763 	RecursiveLocker mountOpLocker(sMountOpLock);
7764 	ReadLocker mountLocker(sMountLock);
7765 
7766 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7767 	if (mount == NULL) {
7768 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7769 			pathVnode);
7770 	}
7771 
7772 	mountLocker.Unlock();
7773 
7774 	if (path != NULL) {
7775 		put_vnode(pathVnode);
7776 
7777 		if (mount->root_vnode != pathVnode) {
7778 			// not mountpoint
7779 			return B_BAD_VALUE;
7780 		}
7781 	}
7782 
7783 	// if the volume is associated with a partition, lock the device of the
7784 	// partition as long as we are unmounting
7785 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7786 	KPartition* partition = mount->partition;
7787 	KDiskDevice* diskDevice = NULL;
7788 	if (partition != NULL) {
7789 		if (partition->Device() == NULL) {
7790 			dprintf("fs_unmount(): There is no device!\n");
7791 			return B_ERROR;
7792 		}
7793 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7794 		if (!diskDevice) {
7795 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7796 			return B_ERROR;
7797 		}
7798 	}
7799 	DeviceWriteLocker writeLocker(diskDevice, true);
7800 
7801 	// make sure, that the partition is not busy
7802 	if (partition != NULL) {
7803 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7804 			TRACE(("fs_unmount(): Partition is busy.\n"));
7805 			return B_BUSY;
7806 		}
7807 	}
7808 
7809 	// grab the vnode master mutex to keep someone from creating
7810 	// a vnode while we're figuring out if we can continue
7811 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7812 
7813 	bool disconnectedDescriptors = false;
7814 
7815 	while (true) {
7816 		bool busy = false;
7817 
7818 		// cycle through the list of vnodes associated with this mount and
7819 		// make sure all of them are not busy or have refs on them
7820 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7821 		while (struct vnode* vnode = iterator.Next()) {
7822 			if (vnode->IsBusy()) {
7823 				busy = true;
7824 				break;
7825 			}
7826 
7827 			// check the vnode's ref count -- subtract additional references for
7828 			// covering
7829 			int32 refCount = vnode->ref_count;
7830 			if (vnode->covers != NULL)
7831 				refCount--;
7832 			if (vnode->covered_by != NULL)
7833 				refCount--;
7834 
7835 			if (refCount != 0) {
7836 				// there are still vnodes in use on this mount, so we cannot
7837 				// unmount yet
7838 				busy = true;
7839 				break;
7840 			}
7841 		}
7842 
7843 		if (!busy)
7844 			break;
7845 
7846 		if ((flags & B_FORCE_UNMOUNT) == 0)
7847 			return B_BUSY;
7848 
7849 		if (disconnectedDescriptors) {
7850 			// wait a bit until the last access is finished, and then try again
7851 			vnodesWriteLocker.Unlock();
7852 			snooze(100000);
7853 			// TODO: if there is some kind of bug that prevents the ref counts
7854 			// from getting back to zero, this will fall into an endless loop...
7855 			vnodesWriteLocker.Lock();
7856 			continue;
7857 		}
7858 
7859 		// the file system is still busy - but we're forced to unmount it,
7860 		// so let's disconnect all open file descriptors
7861 
7862 		mount->unmounting = true;
7863 			// prevent new vnodes from being created
7864 
7865 		vnodesWriteLocker.Unlock();
7866 
7867 		disconnect_mount_or_vnode_fds(mount, NULL);
7868 		disconnectedDescriptors = true;
7869 
7870 		vnodesWriteLocker.Lock();
7871 	}
7872 
7873 	// We can safely continue. Mark all of the vnodes busy and this mount
7874 	// structure in unmounting state. Also undo the vnode covers/covered_by
7875 	// links.
7876 	mount->unmounting = true;
7877 
7878 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7879 	while (struct vnode* vnode = iterator.Next()) {
7880 		// Remove all covers/covered_by links from other mounts' nodes to this
7881 		// vnode and adjust the node ref count accordingly. We will release the
7882 		// references to the external vnodes below.
7883 		if (Vnode* coveredNode = vnode->covers) {
7884 			if (Vnode* coveringNode = vnode->covered_by) {
7885 				// We have both covered and covering vnodes, so just remove us
7886 				// from the chain.
7887 				coveredNode->covered_by = coveringNode;
7888 				coveringNode->covers = coveredNode;
7889 				vnode->ref_count -= 2;
7890 
7891 				vnode->covered_by = NULL;
7892 				vnode->covers = NULL;
7893 				vnode->SetCovering(false);
7894 				vnode->SetCovered(false);
7895 			} else {
7896 				// We only have a covered vnode. Remove its link to us.
7897 				coveredNode->covered_by = NULL;
7898 				coveredNode->SetCovered(false);
7899 				vnode->ref_count--;
7900 
7901 				// If the other node is an external vnode, we keep its link
7902 				// link around so we can put the reference later on. Otherwise
7903 				// we get rid of it right now.
7904 				if (coveredNode->mount == mount) {
7905 					vnode->covers = NULL;
7906 					coveredNode->ref_count--;
7907 				}
7908 			}
7909 		} else if (Vnode* coveringNode = vnode->covered_by) {
7910 			// We only have a covering vnode. Remove its link to us.
7911 			coveringNode->covers = NULL;
7912 			coveringNode->SetCovering(false);
7913 			vnode->ref_count--;
7914 
7915 			// If the other node is an external vnode, we keep its link
7916 			// link around so we can put the reference later on. Otherwise
7917 			// we get rid of it right now.
7918 			if (coveringNode->mount == mount) {
7919 				vnode->covered_by = NULL;
7920 				coveringNode->ref_count--;
7921 			}
7922 		}
7923 
7924 		vnode->SetBusy(true);
7925 		vnode_to_be_freed(vnode);
7926 	}
7927 
7928 	vnodesWriteLocker.Unlock();
7929 
7930 	// Free all vnodes associated with this mount.
7931 	// They will be removed from the mount list by free_vnode(), so
7932 	// we don't have to do this.
7933 	while (struct vnode* vnode = mount->vnodes.Head()) {
7934 		// Put the references to external covered/covering vnodes we kept above.
7935 		if (Vnode* coveredNode = vnode->covers)
7936 			put_vnode(coveredNode);
7937 		if (Vnode* coveringNode = vnode->covered_by)
7938 			put_vnode(coveringNode);
7939 
7940 		free_vnode(vnode, false);
7941 	}
7942 
7943 	// remove the mount structure from the hash table
7944 	rw_lock_write_lock(&sMountLock);
7945 	sMountsTable->Remove(mount);
7946 	rw_lock_write_unlock(&sMountLock);
7947 
7948 	mountOpLocker.Unlock();
7949 
7950 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7951 	notify_unmount(mount->id);
7952 
7953 	// dereference the partition and mark it unmounted
7954 	if (partition) {
7955 		partition->SetVolumeID(-1);
7956 		partition->SetMountCookie(NULL);
7957 
7958 		if (mount->owns_file_device)
7959 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7960 		partition->Unregister();
7961 	}
7962 
7963 	delete mount;
7964 	return B_OK;
7965 }
7966 
7967 
7968 static status_t
7969 fs_sync(dev_t device)
7970 {
7971 	struct fs_mount* mount;
7972 	status_t status = get_mount(device, &mount);
7973 	if (status != B_OK)
7974 		return status;
7975 
7976 	struct vnode marker;
7977 	memset(&marker, 0, sizeof(marker));
7978 	marker.SetBusy(true);
7979 	marker.SetRemoved(true);
7980 
7981 	// First, synchronize all file caches
7982 
7983 	while (true) {
7984 		WriteLocker locker(sVnodeLock);
7985 			// Note: That's the easy way. Which is probably OK for sync(),
7986 			// since it's a relatively rare call and doesn't need to allow for
7987 			// a lot of concurrency. Using a read lock would be possible, but
7988 			// also more involved, since we had to lock the individual nodes
7989 			// and take care of the locking order, which we might not want to
7990 			// do while holding fs_mount::lock.
7991 
7992 		// synchronize access to vnode list
7993 		mutex_lock(&mount->lock);
7994 
7995 		struct vnode* vnode;
7996 		if (!marker.IsRemoved()) {
7997 			vnode = mount->vnodes.GetNext(&marker);
7998 			mount->vnodes.Remove(&marker);
7999 			marker.SetRemoved(true);
8000 		} else
8001 			vnode = mount->vnodes.First();
8002 
8003 		while (vnode != NULL && (vnode->cache == NULL
8004 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8005 			// TODO: we could track writes (and writable mapped vnodes)
8006 			//	and have a simple flag that we could test for here
8007 			vnode = mount->vnodes.GetNext(vnode);
8008 		}
8009 
8010 		if (vnode != NULL) {
8011 			// insert marker vnode again
8012 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8013 			marker.SetRemoved(false);
8014 		}
8015 
8016 		mutex_unlock(&mount->lock);
8017 
8018 		if (vnode == NULL)
8019 			break;
8020 
8021 		vnode = lookup_vnode(mount->id, vnode->id);
8022 		if (vnode == NULL || vnode->IsBusy())
8023 			continue;
8024 
8025 		if (vnode->ref_count == 0) {
8026 			// this vnode has been unused before
8027 			vnode_used(vnode);
8028 		}
8029 		inc_vnode_ref_count(vnode);
8030 
8031 		locker.Unlock();
8032 
8033 		if (vnode->cache != NULL && !vnode->IsRemoved())
8034 			vnode->cache->WriteModified();
8035 
8036 		put_vnode(vnode);
8037 	}
8038 
8039 	// Let the file systems do their synchronizing work
8040 	if (HAS_FS_MOUNT_CALL(mount, sync))
8041 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8042 
8043 	// Finally, flush the underlying device's write cache (if possible.)
8044 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8045 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8046 
8047 	put_mount(mount);
8048 	return status;
8049 }
8050 
8051 
8052 static status_t
8053 fs_read_info(dev_t device, struct fs_info* info)
8054 {
8055 	struct fs_mount* mount;
8056 	status_t status = get_mount(device, &mount);
8057 	if (status != B_OK)
8058 		return status;
8059 
8060 	memset(info, 0, sizeof(struct fs_info));
8061 
8062 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8063 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8064 
8065 	// fill in info the file system doesn't (have to) know about
8066 	if (status == B_OK) {
8067 		info->dev = mount->id;
8068 		info->root = mount->root_vnode->id;
8069 
8070 		fs_volume* volume = mount->volume;
8071 		while (volume->super_volume != NULL)
8072 			volume = volume->super_volume;
8073 
8074 		strlcpy(info->fsh_name, volume->file_system_name,
8075 			sizeof(info->fsh_name));
8076 		if (mount->device_name != NULL) {
8077 			strlcpy(info->device_name, mount->device_name,
8078 				sizeof(info->device_name));
8079 		}
8080 	}
8081 
8082 	// if the call is not supported by the file system, there are still
8083 	// the parts that we filled out ourselves
8084 
8085 	put_mount(mount);
8086 	return status;
8087 }
8088 
8089 
8090 static status_t
8091 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8092 {
8093 	struct fs_mount* mount;
8094 	status_t status = get_mount(device, &mount);
8095 	if (status != B_OK)
8096 		return status;
8097 
8098 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8099 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8100 	else
8101 		status = B_READ_ONLY_DEVICE;
8102 
8103 	put_mount(mount);
8104 	return status;
8105 }
8106 
8107 
8108 static dev_t
8109 fs_next_device(int32* _cookie)
8110 {
8111 	struct fs_mount* mount = NULL;
8112 	dev_t device = *_cookie;
8113 
8114 	rw_lock_read_lock(&sMountLock);
8115 
8116 	// Since device IDs are assigned sequentially, this algorithm
8117 	// does work good enough. It makes sure that the device list
8118 	// returned is sorted, and that no device is skipped when an
8119 	// already visited device got unmounted.
8120 
8121 	while (device < sNextMountID) {
8122 		mount = find_mount(device++);
8123 		if (mount != NULL && mount->volume->private_volume != NULL)
8124 			break;
8125 	}
8126 
8127 	*_cookie = device;
8128 
8129 	if (mount != NULL)
8130 		device = mount->id;
8131 	else
8132 		device = B_BAD_VALUE;
8133 
8134 	rw_lock_read_unlock(&sMountLock);
8135 
8136 	return device;
8137 }
8138 
8139 
8140 ssize_t
8141 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8142 	void *buffer, size_t readBytes)
8143 {
8144 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8145 	if (attrFD < 0)
8146 		return attrFD;
8147 
8148 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8149 
8150 	_kern_close(attrFD);
8151 
8152 	return bytesRead;
8153 }
8154 
8155 
8156 static status_t
8157 get_cwd(char* buffer, size_t size, bool kernel)
8158 {
8159 	// Get current working directory from io context
8160 	struct io_context* context = get_current_io_context(kernel);
8161 	status_t status;
8162 
8163 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8164 
8165 	mutex_lock(&context->io_mutex);
8166 
8167 	struct vnode* vnode = context->cwd;
8168 	if (vnode)
8169 		inc_vnode_ref_count(vnode);
8170 
8171 	mutex_unlock(&context->io_mutex);
8172 
8173 	if (vnode) {
8174 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8175 		put_vnode(vnode);
8176 	} else
8177 		status = B_ERROR;
8178 
8179 	return status;
8180 }
8181 
8182 
8183 static status_t
8184 set_cwd(int fd, char* path, bool kernel)
8185 {
8186 	struct io_context* context;
8187 	struct vnode* vnode = NULL;
8188 	struct vnode* oldDirectory;
8189 	status_t status;
8190 
8191 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8192 
8193 	// Get vnode for passed path, and bail if it failed
8194 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8195 	if (status < 0)
8196 		return status;
8197 
8198 	if (!S_ISDIR(vnode->Type())) {
8199 		// nope, can't cwd to here
8200 		status = B_NOT_A_DIRECTORY;
8201 		goto err;
8202 	}
8203 
8204 	// We need to have the permission to enter the directory, too
8205 	if (HAS_FS_CALL(vnode, access)) {
8206 		status = FS_CALL(vnode, access, X_OK);
8207 		if (status != B_OK)
8208 			goto err;
8209 	}
8210 
8211 	// Get current io context and lock
8212 	context = get_current_io_context(kernel);
8213 	mutex_lock(&context->io_mutex);
8214 
8215 	// save the old current working directory first
8216 	oldDirectory = context->cwd;
8217 	context->cwd = vnode;
8218 
8219 	mutex_unlock(&context->io_mutex);
8220 
8221 	if (oldDirectory)
8222 		put_vnode(oldDirectory);
8223 
8224 	return B_NO_ERROR;
8225 
8226 err:
8227 	put_vnode(vnode);
8228 	return status;
8229 }
8230 
8231 
8232 static status_t
8233 user_copy_name(char* to, const char* from, size_t length)
8234 {
8235 	ssize_t len = user_strlcpy(to, from, length);
8236 	if (len < 0)
8237 		return len;
8238 	if (len >= (ssize_t)length)
8239 		return B_NAME_TOO_LONG;
8240 	return B_OK;
8241 }
8242 
8243 
8244 //	#pragma mark - kernel mirrored syscalls
8245 
8246 
8247 dev_t
8248 _kern_mount(const char* path, const char* device, const char* fsName,
8249 	uint32 flags, const char* args, size_t argsLength)
8250 {
8251 	KPath pathBuffer(path);
8252 	if (pathBuffer.InitCheck() != B_OK)
8253 		return B_NO_MEMORY;
8254 
8255 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8256 }
8257 
8258 
8259 status_t
8260 _kern_unmount(const char* path, uint32 flags)
8261 {
8262 	KPath pathBuffer(path);
8263 	if (pathBuffer.InitCheck() != B_OK)
8264 		return B_NO_MEMORY;
8265 
8266 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8267 }
8268 
8269 
8270 status_t
8271 _kern_read_fs_info(dev_t device, struct fs_info* info)
8272 {
8273 	if (info == NULL)
8274 		return B_BAD_VALUE;
8275 
8276 	return fs_read_info(device, info);
8277 }
8278 
8279 
8280 status_t
8281 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8282 {
8283 	if (info == NULL)
8284 		return B_BAD_VALUE;
8285 
8286 	return fs_write_info(device, info, mask);
8287 }
8288 
8289 
8290 status_t
8291 _kern_sync(void)
8292 {
8293 	// Note: _kern_sync() is also called from _user_sync()
8294 	int32 cookie = 0;
8295 	dev_t device;
8296 	while ((device = next_dev(&cookie)) >= 0) {
8297 		status_t status = fs_sync(device);
8298 		if (status != B_OK && status != B_BAD_VALUE) {
8299 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8300 				strerror(status));
8301 		}
8302 	}
8303 
8304 	return B_OK;
8305 }
8306 
8307 
8308 dev_t
8309 _kern_next_device(int32* _cookie)
8310 {
8311 	return fs_next_device(_cookie);
8312 }
8313 
8314 
8315 status_t
8316 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8317 	size_t infoSize)
8318 {
8319 	if (infoSize != sizeof(fd_info))
8320 		return B_BAD_VALUE;
8321 
8322 	// get the team
8323 	Team* team = Team::Get(teamID);
8324 	if (team == NULL)
8325 		return B_BAD_TEAM_ID;
8326 	BReference<Team> teamReference(team, true);
8327 
8328 	// now that we have a team reference, its I/O context won't go away
8329 	io_context* context = team->io_context;
8330 	MutexLocker contextLocker(context->io_mutex);
8331 
8332 	uint32 slot = *_cookie;
8333 
8334 	struct file_descriptor* descriptor;
8335 	while (slot < context->table_size
8336 		&& (descriptor = context->fds[slot]) == NULL) {
8337 		slot++;
8338 	}
8339 
8340 	if (slot >= context->table_size)
8341 		return B_ENTRY_NOT_FOUND;
8342 
8343 	info->number = slot;
8344 	info->open_mode = descriptor->open_mode;
8345 
8346 	struct vnode* vnode = fd_vnode(descriptor);
8347 	if (vnode != NULL) {
8348 		info->device = vnode->device;
8349 		info->node = vnode->id;
8350 	} else if (descriptor->u.mount != NULL) {
8351 		info->device = descriptor->u.mount->id;
8352 		info->node = -1;
8353 	}
8354 
8355 	*_cookie = slot + 1;
8356 	return B_OK;
8357 }
8358 
8359 
8360 int
8361 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8362 	int perms)
8363 {
8364 	if ((openMode & O_CREAT) != 0) {
8365 		return file_create_entry_ref(device, inode, name, openMode, perms,
8366 			true);
8367 	}
8368 
8369 	return file_open_entry_ref(device, inode, name, openMode, true);
8370 }
8371 
8372 
8373 /*!	\brief Opens a node specified by a FD + path pair.
8374 
8375 	At least one of \a fd and \a path must be specified.
8376 	If only \a fd is given, the function opens the node identified by this
8377 	FD. If only a path is given, this path is opened. If both are given and
8378 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8379 	of the directory (!) identified by \a fd.
8380 
8381 	\param fd The FD. May be < 0.
8382 	\param path The absolute or relative path. May be \c NULL.
8383 	\param openMode The open mode.
8384 	\return A FD referring to the newly opened node, or an error code,
8385 			if an error occurs.
8386 */
8387 int
8388 _kern_open(int fd, const char* path, int openMode, int perms)
8389 {
8390 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8391 	if (pathBuffer.InitCheck() != B_OK)
8392 		return B_NO_MEMORY;
8393 
8394 	if ((openMode & O_CREAT) != 0)
8395 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8396 
8397 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8398 }
8399 
8400 
8401 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8402 
8403 	The supplied name may be \c NULL, in which case directory identified
8404 	by \a device and \a inode will be opened. Otherwise \a device and
8405 	\a inode identify the parent directory of the directory to be opened
8406 	and \a name its entry name.
8407 
8408 	\param device If \a name is specified the ID of the device the parent
8409 		   directory of the directory to be opened resides on, otherwise
8410 		   the device of the directory itself.
8411 	\param inode If \a name is specified the node ID of the parent
8412 		   directory of the directory to be opened, otherwise node ID of the
8413 		   directory itself.
8414 	\param name The entry name of the directory to be opened. If \c NULL,
8415 		   the \a device + \a inode pair identify the node to be opened.
8416 	\return The FD of the newly opened directory or an error code, if
8417 			something went wrong.
8418 */
8419 int
8420 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8421 {
8422 	return dir_open_entry_ref(device, inode, name, true);
8423 }
8424 
8425 
8426 /*!	\brief Opens a directory specified by a FD + path pair.
8427 
8428 	At least one of \a fd and \a path must be specified.
8429 	If only \a fd is given, the function opens the directory identified by this
8430 	FD. If only a path is given, this path is opened. If both are given and
8431 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8432 	of the directory (!) identified by \a fd.
8433 
8434 	\param fd The FD. May be < 0.
8435 	\param path The absolute or relative path. May be \c NULL.
8436 	\return A FD referring to the newly opened directory, or an error code,
8437 			if an error occurs.
8438 */
8439 int
8440 _kern_open_dir(int fd, const char* path)
8441 {
8442 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8443 	if (pathBuffer.InitCheck() != B_OK)
8444 		return B_NO_MEMORY;
8445 
8446 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8447 }
8448 
8449 
8450 status_t
8451 _kern_fcntl(int fd, int op, size_t argument)
8452 {
8453 	return common_fcntl(fd, op, argument, true);
8454 }
8455 
8456 
8457 status_t
8458 _kern_fsync(int fd)
8459 {
8460 	return common_sync(fd, true);
8461 }
8462 
8463 
8464 status_t
8465 _kern_lock_node(int fd)
8466 {
8467 	return common_lock_node(fd, true);
8468 }
8469 
8470 
8471 status_t
8472 _kern_unlock_node(int fd)
8473 {
8474 	return common_unlock_node(fd, true);
8475 }
8476 
8477 
8478 status_t
8479 _kern_preallocate(int fd, off_t offset, off_t length)
8480 {
8481 	return common_preallocate(fd, offset, length, true);
8482 }
8483 
8484 
8485 status_t
8486 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8487 	int perms)
8488 {
8489 	return dir_create_entry_ref(device, inode, name, perms, true);
8490 }
8491 
8492 
8493 /*!	\brief Creates a directory specified by a FD + path pair.
8494 
8495 	\a path must always be specified (it contains the name of the new directory
8496 	at least). If only a path is given, this path identifies the location at
8497 	which the directory shall be created. If both \a fd and \a path are given
8498 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8499 	of the directory (!) identified by \a fd.
8500 
8501 	\param fd The FD. May be < 0.
8502 	\param path The absolute or relative path. Must not be \c NULL.
8503 	\param perms The access permissions the new directory shall have.
8504 	\return \c B_OK, if the directory has been created successfully, another
8505 			error code otherwise.
8506 */
8507 status_t
8508 _kern_create_dir(int fd, const char* path, int perms)
8509 {
8510 	KPath pathBuffer(path, KPath::DEFAULT);
8511 	if (pathBuffer.InitCheck() != B_OK)
8512 		return B_NO_MEMORY;
8513 
8514 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8515 }
8516 
8517 
8518 status_t
8519 _kern_remove_dir(int fd, const char* path)
8520 {
8521 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8522 	if (pathBuffer.InitCheck() != B_OK)
8523 		return B_NO_MEMORY;
8524 
8525 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8526 }
8527 
8528 
8529 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8530 
8531 	At least one of \a fd and \a path must be specified.
8532 	If only \a fd is given, the function the symlink to be read is the node
8533 	identified by this FD. If only a path is given, this path identifies the
8534 	symlink to be read. If both are given and the path is absolute, \a fd is
8535 	ignored; a relative path is reckoned off of the directory (!) identified
8536 	by \a fd.
8537 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8538 	will still be updated to reflect the required buffer size.
8539 
8540 	\param fd The FD. May be < 0.
8541 	\param path The absolute or relative path. May be \c NULL.
8542 	\param buffer The buffer into which the contents of the symlink shall be
8543 		   written.
8544 	\param _bufferSize A pointer to the size of the supplied buffer.
8545 	\return The length of the link on success or an appropriate error code
8546 */
8547 status_t
8548 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8549 {
8550 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8551 	if (pathBuffer.InitCheck() != B_OK)
8552 		return B_NO_MEMORY;
8553 
8554 	return common_read_link(fd, pathBuffer.LockBuffer(),
8555 		buffer, _bufferSize, true);
8556 }
8557 
8558 
8559 /*!	\brief Creates a symlink specified by a FD + path pair.
8560 
8561 	\a path must always be specified (it contains the name of the new symlink
8562 	at least). If only a path is given, this path identifies the location at
8563 	which the symlink shall be created. If both \a fd and \a path are given and
8564 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8565 	of the directory (!) identified by \a fd.
8566 
8567 	\param fd The FD. May be < 0.
8568 	\param toPath The absolute or relative path. Must not be \c NULL.
8569 	\param mode The access permissions the new symlink shall have.
8570 	\return \c B_OK, if the symlink has been created successfully, another
8571 			error code otherwise.
8572 */
8573 status_t
8574 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8575 {
8576 	KPath pathBuffer(path);
8577 	if (pathBuffer.InitCheck() != B_OK)
8578 		return B_NO_MEMORY;
8579 
8580 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8581 		toPath, mode, true);
8582 }
8583 
8584 
8585 status_t
8586 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8587 	bool traverseLeafLink)
8588 {
8589 	KPath pathBuffer(path);
8590 	KPath toPathBuffer(toPath);
8591 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8592 		return B_NO_MEMORY;
8593 
8594 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8595 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8596 }
8597 
8598 
8599 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8600 
8601 	\a path must always be specified (it contains at least the name of the entry
8602 	to be deleted). If only a path is given, this path identifies the entry
8603 	directly. If both \a fd and \a path are given and the path is absolute,
8604 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8605 	identified by \a fd.
8606 
8607 	\param fd The FD. May be < 0.
8608 	\param path The absolute or relative path. Must not be \c NULL.
8609 	\return \c B_OK, if the entry has been removed successfully, another
8610 			error code otherwise.
8611 */
8612 status_t
8613 _kern_unlink(int fd, const char* path)
8614 {
8615 	KPath pathBuffer(path);
8616 	if (pathBuffer.InitCheck() != B_OK)
8617 		return B_NO_MEMORY;
8618 
8619 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8620 }
8621 
8622 
8623 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8624 		   by another FD + path pair.
8625 
8626 	\a oldPath and \a newPath must always be specified (they contain at least
8627 	the name of the entry). If only a path is given, this path identifies the
8628 	entry directly. If both a FD and a path are given and the path is absolute,
8629 	the FD is ignored; a relative path is reckoned off of the directory (!)
8630 	identified by the respective FD.
8631 
8632 	\param oldFD The FD of the old location. May be < 0.
8633 	\param oldPath The absolute or relative path of the old location. Must not
8634 		   be \c NULL.
8635 	\param newFD The FD of the new location. May be < 0.
8636 	\param newPath The absolute or relative path of the new location. Must not
8637 		   be \c NULL.
8638 	\return \c B_OK, if the entry has been moved successfully, another
8639 			error code otherwise.
8640 */
8641 status_t
8642 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8643 {
8644 	KPath oldPathBuffer(oldPath);
8645 	KPath newPathBuffer(newPath);
8646 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8647 		return B_NO_MEMORY;
8648 
8649 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8650 		newFD, newPathBuffer.LockBuffer(), true);
8651 }
8652 
8653 
8654 status_t
8655 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8656 {
8657 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8658 	if (pathBuffer.InitCheck() != B_OK)
8659 		return B_NO_MEMORY;
8660 
8661 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8662 		true);
8663 }
8664 
8665 
8666 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8667 
8668 	If only \a fd is given, the stat operation associated with the type
8669 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8670 	given, this path identifies the entry for whose node to retrieve the
8671 	stat data. If both \a fd and \a path are given and the path is absolute,
8672 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8673 	identified by \a fd and specifies the entry whose stat data shall be
8674 	retrieved.
8675 
8676 	\param fd The FD. May be < 0.
8677 	\param path The absolute or relative path. Must not be \c NULL.
8678 	\param traverseLeafLink If \a path is given, \c true specifies that the
8679 		   function shall not stick to symlinks, but traverse them.
8680 	\param stat The buffer the stat data shall be written into.
8681 	\param statSize The size of the supplied stat buffer.
8682 	\return \c B_OK, if the the stat data have been read successfully, another
8683 			error code otherwise.
8684 */
8685 status_t
8686 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8687 	struct stat* stat, size_t statSize)
8688 {
8689 	struct stat completeStat;
8690 	struct stat* originalStat = NULL;
8691 	status_t status;
8692 
8693 	if (statSize > sizeof(struct stat))
8694 		return B_BAD_VALUE;
8695 
8696 	// this supports different stat extensions
8697 	if (statSize < sizeof(struct stat)) {
8698 		originalStat = stat;
8699 		stat = &completeStat;
8700 	}
8701 
8702 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8703 
8704 	if (status == B_OK && originalStat != NULL)
8705 		memcpy(originalStat, stat, statSize);
8706 
8707 	return status;
8708 }
8709 
8710 
8711 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8712 
8713 	If only \a fd is given, the stat operation associated with the type
8714 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8715 	given, this path identifies the entry for whose node to write the
8716 	stat data. If both \a fd and \a path are given and the path is absolute,
8717 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8718 	identified by \a fd and specifies the entry whose stat data shall be
8719 	written.
8720 
8721 	\param fd The FD. May be < 0.
8722 	\param path The absolute or relative path. May be \c NULL.
8723 	\param traverseLeafLink If \a path is given, \c true specifies that the
8724 		   function shall not stick to symlinks, but traverse them.
8725 	\param stat The buffer containing the stat data to be written.
8726 	\param statSize The size of the supplied stat buffer.
8727 	\param statMask A mask specifying which parts of the stat data shall be
8728 		   written.
8729 	\return \c B_OK, if the the stat data have been written successfully,
8730 			another error code otherwise.
8731 */
8732 status_t
8733 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8734 	const struct stat* stat, size_t statSize, int statMask)
8735 {
8736 	struct stat completeStat;
8737 
8738 	if (statSize > sizeof(struct stat))
8739 		return B_BAD_VALUE;
8740 
8741 	// this supports different stat extensions
8742 	if (statSize < sizeof(struct stat)) {
8743 		memset((uint8*)&completeStat + statSize, 0,
8744 			sizeof(struct stat) - statSize);
8745 		memcpy(&completeStat, stat, statSize);
8746 		stat = &completeStat;
8747 	}
8748 
8749 	status_t status;
8750 
8751 	if (path != NULL) {
8752 		// path given: write the stat of the node referred to by (fd, path)
8753 		KPath pathBuffer(path);
8754 		if (pathBuffer.InitCheck() != B_OK)
8755 			return B_NO_MEMORY;
8756 
8757 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8758 			traverseLeafLink, stat, statMask, true);
8759 	} else {
8760 		// no path given: get the FD and use the FD operation
8761 		struct file_descriptor* descriptor
8762 			= get_fd(get_current_io_context(true), fd);
8763 		if (descriptor == NULL)
8764 			return B_FILE_ERROR;
8765 
8766 		if (descriptor->ops->fd_write_stat)
8767 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8768 		else
8769 			status = B_UNSUPPORTED;
8770 
8771 		put_fd(descriptor);
8772 	}
8773 
8774 	return status;
8775 }
8776 
8777 
8778 int
8779 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8780 {
8781 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8782 	if (pathBuffer.InitCheck() != B_OK)
8783 		return B_NO_MEMORY;
8784 
8785 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8786 }
8787 
8788 
8789 int
8790 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8791 	int openMode)
8792 {
8793 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8794 	if (pathBuffer.InitCheck() != B_OK)
8795 		return B_NO_MEMORY;
8796 
8797 	if ((openMode & O_CREAT) != 0) {
8798 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8799 			true);
8800 	}
8801 
8802 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8803 }
8804 
8805 
8806 status_t
8807 _kern_remove_attr(int fd, const char* name)
8808 {
8809 	return attr_remove(fd, name, true);
8810 }
8811 
8812 
8813 status_t
8814 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8815 	const char* toName)
8816 {
8817 	return attr_rename(fromFile, fromName, toFile, toName, true);
8818 }
8819 
8820 
8821 int
8822 _kern_open_index_dir(dev_t device)
8823 {
8824 	return index_dir_open(device, true);
8825 }
8826 
8827 
8828 status_t
8829 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8830 {
8831 	return index_create(device, name, type, flags, true);
8832 }
8833 
8834 
8835 status_t
8836 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8837 {
8838 	return index_name_read_stat(device, name, stat, true);
8839 }
8840 
8841 
8842 status_t
8843 _kern_remove_index(dev_t device, const char* name)
8844 {
8845 	return index_remove(device, name, true);
8846 }
8847 
8848 
8849 status_t
8850 _kern_getcwd(char* buffer, size_t size)
8851 {
8852 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8853 
8854 	// Call vfs to get current working directory
8855 	return get_cwd(buffer, size, true);
8856 }
8857 
8858 
8859 status_t
8860 _kern_setcwd(int fd, const char* path)
8861 {
8862 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8863 	if (pathBuffer.InitCheck() != B_OK)
8864 		return B_NO_MEMORY;
8865 
8866 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8867 }
8868 
8869 
8870 //	#pragma mark - userland syscalls
8871 
8872 
8873 dev_t
8874 _user_mount(const char* userPath, const char* userDevice,
8875 	const char* userFileSystem, uint32 flags, const char* userArgs,
8876 	size_t argsLength)
8877 {
8878 	char fileSystem[B_FILE_NAME_LENGTH];
8879 	KPath path, device;
8880 	char* args = NULL;
8881 	status_t status;
8882 
8883 	if (!IS_USER_ADDRESS(userPath))
8884 		return B_BAD_ADDRESS;
8885 
8886 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8887 		return B_NO_MEMORY;
8888 
8889 	status = user_copy_name(path.LockBuffer(), userPath,
8890 		B_PATH_NAME_LENGTH);
8891 	if (status != B_OK)
8892 		return status;
8893 	path.UnlockBuffer();
8894 
8895 	if (userFileSystem != NULL) {
8896 		if (!IS_USER_ADDRESS(userFileSystem))
8897 			return B_BAD_ADDRESS;
8898 
8899 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8900 		if (status != B_OK)
8901 			return status;
8902 	}
8903 
8904 	if (userDevice != NULL) {
8905 		if (!IS_USER_ADDRESS(userDevice))
8906 			return B_BAD_ADDRESS;
8907 
8908 		status = user_copy_name(device.LockBuffer(), userDevice,
8909 			B_PATH_NAME_LENGTH);
8910 		if (status != B_OK)
8911 			return status;
8912 		device.UnlockBuffer();
8913 	}
8914 
8915 	if (userArgs != NULL && argsLength > 0) {
8916 		if (!IS_USER_ADDRESS(userArgs))
8917 			return B_BAD_ADDRESS;
8918 
8919 		// this is a safety restriction
8920 		if (argsLength >= 65536)
8921 			return B_NAME_TOO_LONG;
8922 
8923 		args = (char*)malloc(argsLength + 1);
8924 		if (args == NULL)
8925 			return B_NO_MEMORY;
8926 
8927 		status = user_copy_name(args, userArgs, argsLength + 1);
8928 		if (status != B_OK) {
8929 			free(args);
8930 			return status;
8931 		}
8932 	}
8933 
8934 	status = fs_mount(path.LockBuffer(),
8935 		userDevice != NULL ? device.Path() : NULL,
8936 		userFileSystem ? fileSystem : NULL, flags, args, false);
8937 
8938 	free(args);
8939 	return status;
8940 }
8941 
8942 
8943 status_t
8944 _user_unmount(const char* userPath, uint32 flags)
8945 {
8946 	if (!IS_USER_ADDRESS(userPath))
8947 		return B_BAD_ADDRESS;
8948 
8949 	KPath pathBuffer;
8950 	if (pathBuffer.InitCheck() != B_OK)
8951 		return B_NO_MEMORY;
8952 
8953 	char* path = pathBuffer.LockBuffer();
8954 
8955 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8956 	if (status != B_OK)
8957 		return status;
8958 
8959 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8960 }
8961 
8962 
8963 status_t
8964 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8965 {
8966 	struct fs_info info;
8967 	status_t status;
8968 
8969 	if (userInfo == NULL)
8970 		return B_BAD_VALUE;
8971 
8972 	if (!IS_USER_ADDRESS(userInfo))
8973 		return B_BAD_ADDRESS;
8974 
8975 	status = fs_read_info(device, &info);
8976 	if (status != B_OK)
8977 		return status;
8978 
8979 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8980 		return B_BAD_ADDRESS;
8981 
8982 	return B_OK;
8983 }
8984 
8985 
8986 status_t
8987 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8988 {
8989 	struct fs_info info;
8990 
8991 	if (userInfo == NULL)
8992 		return B_BAD_VALUE;
8993 
8994 	if (!IS_USER_ADDRESS(userInfo)
8995 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8996 		return B_BAD_ADDRESS;
8997 
8998 	return fs_write_info(device, &info, mask);
8999 }
9000 
9001 
9002 dev_t
9003 _user_next_device(int32* _userCookie)
9004 {
9005 	int32 cookie;
9006 	dev_t device;
9007 
9008 	if (!IS_USER_ADDRESS(_userCookie)
9009 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9010 		return B_BAD_ADDRESS;
9011 
9012 	device = fs_next_device(&cookie);
9013 
9014 	if (device >= B_OK) {
9015 		// update user cookie
9016 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9017 			return B_BAD_ADDRESS;
9018 	}
9019 
9020 	return device;
9021 }
9022 
9023 
9024 status_t
9025 _user_sync(void)
9026 {
9027 	return _kern_sync();
9028 }
9029 
9030 
9031 status_t
9032 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9033 	size_t infoSize)
9034 {
9035 	struct fd_info info;
9036 	uint32 cookie;
9037 
9038 	// only root can do this
9039 	if (geteuid() != 0)
9040 		return B_NOT_ALLOWED;
9041 
9042 	if (infoSize != sizeof(fd_info))
9043 		return B_BAD_VALUE;
9044 
9045 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9046 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9047 		return B_BAD_ADDRESS;
9048 
9049 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9050 	if (status != B_OK)
9051 		return status;
9052 
9053 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9054 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9055 		return B_BAD_ADDRESS;
9056 
9057 	return status;
9058 }
9059 
9060 
9061 status_t
9062 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9063 	char* userPath, size_t pathLength)
9064 {
9065 	if (!IS_USER_ADDRESS(userPath))
9066 		return B_BAD_ADDRESS;
9067 
9068 	KPath path;
9069 	if (path.InitCheck() != B_OK)
9070 		return B_NO_MEMORY;
9071 
9072 	// copy the leaf name onto the stack
9073 	char stackLeaf[B_FILE_NAME_LENGTH];
9074 	if (leaf != NULL) {
9075 		if (!IS_USER_ADDRESS(leaf))
9076 			return B_BAD_ADDRESS;
9077 
9078 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9079 		if (status != B_OK)
9080 			return status;
9081 
9082 		leaf = stackLeaf;
9083 	}
9084 
9085 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9086 		false, path.LockBuffer(), path.BufferSize());
9087 	if (status != B_OK)
9088 		return status;
9089 
9090 	path.UnlockBuffer();
9091 
9092 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9093 	if (length < 0)
9094 		return length;
9095 	if (length >= (int)pathLength)
9096 		return B_BUFFER_OVERFLOW;
9097 
9098 	return B_OK;
9099 }
9100 
9101 
9102 status_t
9103 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9104 {
9105 	if (userPath == NULL || buffer == NULL)
9106 		return B_BAD_VALUE;
9107 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9108 		return B_BAD_ADDRESS;
9109 
9110 	// copy path from userland
9111 	KPath pathBuffer;
9112 	if (pathBuffer.InitCheck() != B_OK)
9113 		return B_NO_MEMORY;
9114 	char* path = pathBuffer.LockBuffer();
9115 
9116 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9117 	if (status != B_OK)
9118 		return status;
9119 
9120 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9121 		false);
9122 	if (error != B_OK)
9123 		return error;
9124 
9125 	// copy back to userland
9126 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9127 	if (len < 0)
9128 		return len;
9129 	if (len >= B_PATH_NAME_LENGTH)
9130 		return B_BUFFER_OVERFLOW;
9131 
9132 	return B_OK;
9133 }
9134 
9135 
9136 int
9137 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9138 	int openMode, int perms)
9139 {
9140 	char name[B_FILE_NAME_LENGTH];
9141 
9142 	if (userName == NULL || device < 0 || inode < 0)
9143 		return B_BAD_VALUE;
9144 	if (!IS_USER_ADDRESS(userName))
9145 		return B_BAD_ADDRESS;
9146 	status_t status = user_copy_name(name, userName, sizeof(name));
9147 	if (status != B_OK)
9148 		return status;
9149 
9150 	if ((openMode & O_CREAT) != 0) {
9151 		return file_create_entry_ref(device, inode, name, openMode, perms,
9152 			false);
9153 	}
9154 
9155 	return file_open_entry_ref(device, inode, name, openMode, false);
9156 }
9157 
9158 
9159 int
9160 _user_open(int fd, const char* userPath, int openMode, int perms)
9161 {
9162 	KPath path;
9163 	if (path.InitCheck() != B_OK)
9164 		return B_NO_MEMORY;
9165 
9166 	char* buffer = path.LockBuffer();
9167 
9168 	if (!IS_USER_ADDRESS(userPath))
9169 		return B_BAD_ADDRESS;
9170 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9171 	if (status != B_OK)
9172 		return status;
9173 
9174 	if ((openMode & O_CREAT) != 0)
9175 		return file_create(fd, buffer, openMode, perms, false);
9176 
9177 	return file_open(fd, buffer, openMode, false);
9178 }
9179 
9180 
9181 int
9182 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9183 {
9184 	if (userName != NULL) {
9185 		char name[B_FILE_NAME_LENGTH];
9186 
9187 		if (!IS_USER_ADDRESS(userName))
9188 			return B_BAD_ADDRESS;
9189 		status_t status = user_copy_name(name, userName, sizeof(name));
9190 		if (status != B_OK)
9191 			return status;
9192 
9193 		return dir_open_entry_ref(device, inode, name, false);
9194 	}
9195 	return dir_open_entry_ref(device, inode, NULL, false);
9196 }
9197 
9198 
9199 int
9200 _user_open_dir(int fd, const char* userPath)
9201 {
9202 	if (userPath == NULL)
9203 		return dir_open(fd, NULL, false);
9204 
9205 	KPath path;
9206 	if (path.InitCheck() != B_OK)
9207 		return B_NO_MEMORY;
9208 
9209 	char* buffer = path.LockBuffer();
9210 
9211 	if (!IS_USER_ADDRESS(userPath))
9212 		return B_BAD_ADDRESS;
9213 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9214 	if (status != B_OK)
9215 		return status;
9216 
9217 	return dir_open(fd, buffer, false);
9218 }
9219 
9220 
9221 /*!	\brief Opens a directory's parent directory and returns the entry name
9222 		   of the former.
9223 
9224 	Aside from that it returns the directory's entry name, this method is
9225 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9226 	equivalent, if \a userName is \c NULL.
9227 
9228 	If a name buffer is supplied and the name does not fit the buffer, the
9229 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9230 
9231 	\param fd A FD referring to a directory.
9232 	\param userName Buffer the directory's entry name shall be written into.
9233 		   May be \c NULL.
9234 	\param nameLength Size of the name buffer.
9235 	\return The file descriptor of the opened parent directory, if everything
9236 			went fine, an error code otherwise.
9237 */
9238 int
9239 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9240 {
9241 	bool kernel = false;
9242 
9243 	if (userName && !IS_USER_ADDRESS(userName))
9244 		return B_BAD_ADDRESS;
9245 
9246 	// open the parent dir
9247 	int parentFD = dir_open(fd, (char*)"..", kernel);
9248 	if (parentFD < 0)
9249 		return parentFD;
9250 	FDCloser fdCloser(parentFD, kernel);
9251 
9252 	if (userName) {
9253 		// get the vnodes
9254 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9255 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9256 		VNodePutter parentVNodePutter(parentVNode);
9257 		VNodePutter dirVNodePutter(dirVNode);
9258 		if (!parentVNode || !dirVNode)
9259 			return B_FILE_ERROR;
9260 
9261 		// get the vnode name
9262 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9263 		struct dirent* buffer = (struct dirent*)_buffer;
9264 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9265 			sizeof(_buffer), get_current_io_context(false));
9266 		if (status != B_OK)
9267 			return status;
9268 
9269 		// copy the name to the userland buffer
9270 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9271 		if (len < 0)
9272 			return len;
9273 		if (len >= (int)nameLength)
9274 			return B_BUFFER_OVERFLOW;
9275 	}
9276 
9277 	return fdCloser.Detach();
9278 }
9279 
9280 
9281 status_t
9282 _user_fcntl(int fd, int op, size_t argument)
9283 {
9284 	status_t status = common_fcntl(fd, op, argument, false);
9285 	if (op == F_SETLKW)
9286 		syscall_restart_handle_post(status);
9287 
9288 	return status;
9289 }
9290 
9291 
9292 status_t
9293 _user_fsync(int fd)
9294 {
9295 	return common_sync(fd, false);
9296 }
9297 
9298 
9299 status_t
9300 _user_flock(int fd, int operation)
9301 {
9302 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9303 
9304 	// Check if the operation is valid
9305 	switch (operation & ~LOCK_NB) {
9306 		case LOCK_UN:
9307 		case LOCK_SH:
9308 		case LOCK_EX:
9309 			break;
9310 
9311 		default:
9312 			return B_BAD_VALUE;
9313 	}
9314 
9315 	struct file_descriptor* descriptor;
9316 	struct vnode* vnode;
9317 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9318 	if (descriptor == NULL)
9319 		return B_FILE_ERROR;
9320 
9321 	if (descriptor->type != FDTYPE_FILE) {
9322 		put_fd(descriptor);
9323 		return B_BAD_VALUE;
9324 	}
9325 
9326 	struct flock flock;
9327 	flock.l_start = 0;
9328 	flock.l_len = OFF_MAX;
9329 	flock.l_whence = 0;
9330 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9331 
9332 	status_t status;
9333 	if ((operation & LOCK_UN) != 0) {
9334 		if (HAS_FS_CALL(vnode, release_lock))
9335 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9336 		else
9337 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9338 	} else {
9339 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9340 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9341 				(operation & LOCK_NB) == 0);
9342 		} else {
9343 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9344 				(operation & LOCK_NB) == 0);
9345 		}
9346 	}
9347 
9348 	syscall_restart_handle_post(status);
9349 
9350 	put_fd(descriptor);
9351 	return status;
9352 }
9353 
9354 
9355 status_t
9356 _user_lock_node(int fd)
9357 {
9358 	return common_lock_node(fd, false);
9359 }
9360 
9361 
9362 status_t
9363 _user_unlock_node(int fd)
9364 {
9365 	return common_unlock_node(fd, false);
9366 }
9367 
9368 
9369 status_t
9370 _user_preallocate(int fd, off_t offset, off_t length)
9371 {
9372 	return common_preallocate(fd, offset, length, false);
9373 }
9374 
9375 
9376 status_t
9377 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9378 	int perms)
9379 {
9380 	char name[B_FILE_NAME_LENGTH];
9381 	status_t status;
9382 
9383 	if (!IS_USER_ADDRESS(userName))
9384 		return B_BAD_ADDRESS;
9385 
9386 	status = user_copy_name(name, userName, sizeof(name));
9387 	if (status != B_OK)
9388 		return status;
9389 
9390 	return dir_create_entry_ref(device, inode, name, perms, false);
9391 }
9392 
9393 
9394 status_t
9395 _user_create_dir(int fd, const char* userPath, int perms)
9396 {
9397 	KPath pathBuffer;
9398 	if (pathBuffer.InitCheck() != B_OK)
9399 		return B_NO_MEMORY;
9400 
9401 	char* path = pathBuffer.LockBuffer();
9402 
9403 	if (!IS_USER_ADDRESS(userPath))
9404 		return B_BAD_ADDRESS;
9405 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9406 	if (status != B_OK)
9407 		return status;
9408 
9409 	return dir_create(fd, path, perms, false);
9410 }
9411 
9412 
9413 status_t
9414 _user_remove_dir(int fd, const char* userPath)
9415 {
9416 	KPath pathBuffer;
9417 	if (pathBuffer.InitCheck() != B_OK)
9418 		return B_NO_MEMORY;
9419 
9420 	char* path = pathBuffer.LockBuffer();
9421 
9422 	if (userPath != NULL) {
9423 		if (!IS_USER_ADDRESS(userPath))
9424 			return B_BAD_ADDRESS;
9425 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9426 		if (status != B_OK)
9427 			return status;
9428 	}
9429 
9430 	return dir_remove(fd, userPath ? path : NULL, false);
9431 }
9432 
9433 
9434 status_t
9435 _user_read_link(int fd, const char* userPath, char* userBuffer,
9436 	size_t* userBufferSize)
9437 {
9438 	KPath pathBuffer, linkBuffer;
9439 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9440 		return B_NO_MEMORY;
9441 
9442 	size_t bufferSize;
9443 
9444 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9445 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9446 		return B_BAD_ADDRESS;
9447 
9448 	char* path = pathBuffer.LockBuffer();
9449 	char* buffer = linkBuffer.LockBuffer();
9450 
9451 	if (userPath) {
9452 		if (!IS_USER_ADDRESS(userPath))
9453 			return B_BAD_ADDRESS;
9454 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9455 		if (status != B_OK)
9456 			return status;
9457 
9458 		if (bufferSize > B_PATH_NAME_LENGTH)
9459 			bufferSize = B_PATH_NAME_LENGTH;
9460 	}
9461 
9462 	size_t newBufferSize = bufferSize;
9463 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9464 		&newBufferSize, false);
9465 
9466 	// we also update the bufferSize in case of errors
9467 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9468 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9469 		return B_BAD_ADDRESS;
9470 
9471 	if (status != B_OK)
9472 		return status;
9473 
9474 	bufferSize = min_c(newBufferSize, bufferSize);
9475 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9476 		return B_BAD_ADDRESS;
9477 
9478 	return B_OK;
9479 }
9480 
9481 
9482 status_t
9483 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9484 	int mode)
9485 {
9486 	KPath pathBuffer;
9487 	KPath toPathBuffer;
9488 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9489 		return B_NO_MEMORY;
9490 
9491 	char* path = pathBuffer.LockBuffer();
9492 	char* toPath = toPathBuffer.LockBuffer();
9493 
9494 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9495 		return B_BAD_ADDRESS;
9496 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9497 	if (status != B_OK)
9498 		return status;
9499 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9500 	if (status != B_OK)
9501 		return status;
9502 
9503 	return common_create_symlink(fd, path, toPath, mode, false);
9504 }
9505 
9506 
9507 status_t
9508 _user_create_link(int pathFD, const char* userPath, int toFD,
9509 	const char* userToPath, bool traverseLeafLink)
9510 {
9511 	KPath pathBuffer;
9512 	KPath toPathBuffer;
9513 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9514 		return B_NO_MEMORY;
9515 
9516 	char* path = pathBuffer.LockBuffer();
9517 	char* toPath = toPathBuffer.LockBuffer();
9518 
9519 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9520 		return B_BAD_ADDRESS;
9521 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9522 	if (status != B_OK)
9523 		return status;
9524 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9525 	if (status != B_OK)
9526 		return status;
9527 
9528 	status = check_path(toPath);
9529 	if (status != B_OK)
9530 		return status;
9531 
9532 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9533 		false);
9534 }
9535 
9536 
9537 status_t
9538 _user_unlink(int fd, const char* userPath)
9539 {
9540 	KPath pathBuffer;
9541 	if (pathBuffer.InitCheck() != B_OK)
9542 		return B_NO_MEMORY;
9543 
9544 	char* path = pathBuffer.LockBuffer();
9545 
9546 	if (!IS_USER_ADDRESS(userPath))
9547 		return B_BAD_ADDRESS;
9548 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9549 	if (status != B_OK)
9550 		return status;
9551 
9552 	return common_unlink(fd, path, false);
9553 }
9554 
9555 
9556 status_t
9557 _user_rename(int oldFD, const char* userOldPath, int newFD,
9558 	const char* userNewPath)
9559 {
9560 	KPath oldPathBuffer;
9561 	KPath newPathBuffer;
9562 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9563 		return B_NO_MEMORY;
9564 
9565 	char* oldPath = oldPathBuffer.LockBuffer();
9566 	char* newPath = newPathBuffer.LockBuffer();
9567 
9568 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9569 		return B_BAD_ADDRESS;
9570 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9571 	if (status != B_OK)
9572 		return status;
9573 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9574 	if (status != B_OK)
9575 		return status;
9576 
9577 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9578 }
9579 
9580 
9581 status_t
9582 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9583 {
9584 	KPath pathBuffer;
9585 	if (pathBuffer.InitCheck() != B_OK)
9586 		return B_NO_MEMORY;
9587 
9588 	char* path = pathBuffer.LockBuffer();
9589 
9590 	if (!IS_USER_ADDRESS(userPath))
9591 		return B_BAD_ADDRESS;
9592 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9593 	if (status != B_OK)
9594 		return status;
9595 
9596 	// split into directory vnode and filename path
9597 	char filename[B_FILE_NAME_LENGTH];
9598 	struct vnode* dir;
9599 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9600 	if (status != B_OK)
9601 		return status;
9602 
9603 	VNodePutter _(dir);
9604 
9605 	// the underlying FS needs to support creating FIFOs
9606 	if (!HAS_FS_CALL(dir, create_special_node))
9607 		return B_UNSUPPORTED;
9608 
9609 	// create the entry	-- the FIFO sub node is set up automatically
9610 	fs_vnode superVnode;
9611 	ino_t nodeID;
9612 	status = FS_CALL(dir, create_special_node, filename, NULL,
9613 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9614 
9615 	// create_special_node() acquired a reference for us that we don't need.
9616 	if (status == B_OK)
9617 		put_vnode(dir->mount->volume, nodeID);
9618 
9619 	return status;
9620 }
9621 
9622 
9623 status_t
9624 _user_create_pipe(int* userFDs)
9625 {
9626 	// rootfs should support creating FIFOs, but let's be sure
9627 	if (!HAS_FS_CALL(sRoot, create_special_node))
9628 		return B_UNSUPPORTED;
9629 
9630 	// create the node	-- the FIFO sub node is set up automatically
9631 	fs_vnode superVnode;
9632 	ino_t nodeID;
9633 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9634 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9635 	if (status != B_OK)
9636 		return status;
9637 
9638 	// We've got one reference to the node and need another one.
9639 	struct vnode* vnode;
9640 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9641 	if (status != B_OK) {
9642 		// that should not happen
9643 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9644 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9645 		return status;
9646 	}
9647 
9648 	// Everything looks good so far. Open two FDs for reading respectively
9649 	// writing.
9650 	int fds[2];
9651 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9652 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9653 
9654 	FDCloser closer0(fds[0], false);
9655 	FDCloser closer1(fds[1], false);
9656 
9657 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9658 
9659 	// copy FDs to userland
9660 	if (status == B_OK) {
9661 		if (!IS_USER_ADDRESS(userFDs)
9662 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9663 			status = B_BAD_ADDRESS;
9664 		}
9665 	}
9666 
9667 	// keep FDs, if everything went fine
9668 	if (status == B_OK) {
9669 		closer0.Detach();
9670 		closer1.Detach();
9671 	}
9672 
9673 	return status;
9674 }
9675 
9676 
9677 status_t
9678 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9679 {
9680 	KPath pathBuffer;
9681 	if (pathBuffer.InitCheck() != B_OK)
9682 		return B_NO_MEMORY;
9683 
9684 	char* path = pathBuffer.LockBuffer();
9685 
9686 	if (!IS_USER_ADDRESS(userPath))
9687 		return B_BAD_ADDRESS;
9688 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9689 	if (status != B_OK)
9690 		return status;
9691 
9692 	return common_access(fd, path, mode, effectiveUserGroup, false);
9693 }
9694 
9695 
9696 status_t
9697 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9698 	struct stat* userStat, size_t statSize)
9699 {
9700 	struct stat stat = {0};
9701 	status_t status;
9702 
9703 	if (statSize > sizeof(struct stat))
9704 		return B_BAD_VALUE;
9705 
9706 	if (!IS_USER_ADDRESS(userStat))
9707 		return B_BAD_ADDRESS;
9708 
9709 	if (userPath != NULL) {
9710 		// path given: get the stat of the node referred to by (fd, path)
9711 		if (!IS_USER_ADDRESS(userPath))
9712 			return B_BAD_ADDRESS;
9713 
9714 		KPath pathBuffer;
9715 		if (pathBuffer.InitCheck() != B_OK)
9716 			return B_NO_MEMORY;
9717 
9718 		char* path = pathBuffer.LockBuffer();
9719 
9720 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9721 		if (status != B_OK)
9722 			return status;
9723 
9724 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9725 	} else {
9726 		// no path given: get the FD and use the FD operation
9727 		struct file_descriptor* descriptor
9728 			= get_fd(get_current_io_context(false), fd);
9729 		if (descriptor == NULL)
9730 			return B_FILE_ERROR;
9731 
9732 		if (descriptor->ops->fd_read_stat)
9733 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9734 		else
9735 			status = B_UNSUPPORTED;
9736 
9737 		put_fd(descriptor);
9738 	}
9739 
9740 	if (status != B_OK)
9741 		return status;
9742 
9743 	return user_memcpy(userStat, &stat, statSize);
9744 }
9745 
9746 
9747 status_t
9748 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9749 	const struct stat* userStat, size_t statSize, int statMask)
9750 {
9751 	if (statSize > sizeof(struct stat))
9752 		return B_BAD_VALUE;
9753 
9754 	struct stat stat;
9755 
9756 	if (!IS_USER_ADDRESS(userStat)
9757 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9758 		return B_BAD_ADDRESS;
9759 
9760 	// clear additional stat fields
9761 	if (statSize < sizeof(struct stat))
9762 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9763 
9764 	status_t status;
9765 
9766 	if (userPath != NULL) {
9767 		// path given: write the stat of the node referred to by (fd, path)
9768 		if (!IS_USER_ADDRESS(userPath))
9769 			return B_BAD_ADDRESS;
9770 
9771 		KPath pathBuffer;
9772 		if (pathBuffer.InitCheck() != B_OK)
9773 			return B_NO_MEMORY;
9774 
9775 		char* path = pathBuffer.LockBuffer();
9776 
9777 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9778 		if (status != B_OK)
9779 			return status;
9780 
9781 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9782 			statMask, false);
9783 	} else {
9784 		// no path given: get the FD and use the FD operation
9785 		struct file_descriptor* descriptor
9786 			= get_fd(get_current_io_context(false), fd);
9787 		if (descriptor == NULL)
9788 			return B_FILE_ERROR;
9789 
9790 		if (descriptor->ops->fd_write_stat) {
9791 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9792 				statMask);
9793 		} else
9794 			status = B_UNSUPPORTED;
9795 
9796 		put_fd(descriptor);
9797 	}
9798 
9799 	return status;
9800 }
9801 
9802 
9803 int
9804 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9805 {
9806 	KPath pathBuffer;
9807 	if (pathBuffer.InitCheck() != B_OK)
9808 		return B_NO_MEMORY;
9809 
9810 	char* path = pathBuffer.LockBuffer();
9811 
9812 	if (userPath != NULL) {
9813 		if (!IS_USER_ADDRESS(userPath))
9814 			return B_BAD_ADDRESS;
9815 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9816 		if (status != B_OK)
9817 			return status;
9818 	}
9819 
9820 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9821 }
9822 
9823 
9824 ssize_t
9825 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9826 	size_t readBytes)
9827 {
9828 	char attribute[B_FILE_NAME_LENGTH];
9829 
9830 	if (userAttribute == NULL)
9831 		return B_BAD_VALUE;
9832 	if (!IS_USER_ADDRESS(userAttribute))
9833 		return B_BAD_ADDRESS;
9834 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9835 	if (status != B_OK)
9836 		return status;
9837 
9838 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9839 	if (attr < 0)
9840 		return attr;
9841 
9842 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9843 	_user_close(attr);
9844 
9845 	return bytes;
9846 }
9847 
9848 
9849 ssize_t
9850 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9851 	const void* buffer, size_t writeBytes)
9852 {
9853 	char attribute[B_FILE_NAME_LENGTH];
9854 
9855 	if (userAttribute == NULL)
9856 		return B_BAD_VALUE;
9857 	if (!IS_USER_ADDRESS(userAttribute))
9858 		return B_BAD_ADDRESS;
9859 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9860 	if (status != B_OK)
9861 		return status;
9862 
9863 	// Try to support the BeOS typical truncation as well as the position
9864 	// argument
9865 	int attr = attr_create(fd, NULL, attribute, type,
9866 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9867 	if (attr < 0)
9868 		return attr;
9869 
9870 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9871 	_user_close(attr);
9872 
9873 	return bytes;
9874 }
9875 
9876 
9877 status_t
9878 _user_stat_attr(int fd, const char* userAttribute,
9879 	struct attr_info* userAttrInfo)
9880 {
9881 	char attribute[B_FILE_NAME_LENGTH];
9882 
9883 	if (userAttribute == NULL || userAttrInfo == NULL)
9884 		return B_BAD_VALUE;
9885 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9886 		return B_BAD_ADDRESS;
9887 	status_t status = user_copy_name(attribute, userAttribute,
9888 		sizeof(attribute));
9889 	if (status != B_OK)
9890 		return status;
9891 
9892 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9893 	if (attr < 0)
9894 		return attr;
9895 
9896 	struct file_descriptor* descriptor
9897 		= get_fd(get_current_io_context(false), attr);
9898 	if (descriptor == NULL) {
9899 		_user_close(attr);
9900 		return B_FILE_ERROR;
9901 	}
9902 
9903 	struct stat stat;
9904 	if (descriptor->ops->fd_read_stat)
9905 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9906 	else
9907 		status = B_UNSUPPORTED;
9908 
9909 	put_fd(descriptor);
9910 	_user_close(attr);
9911 
9912 	if (status == B_OK) {
9913 		attr_info info;
9914 		info.type = stat.st_type;
9915 		info.size = stat.st_size;
9916 
9917 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9918 			return B_BAD_ADDRESS;
9919 	}
9920 
9921 	return status;
9922 }
9923 
9924 
9925 int
9926 _user_open_attr(int fd, const char* userPath, const char* userName,
9927 	uint32 type, int openMode)
9928 {
9929 	char name[B_FILE_NAME_LENGTH];
9930 
9931 	if (!IS_USER_ADDRESS(userName))
9932 		return B_BAD_ADDRESS;
9933 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9934 	if (status != B_OK)
9935 		return status;
9936 
9937 	KPath pathBuffer;
9938 	if (pathBuffer.InitCheck() != B_OK)
9939 		return B_NO_MEMORY;
9940 
9941 	char* path = pathBuffer.LockBuffer();
9942 
9943 	if (userPath != NULL) {
9944 		if (!IS_USER_ADDRESS(userPath))
9945 			return B_BAD_ADDRESS;
9946 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9947 		if (status != B_OK)
9948 			return status;
9949 	}
9950 
9951 	if ((openMode & O_CREAT) != 0) {
9952 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9953 			false);
9954 	}
9955 
9956 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9957 }
9958 
9959 
9960 status_t
9961 _user_remove_attr(int fd, const char* userName)
9962 {
9963 	char name[B_FILE_NAME_LENGTH];
9964 
9965 	if (!IS_USER_ADDRESS(userName))
9966 		return B_BAD_ADDRESS;
9967 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9968 	if (status != B_OK)
9969 		return status;
9970 
9971 	return attr_remove(fd, name, false);
9972 }
9973 
9974 
9975 status_t
9976 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9977 	const char* userToName)
9978 {
9979 	if (!IS_USER_ADDRESS(userFromName)
9980 		|| !IS_USER_ADDRESS(userToName))
9981 		return B_BAD_ADDRESS;
9982 
9983 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9984 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9985 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9986 		return B_NO_MEMORY;
9987 
9988 	char* fromName = fromNameBuffer.LockBuffer();
9989 	char* toName = toNameBuffer.LockBuffer();
9990 
9991 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9992 	if (status != B_OK)
9993 		return status;
9994 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9995 	if (status != B_OK)
9996 		return status;
9997 
9998 	return attr_rename(fromFile, fromName, toFile, toName, false);
9999 }
10000 
10001 
10002 int
10003 _user_open_index_dir(dev_t device)
10004 {
10005 	return index_dir_open(device, false);
10006 }
10007 
10008 
10009 status_t
10010 _user_create_index(dev_t device, const char* userName, uint32 type,
10011 	uint32 flags)
10012 {
10013 	char name[B_FILE_NAME_LENGTH];
10014 
10015 	if (!IS_USER_ADDRESS(userName))
10016 		return B_BAD_ADDRESS;
10017 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10018 	if (status != B_OK)
10019 		return status;
10020 
10021 	return index_create(device, name, type, flags, false);
10022 }
10023 
10024 
10025 status_t
10026 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10027 {
10028 	char name[B_FILE_NAME_LENGTH];
10029 	struct stat stat = {0};
10030 	status_t status;
10031 
10032 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10033 		return B_BAD_ADDRESS;
10034 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10035 	if (status != B_OK)
10036 		return status;
10037 
10038 	status = index_name_read_stat(device, name, &stat, false);
10039 	if (status == B_OK) {
10040 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10041 			return B_BAD_ADDRESS;
10042 	}
10043 
10044 	return status;
10045 }
10046 
10047 
10048 status_t
10049 _user_remove_index(dev_t device, const char* userName)
10050 {
10051 	char name[B_FILE_NAME_LENGTH];
10052 
10053 	if (!IS_USER_ADDRESS(userName))
10054 		return B_BAD_ADDRESS;
10055 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10056 	if (status != B_OK)
10057 		return status;
10058 
10059 	return index_remove(device, name, false);
10060 }
10061 
10062 
10063 status_t
10064 _user_getcwd(char* userBuffer, size_t size)
10065 {
10066 	if (size == 0)
10067 		return B_BAD_VALUE;
10068 	if (!IS_USER_ADDRESS(userBuffer))
10069 		return B_BAD_ADDRESS;
10070 
10071 	if (size > kMaxPathLength)
10072 		size = kMaxPathLength;
10073 
10074 	KPath pathBuffer(size);
10075 	if (pathBuffer.InitCheck() != B_OK)
10076 		return B_NO_MEMORY;
10077 
10078 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10079 
10080 	char* path = pathBuffer.LockBuffer();
10081 
10082 	status_t status = get_cwd(path, size, false);
10083 	if (status != B_OK)
10084 		return status;
10085 
10086 	// Copy back the result
10087 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10088 		return B_BAD_ADDRESS;
10089 
10090 	return status;
10091 }
10092 
10093 
10094 status_t
10095 _user_setcwd(int fd, const char* userPath)
10096 {
10097 	TRACE(("user_setcwd: path = %p\n", userPath));
10098 
10099 	KPath pathBuffer;
10100 	if (pathBuffer.InitCheck() != B_OK)
10101 		return B_NO_MEMORY;
10102 
10103 	char* path = pathBuffer.LockBuffer();
10104 
10105 	if (userPath != NULL) {
10106 		if (!IS_USER_ADDRESS(userPath))
10107 			return B_BAD_ADDRESS;
10108 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10109 		if (status != B_OK)
10110 			return status;
10111 	}
10112 
10113 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10114 }
10115 
10116 
10117 status_t
10118 _user_change_root(const char* userPath)
10119 {
10120 	// only root is allowed to chroot()
10121 	if (geteuid() != 0)
10122 		return B_NOT_ALLOWED;
10123 
10124 	// alloc path buffer
10125 	KPath pathBuffer;
10126 	if (pathBuffer.InitCheck() != B_OK)
10127 		return B_NO_MEMORY;
10128 
10129 	// copy userland path to kernel
10130 	char* path = pathBuffer.LockBuffer();
10131 	if (userPath != NULL) {
10132 		if (!IS_USER_ADDRESS(userPath))
10133 			return B_BAD_ADDRESS;
10134 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10135 		if (status != B_OK)
10136 			return status;
10137 	}
10138 
10139 	// get the vnode
10140 	struct vnode* vnode;
10141 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10142 	if (status != B_OK)
10143 		return status;
10144 
10145 	// set the new root
10146 	struct io_context* context = get_current_io_context(false);
10147 	mutex_lock(&sIOContextRootLock);
10148 	struct vnode* oldRoot = context->root;
10149 	context->root = vnode;
10150 	mutex_unlock(&sIOContextRootLock);
10151 
10152 	put_vnode(oldRoot);
10153 
10154 	return B_OK;
10155 }
10156 
10157 
10158 int
10159 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10160 	uint32 flags, port_id port, int32 token)
10161 {
10162 	if (device < 0 || userQuery == NULL || queryLength == 0)
10163 		return B_BAD_VALUE;
10164 
10165 	if (!IS_USER_ADDRESS(userQuery))
10166 		return B_BAD_ADDRESS;
10167 
10168 	// this is a safety restriction
10169 	if (queryLength >= 65536)
10170 		return B_NAME_TOO_LONG;
10171 
10172 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10173 	if (!query.IsValid())
10174 		return B_NO_MEMORY;
10175 
10176 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10177 		return B_BAD_ADDRESS;
10178 
10179 	return query_open(device, query, flags, port, token, false);
10180 }
10181 
10182 
10183 #include "vfs_request_io.cpp"
10184