xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 6889394848e2dc9f41ff53b12141d572822ca0c6)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2017, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 
56 #include "EntryCache.h"
57 #include "fifo.h"
58 #include "IORequest.h"
59 #include "unused_vnodes.h"
60 #include "vfs_tracing.h"
61 #include "Vnode.h"
62 #include "../cache/vnode_store.h"
63 
64 
65 //#define TRACE_VFS
66 #ifdef TRACE_VFS
67 #	define TRACE(x) dprintf x
68 #	define FUNCTION(x) dprintf x
69 #else
70 #	define TRACE(x) ;
71 #	define FUNCTION(x) ;
72 #endif
73 
74 #define ADD_DEBUGGER_COMMANDS
75 
76 
77 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
78 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
79 
80 #if KDEBUG
81 #	define FS_CALL(vnode, op, params...) \
82 		( HAS_FS_CALL(vnode, op) ? \
83 			vnode->ops->op(vnode->mount->volume, vnode, params) \
84 			: (panic("FS_CALL op " #op " is NULL"), 0))
85 #	define FS_CALL_NO_PARAMS(vnode, op) \
86 		( HAS_FS_CALL(vnode, op) ? \
87 			vnode->ops->op(vnode->mount->volume, vnode) \
88 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
89 #	define FS_MOUNT_CALL(mount, op, params...) \
90 		( HAS_FS_MOUNT_CALL(mount, op) ? \
91 			mount->volume->ops->op(mount->volume, params) \
92 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
93 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
94 		( HAS_FS_MOUNT_CALL(mount, op) ? \
95 			mount->volume->ops->op(mount->volume) \
96 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
97 #else
98 #	define FS_CALL(vnode, op, params...) \
99 			vnode->ops->op(vnode->mount->volume, vnode, params)
100 #	define FS_CALL_NO_PARAMS(vnode, op) \
101 			vnode->ops->op(vnode->mount->volume, vnode)
102 #	define FS_MOUNT_CALL(mount, op, params...) \
103 			mount->volume->ops->op(mount->volume, params)
104 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
105 			mount->volume->ops->op(mount->volume)
106 #endif
107 
108 
109 const static size_t kMaxPathLength = 65536;
110 	// The absolute maximum path length (for getcwd() - this is not depending
111 	// on PATH_MAX
112 
113 
114 typedef DoublyLinkedList<vnode> VnodeList;
115 
116 /*!	\brief Structure to manage a mounted file system
117 
118 	Note: The root_vnode and root_vnode->covers fields (what others?) are
119 	initialized in fs_mount() and not changed afterwards. That is as soon
120 	as the mount is mounted and it is made sure it won't be unmounted
121 	(e.g. by holding a reference to a vnode of that mount) (read) access
122 	to those fields is always safe, even without additional locking. Morever
123 	while mounted the mount holds a reference to the root_vnode->covers vnode,
124 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
125 	safe if a reference to vnode is held (note that for the root mount
126 	root_vnode->covers is NULL, though).
127 */
128 struct fs_mount {
129 	fs_mount()
130 		:
131 		volume(NULL),
132 		device_name(NULL)
133 	{
134 		recursive_lock_init(&rlock, "mount rlock");
135 	}
136 
137 	~fs_mount()
138 	{
139 		recursive_lock_destroy(&rlock);
140 		free(device_name);
141 
142 		while (volume) {
143 			fs_volume* superVolume = volume->super_volume;
144 
145 			if (volume->file_system != NULL)
146 				put_module(volume->file_system->info.name);
147 
148 			free(volume->file_system_name);
149 			free(volume);
150 			volume = superVolume;
151 		}
152 	}
153 
154 	struct fs_mount* next;
155 	dev_t			id;
156 	fs_volume*		volume;
157 	char*			device_name;
158 	recursive_lock	rlock;	// guards the vnodes list
159 		// TODO: Make this a mutex! It is never used recursively.
160 	struct vnode*	root_vnode;
161 	struct vnode*	covers_vnode;	// immutable
162 	KPartition*		partition;
163 	VnodeList		vnodes;
164 	EntryCache		entry_cache;
165 	bool			unmounting;
166 	bool			owns_file_device;
167 };
168 
169 
170 namespace {
171 
172 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
173 	list_link		link;
174 	team_id			team;
175 	pid_t			session;
176 	off_t			start;
177 	off_t			end;
178 	bool			shared;
179 };
180 
181 typedef DoublyLinkedList<advisory_lock> LockList;
182 
183 } // namespace
184 
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 
224 	The thread trying to lock the lock must not hold sVnodeLock or
225 	sMountMutex.
226 */
227 static recursive_lock sMountOpLock;
228 
229 /*!	\brief Guards sVnodeTable.
230 
231 	The holder is allowed read/write access to sVnodeTable and to
232 	any unbusy vnode in that table, save to the immutable fields (device, id,
233 	private_node, mount) to which only read-only access is allowed.
234 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
235 	well as the busy, removed, unused flags, and the vnode's type can also be
236 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
237 	locked. Write access to covered_by and covers requires to write lock
238 	sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not hold this lock when calling create_sem(), as this might call
242 	vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 namespace {
256 
257 struct vnode_hash_key {
258 	dev_t	device;
259 	ino_t	vnode;
260 };
261 
262 struct VnodeHash {
263 	typedef vnode_hash_key	KeyType;
264 	typedef	struct vnode	ValueType;
265 
266 #define VHASH(mountid, vnodeid) \
267 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
268 
269 	size_t HashKey(KeyType key) const
270 	{
271 		return VHASH(key.device, key.vnode);
272 	}
273 
274 	size_t Hash(ValueType* vnode) const
275 	{
276 		return VHASH(vnode->device, vnode->id);
277 	}
278 
279 #undef VHASH
280 
281 	bool Compare(KeyType key, ValueType* vnode) const
282 	{
283 		return vnode->device == key.device && vnode->id == key.vnode;
284 	}
285 
286 	ValueType*& GetLink(ValueType* value) const
287 	{
288 		return value->next;
289 	}
290 };
291 
292 typedef BOpenHashTable<VnodeHash> VnodeTable;
293 
294 
295 struct MountHash {
296 	typedef dev_t			KeyType;
297 	typedef	struct fs_mount	ValueType;
298 
299 	size_t HashKey(KeyType key) const
300 	{
301 		return key;
302 	}
303 
304 	size_t Hash(ValueType* mount) const
305 	{
306 		return mount->id;
307 	}
308 
309 	bool Compare(KeyType key, ValueType* mount) const
310 	{
311 		return mount->id == key;
312 	}
313 
314 	ValueType*& GetLink(ValueType* value) const
315 	{
316 		return value->next;
317 	}
318 };
319 
320 typedef BOpenHashTable<MountHash> MountTable;
321 
322 } // namespace
323 
324 
325 #define VNODE_HASH_TABLE_SIZE 1024
326 static VnodeTable* sVnodeTable;
327 static struct vnode* sRoot;
328 
329 #define MOUNTS_HASH_TABLE_SIZE 16
330 static MountTable* sMountsTable;
331 static dev_t sNextMountID = 1;
332 
333 #define MAX_TEMP_IO_VECS 8
334 
335 // How long to wait for busy vnodes (10s)
336 #define BUSY_VNODE_RETRIES 2000
337 #define BUSY_VNODE_DELAY 5000
338 
339 mode_t __gUmask = 022;
340 
341 /* function declarations */
342 
343 static void free_unused_vnodes();
344 
345 // file descriptor operation prototypes
346 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
347 	void* buffer, size_t* _bytes);
348 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
349 	const void* buffer, size_t* _bytes);
350 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
351 	int seekType);
352 static void file_free_fd(struct file_descriptor* descriptor);
353 static status_t file_close(struct file_descriptor* descriptor);
354 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
355 	struct selectsync* sync);
356 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
357 	struct selectsync* sync);
358 static status_t dir_read(struct io_context* context,
359 	struct file_descriptor* descriptor, struct dirent* buffer,
360 	size_t bufferSize, uint32* _count);
361 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
362 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
363 static status_t dir_rewind(struct file_descriptor* descriptor);
364 static void dir_free_fd(struct file_descriptor* descriptor);
365 static status_t dir_close(struct file_descriptor* descriptor);
366 static status_t attr_dir_read(struct io_context* context,
367 	struct file_descriptor* descriptor, struct dirent* buffer,
368 	size_t bufferSize, uint32* _count);
369 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
370 static void attr_dir_free_fd(struct file_descriptor* descriptor);
371 static status_t attr_dir_close(struct file_descriptor* descriptor);
372 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
373 	void* buffer, size_t* _bytes);
374 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
375 	const void* buffer, size_t* _bytes);
376 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
377 	int seekType);
378 static void attr_free_fd(struct file_descriptor* descriptor);
379 static status_t attr_close(struct file_descriptor* descriptor);
380 static status_t attr_read_stat(struct file_descriptor* descriptor,
381 	struct stat* statData);
382 static status_t attr_write_stat(struct file_descriptor* descriptor,
383 	const struct stat* stat, int statMask);
384 static status_t index_dir_read(struct io_context* context,
385 	struct file_descriptor* descriptor, struct dirent* buffer,
386 	size_t bufferSize, uint32* _count);
387 static status_t index_dir_rewind(struct file_descriptor* descriptor);
388 static void index_dir_free_fd(struct file_descriptor* descriptor);
389 static status_t index_dir_close(struct file_descriptor* descriptor);
390 static status_t query_read(struct io_context* context,
391 	struct file_descriptor* descriptor, struct dirent* buffer,
392 	size_t bufferSize, uint32* _count);
393 static status_t query_rewind(struct file_descriptor* descriptor);
394 static void query_free_fd(struct file_descriptor* descriptor);
395 static status_t query_close(struct file_descriptor* descriptor);
396 
397 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
398 	void* buffer, size_t length);
399 static status_t common_read_stat(struct file_descriptor* descriptor,
400 	struct stat* statData);
401 static status_t common_write_stat(struct file_descriptor* descriptor,
402 	const struct stat* statData, int statMask);
403 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
404 	struct stat* stat, bool kernel);
405 
406 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
407 	bool traverseLeafLink, int count, bool kernel,
408 	struct vnode** _vnode, ino_t* _parentID);
409 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
410 	size_t bufferSize, bool kernel);
411 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
412 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
413 static void inc_vnode_ref_count(struct vnode* vnode);
414 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
415 	bool reenter);
416 static inline void put_vnode(struct vnode* vnode);
417 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
418 	bool kernel);
419 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
420 
421 
422 static struct fd_ops sFileOps = {
423 	file_read,
424 	file_write,
425 	file_seek,
426 	common_ioctl,
427 	NULL,		// set_flags
428 	file_select,
429 	file_deselect,
430 	NULL,		// read_dir()
431 	NULL,		// rewind_dir()
432 	common_read_stat,
433 	common_write_stat,
434 	file_close,
435 	file_free_fd
436 };
437 
438 static struct fd_ops sDirectoryOps = {
439 	NULL,		// read()
440 	NULL,		// write()
441 	NULL,		// seek()
442 	common_ioctl,
443 	NULL,		// set_flags
444 	NULL,		// select()
445 	NULL,		// deselect()
446 	dir_read,
447 	dir_rewind,
448 	common_read_stat,
449 	common_write_stat,
450 	dir_close,
451 	dir_free_fd
452 };
453 
454 static struct fd_ops sAttributeDirectoryOps = {
455 	NULL,		// read()
456 	NULL,		// write()
457 	NULL,		// seek()
458 	common_ioctl,
459 	NULL,		// set_flags
460 	NULL,		// select()
461 	NULL,		// deselect()
462 	attr_dir_read,
463 	attr_dir_rewind,
464 	common_read_stat,
465 	common_write_stat,
466 	attr_dir_close,
467 	attr_dir_free_fd
468 };
469 
470 static struct fd_ops sAttributeOps = {
471 	attr_read,
472 	attr_write,
473 	attr_seek,
474 	common_ioctl,
475 	NULL,		// set_flags
476 	NULL,		// select()
477 	NULL,		// deselect()
478 	NULL,		// read_dir()
479 	NULL,		// rewind_dir()
480 	attr_read_stat,
481 	attr_write_stat,
482 	attr_close,
483 	attr_free_fd
484 };
485 
486 static struct fd_ops sIndexDirectoryOps = {
487 	NULL,		// read()
488 	NULL,		// write()
489 	NULL,		// seek()
490 	NULL,		// ioctl()
491 	NULL,		// set_flags
492 	NULL,		// select()
493 	NULL,		// deselect()
494 	index_dir_read,
495 	index_dir_rewind,
496 	NULL,		// read_stat()
497 	NULL,		// write_stat()
498 	index_dir_close,
499 	index_dir_free_fd
500 };
501 
502 #if 0
503 static struct fd_ops sIndexOps = {
504 	NULL,		// read()
505 	NULL,		// write()
506 	NULL,		// seek()
507 	NULL,		// ioctl()
508 	NULL,		// set_flags
509 	NULL,		// select()
510 	NULL,		// deselect()
511 	NULL,		// dir_read()
512 	NULL,		// dir_rewind()
513 	index_read_stat,	// read_stat()
514 	NULL,		// write_stat()
515 	NULL,		// dir_close()
516 	NULL		// free_fd()
517 };
518 #endif
519 
520 static struct fd_ops sQueryOps = {
521 	NULL,		// read()
522 	NULL,		// write()
523 	NULL,		// seek()
524 	NULL,		// ioctl()
525 	NULL,		// set_flags
526 	NULL,		// select()
527 	NULL,		// deselect()
528 	query_read,
529 	query_rewind,
530 	NULL,		// read_stat()
531 	NULL,		// write_stat()
532 	query_close,
533 	query_free_fd
534 };
535 
536 
537 namespace {
538 
539 class VNodePutter {
540 public:
541 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
542 
543 	~VNodePutter()
544 	{
545 		Put();
546 	}
547 
548 	void SetTo(struct vnode* vnode)
549 	{
550 		Put();
551 		fVNode = vnode;
552 	}
553 
554 	void Put()
555 	{
556 		if (fVNode) {
557 			put_vnode(fVNode);
558 			fVNode = NULL;
559 		}
560 	}
561 
562 	struct vnode* Detach()
563 	{
564 		struct vnode* vnode = fVNode;
565 		fVNode = NULL;
566 		return vnode;
567 	}
568 
569 private:
570 	struct vnode* fVNode;
571 };
572 
573 
574 class FDCloser {
575 public:
576 	FDCloser() : fFD(-1), fKernel(true) {}
577 
578 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
579 
580 	~FDCloser()
581 	{
582 		Close();
583 	}
584 
585 	void SetTo(int fd, bool kernel)
586 	{
587 		Close();
588 		fFD = fd;
589 		fKernel = kernel;
590 	}
591 
592 	void Close()
593 	{
594 		if (fFD >= 0) {
595 			if (fKernel)
596 				_kern_close(fFD);
597 			else
598 				_user_close(fFD);
599 			fFD = -1;
600 		}
601 	}
602 
603 	int Detach()
604 	{
605 		int fd = fFD;
606 		fFD = -1;
607 		return fd;
608 	}
609 
610 private:
611 	int		fFD;
612 	bool	fKernel;
613 };
614 
615 } // namespace
616 
617 
618 #if VFS_PAGES_IO_TRACING
619 
620 namespace VFSPagesIOTracing {
621 
622 class PagesIOTraceEntry : public AbstractTraceEntry {
623 protected:
624 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
625 		const generic_io_vec* vecs, uint32 count, uint32 flags,
626 		generic_size_t bytesRequested, status_t status,
627 		generic_size_t bytesTransferred)
628 		:
629 		fVnode(vnode),
630 		fMountID(vnode->mount->id),
631 		fNodeID(vnode->id),
632 		fCookie(cookie),
633 		fPos(pos),
634 		fCount(count),
635 		fFlags(flags),
636 		fBytesRequested(bytesRequested),
637 		fStatus(status),
638 		fBytesTransferred(bytesTransferred)
639 	{
640 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
641 			sizeof(generic_io_vec) * count, false);
642 	}
643 
644 	void AddDump(TraceOutput& out, const char* mode)
645 	{
646 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
647 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
648 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
649 			(uint64)fBytesRequested);
650 
651 		if (fVecs != NULL) {
652 			for (uint32 i = 0; i < fCount; i++) {
653 				if (i > 0)
654 					out.Print(", ");
655 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
656 					(uint64)fVecs[i].length);
657 			}
658 		}
659 
660 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
661 			"transferred: %" B_PRIu64, fFlags, fStatus,
662 			(uint64)fBytesTransferred);
663 	}
664 
665 protected:
666 	struct vnode*	fVnode;
667 	dev_t			fMountID;
668 	ino_t			fNodeID;
669 	void*			fCookie;
670 	off_t			fPos;
671 	generic_io_vec*	fVecs;
672 	uint32			fCount;
673 	uint32			fFlags;
674 	generic_size_t	fBytesRequested;
675 	status_t		fStatus;
676 	generic_size_t	fBytesTransferred;
677 };
678 
679 
680 class ReadPages : public PagesIOTraceEntry {
681 public:
682 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
683 		const generic_io_vec* vecs, uint32 count, uint32 flags,
684 		generic_size_t bytesRequested, status_t status,
685 		generic_size_t bytesTransferred)
686 		:
687 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
688 			bytesRequested, status, bytesTransferred)
689 	{
690 		Initialized();
691 	}
692 
693 	virtual void AddDump(TraceOutput& out)
694 	{
695 		PagesIOTraceEntry::AddDump(out, "read");
696 	}
697 };
698 
699 
700 class WritePages : public PagesIOTraceEntry {
701 public:
702 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
703 		const generic_io_vec* vecs, uint32 count, uint32 flags,
704 		generic_size_t bytesRequested, status_t status,
705 		generic_size_t bytesTransferred)
706 		:
707 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
708 			bytesRequested, status, bytesTransferred)
709 	{
710 		Initialized();
711 	}
712 
713 	virtual void AddDump(TraceOutput& out)
714 	{
715 		PagesIOTraceEntry::AddDump(out, "write");
716 	}
717 };
718 
719 }	// namespace VFSPagesIOTracing
720 
721 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
722 #else
723 #	define TPIO(x) ;
724 #endif	// VFS_PAGES_IO_TRACING
725 
726 
727 /*! Finds the mounted device (the fs_mount structure) with the given ID.
728 	Note, you must hold the gMountMutex lock when you call this function.
729 */
730 static struct fs_mount*
731 find_mount(dev_t id)
732 {
733 	ASSERT_LOCKED_MUTEX(&sMountMutex);
734 
735 	return sMountsTable->Lookup(id);
736 }
737 
738 
739 static status_t
740 get_mount(dev_t id, struct fs_mount** _mount)
741 {
742 	struct fs_mount* mount;
743 
744 	ReadLocker nodeLocker(sVnodeLock);
745 	MutexLocker mountLocker(sMountMutex);
746 
747 	mount = find_mount(id);
748 	if (mount == NULL)
749 		return B_BAD_VALUE;
750 
751 	struct vnode* rootNode = mount->root_vnode;
752 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
753 		|| rootNode->ref_count == 0) {
754 		// might have been called during a mount/unmount operation
755 		return B_BUSY;
756 	}
757 
758 	inc_vnode_ref_count(rootNode);
759 	*_mount = mount;
760 	return B_OK;
761 }
762 
763 
764 static void
765 put_mount(struct fs_mount* mount)
766 {
767 	if (mount)
768 		put_vnode(mount->root_vnode);
769 }
770 
771 
772 /*!	Tries to open the specified file system module.
773 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
774 	Returns a pointer to file system module interface, or NULL if it
775 	could not open the module.
776 */
777 static file_system_module_info*
778 get_file_system(const char* fsName)
779 {
780 	char name[B_FILE_NAME_LENGTH];
781 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
782 		// construct module name if we didn't get one
783 		// (we currently support only one API)
784 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
785 		fsName = NULL;
786 	}
787 
788 	file_system_module_info* info;
789 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
790 		return NULL;
791 
792 	return info;
793 }
794 
795 
796 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
797 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
798 	The name is allocated for you, and you have to free() it when you're
799 	done with it.
800 	Returns NULL if the required memory is not available.
801 */
802 static char*
803 get_file_system_name(const char* fsName)
804 {
805 	const size_t length = strlen("file_systems/");
806 
807 	if (strncmp(fsName, "file_systems/", length)) {
808 		// the name already seems to be the module's file name
809 		return strdup(fsName);
810 	}
811 
812 	fsName += length;
813 	const char* end = strchr(fsName, '/');
814 	if (end == NULL) {
815 		// this doesn't seem to be a valid name, but well...
816 		return strdup(fsName);
817 	}
818 
819 	// cut off the trailing /v1
820 
821 	char* name = (char*)malloc(end + 1 - fsName);
822 	if (name == NULL)
823 		return NULL;
824 
825 	strlcpy(name, fsName, end + 1 - fsName);
826 	return name;
827 }
828 
829 
830 /*!	Accepts a list of file system names separated by a colon, one for each
831 	layer and returns the file system name for the specified layer.
832 	The name is allocated for you, and you have to free() it when you're
833 	done with it.
834 	Returns NULL if the required memory is not available or if there is no
835 	name for the specified layer.
836 */
837 static char*
838 get_file_system_name_for_layer(const char* fsNames, int32 layer)
839 {
840 	while (layer >= 0) {
841 		const char* end = strchr(fsNames, ':');
842 		if (end == NULL) {
843 			if (layer == 0)
844 				return strdup(fsNames);
845 			return NULL;
846 		}
847 
848 		if (layer == 0) {
849 			size_t length = end - fsNames + 1;
850 			char* result = (char*)malloc(length);
851 			strlcpy(result, fsNames, length);
852 			return result;
853 		}
854 
855 		fsNames = end + 1;
856 		layer--;
857 	}
858 
859 	return NULL;
860 }
861 
862 
863 static void
864 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
865 {
866 	RecursiveLocker _(mount->rlock);
867 	mount->vnodes.Add(vnode);
868 }
869 
870 
871 static void
872 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
873 {
874 	RecursiveLocker _(mount->rlock);
875 	mount->vnodes.Remove(vnode);
876 }
877 
878 
879 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
880 
881 	The caller must hold the sVnodeLock (read lock at least).
882 
883 	\param mountID the mount ID.
884 	\param vnodeID the node ID.
885 
886 	\return The vnode structure, if it was found in the hash table, \c NULL
887 			otherwise.
888 */
889 static struct vnode*
890 lookup_vnode(dev_t mountID, ino_t vnodeID)
891 {
892 	struct vnode_hash_key key;
893 
894 	key.device = mountID;
895 	key.vnode = vnodeID;
896 
897 	return sVnodeTable->Lookup(key);
898 }
899 
900 
901 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
902 
903 	This will also wait for BUSY_VNODE_DELAY before returning if one should
904 	still wait for the vnode becoming unbusy.
905 
906 	\return \c true if one should retry, \c false if not.
907 */
908 static bool
909 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
910 {
911 	if (--tries < 0) {
912 		// vnode doesn't seem to become unbusy
913 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
914 			" is not becoming unbusy!\n", mountID, vnodeID);
915 		return false;
916 	}
917 	snooze(BUSY_VNODE_DELAY);
918 	return true;
919 }
920 
921 
922 /*!	Creates a new vnode with the given mount and node ID.
923 	If the node already exists, it is returned instead and no new node is
924 	created. In either case -- but not, if an error occurs -- the function write
925 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
926 	error the lock is not held on return.
927 
928 	\param mountID The mount ID.
929 	\param vnodeID The vnode ID.
930 	\param _vnode Will be set to the new vnode on success.
931 	\param _nodeCreated Will be set to \c true when the returned vnode has
932 		been newly created, \c false when it already existed. Will not be
933 		changed on error.
934 	\return \c B_OK, when the vnode was successfully created and inserted or
935 		a node with the given ID was found, \c B_NO_MEMORY or
936 		\c B_ENTRY_NOT_FOUND on error.
937 */
938 static status_t
939 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
940 	bool& _nodeCreated)
941 {
942 	FUNCTION(("create_new_vnode_and_lock()\n"));
943 
944 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
945 	if (vnode == NULL)
946 		return B_NO_MEMORY;
947 
948 	// initialize basic values
949 	memset(vnode, 0, sizeof(struct vnode));
950 	vnode->device = mountID;
951 	vnode->id = vnodeID;
952 	vnode->ref_count = 1;
953 	vnode->SetBusy(true);
954 
955 	// look up the node -- it might have been added by someone else in the
956 	// meantime
957 	rw_lock_write_lock(&sVnodeLock);
958 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
959 	if (existingVnode != NULL) {
960 		free(vnode);
961 		_vnode = existingVnode;
962 		_nodeCreated = false;
963 		return B_OK;
964 	}
965 
966 	// get the mount structure
967 	mutex_lock(&sMountMutex);
968 	vnode->mount = find_mount(mountID);
969 	if (!vnode->mount || vnode->mount->unmounting) {
970 		mutex_unlock(&sMountMutex);
971 		rw_lock_write_unlock(&sVnodeLock);
972 		free(vnode);
973 		return B_ENTRY_NOT_FOUND;
974 	}
975 
976 	// add the vnode to the mount's node list and the hash table
977 	sVnodeTable->Insert(vnode);
978 	add_vnode_to_mount_list(vnode, vnode->mount);
979 
980 	mutex_unlock(&sMountMutex);
981 
982 	_vnode = vnode;
983 	_nodeCreated = true;
984 
985 	// keep the vnode lock locked
986 	return B_OK;
987 }
988 
989 
990 /*!	Frees the vnode and all resources it has acquired, and removes
991 	it from the vnode hash as well as from its mount structure.
992 	Will also make sure that any cache modifications are written back.
993 */
994 static void
995 free_vnode(struct vnode* vnode, bool reenter)
996 {
997 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
998 		vnode);
999 
1000 	// write back any changes in this vnode's cache -- but only
1001 	// if the vnode won't be deleted, in which case the changes
1002 	// will be discarded
1003 
1004 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1005 		FS_CALL_NO_PARAMS(vnode, fsync);
1006 
1007 	// Note: If this vnode has a cache attached, there will still be two
1008 	// references to that cache at this point. The last one belongs to the vnode
1009 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1010 	// cache. Each but the last reference to a cache also includes a reference
1011 	// to the vnode. The file cache, however, released its reference (cf.
1012 	// file_cache_create()), so that this vnode's ref count has the chance to
1013 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1014 	// cache reference to be released, which will also release a (no longer
1015 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1016 	// count, so that it will neither become negative nor 0.
1017 	vnode->ref_count = 2;
1018 
1019 	if (!vnode->IsUnpublished()) {
1020 		if (vnode->IsRemoved())
1021 			FS_CALL(vnode, remove_vnode, reenter);
1022 		else
1023 			FS_CALL(vnode, put_vnode, reenter);
1024 	}
1025 
1026 	// If the vnode has a VMCache attached, make sure that it won't try to get
1027 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1028 	// long as the vnode is busy and in the hash, that won't happen, but as
1029 	// soon as we've removed it from the hash, it could reload the vnode -- with
1030 	// a new cache attached!
1031 	if (vnode->cache != NULL)
1032 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1033 
1034 	// The file system has removed the resources of the vnode now, so we can
1035 	// make it available again (by removing the busy vnode from the hash).
1036 	rw_lock_write_lock(&sVnodeLock);
1037 	sVnodeTable->Remove(vnode);
1038 	rw_lock_write_unlock(&sVnodeLock);
1039 
1040 	// if we have a VMCache attached, remove it
1041 	if (vnode->cache)
1042 		vnode->cache->ReleaseRef();
1043 
1044 	vnode->cache = NULL;
1045 
1046 	remove_vnode_from_mount_list(vnode, vnode->mount);
1047 
1048 	free(vnode);
1049 }
1050 
1051 
1052 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1053 	if the counter dropped to 0.
1054 
1055 	The caller must, of course, own a reference to the vnode to call this
1056 	function.
1057 	The caller must not hold the sVnodeLock or the sMountMutex.
1058 
1059 	\param vnode the vnode.
1060 	\param alwaysFree don't move this vnode into the unused list, but really
1061 		   delete it if possible.
1062 	\param reenter \c true, if this function is called (indirectly) from within
1063 		   a file system. This will be passed to file system hooks only.
1064 	\return \c B_OK, if everything went fine, an error code otherwise.
1065 */
1066 static status_t
1067 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1068 {
1069 	ReadLocker locker(sVnodeLock);
1070 	AutoLocker<Vnode> nodeLocker(vnode);
1071 
1072 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1073 
1074 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1075 
1076 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1077 		vnode->ref_count));
1078 
1079 	if (oldRefCount != 1)
1080 		return B_OK;
1081 
1082 	if (vnode->IsBusy())
1083 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1084 
1085 	bool freeNode = false;
1086 	bool freeUnusedNodes = false;
1087 
1088 	// Just insert the vnode into an unused list if we don't need
1089 	// to delete it
1090 	if (vnode->IsRemoved() || alwaysFree) {
1091 		vnode_to_be_freed(vnode);
1092 		vnode->SetBusy(true);
1093 		freeNode = true;
1094 	} else
1095 		freeUnusedNodes = vnode_unused(vnode);
1096 
1097 	nodeLocker.Unlock();
1098 	locker.Unlock();
1099 
1100 	if (freeNode)
1101 		free_vnode(vnode, reenter);
1102 	else if (freeUnusedNodes)
1103 		free_unused_vnodes();
1104 
1105 	return B_OK;
1106 }
1107 
1108 
1109 /*!	\brief Increments the reference counter of the given vnode.
1110 
1111 	The caller must make sure that the node isn't deleted while this function
1112 	is called. This can be done either:
1113 	- by ensuring that a reference to the node exists and remains in existence,
1114 	  or
1115 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1116 	  or by holding sVnodeLock write locked.
1117 
1118 	In the second case the caller is responsible for dealing with the ref count
1119 	0 -> 1 transition. That is 1. this function must not be invoked when the
1120 	node is busy in the first place and 2. vnode_used() must be called for the
1121 	node.
1122 
1123 	\param vnode the vnode.
1124 */
1125 static void
1126 inc_vnode_ref_count(struct vnode* vnode)
1127 {
1128 	atomic_add(&vnode->ref_count, 1);
1129 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1130 		vnode->ref_count));
1131 }
1132 
1133 
1134 static bool
1135 is_special_node_type(int type)
1136 {
1137 	// at the moment only FIFOs are supported
1138 	return S_ISFIFO(type);
1139 }
1140 
1141 
1142 static status_t
1143 create_special_sub_node(struct vnode* vnode, uint32 flags)
1144 {
1145 	if (S_ISFIFO(vnode->Type()))
1146 		return create_fifo_vnode(vnode->mount->volume, vnode);
1147 
1148 	return B_BAD_VALUE;
1149 }
1150 
1151 
1152 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1153 
1154 	If the node is not yet in memory, it will be loaded.
1155 
1156 	The caller must not hold the sVnodeLock or the sMountMutex.
1157 
1158 	\param mountID the mount ID.
1159 	\param vnodeID the node ID.
1160 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1161 		   retrieved vnode structure shall be written.
1162 	\param reenter \c true, if this function is called (indirectly) from within
1163 		   a file system.
1164 	\return \c B_OK, if everything when fine, an error code otherwise.
1165 */
1166 static status_t
1167 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1168 	int reenter)
1169 {
1170 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1171 		mountID, vnodeID, _vnode));
1172 
1173 	rw_lock_read_lock(&sVnodeLock);
1174 
1175 	int32 tries = BUSY_VNODE_RETRIES;
1176 restart:
1177 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1178 	AutoLocker<Vnode> nodeLocker(vnode);
1179 
1180 	if (vnode && vnode->IsBusy()) {
1181 		nodeLocker.Unlock();
1182 		rw_lock_read_unlock(&sVnodeLock);
1183 		if (!canWait) {
1184 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1185 				mountID, vnodeID);
1186 			return B_BUSY;
1187 		}
1188 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1189 			return B_BUSY;
1190 
1191 		rw_lock_read_lock(&sVnodeLock);
1192 		goto restart;
1193 	}
1194 
1195 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1196 
1197 	status_t status;
1198 
1199 	if (vnode) {
1200 		if (vnode->ref_count == 0) {
1201 			// this vnode has been unused before
1202 			vnode_used(vnode);
1203 		}
1204 		inc_vnode_ref_count(vnode);
1205 
1206 		nodeLocker.Unlock();
1207 		rw_lock_read_unlock(&sVnodeLock);
1208 	} else {
1209 		// we need to create a new vnode and read it in
1210 		rw_lock_read_unlock(&sVnodeLock);
1211 			// unlock -- create_new_vnode_and_lock() write-locks on success
1212 		bool nodeCreated;
1213 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1214 			nodeCreated);
1215 		if (status != B_OK)
1216 			return status;
1217 
1218 		if (!nodeCreated) {
1219 			rw_lock_read_lock(&sVnodeLock);
1220 			rw_lock_write_unlock(&sVnodeLock);
1221 			goto restart;
1222 		}
1223 
1224 		rw_lock_write_unlock(&sVnodeLock);
1225 
1226 		int type;
1227 		uint32 flags;
1228 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1229 			&flags, reenter);
1230 		if (status == B_OK && vnode->private_node == NULL)
1231 			status = B_BAD_VALUE;
1232 
1233 		bool gotNode = status == B_OK;
1234 		bool publishSpecialSubNode = false;
1235 		if (gotNode) {
1236 			vnode->SetType(type);
1237 			publishSpecialSubNode = is_special_node_type(type)
1238 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1239 		}
1240 
1241 		if (gotNode && publishSpecialSubNode)
1242 			status = create_special_sub_node(vnode, flags);
1243 
1244 		if (status != B_OK) {
1245 			if (gotNode)
1246 				FS_CALL(vnode, put_vnode, reenter);
1247 
1248 			rw_lock_write_lock(&sVnodeLock);
1249 			sVnodeTable->Remove(vnode);
1250 			remove_vnode_from_mount_list(vnode, vnode->mount);
1251 			rw_lock_write_unlock(&sVnodeLock);
1252 
1253 			free(vnode);
1254 			return status;
1255 		}
1256 
1257 		rw_lock_read_lock(&sVnodeLock);
1258 		vnode->Lock();
1259 
1260 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1261 		vnode->SetBusy(false);
1262 
1263 		vnode->Unlock();
1264 		rw_lock_read_unlock(&sVnodeLock);
1265 	}
1266 
1267 	TRACE(("get_vnode: returning %p\n", vnode));
1268 
1269 	*_vnode = vnode;
1270 	return B_OK;
1271 }
1272 
1273 
1274 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1275 	if the counter dropped to 0.
1276 
1277 	The caller must, of course, own a reference to the vnode to call this
1278 	function.
1279 	The caller must not hold the sVnodeLock or the sMountMutex.
1280 
1281 	\param vnode the vnode.
1282 */
1283 static inline void
1284 put_vnode(struct vnode* vnode)
1285 {
1286 	dec_vnode_ref_count(vnode, false, false);
1287 }
1288 
1289 
1290 static void
1291 free_unused_vnodes(int32 level)
1292 {
1293 	unused_vnodes_check_started();
1294 
1295 	if (level == B_NO_LOW_RESOURCE) {
1296 		unused_vnodes_check_done();
1297 		return;
1298 	}
1299 
1300 	flush_hot_vnodes();
1301 
1302 	// determine how many nodes to free
1303 	uint32 count = 1;
1304 	{
1305 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1306 
1307 		switch (level) {
1308 			case B_LOW_RESOURCE_NOTE:
1309 				count = sUnusedVnodes / 100;
1310 				break;
1311 			case B_LOW_RESOURCE_WARNING:
1312 				count = sUnusedVnodes / 10;
1313 				break;
1314 			case B_LOW_RESOURCE_CRITICAL:
1315 				count = sUnusedVnodes;
1316 				break;
1317 		}
1318 
1319 		if (count > sUnusedVnodes)
1320 			count = sUnusedVnodes;
1321 	}
1322 
1323 	// Write back the modified pages of some unused vnodes and free them.
1324 
1325 	for (uint32 i = 0; i < count; i++) {
1326 		ReadLocker vnodesReadLocker(sVnodeLock);
1327 
1328 		// get the first node
1329 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1330 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1331 			&sUnusedVnodeList);
1332 		unusedVnodesLocker.Unlock();
1333 
1334 		if (vnode == NULL)
1335 			break;
1336 
1337 		// lock the node
1338 		AutoLocker<Vnode> nodeLocker(vnode);
1339 
1340 		// Check whether the node is still unused -- since we only append to the
1341 		// tail of the unused queue, the vnode should still be at its head.
1342 		// Alternatively we could check its ref count for 0 and its busy flag,
1343 		// but if the node is no longer at the head of the queue, it means it
1344 		// has been touched in the meantime, i.e. it is no longer the least
1345 		// recently used unused vnode and we rather don't free it.
1346 		unusedVnodesLocker.Lock();
1347 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1348 			continue;
1349 		unusedVnodesLocker.Unlock();
1350 
1351 		ASSERT(!vnode->IsBusy());
1352 
1353 		// grab a reference
1354 		inc_vnode_ref_count(vnode);
1355 		vnode_used(vnode);
1356 
1357 		// write back changes and free the node
1358 		nodeLocker.Unlock();
1359 		vnodesReadLocker.Unlock();
1360 
1361 		if (vnode->cache != NULL)
1362 			vnode->cache->WriteModified();
1363 
1364 		dec_vnode_ref_count(vnode, true, false);
1365 			// this should free the vnode when it's still unused
1366 	}
1367 
1368 	unused_vnodes_check_done();
1369 }
1370 
1371 
1372 /*!	Gets the vnode the given vnode is covering.
1373 
1374 	The caller must have \c sVnodeLock read-locked at least.
1375 
1376 	The function returns a reference to the retrieved vnode (if any), the caller
1377 	is responsible to free.
1378 
1379 	\param vnode The vnode whose covered node shall be returned.
1380 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1381 		vnode.
1382 */
1383 static inline Vnode*
1384 get_covered_vnode_locked(Vnode* vnode)
1385 {
1386 	if (Vnode* coveredNode = vnode->covers) {
1387 		while (coveredNode->covers != NULL)
1388 			coveredNode = coveredNode->covers;
1389 
1390 		inc_vnode_ref_count(coveredNode);
1391 		return coveredNode;
1392 	}
1393 
1394 	return NULL;
1395 }
1396 
1397 
1398 /*!	Gets the vnode the given vnode is covering.
1399 
1400 	The caller must not hold \c sVnodeLock. Note that this implies a race
1401 	condition, since the situation can change at any time.
1402 
1403 	The function returns a reference to the retrieved vnode (if any), the caller
1404 	is responsible to free.
1405 
1406 	\param vnode The vnode whose covered node shall be returned.
1407 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1408 		vnode.
1409 */
1410 static inline Vnode*
1411 get_covered_vnode(Vnode* vnode)
1412 {
1413 	if (!vnode->IsCovering())
1414 		return NULL;
1415 
1416 	ReadLocker vnodeReadLocker(sVnodeLock);
1417 	return get_covered_vnode_locked(vnode);
1418 }
1419 
1420 
1421 /*!	Gets the vnode the given vnode is covered by.
1422 
1423 	The caller must have \c sVnodeLock read-locked at least.
1424 
1425 	The function returns a reference to the retrieved vnode (if any), the caller
1426 	is responsible to free.
1427 
1428 	\param vnode The vnode whose covering node shall be returned.
1429 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1430 		any vnode.
1431 */
1432 static Vnode*
1433 get_covering_vnode_locked(Vnode* vnode)
1434 {
1435 	if (Vnode* coveringNode = vnode->covered_by) {
1436 		while (coveringNode->covered_by != NULL)
1437 			coveringNode = coveringNode->covered_by;
1438 
1439 		inc_vnode_ref_count(coveringNode);
1440 		return coveringNode;
1441 	}
1442 
1443 	return NULL;
1444 }
1445 
1446 
1447 /*!	Gets the vnode the given vnode is covered by.
1448 
1449 	The caller must not hold \c sVnodeLock. Note that this implies a race
1450 	condition, since the situation can change at any time.
1451 
1452 	The function returns a reference to the retrieved vnode (if any), the caller
1453 	is responsible to free.
1454 
1455 	\param vnode The vnode whose covering node shall be returned.
1456 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1457 		any vnode.
1458 */
1459 static inline Vnode*
1460 get_covering_vnode(Vnode* vnode)
1461 {
1462 	if (!vnode->IsCovered())
1463 		return NULL;
1464 
1465 	ReadLocker vnodeReadLocker(sVnodeLock);
1466 	return get_covering_vnode_locked(vnode);
1467 }
1468 
1469 
1470 static void
1471 free_unused_vnodes()
1472 {
1473 	free_unused_vnodes(
1474 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1475 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1476 }
1477 
1478 
1479 static void
1480 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1481 {
1482 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1483 
1484 	free_unused_vnodes(level);
1485 }
1486 
1487 
1488 static inline void
1489 put_advisory_locking(struct advisory_locking* locking)
1490 {
1491 	release_sem(locking->lock);
1492 }
1493 
1494 
1495 /*!	Returns the advisory_locking object of the \a vnode in case it
1496 	has one, and locks it.
1497 	You have to call put_advisory_locking() when you're done with
1498 	it.
1499 	Note, you must not have the vnode mutex locked when calling
1500 	this function.
1501 */
1502 static struct advisory_locking*
1503 get_advisory_locking(struct vnode* vnode)
1504 {
1505 	rw_lock_read_lock(&sVnodeLock);
1506 	vnode->Lock();
1507 
1508 	struct advisory_locking* locking = vnode->advisory_locking;
1509 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1510 
1511 	vnode->Unlock();
1512 	rw_lock_read_unlock(&sVnodeLock);
1513 
1514 	if (lock >= 0)
1515 		lock = acquire_sem(lock);
1516 	if (lock < 0) {
1517 		// This means the locking has been deleted in the mean time
1518 		// or had never existed in the first place - otherwise, we
1519 		// would get the lock at some point.
1520 		return NULL;
1521 	}
1522 
1523 	return locking;
1524 }
1525 
1526 
1527 /*!	Creates a locked advisory_locking object, and attaches it to the
1528 	given \a vnode.
1529 	Returns B_OK in case of success - also if the vnode got such an
1530 	object from someone else in the mean time, you'll still get this
1531 	one locked then.
1532 */
1533 static status_t
1534 create_advisory_locking(struct vnode* vnode)
1535 {
1536 	if (vnode == NULL)
1537 		return B_FILE_ERROR;
1538 
1539 	ObjectDeleter<advisory_locking> lockingDeleter;
1540 	struct advisory_locking* locking = NULL;
1541 
1542 	while (get_advisory_locking(vnode) == NULL) {
1543 		// no locking object set on the vnode yet, create one
1544 		if (locking == NULL) {
1545 			locking = new(std::nothrow) advisory_locking;
1546 			if (locking == NULL)
1547 				return B_NO_MEMORY;
1548 			lockingDeleter.SetTo(locking);
1549 
1550 			locking->wait_sem = create_sem(0, "advisory lock");
1551 			if (locking->wait_sem < 0)
1552 				return locking->wait_sem;
1553 
1554 			locking->lock = create_sem(0, "advisory locking");
1555 			if (locking->lock < 0)
1556 				return locking->lock;
1557 		}
1558 
1559 		// set our newly created locking object
1560 		ReadLocker _(sVnodeLock);
1561 		AutoLocker<Vnode> nodeLocker(vnode);
1562 		if (vnode->advisory_locking == NULL) {
1563 			vnode->advisory_locking = locking;
1564 			lockingDeleter.Detach();
1565 			return B_OK;
1566 		}
1567 	}
1568 
1569 	// The vnode already had a locking object. That's just as well.
1570 
1571 	return B_OK;
1572 }
1573 
1574 
1575 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1576 	with the advisory_lock \a lock.
1577 */
1578 static bool
1579 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1580 {
1581 	if (flock == NULL)
1582 		return true;
1583 
1584 	return lock->start <= flock->l_start - 1 + flock->l_len
1585 		&& lock->end >= flock->l_start;
1586 }
1587 
1588 
1589 /*!	Tests whether acquiring a lock would block.
1590 */
1591 static status_t
1592 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1593 {
1594 	flock->l_type = F_UNLCK;
1595 
1596 	struct advisory_locking* locking = get_advisory_locking(vnode);
1597 	if (locking == NULL)
1598 		return B_OK;
1599 
1600 	team_id team = team_get_current_team_id();
1601 
1602 	LockList::Iterator iterator = locking->locks.GetIterator();
1603 	while (iterator.HasNext()) {
1604 		struct advisory_lock* lock = iterator.Next();
1605 
1606 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1607 			// locks do overlap
1608 			if (flock->l_type != F_RDLCK || !lock->shared) {
1609 				// collision
1610 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1611 				flock->l_whence = SEEK_SET;
1612 				flock->l_start = lock->start;
1613 				flock->l_len = lock->end - lock->start + 1;
1614 				flock->l_pid = lock->team;
1615 				break;
1616 			}
1617 		}
1618 	}
1619 
1620 	put_advisory_locking(locking);
1621 	return B_OK;
1622 }
1623 
1624 
1625 /*!	Removes the specified lock, or all locks of the calling team
1626 	if \a flock is NULL.
1627 */
1628 static status_t
1629 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1630 {
1631 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1632 
1633 	struct advisory_locking* locking = get_advisory_locking(vnode);
1634 	if (locking == NULL)
1635 		return B_OK;
1636 
1637 	// TODO: use the thread ID instead??
1638 	team_id team = team_get_current_team_id();
1639 	pid_t session = thread_get_current_thread()->team->session_id;
1640 
1641 	// find matching lock entries
1642 
1643 	LockList::Iterator iterator = locking->locks.GetIterator();
1644 	while (iterator.HasNext()) {
1645 		struct advisory_lock* lock = iterator.Next();
1646 		bool removeLock = false;
1647 
1648 		if (lock->session == session)
1649 			removeLock = true;
1650 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1651 			bool endsBeyond = false;
1652 			bool startsBefore = false;
1653 			if (flock != NULL) {
1654 				startsBefore = lock->start < flock->l_start;
1655 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1656 			}
1657 
1658 			if (!startsBefore && !endsBeyond) {
1659 				// lock is completely contained in flock
1660 				removeLock = true;
1661 			} else if (startsBefore && !endsBeyond) {
1662 				// cut the end of the lock
1663 				lock->end = flock->l_start - 1;
1664 			} else if (!startsBefore && endsBeyond) {
1665 				// cut the start of the lock
1666 				lock->start = flock->l_start + flock->l_len;
1667 			} else {
1668 				// divide the lock into two locks
1669 				struct advisory_lock* secondLock = new advisory_lock;
1670 				if (secondLock == NULL) {
1671 					// TODO: we should probably revert the locks we already
1672 					// changed... (ie. allocate upfront)
1673 					put_advisory_locking(locking);
1674 					return B_NO_MEMORY;
1675 				}
1676 
1677 				lock->end = flock->l_start - 1;
1678 
1679 				secondLock->team = lock->team;
1680 				secondLock->session = lock->session;
1681 				// values must already be normalized when getting here
1682 				secondLock->start = flock->l_start + flock->l_len;
1683 				secondLock->end = lock->end;
1684 				secondLock->shared = lock->shared;
1685 
1686 				locking->locks.Add(secondLock);
1687 			}
1688 		}
1689 
1690 		if (removeLock) {
1691 			// this lock is no longer used
1692 			iterator.Remove();
1693 			free(lock);
1694 		}
1695 	}
1696 
1697 	bool removeLocking = locking->locks.IsEmpty();
1698 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1699 
1700 	put_advisory_locking(locking);
1701 
1702 	if (removeLocking) {
1703 		// We can remove the whole advisory locking structure; it's no
1704 		// longer used
1705 		locking = get_advisory_locking(vnode);
1706 		if (locking != NULL) {
1707 			ReadLocker locker(sVnodeLock);
1708 			AutoLocker<Vnode> nodeLocker(vnode);
1709 
1710 			// the locking could have been changed in the mean time
1711 			if (locking->locks.IsEmpty()) {
1712 				vnode->advisory_locking = NULL;
1713 				nodeLocker.Unlock();
1714 				locker.Unlock();
1715 
1716 				// we've detached the locking from the vnode, so we can
1717 				// safely delete it
1718 				delete locking;
1719 			} else {
1720 				// the locking is in use again
1721 				nodeLocker.Unlock();
1722 				locker.Unlock();
1723 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1724 			}
1725 		}
1726 	}
1727 
1728 	return B_OK;
1729 }
1730 
1731 
1732 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1733 	will wait for the lock to become available, if there are any collisions
1734 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1735 
1736 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1737 	BSD flock() semantics are used, that is, all children can unlock the file
1738 	in question (we even allow parents to remove the lock, though, but that
1739 	seems to be in line to what the BSD's are doing).
1740 */
1741 static status_t
1742 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1743 	bool wait)
1744 {
1745 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1746 		vnode, flock, wait ? "yes" : "no"));
1747 
1748 	bool shared = flock->l_type == F_RDLCK;
1749 	status_t status = B_OK;
1750 
1751 	// TODO: do deadlock detection!
1752 
1753 	struct advisory_locking* locking;
1754 
1755 	while (true) {
1756 		// if this vnode has an advisory_locking structure attached,
1757 		// lock that one and search for any colliding file lock
1758 		status = create_advisory_locking(vnode);
1759 		if (status != B_OK)
1760 			return status;
1761 
1762 		locking = vnode->advisory_locking;
1763 		team_id team = team_get_current_team_id();
1764 		sem_id waitForLock = -1;
1765 
1766 		// test for collisions
1767 		LockList::Iterator iterator = locking->locks.GetIterator();
1768 		while (iterator.HasNext()) {
1769 			struct advisory_lock* lock = iterator.Next();
1770 
1771 			// TODO: locks from the same team might be joinable!
1772 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1773 				// locks do overlap
1774 				if (!shared || !lock->shared) {
1775 					// we need to wait
1776 					waitForLock = locking->wait_sem;
1777 					break;
1778 				}
1779 			}
1780 		}
1781 
1782 		if (waitForLock < 0)
1783 			break;
1784 
1785 		// We need to wait. Do that or fail now, if we've been asked not to.
1786 
1787 		if (!wait) {
1788 			put_advisory_locking(locking);
1789 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1790 		}
1791 
1792 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1793 			B_CAN_INTERRUPT, 0);
1794 		if (status != B_OK && status != B_BAD_SEM_ID)
1795 			return status;
1796 
1797 		// We have been notified, but we need to re-lock the locking object. So
1798 		// go another round...
1799 	}
1800 
1801 	// install new lock
1802 
1803 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1804 		sizeof(struct advisory_lock));
1805 	if (lock == NULL) {
1806 		put_advisory_locking(locking);
1807 		return B_NO_MEMORY;
1808 	}
1809 
1810 	lock->team = team_get_current_team_id();
1811 	lock->session = session;
1812 	// values must already be normalized when getting here
1813 	lock->start = flock->l_start;
1814 	lock->end = flock->l_start - 1 + flock->l_len;
1815 	lock->shared = shared;
1816 
1817 	locking->locks.Add(lock);
1818 	put_advisory_locking(locking);
1819 
1820 	return status;
1821 }
1822 
1823 
1824 /*!	Normalizes the \a flock structure to make it easier to compare the
1825 	structure with others. The l_start and l_len fields are set to absolute
1826 	values according to the l_whence field.
1827 */
1828 static status_t
1829 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1830 {
1831 	switch (flock->l_whence) {
1832 		case SEEK_SET:
1833 			break;
1834 		case SEEK_CUR:
1835 			flock->l_start += descriptor->pos;
1836 			break;
1837 		case SEEK_END:
1838 		{
1839 			struct vnode* vnode = descriptor->u.vnode;
1840 			struct stat stat;
1841 			status_t status;
1842 
1843 			if (!HAS_FS_CALL(vnode, read_stat))
1844 				return B_UNSUPPORTED;
1845 
1846 			status = FS_CALL(vnode, read_stat, &stat);
1847 			if (status != B_OK)
1848 				return status;
1849 
1850 			flock->l_start += stat.st_size;
1851 			break;
1852 		}
1853 		default:
1854 			return B_BAD_VALUE;
1855 	}
1856 
1857 	if (flock->l_start < 0)
1858 		flock->l_start = 0;
1859 	if (flock->l_len == 0)
1860 		flock->l_len = OFF_MAX;
1861 
1862 	// don't let the offset and length overflow
1863 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1864 		flock->l_len = OFF_MAX - flock->l_start;
1865 
1866 	if (flock->l_len < 0) {
1867 		// a negative length reverses the region
1868 		flock->l_start += flock->l_len;
1869 		flock->l_len = -flock->l_len;
1870 	}
1871 
1872 	return B_OK;
1873 }
1874 
1875 
1876 static void
1877 replace_vnode_if_disconnected(struct fs_mount* mount,
1878 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1879 	struct vnode* fallBack, bool lockRootLock)
1880 {
1881 	struct vnode* givenVnode = vnode;
1882 	bool vnodeReplaced = false;
1883 
1884 	ReadLocker vnodeReadLocker(sVnodeLock);
1885 
1886 	if (lockRootLock)
1887 		mutex_lock(&sIOContextRootLock);
1888 
1889 	while (vnode != NULL && vnode->mount == mount
1890 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1891 		if (vnode->covers != NULL) {
1892 			// redirect the vnode to the covered vnode
1893 			vnode = vnode->covers;
1894 		} else
1895 			vnode = fallBack;
1896 
1897 		vnodeReplaced = true;
1898 	}
1899 
1900 	// If we've replaced the node, grab a reference for the new one.
1901 	if (vnodeReplaced && vnode != NULL)
1902 		inc_vnode_ref_count(vnode);
1903 
1904 	if (lockRootLock)
1905 		mutex_unlock(&sIOContextRootLock);
1906 
1907 	vnodeReadLocker.Unlock();
1908 
1909 	if (vnodeReplaced)
1910 		put_vnode(givenVnode);
1911 }
1912 
1913 
1914 /*!	Disconnects all file descriptors that are associated with the
1915 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1916 	\a mount object.
1917 
1918 	Note, after you've called this function, there might still be ongoing
1919 	accesses - they won't be interrupted if they already happened before.
1920 	However, any subsequent access will fail.
1921 
1922 	This is not a cheap function and should be used with care and rarely.
1923 	TODO: there is currently no means to stop a blocking read/write!
1924 */
1925 static void
1926 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1927 	struct vnode* vnodeToDisconnect)
1928 {
1929 	// iterate over all teams and peek into their file descriptors
1930 	TeamListIterator teamIterator;
1931 	while (Team* team = teamIterator.Next()) {
1932 		BReference<Team> teamReference(team, true);
1933 
1934 		// lock the I/O context
1935 		io_context* context = team->io_context;
1936 		MutexLocker contextLocker(context->io_mutex);
1937 
1938 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1939 			sRoot, true);
1940 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1941 			sRoot, false);
1942 
1943 		for (uint32 i = 0; i < context->table_size; i++) {
1944 			if (struct file_descriptor* descriptor = context->fds[i]) {
1945 				inc_fd_ref_count(descriptor);
1946 
1947 				// if this descriptor points at this mount, we
1948 				// need to disconnect it to be able to unmount
1949 				struct vnode* vnode = fd_vnode(descriptor);
1950 				if (vnodeToDisconnect != NULL) {
1951 					if (vnode == vnodeToDisconnect)
1952 						disconnect_fd(descriptor);
1953 				} else if ((vnode != NULL && vnode->mount == mount)
1954 					|| (vnode == NULL && descriptor->u.mount == mount))
1955 					disconnect_fd(descriptor);
1956 
1957 				put_fd(descriptor);
1958 			}
1959 		}
1960 	}
1961 }
1962 
1963 
1964 /*!	\brief Gets the root node of the current IO context.
1965 	If \a kernel is \c true, the kernel IO context will be used.
1966 	The caller obtains a reference to the returned node.
1967 */
1968 struct vnode*
1969 get_root_vnode(bool kernel)
1970 {
1971 	if (!kernel) {
1972 		// Get current working directory from io context
1973 		struct io_context* context = get_current_io_context(kernel);
1974 
1975 		mutex_lock(&sIOContextRootLock);
1976 
1977 		struct vnode* root = context->root;
1978 		if (root != NULL)
1979 			inc_vnode_ref_count(root);
1980 
1981 		mutex_unlock(&sIOContextRootLock);
1982 
1983 		if (root != NULL)
1984 			return root;
1985 
1986 		// That should never happen.
1987 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1988 			"have a root\n", team_get_current_team_id());
1989 	}
1990 
1991 	inc_vnode_ref_count(sRoot);
1992 	return sRoot;
1993 }
1994 
1995 
1996 /*!	\brief Gets the directory path and leaf name for a given path.
1997 
1998 	The supplied \a path is transformed to refer to the directory part of
1999 	the entry identified by the original path, and into the buffer \a filename
2000 	the leaf name of the original entry is written.
2001 	Neither the returned path nor the leaf name can be expected to be
2002 	canonical.
2003 
2004 	\param path The path to be analyzed. Must be able to store at least one
2005 		   additional character.
2006 	\param filename The buffer into which the leaf name will be written.
2007 		   Must be of size B_FILE_NAME_LENGTH at least.
2008 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2009 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2010 		   if the given path name is empty.
2011 */
2012 static status_t
2013 get_dir_path_and_leaf(char* path, char* filename)
2014 {
2015 	if (*path == '\0')
2016 		return B_ENTRY_NOT_FOUND;
2017 
2018 	char* last = strrchr(path, '/');
2019 		// '/' are not allowed in file names!
2020 
2021 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2022 
2023 	if (last == NULL) {
2024 		// this path is single segment with no '/' in it
2025 		// ex. "foo"
2026 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2027 			return B_NAME_TOO_LONG;
2028 
2029 		strcpy(path, ".");
2030 	} else {
2031 		last++;
2032 		if (last[0] == '\0') {
2033 			// special case: the path ends in one or more '/' - remove them
2034 			while (*--last == '/' && last != path);
2035 			last[1] = '\0';
2036 
2037 			if (last == path && last[0] == '/') {
2038 				// This path points to the root of the file system
2039 				strcpy(filename, ".");
2040 				return B_OK;
2041 			}
2042 			for (; last != path && *(last - 1) != '/'; last--);
2043 				// rewind to the start of the leaf before the '/'
2044 		}
2045 
2046 		// normal leaf: replace the leaf portion of the path with a '.'
2047 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 			return B_NAME_TOO_LONG;
2049 
2050 		last[0] = '.';
2051 		last[1] = '\0';
2052 	}
2053 	return B_OK;
2054 }
2055 
2056 
2057 static status_t
2058 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2059 	bool traverse, bool kernel, struct vnode** _vnode)
2060 {
2061 	char clonedName[B_FILE_NAME_LENGTH + 1];
2062 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2063 		return B_NAME_TOO_LONG;
2064 
2065 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2066 	struct vnode* directory;
2067 
2068 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2069 	if (status < 0)
2070 		return status;
2071 
2072 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2073 		_vnode, NULL);
2074 }
2075 
2076 
2077 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2078 	and returns the respective vnode.
2079 	On success a reference to the vnode is acquired for the caller.
2080 */
2081 static status_t
2082 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2083 {
2084 	ino_t id;
2085 	bool missing;
2086 
2087 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2088 		return missing ? B_ENTRY_NOT_FOUND
2089 			: get_vnode(dir->device, id, _vnode, true, false);
2090 	}
2091 
2092 	status_t status = FS_CALL(dir, lookup, name, &id);
2093 	if (status != B_OK)
2094 		return status;
2095 
2096 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2097 	// have a reference and just need to look the node up.
2098 	rw_lock_read_lock(&sVnodeLock);
2099 	*_vnode = lookup_vnode(dir->device, id);
2100 	rw_lock_read_unlock(&sVnodeLock);
2101 
2102 	if (*_vnode == NULL) {
2103 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2104 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2105 		return B_ENTRY_NOT_FOUND;
2106 	}
2107 
2108 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2109 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2110 //		(*_vnode)->mount->id, (*_vnode)->id);
2111 
2112 	return B_OK;
2113 }
2114 
2115 
2116 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2117 	\a path must not be NULL.
2118 	If it returns successfully, \a path contains the name of the last path
2119 	component. This function clobbers the buffer pointed to by \a path only
2120 	if it does contain more than one component.
2121 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2122 	it is successful or not!
2123 */
2124 static status_t
2125 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2126 	int count, struct io_context* ioContext, struct vnode** _vnode,
2127 	ino_t* _parentID)
2128 {
2129 	status_t status = B_OK;
2130 	ino_t lastParentID = vnode->id;
2131 
2132 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2133 
2134 	if (path == NULL) {
2135 		put_vnode(vnode);
2136 		return B_BAD_VALUE;
2137 	}
2138 
2139 	if (*path == '\0') {
2140 		put_vnode(vnode);
2141 		return B_ENTRY_NOT_FOUND;
2142 	}
2143 
2144 	while (true) {
2145 		struct vnode* nextVnode;
2146 		char* nextPath;
2147 
2148 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 			path));
2150 
2151 		// done?
2152 		if (path[0] == '\0')
2153 			break;
2154 
2155 		// walk to find the next path component ("path" will point to a single
2156 		// path component), and filter out multiple slashes
2157 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 				nextPath++);
2159 
2160 		if (*nextPath == '/') {
2161 			*nextPath = '\0';
2162 			do
2163 				nextPath++;
2164 			while (*nextPath == '/');
2165 		}
2166 
2167 		// See if the '..' is at a covering vnode move to the covered
2168 		// vnode so we pass the '..' path to the underlying filesystem.
2169 		// Also prevent breaking the root of the IO context.
2170 		if (strcmp("..", path) == 0) {
2171 			if (vnode == ioContext->root) {
2172 				// Attempted prison break! Keep it contained.
2173 				path = nextPath;
2174 				continue;
2175 			}
2176 
2177 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2178 				nextVnode = coveredVnode;
2179 				put_vnode(vnode);
2180 				vnode = nextVnode;
2181 			}
2182 		}
2183 
2184 		// check if vnode is really a directory
2185 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2186 			status = B_NOT_A_DIRECTORY;
2187 
2188 		// Check if we have the right to search the current directory vnode.
2189 		// If a file system doesn't have the access() function, we assume that
2190 		// searching a directory is always allowed
2191 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2192 			status = FS_CALL(vnode, access, X_OK);
2193 
2194 		// Tell the filesystem to get the vnode of this path component (if we
2195 		// got the permission from the call above)
2196 		if (status == B_OK)
2197 			status = lookup_dir_entry(vnode, path, &nextVnode);
2198 
2199 		if (status != B_OK) {
2200 			put_vnode(vnode);
2201 			return status;
2202 		}
2203 
2204 		// If the new node is a symbolic link, resolve it (if we've been told
2205 		// to do it)
2206 		if (S_ISLNK(nextVnode->Type())
2207 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2208 			size_t bufferSize;
2209 			char* buffer;
2210 
2211 			TRACE(("traverse link\n"));
2212 
2213 			// it's not exactly nice style using goto in this way, but hey,
2214 			// it works :-/
2215 			if (count + 1 > B_MAX_SYMLINKS) {
2216 				status = B_LINK_LIMIT;
2217 				goto resolve_link_error;
2218 			}
2219 
2220 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2221 			if (buffer == NULL) {
2222 				status = B_NO_MEMORY;
2223 				goto resolve_link_error;
2224 			}
2225 
2226 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2227 				bufferSize--;
2228 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2229 				// null-terminate
2230 				if (status >= 0)
2231 					buffer[bufferSize] = '\0';
2232 			} else
2233 				status = B_BAD_VALUE;
2234 
2235 			if (status != B_OK) {
2236 				free(buffer);
2237 
2238 		resolve_link_error:
2239 				put_vnode(vnode);
2240 				put_vnode(nextVnode);
2241 
2242 				return status;
2243 			}
2244 			put_vnode(nextVnode);
2245 
2246 			// Check if we start from the root directory or the current
2247 			// directory ("vnode" still points to that one).
2248 			// Cut off all leading slashes if it's the root directory
2249 			path = buffer;
2250 			bool absoluteSymlink = false;
2251 			if (path[0] == '/') {
2252 				// we don't need the old directory anymore
2253 				put_vnode(vnode);
2254 
2255 				while (*++path == '/')
2256 					;
2257 
2258 				mutex_lock(&sIOContextRootLock);
2259 				vnode = ioContext->root;
2260 				inc_vnode_ref_count(vnode);
2261 				mutex_unlock(&sIOContextRootLock);
2262 
2263 				absoluteSymlink = true;
2264 			}
2265 
2266 			inc_vnode_ref_count(vnode);
2267 				// balance the next recursion - we will decrement the
2268 				// ref_count of the vnode, no matter if we succeeded or not
2269 
2270 			if (absoluteSymlink && *path == '\0') {
2271 				// symlink was just "/"
2272 				nextVnode = vnode;
2273 			} else {
2274 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2275 					ioContext, &nextVnode, &lastParentID);
2276 			}
2277 
2278 			free(buffer);
2279 
2280 			if (status != B_OK) {
2281 				put_vnode(vnode);
2282 				return status;
2283 			}
2284 		} else
2285 			lastParentID = vnode->id;
2286 
2287 		// decrease the ref count on the old dir we just looked up into
2288 		put_vnode(vnode);
2289 
2290 		path = nextPath;
2291 		vnode = nextVnode;
2292 
2293 		// see if we hit a covered node
2294 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2295 			put_vnode(vnode);
2296 			vnode = coveringNode;
2297 		}
2298 	}
2299 
2300 	*_vnode = vnode;
2301 	if (_parentID)
2302 		*_parentID = lastParentID;
2303 
2304 	return B_OK;
2305 }
2306 
2307 
2308 static status_t
2309 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2310 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2311 {
2312 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2313 		get_current_io_context(kernel), _vnode, _parentID);
2314 }
2315 
2316 
2317 static status_t
2318 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2319 	ino_t* _parentID, bool kernel)
2320 {
2321 	struct vnode* start = NULL;
2322 
2323 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2324 
2325 	if (!path)
2326 		return B_BAD_VALUE;
2327 
2328 	if (*path == '\0')
2329 		return B_ENTRY_NOT_FOUND;
2330 
2331 	// figure out if we need to start at root or at cwd
2332 	if (*path == '/') {
2333 		if (sRoot == NULL) {
2334 			// we're a bit early, aren't we?
2335 			return B_ERROR;
2336 		}
2337 
2338 		while (*++path == '/')
2339 			;
2340 		start = get_root_vnode(kernel);
2341 
2342 		if (*path == '\0') {
2343 			*_vnode = start;
2344 			return B_OK;
2345 		}
2346 
2347 	} else {
2348 		struct io_context* context = get_current_io_context(kernel);
2349 
2350 		mutex_lock(&context->io_mutex);
2351 		start = context->cwd;
2352 		if (start != NULL)
2353 			inc_vnode_ref_count(start);
2354 		mutex_unlock(&context->io_mutex);
2355 
2356 		if (start == NULL)
2357 			return B_ERROR;
2358 	}
2359 
2360 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2361 		_parentID);
2362 }
2363 
2364 
2365 /*! Returns the vnode in the next to last segment of the path, and returns
2366 	the last portion in filename.
2367 	The path buffer must be able to store at least one additional character.
2368 */
2369 static status_t
2370 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2371 	bool kernel)
2372 {
2373 	status_t status = get_dir_path_and_leaf(path, filename);
2374 	if (status != B_OK)
2375 		return status;
2376 
2377 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2378 }
2379 
2380 
2381 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2382 		   to by a FD + path pair.
2383 
2384 	\a path must be given in either case. \a fd might be omitted, in which
2385 	case \a path is either an absolute path or one relative to the current
2386 	directory. If both a supplied and \a path is relative it is reckoned off
2387 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2388 	ignored.
2389 
2390 	The caller has the responsibility to call put_vnode() on the returned
2391 	directory vnode.
2392 
2393 	\param fd The FD. May be < 0.
2394 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2395 	       is modified by this function. It must have at least room for a
2396 	       string one character longer than the path it contains.
2397 	\param _vnode A pointer to a variable the directory vnode shall be written
2398 		   into.
2399 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2400 		   the leaf name of the specified entry will be written.
2401 	\param kernel \c true, if invoked from inside the kernel, \c false if
2402 		   invoked from userland.
2403 	\return \c B_OK, if everything went fine, another error code otherwise.
2404 */
2405 static status_t
2406 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2407 	char* filename, bool kernel)
2408 {
2409 	if (!path)
2410 		return B_BAD_VALUE;
2411 	if (*path == '\0')
2412 		return B_ENTRY_NOT_FOUND;
2413 	if (fd < 0)
2414 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2415 
2416 	status_t status = get_dir_path_and_leaf(path, filename);
2417 	if (status != B_OK)
2418 		return status;
2419 
2420 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2421 }
2422 
2423 
2424 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2425 		   to by a vnode + path pair.
2426 
2427 	\a path must be given in either case. \a vnode might be omitted, in which
2428 	case \a path is either an absolute path or one relative to the current
2429 	directory. If both a supplied and \a path is relative it is reckoned off
2430 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2431 	ignored.
2432 
2433 	The caller has the responsibility to call put_vnode() on the returned
2434 	directory vnode.
2435 
2436 	\param vnode The vnode. May be \c NULL.
2437 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2438 	       is modified by this function. It must have at least room for a
2439 	       string one character longer than the path it contains.
2440 	\param _vnode A pointer to a variable the directory vnode shall be written
2441 		   into.
2442 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2443 		   the leaf name of the specified entry will be written.
2444 	\param kernel \c true, if invoked from inside the kernel, \c false if
2445 		   invoked from userland.
2446 	\return \c B_OK, if everything went fine, another error code otherwise.
2447 */
2448 static status_t
2449 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2450 	struct vnode** _vnode, char* filename, bool kernel)
2451 {
2452 	if (!path)
2453 		return B_BAD_VALUE;
2454 	if (*path == '\0')
2455 		return B_ENTRY_NOT_FOUND;
2456 	if (vnode == NULL || path[0] == '/')
2457 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2458 
2459 	status_t status = get_dir_path_and_leaf(path, filename);
2460 	if (status != B_OK)
2461 		return status;
2462 
2463 	inc_vnode_ref_count(vnode);
2464 		// vnode_path_to_vnode() always decrements the ref count
2465 
2466 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2467 }
2468 
2469 
2470 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2471 */
2472 static status_t
2473 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2474 	size_t bufferSize, struct io_context* ioContext)
2475 {
2476 	if (bufferSize < sizeof(struct dirent))
2477 		return B_BAD_VALUE;
2478 
2479 	// See if the vnode is covering another vnode and move to the covered
2480 	// vnode so we get the underlying file system
2481 	VNodePutter vnodePutter;
2482 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2483 		vnode = coveredVnode;
2484 		vnodePutter.SetTo(vnode);
2485 	}
2486 
2487 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2488 		// The FS supports getting the name of a vnode.
2489 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2490 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2491 			return B_OK;
2492 	}
2493 
2494 	// The FS doesn't support getting the name of a vnode. So we search the
2495 	// parent directory for the vnode, if the caller let us.
2496 
2497 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2498 		return B_UNSUPPORTED;
2499 
2500 	void* cookie;
2501 
2502 	status_t status = FS_CALL(parent, open_dir, &cookie);
2503 	if (status >= B_OK) {
2504 		while (true) {
2505 			uint32 num = 1;
2506 			// We use the FS hook directly instead of dir_read(), since we don't
2507 			// want the entries to be fixed. We have already resolved vnode to
2508 			// the covered node.
2509 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2510 				&num);
2511 			if (status != B_OK)
2512 				break;
2513 			if (num == 0) {
2514 				status = B_ENTRY_NOT_FOUND;
2515 				break;
2516 			}
2517 
2518 			if (vnode->id == buffer->d_ino) {
2519 				// found correct entry!
2520 				break;
2521 			}
2522 		}
2523 
2524 		FS_CALL(parent, close_dir, cookie);
2525 		FS_CALL(parent, free_dir_cookie, cookie);
2526 	}
2527 	return status;
2528 }
2529 
2530 
2531 static status_t
2532 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2533 	size_t nameSize, bool kernel)
2534 {
2535 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2536 	struct dirent* dirent = (struct dirent*)buffer;
2537 
2538 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2539 		get_current_io_context(kernel));
2540 	if (status != B_OK)
2541 		return status;
2542 
2543 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2544 		return B_BUFFER_OVERFLOW;
2545 
2546 	return B_OK;
2547 }
2548 
2549 
2550 /*!	Gets the full path to a given directory vnode.
2551 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2552 	file system doesn't support this call, it will fall back to iterating
2553 	through the parent directory to get the name of the child.
2554 
2555 	To protect against circular loops, it supports a maximum tree depth
2556 	of 256 levels.
2557 
2558 	Note that the path may not be correct the time this function returns!
2559 	It doesn't use any locking to prevent returning the correct path, as
2560 	paths aren't safe anyway: the path to a file can change at any time.
2561 
2562 	It might be a good idea, though, to check if the returned path exists
2563 	in the calling function (it's not done here because of efficiency)
2564 */
2565 static status_t
2566 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2567 	bool kernel)
2568 {
2569 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2570 
2571 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2572 		return B_BAD_VALUE;
2573 
2574 	if (!S_ISDIR(vnode->Type()))
2575 		return B_NOT_A_DIRECTORY;
2576 
2577 	char* path = buffer;
2578 	int32 insert = bufferSize;
2579 	int32 maxLevel = 256;
2580 	int32 length;
2581 	status_t status = B_OK;
2582 	struct io_context* ioContext = get_current_io_context(kernel);
2583 
2584 	// we don't use get_vnode() here because this call is more
2585 	// efficient and does all we need from get_vnode()
2586 	inc_vnode_ref_count(vnode);
2587 
2588 	path[--insert] = '\0';
2589 		// the path is filled right to left
2590 
2591 	while (true) {
2592 		// If the node is the context's root, bail out. Otherwise resolve mount
2593 		// points.
2594 		if (vnode == ioContext->root)
2595 			break;
2596 
2597 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2598 			put_vnode(vnode);
2599 			vnode = coveredVnode;
2600 		}
2601 
2602 		// lookup the parent vnode
2603 		struct vnode* parentVnode;
2604 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2605 		if (status != B_OK)
2606 			goto out;
2607 
2608 		if (parentVnode == vnode) {
2609 			// The caller apparently got their hands on a node outside of their
2610 			// context's root. Now we've hit the global root.
2611 			put_vnode(parentVnode);
2612 			break;
2613 		}
2614 
2615 		// get the node's name
2616 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2617 			// also used for fs_read_dir()
2618 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2619 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2620 			sizeof(nameBuffer), ioContext);
2621 
2622 		// release the current vnode, we only need its parent from now on
2623 		put_vnode(vnode);
2624 		vnode = parentVnode;
2625 
2626 		if (status != B_OK)
2627 			goto out;
2628 
2629 		// TODO: add an explicit check for loops in about 10 levels to do
2630 		// real loop detection
2631 
2632 		// don't go deeper as 'maxLevel' to prevent circular loops
2633 		if (maxLevel-- < 0) {
2634 			status = B_LINK_LIMIT;
2635 			goto out;
2636 		}
2637 
2638 		// add the name in front of the current path
2639 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2640 		length = strlen(name);
2641 		insert -= length;
2642 		if (insert <= 0) {
2643 			status = B_RESULT_NOT_REPRESENTABLE;
2644 			goto out;
2645 		}
2646 		memcpy(path + insert, name, length);
2647 		path[--insert] = '/';
2648 	}
2649 
2650 	// the root dir will result in an empty path: fix it
2651 	if (path[insert] == '\0')
2652 		path[--insert] = '/';
2653 
2654 	TRACE(("  path is: %s\n", path + insert));
2655 
2656 	// move the path to the start of the buffer
2657 	length = bufferSize - insert;
2658 	memmove(buffer, path + insert, length);
2659 
2660 out:
2661 	put_vnode(vnode);
2662 	return status;
2663 }
2664 
2665 
2666 /*!	Checks the length of every path component, and adds a '.'
2667 	if the path ends in a slash.
2668 	The given path buffer must be able to store at least one
2669 	additional character.
2670 */
2671 static status_t
2672 check_path(char* to)
2673 {
2674 	int32 length = 0;
2675 
2676 	// check length of every path component
2677 
2678 	while (*to) {
2679 		char* begin;
2680 		if (*to == '/')
2681 			to++, length++;
2682 
2683 		begin = to;
2684 		while (*to != '/' && *to)
2685 			to++, length++;
2686 
2687 		if (to - begin > B_FILE_NAME_LENGTH)
2688 			return B_NAME_TOO_LONG;
2689 	}
2690 
2691 	if (length == 0)
2692 		return B_ENTRY_NOT_FOUND;
2693 
2694 	// complete path if there is a slash at the end
2695 
2696 	if (*(to - 1) == '/') {
2697 		if (length > B_PATH_NAME_LENGTH - 2)
2698 			return B_NAME_TOO_LONG;
2699 
2700 		to[0] = '.';
2701 		to[1] = '\0';
2702 	}
2703 
2704 	return B_OK;
2705 }
2706 
2707 
2708 static struct file_descriptor*
2709 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2710 {
2711 	struct file_descriptor* descriptor
2712 		= get_fd(get_current_io_context(kernel), fd);
2713 	if (descriptor == NULL)
2714 		return NULL;
2715 
2716 	struct vnode* vnode = fd_vnode(descriptor);
2717 	if (vnode == NULL) {
2718 		put_fd(descriptor);
2719 		return NULL;
2720 	}
2721 
2722 	// ToDo: when we can close a file descriptor at any point, investigate
2723 	//	if this is still valid to do (accessing the vnode without ref_count
2724 	//	or locking)
2725 	*_vnode = vnode;
2726 	return descriptor;
2727 }
2728 
2729 
2730 static struct vnode*
2731 get_vnode_from_fd(int fd, bool kernel)
2732 {
2733 	struct file_descriptor* descriptor;
2734 	struct vnode* vnode;
2735 
2736 	descriptor = get_fd(get_current_io_context(kernel), fd);
2737 	if (descriptor == NULL)
2738 		return NULL;
2739 
2740 	vnode = fd_vnode(descriptor);
2741 	if (vnode != NULL)
2742 		inc_vnode_ref_count(vnode);
2743 
2744 	put_fd(descriptor);
2745 	return vnode;
2746 }
2747 
2748 
2749 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2750 	only the path will be considered. In this case, the \a path must not be
2751 	NULL.
2752 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2753 	and should be NULL for files.
2754 */
2755 static status_t
2756 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2757 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2758 {
2759 	if (fd < 0 && !path)
2760 		return B_BAD_VALUE;
2761 
2762 	if (path != NULL && *path == '\0')
2763 		return B_ENTRY_NOT_FOUND;
2764 
2765 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2766 		// no FD or absolute path
2767 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2768 	}
2769 
2770 	// FD only, or FD + relative path
2771 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2772 	if (vnode == NULL)
2773 		return B_FILE_ERROR;
2774 
2775 	if (path != NULL) {
2776 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2777 			_vnode, _parentID);
2778 	}
2779 
2780 	// there is no relative path to take into account
2781 
2782 	*_vnode = vnode;
2783 	if (_parentID)
2784 		*_parentID = -1;
2785 
2786 	return B_OK;
2787 }
2788 
2789 
2790 static int
2791 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2792 	void* cookie, int openMode, bool kernel)
2793 {
2794 	struct file_descriptor* descriptor;
2795 	int fd;
2796 
2797 	// If the vnode is locked, we don't allow creating a new file/directory
2798 	// file_descriptor for it
2799 	if (vnode && vnode->mandatory_locked_by != NULL
2800 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2801 		return B_BUSY;
2802 
2803 	descriptor = alloc_fd();
2804 	if (!descriptor)
2805 		return B_NO_MEMORY;
2806 
2807 	if (vnode)
2808 		descriptor->u.vnode = vnode;
2809 	else
2810 		descriptor->u.mount = mount;
2811 	descriptor->cookie = cookie;
2812 
2813 	switch (type) {
2814 		// vnode types
2815 		case FDTYPE_FILE:
2816 			descriptor->ops = &sFileOps;
2817 			break;
2818 		case FDTYPE_DIR:
2819 			descriptor->ops = &sDirectoryOps;
2820 			break;
2821 		case FDTYPE_ATTR:
2822 			descriptor->ops = &sAttributeOps;
2823 			break;
2824 		case FDTYPE_ATTR_DIR:
2825 			descriptor->ops = &sAttributeDirectoryOps;
2826 			break;
2827 
2828 		// mount types
2829 		case FDTYPE_INDEX_DIR:
2830 			descriptor->ops = &sIndexDirectoryOps;
2831 			break;
2832 		case FDTYPE_QUERY:
2833 			descriptor->ops = &sQueryOps;
2834 			break;
2835 
2836 		default:
2837 			panic("get_new_fd() called with unknown type %d\n", type);
2838 			break;
2839 	}
2840 	descriptor->type = type;
2841 	descriptor->open_mode = openMode;
2842 
2843 	io_context* context = get_current_io_context(kernel);
2844 	fd = new_fd(context, descriptor);
2845 	if (fd < 0) {
2846 		free(descriptor);
2847 		return B_NO_MORE_FDS;
2848 	}
2849 
2850 	mutex_lock(&context->io_mutex);
2851 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2852 	mutex_unlock(&context->io_mutex);
2853 
2854 	return fd;
2855 }
2856 
2857 
2858 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2859 	vfs_normalize_path(). See there for more documentation.
2860 */
2861 static status_t
2862 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2863 {
2864 	VNodePutter dirPutter;
2865 	struct vnode* dir = NULL;
2866 	status_t error;
2867 
2868 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2869 		// get dir vnode + leaf name
2870 		struct vnode* nextDir;
2871 		char leaf[B_FILE_NAME_LENGTH];
2872 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2873 		if (error != B_OK)
2874 			return error;
2875 
2876 		dir = nextDir;
2877 		strcpy(path, leaf);
2878 		dirPutter.SetTo(dir);
2879 
2880 		// get file vnode, if we shall resolve links
2881 		bool fileExists = false;
2882 		struct vnode* fileVnode;
2883 		VNodePutter fileVnodePutter;
2884 		if (traverseLink) {
2885 			inc_vnode_ref_count(dir);
2886 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2887 					NULL) == B_OK) {
2888 				fileVnodePutter.SetTo(fileVnode);
2889 				fileExists = true;
2890 			}
2891 		}
2892 
2893 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2894 			// we're done -- construct the path
2895 			bool hasLeaf = true;
2896 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2897 				// special cases "." and ".." -- get the dir, forget the leaf
2898 				inc_vnode_ref_count(dir);
2899 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2900 					&nextDir, NULL);
2901 				if (error != B_OK)
2902 					return error;
2903 				dir = nextDir;
2904 				dirPutter.SetTo(dir);
2905 				hasLeaf = false;
2906 			}
2907 
2908 			// get the directory path
2909 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2910 			if (error != B_OK)
2911 				return error;
2912 
2913 			// append the leaf name
2914 			if (hasLeaf) {
2915 				// insert a directory separator if this is not the file system
2916 				// root
2917 				if ((strcmp(path, "/") != 0
2918 					&& strlcat(path, "/", pathSize) >= pathSize)
2919 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2920 					return B_NAME_TOO_LONG;
2921 				}
2922 			}
2923 
2924 			return B_OK;
2925 		}
2926 
2927 		// read link
2928 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2929 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2930 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2931 			if (error != B_OK)
2932 				return error;
2933 			path[bufferSize] = '\0';
2934 		} else
2935 			return B_BAD_VALUE;
2936 	}
2937 
2938 	return B_LINK_LIMIT;
2939 }
2940 
2941 
2942 static status_t
2943 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2944 	struct io_context* ioContext)
2945 {
2946 	// Make sure the IO context root is not bypassed.
2947 	if (parent == ioContext->root) {
2948 		*_device = parent->device;
2949 		*_node = parent->id;
2950 		return B_OK;
2951 	}
2952 
2953 	inc_vnode_ref_count(parent);
2954 		// vnode_path_to_vnode() puts the node
2955 
2956 	// ".." is guaranteed not to be clobbered by this call
2957 	struct vnode* vnode;
2958 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2959 		ioContext, &vnode, NULL);
2960 	if (status == B_OK) {
2961 		*_device = vnode->device;
2962 		*_node = vnode->id;
2963 		put_vnode(vnode);
2964 	}
2965 
2966 	return status;
2967 }
2968 
2969 
2970 #ifdef ADD_DEBUGGER_COMMANDS
2971 
2972 
2973 static void
2974 _dump_advisory_locking(advisory_locking* locking)
2975 {
2976 	if (locking == NULL)
2977 		return;
2978 
2979 	kprintf("   lock:        %" B_PRId32, locking->lock);
2980 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2981 
2982 	int32 index = 0;
2983 	LockList::Iterator iterator = locking->locks.GetIterator();
2984 	while (iterator.HasNext()) {
2985 		struct advisory_lock* lock = iterator.Next();
2986 
2987 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2988 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2989 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2990 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2991 	}
2992 }
2993 
2994 
2995 static void
2996 _dump_mount(struct fs_mount* mount)
2997 {
2998 	kprintf("MOUNT: %p\n", mount);
2999 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3000 	kprintf(" device_name:   %s\n", mount->device_name);
3001 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3002 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3003 	kprintf(" partition:     %p\n", mount->partition);
3004 	kprintf(" lock:          %p\n", &mount->rlock);
3005 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3006 		mount->owns_file_device ? " owns_file_device" : "");
3007 
3008 	fs_volume* volume = mount->volume;
3009 	while (volume != NULL) {
3010 		kprintf(" volume %p:\n", volume);
3011 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3012 		kprintf("  private_volume:   %p\n", volume->private_volume);
3013 		kprintf("  ops:              %p\n", volume->ops);
3014 		kprintf("  file_system:      %p\n", volume->file_system);
3015 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3016 		volume = volume->super_volume;
3017 	}
3018 
3019 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3020 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3021 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3022 	set_debug_variable("_partition", (addr_t)mount->partition);
3023 }
3024 
3025 
3026 static bool
3027 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3028 	const char* name)
3029 {
3030 	bool insertSlash = buffer[bufferSize] != '\0';
3031 	size_t nameLength = strlen(name);
3032 
3033 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3034 		return false;
3035 
3036 	if (insertSlash)
3037 		buffer[--bufferSize] = '/';
3038 
3039 	bufferSize -= nameLength;
3040 	memcpy(buffer + bufferSize, name, nameLength);
3041 
3042 	return true;
3043 }
3044 
3045 
3046 static bool
3047 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3048 	ino_t nodeID)
3049 {
3050 	if (bufferSize == 0)
3051 		return false;
3052 
3053 	bool insertSlash = buffer[bufferSize] != '\0';
3054 	if (insertSlash)
3055 		buffer[--bufferSize] = '/';
3056 
3057 	size_t size = snprintf(buffer, bufferSize,
3058 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3059 	if (size > bufferSize) {
3060 		if (insertSlash)
3061 			bufferSize++;
3062 		return false;
3063 	}
3064 
3065 	if (size < bufferSize)
3066 		memmove(buffer + bufferSize - size, buffer, size);
3067 
3068 	bufferSize -= size;
3069 	return true;
3070 }
3071 
3072 
3073 static char*
3074 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3075 	bool& _truncated)
3076 {
3077 	// null-terminate the path
3078 	buffer[--bufferSize] = '\0';
3079 
3080 	while (true) {
3081 		while (vnode->covers != NULL)
3082 			vnode = vnode->covers;
3083 
3084 		if (vnode == sRoot) {
3085 			_truncated = bufferSize == 0;
3086 			if (!_truncated)
3087 				buffer[--bufferSize] = '/';
3088 			return buffer + bufferSize;
3089 		}
3090 
3091 		// resolve the name
3092 		ino_t dirID;
3093 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3094 			vnode->id, dirID);
3095 		if (name == NULL) {
3096 			// Failed to resolve the name -- prepend "<dev,node>/".
3097 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3098 				vnode->mount->id, vnode->id);
3099 			return buffer + bufferSize;
3100 		}
3101 
3102 		// prepend the name
3103 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3104 			_truncated = true;
3105 			return buffer + bufferSize;
3106 		}
3107 
3108 		// resolve the directory node
3109 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3110 		if (nextVnode == NULL) {
3111 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3112 				vnode->mount->id, dirID);
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		vnode = nextVnode;
3117 	}
3118 }
3119 
3120 
3121 static void
3122 _dump_vnode(struct vnode* vnode, bool printPath)
3123 {
3124 	kprintf("VNODE: %p\n", vnode);
3125 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3126 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3127 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3128 	kprintf(" private_node:  %p\n", vnode->private_node);
3129 	kprintf(" mount:         %p\n", vnode->mount);
3130 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3131 	kprintf(" covers:        %p\n", vnode->covers);
3132 	kprintf(" cache:         %p\n", vnode->cache);
3133 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3134 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3135 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3136 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3137 
3138 	_dump_advisory_locking(vnode->advisory_locking);
3139 
3140 	if (printPath) {
3141 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3142 		if (buffer != NULL) {
3143 			bool truncated;
3144 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3145 				B_PATH_NAME_LENGTH, truncated);
3146 			if (path != NULL) {
3147 				kprintf(" path:          ");
3148 				if (truncated)
3149 					kputs("<truncated>/");
3150 				kputs(path);
3151 				kputs("\n");
3152 			} else
3153 				kprintf("Failed to resolve vnode path.\n");
3154 
3155 			debug_free(buffer);
3156 		} else
3157 			kprintf("Failed to allocate memory for constructing the path.\n");
3158 	}
3159 
3160 	set_debug_variable("_node", (addr_t)vnode->private_node);
3161 	set_debug_variable("_mount", (addr_t)vnode->mount);
3162 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3163 	set_debug_variable("_covers", (addr_t)vnode->covers);
3164 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3165 }
3166 
3167 
3168 static int
3169 dump_mount(int argc, char** argv)
3170 {
3171 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3172 		kprintf("usage: %s [id|address]\n", argv[0]);
3173 		return 0;
3174 	}
3175 
3176 	ulong val = parse_expression(argv[1]);
3177 	uint32 id = val;
3178 
3179 	struct fs_mount* mount = sMountsTable->Lookup(id);
3180 	if (mount == NULL) {
3181 		if (IS_USER_ADDRESS(id)) {
3182 			kprintf("fs_mount not found\n");
3183 			return 0;
3184 		}
3185 		mount = (fs_mount*)val;
3186 	}
3187 
3188 	_dump_mount(mount);
3189 	return 0;
3190 }
3191 
3192 
3193 static int
3194 dump_mounts(int argc, char** argv)
3195 {
3196 	if (argc != 1) {
3197 		kprintf("usage: %s\n", argv[0]);
3198 		return 0;
3199 	}
3200 
3201 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3202 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3203 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3204 
3205 	struct fs_mount* mount;
3206 
3207 	MountTable::Iterator iterator(sMountsTable);
3208 	while (iterator.HasNext()) {
3209 		mount = iterator.Next();
3210 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3211 			mount->root_vnode->covers, mount->volume->private_volume,
3212 			mount->volume->file_system_name);
3213 
3214 		fs_volume* volume = mount->volume;
3215 		while (volume->super_volume != NULL) {
3216 			volume = volume->super_volume;
3217 			kprintf("                                     %p %s\n",
3218 				volume->private_volume, volume->file_system_name);
3219 		}
3220 	}
3221 
3222 	return 0;
3223 }
3224 
3225 
3226 static int
3227 dump_vnode(int argc, char** argv)
3228 {
3229 	bool printPath = false;
3230 	int argi = 1;
3231 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3232 		printPath = true;
3233 		argi++;
3234 	}
3235 
3236 	if (argi >= argc || argi + 2 < argc) {
3237 		print_debugger_command_usage(argv[0]);
3238 		return 0;
3239 	}
3240 
3241 	struct vnode* vnode = NULL;
3242 
3243 	if (argi + 1 == argc) {
3244 		vnode = (struct vnode*)parse_expression(argv[argi]);
3245 		if (IS_USER_ADDRESS(vnode)) {
3246 			kprintf("invalid vnode address\n");
3247 			return 0;
3248 		}
3249 		_dump_vnode(vnode, printPath);
3250 		return 0;
3251 	}
3252 
3253 	dev_t device = parse_expression(argv[argi]);
3254 	ino_t id = parse_expression(argv[argi + 1]);
3255 
3256 	VnodeTable::Iterator iterator(sVnodeTable);
3257 	while (iterator.HasNext()) {
3258 		vnode = iterator.Next();
3259 		if (vnode->id != id || vnode->device != device)
3260 			continue;
3261 
3262 		_dump_vnode(vnode, printPath);
3263 	}
3264 
3265 	return 0;
3266 }
3267 
3268 
3269 static int
3270 dump_vnodes(int argc, char** argv)
3271 {
3272 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3273 		kprintf("usage: %s [device]\n", argv[0]);
3274 		return 0;
3275 	}
3276 
3277 	// restrict dumped nodes to a certain device if requested
3278 	dev_t device = parse_expression(argv[1]);
3279 
3280 	struct vnode* vnode;
3281 
3282 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3283 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3284 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3285 
3286 	VnodeTable::Iterator iterator(sVnodeTable);
3287 	while (iterator.HasNext()) {
3288 		vnode = iterator.Next();
3289 		if (vnode->device != device)
3290 			continue;
3291 
3292 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3293 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3294 			vnode->private_node, vnode->advisory_locking,
3295 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3296 			vnode->IsUnpublished() ? "u" : "-");
3297 	}
3298 
3299 	return 0;
3300 }
3301 
3302 
3303 static int
3304 dump_vnode_caches(int argc, char** argv)
3305 {
3306 	struct vnode* vnode;
3307 
3308 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3309 		kprintf("usage: %s [device]\n", argv[0]);
3310 		return 0;
3311 	}
3312 
3313 	// restrict dumped nodes to a certain device if requested
3314 	dev_t device = -1;
3315 	if (argc > 1)
3316 		device = parse_expression(argv[1]);
3317 
3318 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3319 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3320 
3321 	VnodeTable::Iterator iterator(sVnodeTable);
3322 	while (iterator.HasNext()) {
3323 		vnode = iterator.Next();
3324 		if (vnode->cache == NULL)
3325 			continue;
3326 		if (device != -1 && vnode->device != device)
3327 			continue;
3328 
3329 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3330 			vnode, vnode->device, vnode->id, vnode->cache,
3331 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3332 			vnode->cache->page_count);
3333 	}
3334 
3335 	return 0;
3336 }
3337 
3338 
3339 int
3340 dump_io_context(int argc, char** argv)
3341 {
3342 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3343 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3344 		return 0;
3345 	}
3346 
3347 	struct io_context* context = NULL;
3348 
3349 	if (argc > 1) {
3350 		ulong num = parse_expression(argv[1]);
3351 		if (IS_KERNEL_ADDRESS(num))
3352 			context = (struct io_context*)num;
3353 		else {
3354 			Team* team = team_get_team_struct_locked(num);
3355 			if (team == NULL) {
3356 				kprintf("could not find team with ID %lu\n", num);
3357 				return 0;
3358 			}
3359 			context = (struct io_context*)team->io_context;
3360 		}
3361 	} else
3362 		context = get_current_io_context(true);
3363 
3364 	kprintf("I/O CONTEXT: %p\n", context);
3365 	kprintf(" root vnode:\t%p\n", context->root);
3366 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3367 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3368 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3369 
3370 	if (context->num_used_fds) {
3371 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3372 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3373 	}
3374 
3375 	for (uint32 i = 0; i < context->table_size; i++) {
3376 		struct file_descriptor* fd = context->fds[i];
3377 		if (fd == NULL)
3378 			continue;
3379 
3380 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3381 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3382 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3383 			fd->pos, fd->cookie,
3384 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3385 				? "mount" : "vnode",
3386 			fd->u.vnode);
3387 	}
3388 
3389 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3390 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3391 
3392 	set_debug_variable("_cwd", (addr_t)context->cwd);
3393 
3394 	return 0;
3395 }
3396 
3397 
3398 int
3399 dump_vnode_usage(int argc, char** argv)
3400 {
3401 	if (argc != 1) {
3402 		kprintf("usage: %s\n", argv[0]);
3403 		return 0;
3404 	}
3405 
3406 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3407 		sUnusedVnodes, kMaxUnusedVnodes);
3408 
3409 	uint32 count = sVnodeTable->CountElements();
3410 
3411 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3412 		count - sUnusedVnodes);
3413 	return 0;
3414 }
3415 
3416 #endif	// ADD_DEBUGGER_COMMANDS
3417 
3418 
3419 /*!	Clears memory specified by an iovec array.
3420 */
3421 static void
3422 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3423 {
3424 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3425 		size_t length = std::min(vecs[i].iov_len, bytes);
3426 		memset(vecs[i].iov_base, 0, length);
3427 		bytes -= length;
3428 	}
3429 }
3430 
3431 
3432 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3433 	and calls the file system hooks to read/write the request to disk.
3434 */
3435 static status_t
3436 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3437 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3438 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3439 	bool doWrite)
3440 {
3441 	if (fileVecCount == 0) {
3442 		// There are no file vecs at this offset, so we're obviously trying
3443 		// to access the file outside of its bounds
3444 		return B_BAD_VALUE;
3445 	}
3446 
3447 	size_t numBytes = *_numBytes;
3448 	uint32 fileVecIndex;
3449 	size_t vecOffset = *_vecOffset;
3450 	uint32 vecIndex = *_vecIndex;
3451 	status_t status;
3452 	size_t size;
3453 
3454 	if (!doWrite && vecOffset == 0) {
3455 		// now directly read the data from the device
3456 		// the first file_io_vec can be read directly
3457 
3458 		if (fileVecs[0].length < (off_t)numBytes)
3459 			size = fileVecs[0].length;
3460 		else
3461 			size = numBytes;
3462 
3463 		if (fileVecs[0].offset >= 0) {
3464 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3465 				&vecs[vecIndex], vecCount - vecIndex, &size);
3466 		} else {
3467 			// sparse read
3468 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3469 			status = B_OK;
3470 		}
3471 		if (status != B_OK)
3472 			return status;
3473 
3474 		// TODO: this is a work-around for buggy device drivers!
3475 		//	When our own drivers honour the length, we can:
3476 		//	a) also use this direct I/O for writes (otherwise, it would
3477 		//	   overwrite precious data)
3478 		//	b) panic if the term below is true (at least for writes)
3479 		if ((off_t)size > fileVecs[0].length) {
3480 			//dprintf("warning: device driver %p doesn't respect total length "
3481 			//	"in read_pages() call!\n", ref->device);
3482 			size = fileVecs[0].length;
3483 		}
3484 
3485 		ASSERT((off_t)size <= fileVecs[0].length);
3486 
3487 		// If the file portion was contiguous, we're already done now
3488 		if (size == numBytes)
3489 			return B_OK;
3490 
3491 		// if we reached the end of the file, we can return as well
3492 		if ((off_t)size != fileVecs[0].length) {
3493 			*_numBytes = size;
3494 			return B_OK;
3495 		}
3496 
3497 		fileVecIndex = 1;
3498 
3499 		// first, find out where we have to continue in our iovecs
3500 		for (; vecIndex < vecCount; vecIndex++) {
3501 			if (size < vecs[vecIndex].iov_len)
3502 				break;
3503 
3504 			size -= vecs[vecIndex].iov_len;
3505 		}
3506 
3507 		vecOffset = size;
3508 	} else {
3509 		fileVecIndex = 0;
3510 		size = 0;
3511 	}
3512 
3513 	// Too bad, let's process the rest of the file_io_vecs
3514 
3515 	size_t totalSize = size;
3516 	size_t bytesLeft = numBytes - size;
3517 
3518 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3519 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3520 		off_t fileOffset = fileVec.offset;
3521 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3522 
3523 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3524 			fileLeft));
3525 
3526 		// process the complete fileVec
3527 		while (fileLeft > 0) {
3528 			iovec tempVecs[MAX_TEMP_IO_VECS];
3529 			uint32 tempCount = 0;
3530 
3531 			// size tracks how much of what is left of the current fileVec
3532 			// (fileLeft) has been assigned to tempVecs
3533 			size = 0;
3534 
3535 			// assign what is left of the current fileVec to the tempVecs
3536 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3537 					&& tempCount < MAX_TEMP_IO_VECS;) {
3538 				// try to satisfy one iovec per iteration (or as much as
3539 				// possible)
3540 
3541 				// bytes left of the current iovec
3542 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3543 				if (vecLeft == 0) {
3544 					vecOffset = 0;
3545 					vecIndex++;
3546 					continue;
3547 				}
3548 
3549 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3550 					vecIndex, vecOffset, size));
3551 
3552 				// actually available bytes
3553 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3554 
3555 				tempVecs[tempCount].iov_base
3556 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3557 				tempVecs[tempCount].iov_len = tempVecSize;
3558 				tempCount++;
3559 
3560 				size += tempVecSize;
3561 				vecOffset += tempVecSize;
3562 			}
3563 
3564 			size_t bytes = size;
3565 
3566 			if (fileOffset == -1) {
3567 				if (doWrite) {
3568 					panic("sparse write attempt: vnode %p", vnode);
3569 					status = B_IO_ERROR;
3570 				} else {
3571 					// sparse read
3572 					zero_iovecs(tempVecs, tempCount, bytes);
3573 					status = B_OK;
3574 				}
3575 			} else if (doWrite) {
3576 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3577 					tempVecs, tempCount, &bytes);
3578 			} else {
3579 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3580 					tempVecs, tempCount, &bytes);
3581 			}
3582 			if (status != B_OK)
3583 				return status;
3584 
3585 			totalSize += bytes;
3586 			bytesLeft -= size;
3587 			if (fileOffset >= 0)
3588 				fileOffset += size;
3589 			fileLeft -= size;
3590 			//dprintf("-> file left = %Lu\n", fileLeft);
3591 
3592 			if (size != bytes || vecIndex >= vecCount) {
3593 				// there are no more bytes or iovecs, let's bail out
3594 				*_numBytes = totalSize;
3595 				return B_OK;
3596 			}
3597 		}
3598 	}
3599 
3600 	*_vecIndex = vecIndex;
3601 	*_vecOffset = vecOffset;
3602 	*_numBytes = totalSize;
3603 	return B_OK;
3604 }
3605 
3606 
3607 static bool
3608 is_user_in_group(gid_t gid)
3609 {
3610 	if (gid == getegid())
3611 		return true;
3612 
3613 	gid_t groups[NGROUPS_MAX];
3614 	int groupCount = getgroups(NGROUPS_MAX, groups);
3615 	for (int i = 0; i < groupCount; i++) {
3616 		if (gid == groups[i])
3617 			return true;
3618 	}
3619 
3620 	return false;
3621 }
3622 
3623 
3624 static status_t
3625 free_io_context(io_context* context)
3626 {
3627 	uint32 i;
3628 
3629 	TIOC(FreeIOContext(context));
3630 
3631 	if (context->root)
3632 		put_vnode(context->root);
3633 
3634 	if (context->cwd)
3635 		put_vnode(context->cwd);
3636 
3637 	mutex_lock(&context->io_mutex);
3638 
3639 	for (i = 0; i < context->table_size; i++) {
3640 		if (struct file_descriptor* descriptor = context->fds[i]) {
3641 			close_fd(descriptor);
3642 			put_fd(descriptor);
3643 		}
3644 	}
3645 
3646 	mutex_destroy(&context->io_mutex);
3647 
3648 	remove_node_monitors(context);
3649 	free(context->fds);
3650 	free(context);
3651 
3652 	return B_OK;
3653 }
3654 
3655 
3656 static status_t
3657 resize_monitor_table(struct io_context* context, const int newSize)
3658 {
3659 	int	status = B_OK;
3660 
3661 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3662 		return B_BAD_VALUE;
3663 
3664 	mutex_lock(&context->io_mutex);
3665 
3666 	if ((size_t)newSize < context->num_monitors) {
3667 		status = B_BUSY;
3668 		goto out;
3669 	}
3670 	context->max_monitors = newSize;
3671 
3672 out:
3673 	mutex_unlock(&context->io_mutex);
3674 	return status;
3675 }
3676 
3677 
3678 //	#pragma mark - public API for file systems
3679 
3680 
3681 extern "C" status_t
3682 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3683 	fs_vnode_ops* ops)
3684 {
3685 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3686 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3687 
3688 	if (privateNode == NULL)
3689 		return B_BAD_VALUE;
3690 
3691 	int32 tries = BUSY_VNODE_RETRIES;
3692 restart:
3693 	// create the node
3694 	bool nodeCreated;
3695 	struct vnode* vnode;
3696 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3697 		nodeCreated);
3698 	if (status != B_OK)
3699 		return status;
3700 
3701 	WriteLocker nodeLocker(sVnodeLock, true);
3702 		// create_new_vnode_and_lock() has locked for us
3703 
3704 	if (!nodeCreated && vnode->IsBusy()) {
3705 		nodeLocker.Unlock();
3706 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3707 			return B_BUSY;
3708 		goto restart;
3709 	}
3710 
3711 	// file system integrity check:
3712 	// test if the vnode already exists and bail out if this is the case!
3713 	if (!nodeCreated) {
3714 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3715 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3716 			vnode->private_node);
3717 		return B_ERROR;
3718 	}
3719 
3720 	vnode->private_node = privateNode;
3721 	vnode->ops = ops;
3722 	vnode->SetUnpublished(true);
3723 
3724 	TRACE(("returns: %s\n", strerror(status)));
3725 
3726 	return status;
3727 }
3728 
3729 
3730 extern "C" status_t
3731 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3732 	fs_vnode_ops* ops, int type, uint32 flags)
3733 {
3734 	FUNCTION(("publish_vnode()\n"));
3735 
3736 	int32 tries = BUSY_VNODE_RETRIES;
3737 restart:
3738 	WriteLocker locker(sVnodeLock);
3739 
3740 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3741 
3742 	bool nodeCreated = false;
3743 	if (vnode == NULL) {
3744 		if (privateNode == NULL)
3745 			return B_BAD_VALUE;
3746 
3747 		// create the node
3748 		locker.Unlock();
3749 			// create_new_vnode_and_lock() will re-lock for us on success
3750 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3751 			nodeCreated);
3752 		if (status != B_OK)
3753 			return status;
3754 
3755 		locker.SetTo(sVnodeLock, true);
3756 	}
3757 
3758 	if (nodeCreated) {
3759 		vnode->private_node = privateNode;
3760 		vnode->ops = ops;
3761 		vnode->SetUnpublished(true);
3762 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3763 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3764 		// already known, but not published
3765 	} else if (vnode->IsBusy()) {
3766 		locker.Unlock();
3767 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3768 			return B_BUSY;
3769 		goto restart;
3770 	} else
3771 		return B_BAD_VALUE;
3772 
3773 	bool publishSpecialSubNode = false;
3774 
3775 	vnode->SetType(type);
3776 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3777 	publishSpecialSubNode = is_special_node_type(type)
3778 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3779 
3780 	status_t status = B_OK;
3781 
3782 	// create sub vnodes, if necessary
3783 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3784 		locker.Unlock();
3785 
3786 		fs_volume* subVolume = volume;
3787 		if (volume->sub_volume != NULL) {
3788 			while (status == B_OK && subVolume->sub_volume != NULL) {
3789 				subVolume = subVolume->sub_volume;
3790 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3791 					vnode);
3792 			}
3793 		}
3794 
3795 		if (status == B_OK && publishSpecialSubNode)
3796 			status = create_special_sub_node(vnode, flags);
3797 
3798 		if (status != B_OK) {
3799 			// error -- clean up the created sub vnodes
3800 			while (subVolume->super_volume != volume) {
3801 				subVolume = subVolume->super_volume;
3802 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3803 			}
3804 		}
3805 
3806 		if (status == B_OK) {
3807 			ReadLocker vnodesReadLocker(sVnodeLock);
3808 			AutoLocker<Vnode> nodeLocker(vnode);
3809 			vnode->SetBusy(false);
3810 			vnode->SetUnpublished(false);
3811 		} else {
3812 			locker.Lock();
3813 			sVnodeTable->Remove(vnode);
3814 			remove_vnode_from_mount_list(vnode, vnode->mount);
3815 			free(vnode);
3816 		}
3817 	} else {
3818 		// we still hold the write lock -- mark the node unbusy and published
3819 		vnode->SetBusy(false);
3820 		vnode->SetUnpublished(false);
3821 	}
3822 
3823 	TRACE(("returns: %s\n", strerror(status)));
3824 
3825 	return status;
3826 }
3827 
3828 
3829 extern "C" status_t
3830 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3831 {
3832 	struct vnode* vnode;
3833 
3834 	if (volume == NULL)
3835 		return B_BAD_VALUE;
3836 
3837 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3838 	if (status != B_OK)
3839 		return status;
3840 
3841 	// If this is a layered FS, we need to get the node cookie for the requested
3842 	// layer.
3843 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3844 		fs_vnode resolvedNode;
3845 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3846 			&resolvedNode);
3847 		if (status != B_OK) {
3848 			panic("get_vnode(): Failed to get super node for vnode %p, "
3849 				"volume: %p", vnode, volume);
3850 			put_vnode(vnode);
3851 			return status;
3852 		}
3853 
3854 		if (_privateNode != NULL)
3855 			*_privateNode = resolvedNode.private_node;
3856 	} else if (_privateNode != NULL)
3857 		*_privateNode = vnode->private_node;
3858 
3859 	return B_OK;
3860 }
3861 
3862 
3863 extern "C" status_t
3864 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3865 {
3866 	struct vnode* vnode;
3867 
3868 	rw_lock_read_lock(&sVnodeLock);
3869 	vnode = lookup_vnode(volume->id, vnodeID);
3870 	rw_lock_read_unlock(&sVnodeLock);
3871 
3872 	if (vnode == NULL)
3873 		return B_BAD_VALUE;
3874 
3875 	inc_vnode_ref_count(vnode);
3876 	return B_OK;
3877 }
3878 
3879 
3880 extern "C" status_t
3881 put_vnode(fs_volume* volume, ino_t vnodeID)
3882 {
3883 	struct vnode* vnode;
3884 
3885 	rw_lock_read_lock(&sVnodeLock);
3886 	vnode = lookup_vnode(volume->id, vnodeID);
3887 	rw_lock_read_unlock(&sVnodeLock);
3888 
3889 	if (vnode == NULL)
3890 		return B_BAD_VALUE;
3891 
3892 	dec_vnode_ref_count(vnode, false, true);
3893 	return B_OK;
3894 }
3895 
3896 
3897 extern "C" status_t
3898 remove_vnode(fs_volume* volume, ino_t vnodeID)
3899 {
3900 	ReadLocker locker(sVnodeLock);
3901 
3902 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3903 	if (vnode == NULL)
3904 		return B_ENTRY_NOT_FOUND;
3905 
3906 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3907 		// this vnode is in use
3908 		return B_BUSY;
3909 	}
3910 
3911 	vnode->Lock();
3912 
3913 	vnode->SetRemoved(true);
3914 	bool removeUnpublished = false;
3915 
3916 	if (vnode->IsUnpublished()) {
3917 		// prepare the vnode for deletion
3918 		removeUnpublished = true;
3919 		vnode->SetBusy(true);
3920 	}
3921 
3922 	vnode->Unlock();
3923 	locker.Unlock();
3924 
3925 	if (removeUnpublished) {
3926 		// If the vnode hasn't been published yet, we delete it here
3927 		atomic_add(&vnode->ref_count, -1);
3928 		free_vnode(vnode, true);
3929 	}
3930 
3931 	return B_OK;
3932 }
3933 
3934 
3935 extern "C" status_t
3936 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3937 {
3938 	struct vnode* vnode;
3939 
3940 	rw_lock_read_lock(&sVnodeLock);
3941 
3942 	vnode = lookup_vnode(volume->id, vnodeID);
3943 	if (vnode) {
3944 		AutoLocker<Vnode> nodeLocker(vnode);
3945 		vnode->SetRemoved(false);
3946 	}
3947 
3948 	rw_lock_read_unlock(&sVnodeLock);
3949 	return B_OK;
3950 }
3951 
3952 
3953 extern "C" status_t
3954 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3955 {
3956 	ReadLocker _(sVnodeLock);
3957 
3958 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3959 		if (_removed != NULL)
3960 			*_removed = vnode->IsRemoved();
3961 		return B_OK;
3962 	}
3963 
3964 	return B_BAD_VALUE;
3965 }
3966 
3967 
3968 extern "C" fs_volume*
3969 volume_for_vnode(fs_vnode* _vnode)
3970 {
3971 	if (_vnode == NULL)
3972 		return NULL;
3973 
3974 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3975 	return vnode->mount->volume;
3976 }
3977 
3978 
3979 extern "C" status_t
3980 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3981 	uid_t nodeUserID)
3982 {
3983 	// get node permissions
3984 	int userPermissions = (mode & S_IRWXU) >> 6;
3985 	int groupPermissions = (mode & S_IRWXG) >> 3;
3986 	int otherPermissions = mode & S_IRWXO;
3987 
3988 	// get the node permissions for this uid/gid
3989 	int permissions = 0;
3990 	uid_t uid = geteuid();
3991 
3992 	if (uid == 0) {
3993 		// user is root
3994 		// root has always read/write permission, but at least one of the
3995 		// X bits must be set for execute permission
3996 		permissions = userPermissions | groupPermissions | otherPermissions
3997 			| S_IROTH | S_IWOTH;
3998 		if (S_ISDIR(mode))
3999 			permissions |= S_IXOTH;
4000 	} else if (uid == nodeUserID) {
4001 		// user is node owner
4002 		permissions = userPermissions;
4003 	} else if (is_user_in_group(nodeGroupID)) {
4004 		// user is in owning group
4005 		permissions = groupPermissions;
4006 	} else {
4007 		// user is one of the others
4008 		permissions = otherPermissions;
4009 	}
4010 
4011 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4012 }
4013 
4014 
4015 #if 0
4016 extern "C" status_t
4017 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4018 	size_t* _numBytes)
4019 {
4020 	struct file_descriptor* descriptor;
4021 	struct vnode* vnode;
4022 
4023 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4024 	if (descriptor == NULL)
4025 		return B_FILE_ERROR;
4026 
4027 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4028 		count, 0, _numBytes);
4029 
4030 	put_fd(descriptor);
4031 	return status;
4032 }
4033 
4034 
4035 extern "C" status_t
4036 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4037 	size_t* _numBytes)
4038 {
4039 	struct file_descriptor* descriptor;
4040 	struct vnode* vnode;
4041 
4042 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4043 	if (descriptor == NULL)
4044 		return B_FILE_ERROR;
4045 
4046 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4047 		count, 0, _numBytes);
4048 
4049 	put_fd(descriptor);
4050 	return status;
4051 }
4052 #endif
4053 
4054 
4055 extern "C" status_t
4056 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4057 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4058 	size_t* _bytes)
4059 {
4060 	struct file_descriptor* descriptor;
4061 	struct vnode* vnode;
4062 
4063 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4064 	if (descriptor == NULL)
4065 		return B_FILE_ERROR;
4066 
4067 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4068 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4069 		false);
4070 
4071 	put_fd(descriptor);
4072 	return status;
4073 }
4074 
4075 
4076 extern "C" status_t
4077 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4078 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4079 	size_t* _bytes)
4080 {
4081 	struct file_descriptor* descriptor;
4082 	struct vnode* vnode;
4083 
4084 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4085 	if (descriptor == NULL)
4086 		return B_FILE_ERROR;
4087 
4088 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4089 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4090 		true);
4091 
4092 	put_fd(descriptor);
4093 	return status;
4094 }
4095 
4096 
4097 extern "C" status_t
4098 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4099 {
4100 	// lookup mount -- the caller is required to make sure that the mount
4101 	// won't go away
4102 	MutexLocker locker(sMountMutex);
4103 	struct fs_mount* mount = find_mount(mountID);
4104 	if (mount == NULL)
4105 		return B_BAD_VALUE;
4106 	locker.Unlock();
4107 
4108 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4109 }
4110 
4111 
4112 extern "C" status_t
4113 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4114 {
4115 	// lookup mount -- the caller is required to make sure that the mount
4116 	// won't go away
4117 	MutexLocker locker(sMountMutex);
4118 	struct fs_mount* mount = find_mount(mountID);
4119 	if (mount == NULL)
4120 		return B_BAD_VALUE;
4121 	locker.Unlock();
4122 
4123 	return mount->entry_cache.Add(dirID, name, -1, true);
4124 }
4125 
4126 
4127 extern "C" status_t
4128 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4129 {
4130 	// lookup mount -- the caller is required to make sure that the mount
4131 	// won't go away
4132 	MutexLocker locker(sMountMutex);
4133 	struct fs_mount* mount = find_mount(mountID);
4134 	if (mount == NULL)
4135 		return B_BAD_VALUE;
4136 	locker.Unlock();
4137 
4138 	return mount->entry_cache.Remove(dirID, name);
4139 }
4140 
4141 
4142 //	#pragma mark - private VFS API
4143 //	Functions the VFS exports for other parts of the kernel
4144 
4145 
4146 /*! Acquires another reference to the vnode that has to be released
4147 	by calling vfs_put_vnode().
4148 */
4149 void
4150 vfs_acquire_vnode(struct vnode* vnode)
4151 {
4152 	inc_vnode_ref_count(vnode);
4153 }
4154 
4155 
4156 /*! This is currently called from file_cache_create() only.
4157 	It's probably a temporary solution as long as devfs requires that
4158 	fs_read_pages()/fs_write_pages() are called with the standard
4159 	open cookie and not with a device cookie.
4160 	If that's done differently, remove this call; it has no other
4161 	purpose.
4162 */
4163 extern "C" status_t
4164 vfs_get_cookie_from_fd(int fd, void** _cookie)
4165 {
4166 	struct file_descriptor* descriptor;
4167 
4168 	descriptor = get_fd(get_current_io_context(true), fd);
4169 	if (descriptor == NULL)
4170 		return B_FILE_ERROR;
4171 
4172 	*_cookie = descriptor->cookie;
4173 	return B_OK;
4174 }
4175 
4176 
4177 extern "C" status_t
4178 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4179 {
4180 	*vnode = get_vnode_from_fd(fd, kernel);
4181 
4182 	if (*vnode == NULL)
4183 		return B_FILE_ERROR;
4184 
4185 	return B_NO_ERROR;
4186 }
4187 
4188 
4189 extern "C" status_t
4190 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4191 {
4192 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4193 		path, kernel));
4194 
4195 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4196 	if (pathBuffer.InitCheck() != B_OK)
4197 		return B_NO_MEMORY;
4198 
4199 	char* buffer = pathBuffer.LockBuffer();
4200 	strlcpy(buffer, path, pathBuffer.BufferSize());
4201 
4202 	struct vnode* vnode;
4203 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4204 	if (status != B_OK)
4205 		return status;
4206 
4207 	*_vnode = vnode;
4208 	return B_OK;
4209 }
4210 
4211 
4212 extern "C" status_t
4213 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4214 {
4215 	struct vnode* vnode = NULL;
4216 
4217 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4218 	if (status != B_OK)
4219 		return status;
4220 
4221 	*_vnode = vnode;
4222 	return B_OK;
4223 }
4224 
4225 
4226 extern "C" status_t
4227 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4228 	const char* name, struct vnode** _vnode)
4229 {
4230 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4231 }
4232 
4233 
4234 extern "C" void
4235 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4236 {
4237 	*_mountID = vnode->device;
4238 	*_vnodeID = vnode->id;
4239 }
4240 
4241 
4242 /*!
4243 	Helper function abstracting the process of "converting" a given
4244 	vnode-pointer to a fs_vnode-pointer.
4245 	Currently only used in bindfs.
4246 */
4247 extern "C" fs_vnode*
4248 vfs_fsnode_for_vnode(struct vnode* vnode)
4249 {
4250 	return vnode;
4251 }
4252 
4253 
4254 /*!
4255 	Calls fs_open() on the given vnode and returns a new
4256 	file descriptor for it
4257 */
4258 int
4259 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4260 {
4261 	return open_vnode(vnode, openMode, kernel);
4262 }
4263 
4264 
4265 /*!	Looks up a vnode with the given mount and vnode ID.
4266 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4267 	to the node.
4268 	It's currently only be used by file_cache_create().
4269 */
4270 extern "C" status_t
4271 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4272 {
4273 	rw_lock_read_lock(&sVnodeLock);
4274 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4275 	rw_lock_read_unlock(&sVnodeLock);
4276 
4277 	if (vnode == NULL)
4278 		return B_ERROR;
4279 
4280 	*_vnode = vnode;
4281 	return B_OK;
4282 }
4283 
4284 
4285 extern "C" status_t
4286 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4287 	bool traverseLeafLink, bool kernel, void** _node)
4288 {
4289 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4290 		volume, path, kernel));
4291 
4292 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4293 	if (pathBuffer.InitCheck() != B_OK)
4294 		return B_NO_MEMORY;
4295 
4296 	fs_mount* mount;
4297 	status_t status = get_mount(volume->id, &mount);
4298 	if (status != B_OK)
4299 		return status;
4300 
4301 	char* buffer = pathBuffer.LockBuffer();
4302 	strlcpy(buffer, path, pathBuffer.BufferSize());
4303 
4304 	struct vnode* vnode = mount->root_vnode;
4305 
4306 	if (buffer[0] == '/')
4307 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4308 	else {
4309 		inc_vnode_ref_count(vnode);
4310 			// vnode_path_to_vnode() releases a reference to the starting vnode
4311 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4312 			kernel, &vnode, NULL);
4313 	}
4314 
4315 	put_mount(mount);
4316 
4317 	if (status != B_OK)
4318 		return status;
4319 
4320 	if (vnode->device != volume->id) {
4321 		// wrong mount ID - must not gain access on foreign file system nodes
4322 		put_vnode(vnode);
4323 		return B_BAD_VALUE;
4324 	}
4325 
4326 	// Use get_vnode() to resolve the cookie for the right layer.
4327 	status = get_vnode(volume, vnode->id, _node);
4328 	put_vnode(vnode);
4329 
4330 	return status;
4331 }
4332 
4333 
4334 status_t
4335 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4336 	struct stat* stat, bool kernel)
4337 {
4338 	status_t status;
4339 
4340 	if (path != NULL) {
4341 		// path given: get the stat of the node referred to by (fd, path)
4342 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
4343 		if (pathBuffer.InitCheck() != B_OK)
4344 			return B_NO_MEMORY;
4345 
4346 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4347 			traverseLeafLink, stat, kernel);
4348 	} else {
4349 		// no path given: get the FD and use the FD operation
4350 		struct file_descriptor* descriptor
4351 			= get_fd(get_current_io_context(kernel), fd);
4352 		if (descriptor == NULL)
4353 			return B_FILE_ERROR;
4354 
4355 		if (descriptor->ops->fd_read_stat)
4356 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4357 		else
4358 			status = B_UNSUPPORTED;
4359 
4360 		put_fd(descriptor);
4361 	}
4362 
4363 	return status;
4364 }
4365 
4366 
4367 /*!	Finds the full path to the file that contains the module \a moduleName,
4368 	puts it into \a pathBuffer, and returns B_OK for success.
4369 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4370 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4371 	\a pathBuffer is clobbered in any case and must not be relied on if this
4372 	functions returns unsuccessfully.
4373 	\a basePath and \a pathBuffer must not point to the same space.
4374 */
4375 status_t
4376 vfs_get_module_path(const char* basePath, const char* moduleName,
4377 	char* pathBuffer, size_t bufferSize)
4378 {
4379 	struct vnode* dir;
4380 	struct vnode* file;
4381 	status_t status;
4382 	size_t length;
4383 	char* path;
4384 
4385 	if (bufferSize == 0
4386 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4387 		return B_BUFFER_OVERFLOW;
4388 
4389 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4390 	if (status != B_OK)
4391 		return status;
4392 
4393 	// the path buffer had been clobbered by the above call
4394 	length = strlcpy(pathBuffer, basePath, bufferSize);
4395 	if (pathBuffer[length - 1] != '/')
4396 		pathBuffer[length++] = '/';
4397 
4398 	path = pathBuffer + length;
4399 	bufferSize -= length;
4400 
4401 	while (moduleName) {
4402 		char* nextPath = strchr(moduleName, '/');
4403 		if (nextPath == NULL)
4404 			length = strlen(moduleName);
4405 		else {
4406 			length = nextPath - moduleName;
4407 			nextPath++;
4408 		}
4409 
4410 		if (length + 1 >= bufferSize) {
4411 			status = B_BUFFER_OVERFLOW;
4412 			goto err;
4413 		}
4414 
4415 		memcpy(path, moduleName, length);
4416 		path[length] = '\0';
4417 		moduleName = nextPath;
4418 
4419 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4420 		if (status != B_OK) {
4421 			// vnode_path_to_vnode() has already released the reference to dir
4422 			return status;
4423 		}
4424 
4425 		if (S_ISDIR(file->Type())) {
4426 			// goto the next directory
4427 			path[length] = '/';
4428 			path[length + 1] = '\0';
4429 			path += length + 1;
4430 			bufferSize -= length + 1;
4431 
4432 			dir = file;
4433 		} else if (S_ISREG(file->Type())) {
4434 			// it's a file so it should be what we've searched for
4435 			put_vnode(file);
4436 
4437 			return B_OK;
4438 		} else {
4439 			TRACE(("vfs_get_module_path(): something is strange here: "
4440 				"0x%08" B_PRIx32 "...\n", file->Type()));
4441 			status = B_ERROR;
4442 			dir = file;
4443 			goto err;
4444 		}
4445 	}
4446 
4447 	// if we got here, the moduleName just pointed to a directory, not to
4448 	// a real module - what should we do in this case?
4449 	status = B_ENTRY_NOT_FOUND;
4450 
4451 err:
4452 	put_vnode(dir);
4453 	return status;
4454 }
4455 
4456 
4457 /*!	\brief Normalizes a given path.
4458 
4459 	The path must refer to an existing or non-existing entry in an existing
4460 	directory, that is chopping off the leaf component the remaining path must
4461 	refer to an existing directory.
4462 
4463 	The returned will be canonical in that it will be absolute, will not
4464 	contain any "." or ".." components or duplicate occurrences of '/'s,
4465 	and none of the directory components will by symbolic links.
4466 
4467 	Any two paths referring to the same entry, will result in the same
4468 	normalized path (well, that is pretty much the definition of `normalized',
4469 	isn't it :-).
4470 
4471 	\param path The path to be normalized.
4472 	\param buffer The buffer into which the normalized path will be written.
4473 		   May be the same one as \a path.
4474 	\param bufferSize The size of \a buffer.
4475 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4476 	\param kernel \c true, if the IO context of the kernel shall be used,
4477 		   otherwise that of the team this thread belongs to. Only relevant,
4478 		   if the path is relative (to get the CWD).
4479 	\return \c B_OK if everything went fine, another error code otherwise.
4480 */
4481 status_t
4482 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4483 	bool traverseLink, bool kernel)
4484 {
4485 	if (!path || !buffer || bufferSize < 1)
4486 		return B_BAD_VALUE;
4487 
4488 	if (path != buffer) {
4489 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4490 			return B_BUFFER_OVERFLOW;
4491 	}
4492 
4493 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4494 }
4495 
4496 
4497 /*!	\brief Gets the parent of the passed in node.
4498 
4499 	Gets the parent of the passed in node, and correctly resolves covered
4500 	nodes.
4501 */
4502 extern "C" status_t
4503 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4504 {
4505 	return resolve_covered_parent(parent, device, node,
4506 		get_current_io_context(true));
4507 }
4508 
4509 
4510 /*!	\brief Creates a special node in the file system.
4511 
4512 	The caller gets a reference to the newly created node (which is passed
4513 	back through \a _createdVnode) and is responsible for releasing it.
4514 
4515 	\param path The path where to create the entry for the node. Can be \c NULL,
4516 		in which case the node is created without an entry in the root FS -- it
4517 		will automatically be deleted when the last reference has been released.
4518 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4519 		the target file system will just create the node with its standard
4520 		operations. Depending on the type of the node a subnode might be created
4521 		automatically, though.
4522 	\param mode The type and permissions for the node to be created.
4523 	\param flags Flags to be passed to the creating FS.
4524 	\param kernel \c true, if called in the kernel context (relevant only if
4525 		\a path is not \c NULL and not absolute).
4526 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4527 		file system creating the node, with the private data pointer and
4528 		operations for the super node. Can be \c NULL.
4529 	\param _createVnode Pointer to pre-allocated storage where to store the
4530 		pointer to the newly created node.
4531 	\return \c B_OK, if everything went fine, another error code otherwise.
4532 */
4533 status_t
4534 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4535 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4536 	struct vnode** _createdVnode)
4537 {
4538 	struct vnode* dirNode;
4539 	char _leaf[B_FILE_NAME_LENGTH];
4540 	char* leaf = NULL;
4541 
4542 	if (path) {
4543 		// We've got a path. Get the dir vnode and the leaf name.
4544 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4545 		if (tmpPathBuffer.InitCheck() != B_OK)
4546 			return B_NO_MEMORY;
4547 
4548 		char* tmpPath = tmpPathBuffer.LockBuffer();
4549 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4550 			return B_NAME_TOO_LONG;
4551 
4552 		// get the dir vnode and the leaf name
4553 		leaf = _leaf;
4554 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4555 		if (error != B_OK)
4556 			return error;
4557 	} else {
4558 		// No path. Create the node in the root FS.
4559 		dirNode = sRoot;
4560 		inc_vnode_ref_count(dirNode);
4561 	}
4562 
4563 	VNodePutter _(dirNode);
4564 
4565 	// check support for creating special nodes
4566 	if (!HAS_FS_CALL(dirNode, create_special_node))
4567 		return B_UNSUPPORTED;
4568 
4569 	// create the node
4570 	fs_vnode superVnode;
4571 	ino_t nodeID;
4572 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4573 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4574 	if (status != B_OK)
4575 		return status;
4576 
4577 	// lookup the node
4578 	rw_lock_read_lock(&sVnodeLock);
4579 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4580 	rw_lock_read_unlock(&sVnodeLock);
4581 
4582 	if (*_createdVnode == NULL) {
4583 		panic("vfs_create_special_node(): lookup of node failed");
4584 		return B_ERROR;
4585 	}
4586 
4587 	return B_OK;
4588 }
4589 
4590 
4591 extern "C" void
4592 vfs_put_vnode(struct vnode* vnode)
4593 {
4594 	put_vnode(vnode);
4595 }
4596 
4597 
4598 extern "C" status_t
4599 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4600 {
4601 	// Get current working directory from io context
4602 	struct io_context* context = get_current_io_context(false);
4603 	status_t status = B_OK;
4604 
4605 	mutex_lock(&context->io_mutex);
4606 
4607 	if (context->cwd != NULL) {
4608 		*_mountID = context->cwd->device;
4609 		*_vnodeID = context->cwd->id;
4610 	} else
4611 		status = B_ERROR;
4612 
4613 	mutex_unlock(&context->io_mutex);
4614 	return status;
4615 }
4616 
4617 
4618 status_t
4619 vfs_unmount(dev_t mountID, uint32 flags)
4620 {
4621 	return fs_unmount(NULL, mountID, flags, true);
4622 }
4623 
4624 
4625 extern "C" status_t
4626 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4627 {
4628 	struct vnode* vnode;
4629 
4630 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4631 	if (status != B_OK)
4632 		return status;
4633 
4634 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4635 	put_vnode(vnode);
4636 	return B_OK;
4637 }
4638 
4639 
4640 extern "C" void
4641 vfs_free_unused_vnodes(int32 level)
4642 {
4643 	vnode_low_resource_handler(NULL,
4644 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4645 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4646 		level);
4647 }
4648 
4649 
4650 extern "C" bool
4651 vfs_can_page(struct vnode* vnode, void* cookie)
4652 {
4653 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4654 
4655 	if (HAS_FS_CALL(vnode, can_page))
4656 		return FS_CALL(vnode, can_page, cookie);
4657 	return false;
4658 }
4659 
4660 
4661 extern "C" status_t
4662 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4663 	const generic_io_vec* vecs, size_t count, uint32 flags,
4664 	generic_size_t* _numBytes)
4665 {
4666 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4667 		vecs, pos));
4668 
4669 #if VFS_PAGES_IO_TRACING
4670 	generic_size_t bytesRequested = *_numBytes;
4671 #endif
4672 
4673 	IORequest request;
4674 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4675 	if (status == B_OK) {
4676 		status = vfs_vnode_io(vnode, cookie, &request);
4677 		if (status == B_OK)
4678 			status = request.Wait();
4679 		*_numBytes = request.TransferredBytes();
4680 	}
4681 
4682 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4683 		status, *_numBytes));
4684 
4685 	return status;
4686 }
4687 
4688 
4689 extern "C" status_t
4690 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4691 	const generic_io_vec* vecs, size_t count, uint32 flags,
4692 	generic_size_t* _numBytes)
4693 {
4694 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4695 		vecs, pos));
4696 
4697 #if VFS_PAGES_IO_TRACING
4698 	generic_size_t bytesRequested = *_numBytes;
4699 #endif
4700 
4701 	IORequest request;
4702 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4703 	if (status == B_OK) {
4704 		status = vfs_vnode_io(vnode, cookie, &request);
4705 		if (status == B_OK)
4706 			status = request.Wait();
4707 		*_numBytes = request.TransferredBytes();
4708 	}
4709 
4710 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4711 		status, *_numBytes));
4712 
4713 	return status;
4714 }
4715 
4716 
4717 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4718 	created if \a allocate is \c true.
4719 	In case it's successful, it will also grab a reference to the cache
4720 	it returns.
4721 */
4722 extern "C" status_t
4723 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4724 {
4725 	if (vnode->cache != NULL) {
4726 		vnode->cache->AcquireRef();
4727 		*_cache = vnode->cache;
4728 		return B_OK;
4729 	}
4730 
4731 	rw_lock_read_lock(&sVnodeLock);
4732 	vnode->Lock();
4733 
4734 	status_t status = B_OK;
4735 
4736 	// The cache could have been created in the meantime
4737 	if (vnode->cache == NULL) {
4738 		if (allocate) {
4739 			// TODO: actually the vnode needs to be busy already here, or
4740 			//	else this won't work...
4741 			bool wasBusy = vnode->IsBusy();
4742 			vnode->SetBusy(true);
4743 
4744 			vnode->Unlock();
4745 			rw_lock_read_unlock(&sVnodeLock);
4746 
4747 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4748 
4749 			rw_lock_read_lock(&sVnodeLock);
4750 			vnode->Lock();
4751 			vnode->SetBusy(wasBusy);
4752 		} else
4753 			status = B_BAD_VALUE;
4754 	}
4755 
4756 	vnode->Unlock();
4757 	rw_lock_read_unlock(&sVnodeLock);
4758 
4759 	if (status == B_OK) {
4760 		vnode->cache->AcquireRef();
4761 		*_cache = vnode->cache;
4762 	}
4763 
4764 	return status;
4765 }
4766 
4767 
4768 status_t
4769 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4770 	file_io_vec* vecs, size_t* _count)
4771 {
4772 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4773 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4774 
4775 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4776 }
4777 
4778 
4779 status_t
4780 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4781 {
4782 	status_t status = FS_CALL(vnode, read_stat, stat);
4783 
4784 	// fill in the st_dev and st_ino fields
4785 	if (status == B_OK) {
4786 		stat->st_dev = vnode->device;
4787 		stat->st_ino = vnode->id;
4788 		// the rdev field must stay unset for non-special files
4789 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4790 			stat->st_rdev = -1;
4791 	}
4792 
4793 	return status;
4794 }
4795 
4796 
4797 status_t
4798 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4799 {
4800 	struct vnode* vnode;
4801 	status_t status = get_vnode(device, inode, &vnode, true, false);
4802 	if (status != B_OK)
4803 		return status;
4804 
4805 	status = vfs_stat_vnode(vnode, stat);
4806 
4807 	put_vnode(vnode);
4808 	return status;
4809 }
4810 
4811 
4812 status_t
4813 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4814 {
4815 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4816 }
4817 
4818 
4819 status_t
4820 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4821 	bool kernel, char* path, size_t pathLength)
4822 {
4823 	struct vnode* vnode;
4824 	status_t status;
4825 
4826 	// filter invalid leaf names
4827 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4828 		return B_BAD_VALUE;
4829 
4830 	// get the vnode matching the dir's node_ref
4831 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4832 		// special cases "." and "..": we can directly get the vnode of the
4833 		// referenced directory
4834 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4835 		leaf = NULL;
4836 	} else
4837 		status = get_vnode(device, inode, &vnode, true, false);
4838 	if (status != B_OK)
4839 		return status;
4840 
4841 	// get the directory path
4842 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4843 	put_vnode(vnode);
4844 		// we don't need the vnode anymore
4845 	if (status != B_OK)
4846 		return status;
4847 
4848 	// append the leaf name
4849 	if (leaf) {
4850 		// insert a directory separator if this is not the file system root
4851 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4852 				>= pathLength)
4853 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4854 			return B_NAME_TOO_LONG;
4855 		}
4856 	}
4857 
4858 	return B_OK;
4859 }
4860 
4861 
4862 /*!	If the given descriptor locked its vnode, that lock will be released. */
4863 void
4864 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4865 {
4866 	struct vnode* vnode = fd_vnode(descriptor);
4867 
4868 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4869 		vnode->mandatory_locked_by = NULL;
4870 }
4871 
4872 
4873 /*!	Closes all file descriptors of the specified I/O context that
4874 	have the O_CLOEXEC flag set.
4875 */
4876 void
4877 vfs_exec_io_context(io_context* context)
4878 {
4879 	uint32 i;
4880 
4881 	for (i = 0; i < context->table_size; i++) {
4882 		mutex_lock(&context->io_mutex);
4883 
4884 		struct file_descriptor* descriptor = context->fds[i];
4885 		bool remove = false;
4886 
4887 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4888 			context->fds[i] = NULL;
4889 			context->num_used_fds--;
4890 
4891 			remove = true;
4892 		}
4893 
4894 		mutex_unlock(&context->io_mutex);
4895 
4896 		if (remove) {
4897 			close_fd(descriptor);
4898 			put_fd(descriptor);
4899 		}
4900 	}
4901 }
4902 
4903 
4904 /*! Sets up a new io_control structure, and inherits the properties
4905 	of the parent io_control if it is given.
4906 */
4907 io_context*
4908 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4909 {
4910 	io_context* context = (io_context*)malloc(sizeof(io_context));
4911 	if (context == NULL)
4912 		return NULL;
4913 
4914 	TIOC(NewIOContext(context, parentContext));
4915 
4916 	memset(context, 0, sizeof(io_context));
4917 	context->ref_count = 1;
4918 
4919 	MutexLocker parentLocker;
4920 
4921 	size_t tableSize;
4922 	if (parentContext != NULL) {
4923 		parentLocker.SetTo(parentContext->io_mutex, false);
4924 		tableSize = parentContext->table_size;
4925 	} else
4926 		tableSize = DEFAULT_FD_TABLE_SIZE;
4927 
4928 	// allocate space for FDs and their close-on-exec flag
4929 	context->fds = (file_descriptor**)malloc(
4930 		sizeof(struct file_descriptor*) * tableSize
4931 		+ sizeof(struct select_sync*) * tableSize
4932 		+ (tableSize + 7) / 8);
4933 	if (context->fds == NULL) {
4934 		free(context);
4935 		return NULL;
4936 	}
4937 
4938 	context->select_infos = (select_info**)(context->fds + tableSize);
4939 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4940 
4941 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4942 		+ sizeof(struct select_sync*) * tableSize
4943 		+ (tableSize + 7) / 8);
4944 
4945 	mutex_init(&context->io_mutex, "I/O context");
4946 
4947 	// Copy all parent file descriptors
4948 
4949 	if (parentContext != NULL) {
4950 		size_t i;
4951 
4952 		mutex_lock(&sIOContextRootLock);
4953 		context->root = parentContext->root;
4954 		if (context->root)
4955 			inc_vnode_ref_count(context->root);
4956 		mutex_unlock(&sIOContextRootLock);
4957 
4958 		context->cwd = parentContext->cwd;
4959 		if (context->cwd)
4960 			inc_vnode_ref_count(context->cwd);
4961 
4962 		if (parentContext->inherit_fds) {
4963 			for (i = 0; i < tableSize; i++) {
4964 				struct file_descriptor* descriptor = parentContext->fds[i];
4965 
4966 				if (descriptor != NULL
4967 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4968 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4969 					if (closeOnExec && purgeCloseOnExec)
4970 						continue;
4971 
4972 					TFD(InheritFD(context, i, descriptor, parentContext));
4973 
4974 					context->fds[i] = descriptor;
4975 					context->num_used_fds++;
4976 					atomic_add(&descriptor->ref_count, 1);
4977 					atomic_add(&descriptor->open_count, 1);
4978 
4979 					if (closeOnExec)
4980 						fd_set_close_on_exec(context, i, true);
4981 				}
4982 			}
4983 		}
4984 
4985 		parentLocker.Unlock();
4986 	} else {
4987 		context->root = sRoot;
4988 		context->cwd = sRoot;
4989 
4990 		if (context->root)
4991 			inc_vnode_ref_count(context->root);
4992 
4993 		if (context->cwd)
4994 			inc_vnode_ref_count(context->cwd);
4995 	}
4996 
4997 	context->table_size = tableSize;
4998 	context->inherit_fds = parentContext != NULL;
4999 
5000 	list_init(&context->node_monitors);
5001 	context->max_monitors = DEFAULT_NODE_MONITORS;
5002 
5003 	return context;
5004 }
5005 
5006 
5007 void
5008 vfs_get_io_context(io_context* context)
5009 {
5010 	atomic_add(&context->ref_count, 1);
5011 }
5012 
5013 
5014 void
5015 vfs_put_io_context(io_context* context)
5016 {
5017 	if (atomic_add(&context->ref_count, -1) == 1)
5018 		free_io_context(context);
5019 }
5020 
5021 
5022 status_t
5023 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5024 {
5025 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5026 		return B_BAD_VALUE;
5027 
5028 	TIOC(ResizeIOContext(context, newSize));
5029 
5030 	MutexLocker _(context->io_mutex);
5031 
5032 	uint32 oldSize = context->table_size;
5033 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5034 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5035 
5036 	// If the tables shrink, make sure none of the fds being dropped are in use.
5037 	if (newSize < oldSize) {
5038 		for (uint32 i = oldSize; i-- > newSize;) {
5039 			if (context->fds[i])
5040 				return B_BUSY;
5041 		}
5042 	}
5043 
5044 	// store pointers to the old tables
5045 	file_descriptor** oldFDs = context->fds;
5046 	select_info** oldSelectInfos = context->select_infos;
5047 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5048 
5049 	// allocate new tables
5050 	file_descriptor** newFDs = (file_descriptor**)malloc(
5051 		sizeof(struct file_descriptor*) * newSize
5052 		+ sizeof(struct select_sync*) * newSize
5053 		+ newCloseOnExitBitmapSize);
5054 	if (newFDs == NULL)
5055 		return B_NO_MEMORY;
5056 
5057 	context->fds = newFDs;
5058 	context->select_infos = (select_info**)(context->fds + newSize);
5059 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5060 	context->table_size = newSize;
5061 
5062 	// copy entries from old tables
5063 	uint32 toCopy = min_c(oldSize, newSize);
5064 
5065 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5066 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5067 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5068 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5069 
5070 	// clear additional entries, if the tables grow
5071 	if (newSize > oldSize) {
5072 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5073 		memset(context->select_infos + oldSize, 0,
5074 			sizeof(void*) * (newSize - oldSize));
5075 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5076 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5077 	}
5078 
5079 	free(oldFDs);
5080 
5081 	return B_OK;
5082 }
5083 
5084 
5085 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5086 
5087 	Given an arbitrary vnode (identified by mount and node ID), the function
5088 	checks, whether the vnode is covered by another vnode. If it is, the
5089 	function returns the mount and node ID of the covering vnode. Otherwise
5090 	it simply returns the supplied mount and node ID.
5091 
5092 	In case of error (e.g. the supplied node could not be found) the variables
5093 	for storing the resolved mount and node ID remain untouched and an error
5094 	code is returned.
5095 
5096 	\param mountID The mount ID of the vnode in question.
5097 	\param nodeID The node ID of the vnode in question.
5098 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5099 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5100 	\return
5101 	- \c B_OK, if everything went fine,
5102 	- another error code, if something went wrong.
5103 */
5104 status_t
5105 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5106 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5107 {
5108 	// get the node
5109 	struct vnode* node;
5110 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5111 	if (error != B_OK)
5112 		return error;
5113 
5114 	// resolve the node
5115 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5116 		put_vnode(node);
5117 		node = coveringNode;
5118 	}
5119 
5120 	// set the return values
5121 	*resolvedMountID = node->device;
5122 	*resolvedNodeID = node->id;
5123 
5124 	put_vnode(node);
5125 
5126 	return B_OK;
5127 }
5128 
5129 
5130 status_t
5131 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5132 	ino_t* _mountPointNodeID)
5133 {
5134 	ReadLocker nodeLocker(sVnodeLock);
5135 	MutexLocker mountLocker(sMountMutex);
5136 
5137 	struct fs_mount* mount = find_mount(mountID);
5138 	if (mount == NULL)
5139 		return B_BAD_VALUE;
5140 
5141 	Vnode* mountPoint = mount->covers_vnode;
5142 
5143 	*_mountPointMountID = mountPoint->device;
5144 	*_mountPointNodeID = mountPoint->id;
5145 
5146 	return B_OK;
5147 }
5148 
5149 
5150 status_t
5151 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5152 	ino_t coveredNodeID)
5153 {
5154 	// get the vnodes
5155 	Vnode* vnode;
5156 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5157 	if (error != B_OK)
5158 		return B_BAD_VALUE;
5159 	VNodePutter vnodePutter(vnode);
5160 
5161 	Vnode* coveredVnode;
5162 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5163 		false);
5164 	if (error != B_OK)
5165 		return B_BAD_VALUE;
5166 	VNodePutter coveredVnodePutter(coveredVnode);
5167 
5168 	// establish the covered/covering links
5169 	WriteLocker locker(sVnodeLock);
5170 
5171 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5172 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5173 		return B_BUSY;
5174 	}
5175 
5176 	vnode->covers = coveredVnode;
5177 	vnode->SetCovering(true);
5178 
5179 	coveredVnode->covered_by = vnode;
5180 	coveredVnode->SetCovered(true);
5181 
5182 	// the vnodes do now reference each other
5183 	inc_vnode_ref_count(vnode);
5184 	inc_vnode_ref_count(coveredVnode);
5185 
5186 	return B_OK;
5187 }
5188 
5189 
5190 int
5191 vfs_getrlimit(int resource, struct rlimit* rlp)
5192 {
5193 	if (!rlp)
5194 		return B_BAD_ADDRESS;
5195 
5196 	switch (resource) {
5197 		case RLIMIT_NOFILE:
5198 		{
5199 			struct io_context* context = get_current_io_context(false);
5200 			MutexLocker _(context->io_mutex);
5201 
5202 			rlp->rlim_cur = context->table_size;
5203 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5204 			return 0;
5205 		}
5206 
5207 		case RLIMIT_NOVMON:
5208 		{
5209 			struct io_context* context = get_current_io_context(false);
5210 			MutexLocker _(context->io_mutex);
5211 
5212 			rlp->rlim_cur = context->max_monitors;
5213 			rlp->rlim_max = MAX_NODE_MONITORS;
5214 			return 0;
5215 		}
5216 
5217 		default:
5218 			return B_BAD_VALUE;
5219 	}
5220 }
5221 
5222 
5223 int
5224 vfs_setrlimit(int resource, const struct rlimit* rlp)
5225 {
5226 	if (!rlp)
5227 		return B_BAD_ADDRESS;
5228 
5229 	switch (resource) {
5230 		case RLIMIT_NOFILE:
5231 			/* TODO: check getuid() */
5232 			if (rlp->rlim_max != RLIM_SAVED_MAX
5233 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5234 				return B_NOT_ALLOWED;
5235 
5236 			return vfs_resize_fd_table(get_current_io_context(false),
5237 				rlp->rlim_cur);
5238 
5239 		case RLIMIT_NOVMON:
5240 			/* TODO: check getuid() */
5241 			if (rlp->rlim_max != RLIM_SAVED_MAX
5242 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5243 				return B_NOT_ALLOWED;
5244 
5245 			return resize_monitor_table(get_current_io_context(false),
5246 				rlp->rlim_cur);
5247 
5248 		default:
5249 			return B_BAD_VALUE;
5250 	}
5251 }
5252 
5253 
5254 status_t
5255 vfs_init(kernel_args* args)
5256 {
5257 	vnode::StaticInit();
5258 
5259 	sVnodeTable = new(std::nothrow) VnodeTable();
5260 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5261 		panic("vfs_init: error creating vnode hash table\n");
5262 
5263 	struct vnode dummy_vnode;
5264 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5265 
5266 	struct fs_mount dummyMount;
5267 	sMountsTable = new(std::nothrow) MountTable();
5268 	if (sMountsTable == NULL
5269 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5270 		panic("vfs_init: error creating mounts hash table\n");
5271 
5272 	node_monitor_init();
5273 
5274 	sRoot = NULL;
5275 
5276 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5277 
5278 	if (block_cache_init() != B_OK)
5279 		return B_ERROR;
5280 
5281 #ifdef ADD_DEBUGGER_COMMANDS
5282 	// add some debugger commands
5283 	add_debugger_command_etc("vnode", &dump_vnode,
5284 		"Print info about the specified vnode",
5285 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5286 		"Prints information about the vnode specified by address <vnode> or\n"
5287 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5288 		"constructed and printed. It might not be possible to construct a\n"
5289 		"complete path, though.\n",
5290 		0);
5291 	add_debugger_command("vnodes", &dump_vnodes,
5292 		"list all vnodes (from the specified device)");
5293 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5294 		"list all vnode caches");
5295 	add_debugger_command("mount", &dump_mount,
5296 		"info about the specified fs_mount");
5297 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5298 	add_debugger_command("io_context", &dump_io_context,
5299 		"info about the I/O context");
5300 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5301 		"info about vnode usage");
5302 #endif
5303 
5304 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5305 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5306 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5307 		0);
5308 
5309 	fifo_init();
5310 	file_map_init();
5311 
5312 	return file_cache_init();
5313 }
5314 
5315 
5316 //	#pragma mark - fd_ops implementations
5317 
5318 
5319 /*!
5320 	Calls fs_open() on the given vnode and returns a new
5321 	file descriptor for it
5322 */
5323 static int
5324 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5325 {
5326 	void* cookie;
5327 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5328 	if (status != B_OK)
5329 		return status;
5330 
5331 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5332 	if (fd < 0) {
5333 		FS_CALL(vnode, close, cookie);
5334 		FS_CALL(vnode, free_cookie, cookie);
5335 	}
5336 	return fd;
5337 }
5338 
5339 
5340 /*!
5341 	Calls fs_open() on the given vnode and returns a new
5342 	file descriptor for it
5343 */
5344 static int
5345 create_vnode(struct vnode* directory, const char* name, int openMode,
5346 	int perms, bool kernel)
5347 {
5348 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5349 	status_t status = B_ERROR;
5350 	struct vnode* vnode;
5351 	void* cookie;
5352 	ino_t newID;
5353 
5354 	// This is somewhat tricky: If the entry already exists, the FS responsible
5355 	// for the directory might not necessarily also be the one responsible for
5356 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5357 	// we can actually never call the create() hook without O_EXCL. Instead we
5358 	// try to look the entry up first. If it already exists, we just open the
5359 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5360 	// introduces a race condition, since someone else might have created the
5361 	// entry in the meantime. We hope the respective FS returns the correct
5362 	// error code and retry (up to 3 times) again.
5363 
5364 	for (int i = 0; i < 3 && status != B_OK; i++) {
5365 		// look the node up
5366 		status = lookup_dir_entry(directory, name, &vnode);
5367 		if (status == B_OK) {
5368 			VNodePutter putter(vnode);
5369 
5370 			if ((openMode & O_EXCL) != 0)
5371 				return B_FILE_EXISTS;
5372 
5373 			// If the node is a symlink, we have to follow it, unless
5374 			// O_NOTRAVERSE is set.
5375 			if (S_ISLNK(vnode->Type()) && traverse) {
5376 				putter.Put();
5377 				char clonedName[B_FILE_NAME_LENGTH + 1];
5378 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5379 						>= B_FILE_NAME_LENGTH) {
5380 					return B_NAME_TOO_LONG;
5381 				}
5382 
5383 				inc_vnode_ref_count(directory);
5384 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5385 					kernel, &vnode, NULL);
5386 				if (status != B_OK)
5387 					return status;
5388 
5389 				putter.SetTo(vnode);
5390 			}
5391 
5392 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5393 				return B_LINK_LIMIT;
5394 
5395 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5396 			// on success keep the vnode reference for the FD
5397 			if (fd >= 0)
5398 				putter.Detach();
5399 
5400 			return fd;
5401 		}
5402 
5403 		// it doesn't exist yet -- try to create it
5404 
5405 		if (!HAS_FS_CALL(directory, create))
5406 			return B_READ_ONLY_DEVICE;
5407 
5408 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5409 			&cookie, &newID);
5410 		if (status != B_OK
5411 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5412 			return status;
5413 		}
5414 	}
5415 
5416 	if (status != B_OK)
5417 		return status;
5418 
5419 	// the node has been created successfully
5420 
5421 	rw_lock_read_lock(&sVnodeLock);
5422 	vnode = lookup_vnode(directory->device, newID);
5423 	rw_lock_read_unlock(&sVnodeLock);
5424 
5425 	if (vnode == NULL) {
5426 		panic("vfs: fs_create() returned success but there is no vnode, "
5427 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5428 		return B_BAD_VALUE;
5429 	}
5430 
5431 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5432 	if (fd >= 0)
5433 		return fd;
5434 
5435 	status = fd;
5436 
5437 	// something went wrong, clean up
5438 
5439 	FS_CALL(vnode, close, cookie);
5440 	FS_CALL(vnode, free_cookie, cookie);
5441 	put_vnode(vnode);
5442 
5443 	FS_CALL(directory, unlink, name);
5444 
5445 	return status;
5446 }
5447 
5448 
5449 /*! Calls fs open_dir() on the given vnode and returns a new
5450 	file descriptor for it
5451 */
5452 static int
5453 open_dir_vnode(struct vnode* vnode, bool kernel)
5454 {
5455 	void* cookie;
5456 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5457 	if (status != B_OK)
5458 		return status;
5459 
5460 	// directory is opened, create a fd
5461 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5462 	if (status >= 0)
5463 		return status;
5464 
5465 	FS_CALL(vnode, close_dir, cookie);
5466 	FS_CALL(vnode, free_dir_cookie, cookie);
5467 
5468 	return status;
5469 }
5470 
5471 
5472 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5473 	file descriptor for it.
5474 	Used by attr_dir_open(), and attr_dir_open_fd().
5475 */
5476 static int
5477 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5478 {
5479 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5480 		return B_UNSUPPORTED;
5481 
5482 	void* cookie;
5483 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5484 	if (status != B_OK)
5485 		return status;
5486 
5487 	// directory is opened, create a fd
5488 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5489 		kernel);
5490 	if (status >= 0)
5491 		return status;
5492 
5493 	FS_CALL(vnode, close_attr_dir, cookie);
5494 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5495 
5496 	return status;
5497 }
5498 
5499 
5500 static int
5501 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5502 	int openMode, int perms, bool kernel)
5503 {
5504 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5505 		"kernel %d\n", name, openMode, perms, kernel));
5506 
5507 	// get directory to put the new file in
5508 	struct vnode* directory;
5509 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5510 	if (status != B_OK)
5511 		return status;
5512 
5513 	status = create_vnode(directory, name, openMode, perms, kernel);
5514 	put_vnode(directory);
5515 
5516 	return status;
5517 }
5518 
5519 
5520 static int
5521 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5522 {
5523 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5524 		openMode, perms, kernel));
5525 
5526 	// get directory to put the new file in
5527 	char name[B_FILE_NAME_LENGTH];
5528 	struct vnode* directory;
5529 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5530 		kernel);
5531 	if (status < 0)
5532 		return status;
5533 
5534 	status = create_vnode(directory, name, openMode, perms, kernel);
5535 
5536 	put_vnode(directory);
5537 	return status;
5538 }
5539 
5540 
5541 static int
5542 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5543 	int openMode, bool kernel)
5544 {
5545 	if (name == NULL || *name == '\0')
5546 		return B_BAD_VALUE;
5547 
5548 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5549 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5550 
5551 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5552 
5553 	// get the vnode matching the entry_ref
5554 	struct vnode* vnode;
5555 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5556 		kernel, &vnode);
5557 	if (status != B_OK)
5558 		return status;
5559 
5560 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5561 		put_vnode(vnode);
5562 		return B_LINK_LIMIT;
5563 	}
5564 
5565 	int newFD = open_vnode(vnode, openMode, kernel);
5566 	if (newFD >= 0) {
5567 		// The vnode reference has been transferred to the FD
5568 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5569 			directoryID, vnode->id, name);
5570 	} else
5571 		put_vnode(vnode);
5572 
5573 	return newFD;
5574 }
5575 
5576 
5577 static int
5578 file_open(int fd, char* path, int openMode, bool kernel)
5579 {
5580 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5581 
5582 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5583 		fd, path, openMode, kernel));
5584 
5585 	// get the vnode matching the vnode + path combination
5586 	struct vnode* vnode;
5587 	ino_t parentID;
5588 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5589 		&parentID, kernel);
5590 	if (status != B_OK)
5591 		return status;
5592 
5593 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5594 		put_vnode(vnode);
5595 		return B_LINK_LIMIT;
5596 	}
5597 
5598 	// open the vnode
5599 	int newFD = open_vnode(vnode, openMode, kernel);
5600 	if (newFD >= 0) {
5601 		// The vnode reference has been transferred to the FD
5602 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5603 			vnode->device, parentID, vnode->id, NULL);
5604 	} else
5605 		put_vnode(vnode);
5606 
5607 	return newFD;
5608 }
5609 
5610 
5611 static status_t
5612 file_close(struct file_descriptor* descriptor)
5613 {
5614 	struct vnode* vnode = descriptor->u.vnode;
5615 	status_t status = B_OK;
5616 
5617 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5618 
5619 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5620 		vnode->id);
5621 	if (HAS_FS_CALL(vnode, close)) {
5622 		status = FS_CALL(vnode, close, descriptor->cookie);
5623 	}
5624 
5625 	if (status == B_OK) {
5626 		// remove all outstanding locks for this team
5627 		if (HAS_FS_CALL(vnode, release_lock))
5628 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5629 		else
5630 			status = release_advisory_lock(vnode, NULL);
5631 	}
5632 	return status;
5633 }
5634 
5635 
5636 static void
5637 file_free_fd(struct file_descriptor* descriptor)
5638 {
5639 	struct vnode* vnode = descriptor->u.vnode;
5640 
5641 	if (vnode != NULL) {
5642 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5643 		put_vnode(vnode);
5644 	}
5645 }
5646 
5647 
5648 static status_t
5649 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5650 	size_t* length)
5651 {
5652 	struct vnode* vnode = descriptor->u.vnode;
5653 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5654 		pos, length, *length));
5655 
5656 	if (S_ISDIR(vnode->Type()))
5657 		return B_IS_A_DIRECTORY;
5658 
5659 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5660 }
5661 
5662 
5663 static status_t
5664 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5665 	size_t* length)
5666 {
5667 	struct vnode* vnode = descriptor->u.vnode;
5668 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5669 		length));
5670 
5671 	if (S_ISDIR(vnode->Type()))
5672 		return B_IS_A_DIRECTORY;
5673 	if (!HAS_FS_CALL(vnode, write))
5674 		return B_READ_ONLY_DEVICE;
5675 
5676 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5677 }
5678 
5679 
5680 static off_t
5681 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5682 {
5683 	struct vnode* vnode = descriptor->u.vnode;
5684 	off_t offset;
5685 	bool isDevice = false;
5686 
5687 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5688 		seekType));
5689 
5690 	// some kinds of files are not seekable
5691 	switch (vnode->Type() & S_IFMT) {
5692 		case S_IFIFO:
5693 		case S_IFSOCK:
5694 			return ESPIPE;
5695 
5696 		// drivers publish block devices as chr, so pick both
5697 		case S_IFBLK:
5698 		case S_IFCHR:
5699 			isDevice = true;
5700 			break;
5701 		// The Open Group Base Specs don't mention any file types besides pipes,
5702 		// fifos, and sockets specially, so we allow seeking them.
5703 		case S_IFREG:
5704 		case S_IFDIR:
5705 		case S_IFLNK:
5706 			break;
5707 	}
5708 
5709 	switch (seekType) {
5710 		case SEEK_SET:
5711 			offset = 0;
5712 			break;
5713 		case SEEK_CUR:
5714 			offset = descriptor->pos;
5715 			break;
5716 		case SEEK_END:
5717 		{
5718 			// stat() the node
5719 			if (!HAS_FS_CALL(vnode, read_stat))
5720 				return B_UNSUPPORTED;
5721 
5722 			struct stat stat;
5723 			status_t status = FS_CALL(vnode, read_stat, &stat);
5724 			if (status != B_OK)
5725 				return status;
5726 
5727 			offset = stat.st_size;
5728 
5729 			if (offset == 0 && isDevice) {
5730 				// stat() on regular drivers doesn't report size
5731 				device_geometry geometry;
5732 
5733 				if (HAS_FS_CALL(vnode, ioctl)) {
5734 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5735 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5736 					if (status == B_OK)
5737 						offset = (off_t)geometry.bytes_per_sector
5738 							* geometry.sectors_per_track
5739 							* geometry.cylinder_count
5740 							* geometry.head_count;
5741 				}
5742 			}
5743 
5744 			break;
5745 		}
5746 		default:
5747 			return B_BAD_VALUE;
5748 	}
5749 
5750 	// assumes off_t is 64 bits wide
5751 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5752 		return B_BUFFER_OVERFLOW;
5753 
5754 	pos += offset;
5755 	if (pos < 0)
5756 		return B_BAD_VALUE;
5757 
5758 	return descriptor->pos = pos;
5759 }
5760 
5761 
5762 static status_t
5763 file_select(struct file_descriptor* descriptor, uint8 event,
5764 	struct selectsync* sync)
5765 {
5766 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5767 
5768 	struct vnode* vnode = descriptor->u.vnode;
5769 
5770 	// If the FS has no select() hook, notify select() now.
5771 	if (!HAS_FS_CALL(vnode, select))
5772 		return notify_select_event(sync, event);
5773 
5774 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5775 }
5776 
5777 
5778 static status_t
5779 file_deselect(struct file_descriptor* descriptor, uint8 event,
5780 	struct selectsync* sync)
5781 {
5782 	struct vnode* vnode = descriptor->u.vnode;
5783 
5784 	if (!HAS_FS_CALL(vnode, deselect))
5785 		return B_OK;
5786 
5787 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5788 }
5789 
5790 
5791 static status_t
5792 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5793 	bool kernel)
5794 {
5795 	struct vnode* vnode;
5796 	status_t status;
5797 
5798 	if (name == NULL || *name == '\0')
5799 		return B_BAD_VALUE;
5800 
5801 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5802 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5803 
5804 	status = get_vnode(mountID, parentID, &vnode, true, false);
5805 	if (status != B_OK)
5806 		return status;
5807 
5808 	if (HAS_FS_CALL(vnode, create_dir))
5809 		status = FS_CALL(vnode, create_dir, name, perms);
5810 	else
5811 		status = B_READ_ONLY_DEVICE;
5812 
5813 	put_vnode(vnode);
5814 	return status;
5815 }
5816 
5817 
5818 static status_t
5819 dir_create(int fd, char* path, int perms, bool kernel)
5820 {
5821 	char filename[B_FILE_NAME_LENGTH];
5822 	struct vnode* vnode;
5823 	status_t status;
5824 
5825 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5826 		kernel));
5827 
5828 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5829 	if (status < 0)
5830 		return status;
5831 
5832 	if (HAS_FS_CALL(vnode, create_dir)) {
5833 		status = FS_CALL(vnode, create_dir, filename, perms);
5834 	} else
5835 		status = B_READ_ONLY_DEVICE;
5836 
5837 	put_vnode(vnode);
5838 	return status;
5839 }
5840 
5841 
5842 static int
5843 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5844 {
5845 	FUNCTION(("dir_open_entry_ref()\n"));
5846 
5847 	if (name && name[0] == '\0')
5848 		return B_BAD_VALUE;
5849 
5850 	// get the vnode matching the entry_ref/node_ref
5851 	struct vnode* vnode;
5852 	status_t status;
5853 	if (name) {
5854 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5855 			&vnode);
5856 	} else
5857 		status = get_vnode(mountID, parentID, &vnode, true, false);
5858 	if (status != B_OK)
5859 		return status;
5860 
5861 	int newFD = open_dir_vnode(vnode, kernel);
5862 	if (newFD >= 0) {
5863 		// The vnode reference has been transferred to the FD
5864 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5865 			vnode->id, name);
5866 	} else
5867 		put_vnode(vnode);
5868 
5869 	return newFD;
5870 }
5871 
5872 
5873 static int
5874 dir_open(int fd, char* path, bool kernel)
5875 {
5876 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5877 		kernel));
5878 
5879 	// get the vnode matching the vnode + path combination
5880 	struct vnode* vnode = NULL;
5881 	ino_t parentID;
5882 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5883 		kernel);
5884 	if (status != B_OK)
5885 		return status;
5886 
5887 	// open the dir
5888 	int newFD = open_dir_vnode(vnode, kernel);
5889 	if (newFD >= 0) {
5890 		// The vnode reference has been transferred to the FD
5891 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5892 			parentID, vnode->id, NULL);
5893 	} else
5894 		put_vnode(vnode);
5895 
5896 	return newFD;
5897 }
5898 
5899 
5900 static status_t
5901 dir_close(struct file_descriptor* descriptor)
5902 {
5903 	struct vnode* vnode = descriptor->u.vnode;
5904 
5905 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5906 
5907 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5908 		vnode->id);
5909 	if (HAS_FS_CALL(vnode, close_dir))
5910 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5911 
5912 	return B_OK;
5913 }
5914 
5915 
5916 static void
5917 dir_free_fd(struct file_descriptor* descriptor)
5918 {
5919 	struct vnode* vnode = descriptor->u.vnode;
5920 
5921 	if (vnode != NULL) {
5922 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5923 		put_vnode(vnode);
5924 	}
5925 }
5926 
5927 
5928 static status_t
5929 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5930 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5931 {
5932 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5933 		bufferSize, _count);
5934 }
5935 
5936 
5937 static status_t
5938 fix_dirent(struct vnode* parent, struct dirent* entry,
5939 	struct io_context* ioContext)
5940 {
5941 	// set d_pdev and d_pino
5942 	entry->d_pdev = parent->device;
5943 	entry->d_pino = parent->id;
5944 
5945 	// If this is the ".." entry and the directory covering another vnode,
5946 	// we need to replace d_dev and d_ino with the actual values.
5947 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5948 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
5949 			ioContext);
5950 	}
5951 
5952 	// resolve covered vnodes
5953 	ReadLocker _(&sVnodeLock);
5954 
5955 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5956 	if (vnode != NULL && vnode->covered_by != NULL) {
5957 		do {
5958 			vnode = vnode->covered_by;
5959 		} while (vnode->covered_by != NULL);
5960 
5961 		entry->d_dev = vnode->device;
5962 		entry->d_ino = vnode->id;
5963 	}
5964 
5965 	return B_OK;
5966 }
5967 
5968 
5969 static status_t
5970 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5971 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5972 {
5973 	if (!HAS_FS_CALL(vnode, read_dir))
5974 		return B_UNSUPPORTED;
5975 
5976 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5977 		_count);
5978 	if (error != B_OK)
5979 		return error;
5980 
5981 	// we need to adjust the read dirents
5982 	uint32 count = *_count;
5983 	for (uint32 i = 0; i < count; i++) {
5984 		error = fix_dirent(vnode, buffer, ioContext);
5985 		if (error != B_OK)
5986 			return error;
5987 
5988 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5989 	}
5990 
5991 	return error;
5992 }
5993 
5994 
5995 static status_t
5996 dir_rewind(struct file_descriptor* descriptor)
5997 {
5998 	struct vnode* vnode = descriptor->u.vnode;
5999 
6000 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6001 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6002 	}
6003 
6004 	return B_UNSUPPORTED;
6005 }
6006 
6007 
6008 static status_t
6009 dir_remove(int fd, char* path, bool kernel)
6010 {
6011 	char name[B_FILE_NAME_LENGTH];
6012 	struct vnode* directory;
6013 	status_t status;
6014 
6015 	if (path != NULL) {
6016 		// we need to make sure our path name doesn't stop with "/", ".",
6017 		// or ".."
6018 		char* lastSlash;
6019 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6020 			char* leaf = lastSlash + 1;
6021 			if (!strcmp(leaf, ".."))
6022 				return B_NOT_ALLOWED;
6023 
6024 			// omit multiple slashes
6025 			while (lastSlash > path && lastSlash[-1] == '/')
6026 				lastSlash--;
6027 
6028 			if (leaf[0]
6029 				&& strcmp(leaf, ".")) {
6030 				break;
6031 			}
6032 			// "name/" -> "name", or "name/." -> "name"
6033 			lastSlash[0] = '\0';
6034 		}
6035 
6036 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6037 			return B_NOT_ALLOWED;
6038 	}
6039 
6040 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6041 	if (status != B_OK)
6042 		return status;
6043 
6044 	if (HAS_FS_CALL(directory, remove_dir))
6045 		status = FS_CALL(directory, remove_dir, name);
6046 	else
6047 		status = B_READ_ONLY_DEVICE;
6048 
6049 	put_vnode(directory);
6050 	return status;
6051 }
6052 
6053 
6054 static status_t
6055 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6056 	size_t length)
6057 {
6058 	struct vnode* vnode = descriptor->u.vnode;
6059 
6060 	if (HAS_FS_CALL(vnode, ioctl))
6061 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6062 
6063 	return B_DEV_INVALID_IOCTL;
6064 }
6065 
6066 
6067 static status_t
6068 common_fcntl(int fd, int op, size_t argument, bool kernel)
6069 {
6070 	struct flock flock;
6071 
6072 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6073 		fd, op, argument, kernel ? "kernel" : "user"));
6074 
6075 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
6076 		fd);
6077 	if (descriptor == NULL)
6078 		return B_FILE_ERROR;
6079 
6080 	struct vnode* vnode = fd_vnode(descriptor);
6081 
6082 	status_t status = B_OK;
6083 
6084 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6085 		if (descriptor->type != FDTYPE_FILE)
6086 			status = B_BAD_VALUE;
6087 		else if (user_memcpy(&flock, (struct flock*)argument,
6088 				sizeof(struct flock)) != B_OK)
6089 			status = B_BAD_ADDRESS;
6090 
6091 		if (status != B_OK) {
6092 			put_fd(descriptor);
6093 			return status;
6094 		}
6095 	}
6096 
6097 	switch (op) {
6098 		case F_SETFD:
6099 		{
6100 			struct io_context* context = get_current_io_context(kernel);
6101 			// Set file descriptor flags
6102 
6103 			// O_CLOEXEC is the only flag available at this time
6104 			mutex_lock(&context->io_mutex);
6105 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6106 			mutex_unlock(&context->io_mutex);
6107 
6108 			status = B_OK;
6109 			break;
6110 		}
6111 
6112 		case F_GETFD:
6113 		{
6114 			struct io_context* context = get_current_io_context(kernel);
6115 
6116 			// Get file descriptor flags
6117 			mutex_lock(&context->io_mutex);
6118 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6119 			mutex_unlock(&context->io_mutex);
6120 			break;
6121 		}
6122 
6123 		case F_SETFL:
6124 			// Set file descriptor open mode
6125 
6126 			// we only accept changes to O_APPEND and O_NONBLOCK
6127 			argument &= O_APPEND | O_NONBLOCK;
6128 			if (descriptor->ops->fd_set_flags != NULL) {
6129 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6130 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6131 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6132 					(int)argument);
6133 			} else
6134 				status = B_UNSUPPORTED;
6135 
6136 			if (status == B_OK) {
6137 				// update this descriptor's open_mode field
6138 				descriptor->open_mode = (descriptor->open_mode
6139 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6140 			}
6141 
6142 			break;
6143 
6144 		case F_GETFL:
6145 			// Get file descriptor open mode
6146 			status = descriptor->open_mode;
6147 			break;
6148 
6149 		case F_DUPFD:
6150 		{
6151 			struct io_context* context = get_current_io_context(kernel);
6152 
6153 			status = new_fd_etc(context, descriptor, (int)argument);
6154 			if (status >= 0) {
6155 				mutex_lock(&context->io_mutex);
6156 				fd_set_close_on_exec(context, fd, false);
6157 				mutex_unlock(&context->io_mutex);
6158 
6159 				atomic_add(&descriptor->ref_count, 1);
6160 			}
6161 			break;
6162 		}
6163 
6164 		case F_GETLK:
6165 			if (vnode != NULL) {
6166 				struct flock normalizedLock;
6167 
6168 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6169 				status = normalize_flock(descriptor, &normalizedLock);
6170 				if (status != B_OK)
6171 					break;
6172 
6173 				if (HAS_FS_CALL(vnode, test_lock)) {
6174 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6175 						&normalizedLock);
6176 				} else
6177 					status = test_advisory_lock(vnode, &normalizedLock);
6178 				if (status == B_OK) {
6179 					if (normalizedLock.l_type == F_UNLCK) {
6180 						// no conflicting lock found, copy back the same struct
6181 						// we were given except change type to F_UNLCK
6182 						flock.l_type = F_UNLCK;
6183 						status = user_memcpy((struct flock*)argument, &flock,
6184 							sizeof(struct flock));
6185 					} else {
6186 						// a conflicting lock was found, copy back its range and
6187 						// type
6188 						if (normalizedLock.l_len == OFF_MAX)
6189 							normalizedLock.l_len = 0;
6190 
6191 						status = user_memcpy((struct flock*)argument,
6192 							&normalizedLock, sizeof(struct flock));
6193 					}
6194 				}
6195 			} else
6196 				status = B_BAD_VALUE;
6197 			break;
6198 
6199 		case F_SETLK:
6200 		case F_SETLKW:
6201 			status = normalize_flock(descriptor, &flock);
6202 			if (status != B_OK)
6203 				break;
6204 
6205 			if (vnode == NULL) {
6206 				status = B_BAD_VALUE;
6207 			} else if (flock.l_type == F_UNLCK) {
6208 				if (HAS_FS_CALL(vnode, release_lock)) {
6209 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6210 						&flock);
6211 				} else
6212 					status = release_advisory_lock(vnode, &flock);
6213 			} else {
6214 				// the open mode must match the lock type
6215 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6216 						&& flock.l_type == F_WRLCK)
6217 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6218 						&& flock.l_type == F_RDLCK))
6219 					status = B_FILE_ERROR;
6220 				else {
6221 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6222 						status = FS_CALL(vnode, acquire_lock,
6223 							descriptor->cookie, &flock, op == F_SETLKW);
6224 					} else {
6225 						status = acquire_advisory_lock(vnode, -1,
6226 							&flock, op == F_SETLKW);
6227 					}
6228 				}
6229 			}
6230 			break;
6231 
6232 		// ToDo: add support for more ops?
6233 
6234 		default:
6235 			status = B_BAD_VALUE;
6236 	}
6237 
6238 	put_fd(descriptor);
6239 	return status;
6240 }
6241 
6242 
6243 static status_t
6244 common_sync(int fd, bool kernel)
6245 {
6246 	struct file_descriptor* descriptor;
6247 	struct vnode* vnode;
6248 	status_t status;
6249 
6250 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6251 
6252 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6253 	if (descriptor == NULL)
6254 		return B_FILE_ERROR;
6255 
6256 	if (HAS_FS_CALL(vnode, fsync))
6257 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6258 	else
6259 		status = B_UNSUPPORTED;
6260 
6261 	put_fd(descriptor);
6262 	return status;
6263 }
6264 
6265 
6266 static status_t
6267 common_lock_node(int fd, bool kernel)
6268 {
6269 	struct file_descriptor* descriptor;
6270 	struct vnode* vnode;
6271 
6272 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6273 	if (descriptor == NULL)
6274 		return B_FILE_ERROR;
6275 
6276 	status_t status = B_OK;
6277 
6278 	// We need to set the locking atomically - someone
6279 	// else might set one at the same time
6280 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6281 			(file_descriptor*)NULL) != NULL)
6282 		status = B_BUSY;
6283 
6284 	put_fd(descriptor);
6285 	return status;
6286 }
6287 
6288 
6289 static status_t
6290 common_unlock_node(int fd, bool kernel)
6291 {
6292 	struct file_descriptor* descriptor;
6293 	struct vnode* vnode;
6294 
6295 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6296 	if (descriptor == NULL)
6297 		return B_FILE_ERROR;
6298 
6299 	status_t status = B_OK;
6300 
6301 	// We need to set the locking atomically - someone
6302 	// else might set one at the same time
6303 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6304 			(file_descriptor*)NULL, descriptor) != descriptor)
6305 		status = B_BAD_VALUE;
6306 
6307 	put_fd(descriptor);
6308 	return status;
6309 }
6310 
6311 
6312 static status_t
6313 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6314 	bool kernel)
6315 {
6316 	struct vnode* vnode;
6317 	status_t status;
6318 
6319 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6320 	if (status != B_OK)
6321 		return status;
6322 
6323 	if (HAS_FS_CALL(vnode, read_symlink)) {
6324 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6325 	} else
6326 		status = B_BAD_VALUE;
6327 
6328 	put_vnode(vnode);
6329 	return status;
6330 }
6331 
6332 
6333 static status_t
6334 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6335 	bool kernel)
6336 {
6337 	// path validity checks have to be in the calling function!
6338 	char name[B_FILE_NAME_LENGTH];
6339 	struct vnode* vnode;
6340 	status_t status;
6341 
6342 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6343 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6344 
6345 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6346 	if (status != B_OK)
6347 		return status;
6348 
6349 	if (HAS_FS_CALL(vnode, create_symlink))
6350 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6351 	else {
6352 		status = HAS_FS_CALL(vnode, write)
6353 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6354 	}
6355 
6356 	put_vnode(vnode);
6357 
6358 	return status;
6359 }
6360 
6361 
6362 static status_t
6363 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6364 	bool traverseLeafLink, bool kernel)
6365 {
6366 	// path validity checks have to be in the calling function!
6367 
6368 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6369 		toPath, kernel));
6370 
6371 	char name[B_FILE_NAME_LENGTH];
6372 	struct vnode* directory;
6373 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6374 		kernel);
6375 	if (status != B_OK)
6376 		return status;
6377 
6378 	struct vnode* vnode;
6379 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6380 		kernel);
6381 	if (status != B_OK)
6382 		goto err;
6383 
6384 	if (directory->mount != vnode->mount) {
6385 		status = B_CROSS_DEVICE_LINK;
6386 		goto err1;
6387 	}
6388 
6389 	if (HAS_FS_CALL(directory, link))
6390 		status = FS_CALL(directory, link, name, vnode);
6391 	else
6392 		status = B_READ_ONLY_DEVICE;
6393 
6394 err1:
6395 	put_vnode(vnode);
6396 err:
6397 	put_vnode(directory);
6398 
6399 	return status;
6400 }
6401 
6402 
6403 static status_t
6404 common_unlink(int fd, char* path, bool kernel)
6405 {
6406 	char filename[B_FILE_NAME_LENGTH];
6407 	struct vnode* vnode;
6408 	status_t status;
6409 
6410 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6411 		kernel));
6412 
6413 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6414 	if (status < 0)
6415 		return status;
6416 
6417 	if (HAS_FS_CALL(vnode, unlink))
6418 		status = FS_CALL(vnode, unlink, filename);
6419 	else
6420 		status = B_READ_ONLY_DEVICE;
6421 
6422 	put_vnode(vnode);
6423 
6424 	return status;
6425 }
6426 
6427 
6428 static status_t
6429 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6430 {
6431 	struct vnode* vnode;
6432 	status_t status;
6433 
6434 	// TODO: honor effectiveUserGroup argument
6435 
6436 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6437 	if (status != B_OK)
6438 		return status;
6439 
6440 	if (HAS_FS_CALL(vnode, access))
6441 		status = FS_CALL(vnode, access, mode);
6442 	else
6443 		status = B_OK;
6444 
6445 	put_vnode(vnode);
6446 
6447 	return status;
6448 }
6449 
6450 
6451 static status_t
6452 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6453 {
6454 	struct vnode* fromVnode;
6455 	struct vnode* toVnode;
6456 	char fromName[B_FILE_NAME_LENGTH];
6457 	char toName[B_FILE_NAME_LENGTH];
6458 	status_t status;
6459 
6460 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6461 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6462 
6463 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6464 	if (status != B_OK)
6465 		return status;
6466 
6467 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6468 	if (status != B_OK)
6469 		goto err1;
6470 
6471 	if (fromVnode->device != toVnode->device) {
6472 		status = B_CROSS_DEVICE_LINK;
6473 		goto err2;
6474 	}
6475 
6476 	if (fromName[0] == '\0' || toName[0] == '\0'
6477 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6478 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6479 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6480 		status = B_BAD_VALUE;
6481 		goto err2;
6482 	}
6483 
6484 	if (HAS_FS_CALL(fromVnode, rename))
6485 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6486 	else
6487 		status = B_READ_ONLY_DEVICE;
6488 
6489 err2:
6490 	put_vnode(toVnode);
6491 err1:
6492 	put_vnode(fromVnode);
6493 
6494 	return status;
6495 }
6496 
6497 
6498 static status_t
6499 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6500 {
6501 	struct vnode* vnode = descriptor->u.vnode;
6502 
6503 	FUNCTION(("common_read_stat: stat %p\n", stat));
6504 
6505 	// TODO: remove this once all file systems properly set them!
6506 	stat->st_crtim.tv_nsec = 0;
6507 	stat->st_ctim.tv_nsec = 0;
6508 	stat->st_mtim.tv_nsec = 0;
6509 	stat->st_atim.tv_nsec = 0;
6510 
6511 	return vfs_stat_vnode(vnode, stat);
6512 }
6513 
6514 
6515 static status_t
6516 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6517 	int statMask)
6518 {
6519 	struct vnode* vnode = descriptor->u.vnode;
6520 
6521 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6522 		vnode, stat, statMask));
6523 
6524 	if (!HAS_FS_CALL(vnode, write_stat))
6525 		return B_READ_ONLY_DEVICE;
6526 
6527 	return FS_CALL(vnode, write_stat, stat, statMask);
6528 }
6529 
6530 
6531 static status_t
6532 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6533 	struct stat* stat, bool kernel)
6534 {
6535 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6536 		stat));
6537 
6538 	struct vnode* vnode;
6539 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6540 		NULL, kernel);
6541 	if (status != B_OK)
6542 		return status;
6543 
6544 	status = vfs_stat_vnode(vnode, stat);
6545 
6546 	put_vnode(vnode);
6547 	return status;
6548 }
6549 
6550 
6551 static status_t
6552 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6553 	const struct stat* stat, int statMask, bool kernel)
6554 {
6555 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6556 		"kernel %d\n", fd, path, stat, statMask, kernel));
6557 
6558 	struct vnode* vnode;
6559 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6560 		NULL, kernel);
6561 	if (status != B_OK)
6562 		return status;
6563 
6564 	if (HAS_FS_CALL(vnode, write_stat))
6565 		status = FS_CALL(vnode, write_stat, stat, statMask);
6566 	else
6567 		status = B_READ_ONLY_DEVICE;
6568 
6569 	put_vnode(vnode);
6570 
6571 	return status;
6572 }
6573 
6574 
6575 static int
6576 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6577 {
6578 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6579 		kernel));
6580 
6581 	struct vnode* vnode;
6582 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6583 		NULL, kernel);
6584 	if (status != B_OK)
6585 		return status;
6586 
6587 	status = open_attr_dir_vnode(vnode, kernel);
6588 	if (status < 0)
6589 		put_vnode(vnode);
6590 
6591 	return status;
6592 }
6593 
6594 
6595 static status_t
6596 attr_dir_close(struct file_descriptor* descriptor)
6597 {
6598 	struct vnode* vnode = descriptor->u.vnode;
6599 
6600 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6601 
6602 	if (HAS_FS_CALL(vnode, close_attr_dir))
6603 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6604 
6605 	return B_OK;
6606 }
6607 
6608 
6609 static void
6610 attr_dir_free_fd(struct file_descriptor* descriptor)
6611 {
6612 	struct vnode* vnode = descriptor->u.vnode;
6613 
6614 	if (vnode != NULL) {
6615 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6616 		put_vnode(vnode);
6617 	}
6618 }
6619 
6620 
6621 static status_t
6622 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6623 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6624 {
6625 	struct vnode* vnode = descriptor->u.vnode;
6626 
6627 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6628 
6629 	if (HAS_FS_CALL(vnode, read_attr_dir))
6630 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6631 			bufferSize, _count);
6632 
6633 	return B_UNSUPPORTED;
6634 }
6635 
6636 
6637 static status_t
6638 attr_dir_rewind(struct file_descriptor* descriptor)
6639 {
6640 	struct vnode* vnode = descriptor->u.vnode;
6641 
6642 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6643 
6644 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6645 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6646 
6647 	return B_UNSUPPORTED;
6648 }
6649 
6650 
6651 static int
6652 attr_create(int fd, char* path, const char* name, uint32 type,
6653 	int openMode, bool kernel)
6654 {
6655 	if (name == NULL || *name == '\0')
6656 		return B_BAD_VALUE;
6657 
6658 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6659 	struct vnode* vnode;
6660 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6661 		kernel);
6662 	if (status != B_OK)
6663 		return status;
6664 
6665 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6666 		status = B_LINK_LIMIT;
6667 		goto err;
6668 	}
6669 
6670 	if (!HAS_FS_CALL(vnode, create_attr)) {
6671 		status = B_READ_ONLY_DEVICE;
6672 		goto err;
6673 	}
6674 
6675 	void* cookie;
6676 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6677 	if (status != B_OK)
6678 		goto err;
6679 
6680 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6681 	if (fd >= 0)
6682 		return fd;
6683 
6684 	status = fd;
6685 
6686 	FS_CALL(vnode, close_attr, cookie);
6687 	FS_CALL(vnode, free_attr_cookie, cookie);
6688 
6689 	FS_CALL(vnode, remove_attr, name);
6690 
6691 err:
6692 	put_vnode(vnode);
6693 
6694 	return status;
6695 }
6696 
6697 
6698 static int
6699 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6700 {
6701 	if (name == NULL || *name == '\0')
6702 		return B_BAD_VALUE;
6703 
6704 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6705 	struct vnode* vnode;
6706 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6707 		kernel);
6708 	if (status != B_OK)
6709 		return status;
6710 
6711 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6712 		status = B_LINK_LIMIT;
6713 		goto err;
6714 	}
6715 
6716 	if (!HAS_FS_CALL(vnode, open_attr)) {
6717 		status = B_UNSUPPORTED;
6718 		goto err;
6719 	}
6720 
6721 	void* cookie;
6722 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6723 	if (status != B_OK)
6724 		goto err;
6725 
6726 	// now we only need a file descriptor for this attribute and we're done
6727 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6728 	if (fd >= 0)
6729 		return fd;
6730 
6731 	status = fd;
6732 
6733 	FS_CALL(vnode, close_attr, cookie);
6734 	FS_CALL(vnode, free_attr_cookie, cookie);
6735 
6736 err:
6737 	put_vnode(vnode);
6738 
6739 	return status;
6740 }
6741 
6742 
6743 static status_t
6744 attr_close(struct file_descriptor* descriptor)
6745 {
6746 	struct vnode* vnode = descriptor->u.vnode;
6747 
6748 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6749 
6750 	if (HAS_FS_CALL(vnode, close_attr))
6751 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6752 
6753 	return B_OK;
6754 }
6755 
6756 
6757 static void
6758 attr_free_fd(struct file_descriptor* descriptor)
6759 {
6760 	struct vnode* vnode = descriptor->u.vnode;
6761 
6762 	if (vnode != NULL) {
6763 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6764 		put_vnode(vnode);
6765 	}
6766 }
6767 
6768 
6769 static status_t
6770 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6771 	size_t* length)
6772 {
6773 	struct vnode* vnode = descriptor->u.vnode;
6774 
6775 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6776 		pos, length, *length));
6777 
6778 	if (!HAS_FS_CALL(vnode, read_attr))
6779 		return B_UNSUPPORTED;
6780 
6781 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6782 }
6783 
6784 
6785 static status_t
6786 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6787 	size_t* length)
6788 {
6789 	struct vnode* vnode = descriptor->u.vnode;
6790 
6791 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6792 		length));
6793 
6794 	if (!HAS_FS_CALL(vnode, write_attr))
6795 		return B_UNSUPPORTED;
6796 
6797 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6798 }
6799 
6800 
6801 static off_t
6802 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6803 {
6804 	off_t offset;
6805 
6806 	switch (seekType) {
6807 		case SEEK_SET:
6808 			offset = 0;
6809 			break;
6810 		case SEEK_CUR:
6811 			offset = descriptor->pos;
6812 			break;
6813 		case SEEK_END:
6814 		{
6815 			struct vnode* vnode = descriptor->u.vnode;
6816 			if (!HAS_FS_CALL(vnode, read_stat))
6817 				return B_UNSUPPORTED;
6818 
6819 			struct stat stat;
6820 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6821 				&stat);
6822 			if (status != B_OK)
6823 				return status;
6824 
6825 			offset = stat.st_size;
6826 			break;
6827 		}
6828 		default:
6829 			return B_BAD_VALUE;
6830 	}
6831 
6832 	// assumes off_t is 64 bits wide
6833 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6834 		return B_BUFFER_OVERFLOW;
6835 
6836 	pos += offset;
6837 	if (pos < 0)
6838 		return B_BAD_VALUE;
6839 
6840 	return descriptor->pos = pos;
6841 }
6842 
6843 
6844 static status_t
6845 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6846 {
6847 	struct vnode* vnode = descriptor->u.vnode;
6848 
6849 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6850 
6851 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6852 		return B_UNSUPPORTED;
6853 
6854 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6855 }
6856 
6857 
6858 static status_t
6859 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6860 	int statMask)
6861 {
6862 	struct vnode* vnode = descriptor->u.vnode;
6863 
6864 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6865 
6866 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6867 		return B_READ_ONLY_DEVICE;
6868 
6869 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6870 }
6871 
6872 
6873 static status_t
6874 attr_remove(int fd, const char* name, bool kernel)
6875 {
6876 	struct file_descriptor* descriptor;
6877 	struct vnode* vnode;
6878 	status_t status;
6879 
6880 	if (name == NULL || *name == '\0')
6881 		return B_BAD_VALUE;
6882 
6883 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6884 		kernel));
6885 
6886 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6887 	if (descriptor == NULL)
6888 		return B_FILE_ERROR;
6889 
6890 	if (HAS_FS_CALL(vnode, remove_attr))
6891 		status = FS_CALL(vnode, remove_attr, name);
6892 	else
6893 		status = B_READ_ONLY_DEVICE;
6894 
6895 	put_fd(descriptor);
6896 
6897 	return status;
6898 }
6899 
6900 
6901 static status_t
6902 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6903 	bool kernel)
6904 {
6905 	struct file_descriptor* fromDescriptor;
6906 	struct file_descriptor* toDescriptor;
6907 	struct vnode* fromVnode;
6908 	struct vnode* toVnode;
6909 	status_t status;
6910 
6911 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6912 		|| *toName == '\0')
6913 		return B_BAD_VALUE;
6914 
6915 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6916 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6917 
6918 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6919 	if (fromDescriptor == NULL)
6920 		return B_FILE_ERROR;
6921 
6922 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6923 	if (toDescriptor == NULL) {
6924 		status = B_FILE_ERROR;
6925 		goto err;
6926 	}
6927 
6928 	// are the files on the same volume?
6929 	if (fromVnode->device != toVnode->device) {
6930 		status = B_CROSS_DEVICE_LINK;
6931 		goto err1;
6932 	}
6933 
6934 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6935 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6936 	} else
6937 		status = B_READ_ONLY_DEVICE;
6938 
6939 err1:
6940 	put_fd(toDescriptor);
6941 err:
6942 	put_fd(fromDescriptor);
6943 
6944 	return status;
6945 }
6946 
6947 
6948 static int
6949 index_dir_open(dev_t mountID, bool kernel)
6950 {
6951 	struct fs_mount* mount;
6952 	void* cookie;
6953 
6954 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6955 		kernel));
6956 
6957 	status_t status = get_mount(mountID, &mount);
6958 	if (status != B_OK)
6959 		return status;
6960 
6961 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6962 		status = B_UNSUPPORTED;
6963 		goto error;
6964 	}
6965 
6966 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6967 	if (status != B_OK)
6968 		goto error;
6969 
6970 	// get fd for the index directory
6971 	int fd;
6972 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6973 	if (fd >= 0)
6974 		return fd;
6975 
6976 	// something went wrong
6977 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6978 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6979 
6980 	status = fd;
6981 
6982 error:
6983 	put_mount(mount);
6984 	return status;
6985 }
6986 
6987 
6988 static status_t
6989 index_dir_close(struct file_descriptor* descriptor)
6990 {
6991 	struct fs_mount* mount = descriptor->u.mount;
6992 
6993 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6994 
6995 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6996 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6997 
6998 	return B_OK;
6999 }
7000 
7001 
7002 static void
7003 index_dir_free_fd(struct file_descriptor* descriptor)
7004 {
7005 	struct fs_mount* mount = descriptor->u.mount;
7006 
7007 	if (mount != NULL) {
7008 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7009 		put_mount(mount);
7010 	}
7011 }
7012 
7013 
7014 static status_t
7015 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7016 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7017 {
7018 	struct fs_mount* mount = descriptor->u.mount;
7019 
7020 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7021 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7022 			bufferSize, _count);
7023 	}
7024 
7025 	return B_UNSUPPORTED;
7026 }
7027 
7028 
7029 static status_t
7030 index_dir_rewind(struct file_descriptor* descriptor)
7031 {
7032 	struct fs_mount* mount = descriptor->u.mount;
7033 
7034 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7035 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7036 
7037 	return B_UNSUPPORTED;
7038 }
7039 
7040 
7041 static status_t
7042 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7043 	bool kernel)
7044 {
7045 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7046 		mountID, name, kernel));
7047 
7048 	struct fs_mount* mount;
7049 	status_t status = get_mount(mountID, &mount);
7050 	if (status != B_OK)
7051 		return status;
7052 
7053 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7054 		status = B_READ_ONLY_DEVICE;
7055 		goto out;
7056 	}
7057 
7058 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7059 
7060 out:
7061 	put_mount(mount);
7062 	return status;
7063 }
7064 
7065 
7066 #if 0
7067 static status_t
7068 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7069 {
7070 	struct vnode* vnode = descriptor->u.vnode;
7071 
7072 	// ToDo: currently unused!
7073 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7074 	if (!HAS_FS_CALL(vnode, read_index_stat))
7075 		return B_UNSUPPORTED;
7076 
7077 	return B_UNSUPPORTED;
7078 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7079 }
7080 
7081 
7082 static void
7083 index_free_fd(struct file_descriptor* descriptor)
7084 {
7085 	struct vnode* vnode = descriptor->u.vnode;
7086 
7087 	if (vnode != NULL) {
7088 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7089 		put_vnode(vnode);
7090 	}
7091 }
7092 #endif
7093 
7094 
7095 static status_t
7096 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7097 	bool kernel)
7098 {
7099 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7100 		mountID, name, kernel));
7101 
7102 	struct fs_mount* mount;
7103 	status_t status = get_mount(mountID, &mount);
7104 	if (status != B_OK)
7105 		return status;
7106 
7107 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7108 		status = B_UNSUPPORTED;
7109 		goto out;
7110 	}
7111 
7112 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7113 
7114 out:
7115 	put_mount(mount);
7116 	return status;
7117 }
7118 
7119 
7120 static status_t
7121 index_remove(dev_t mountID, const char* name, bool kernel)
7122 {
7123 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7124 		mountID, name, kernel));
7125 
7126 	struct fs_mount* mount;
7127 	status_t status = get_mount(mountID, &mount);
7128 	if (status != B_OK)
7129 		return status;
7130 
7131 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7132 		status = B_READ_ONLY_DEVICE;
7133 		goto out;
7134 	}
7135 
7136 	status = FS_MOUNT_CALL(mount, remove_index, name);
7137 
7138 out:
7139 	put_mount(mount);
7140 	return status;
7141 }
7142 
7143 
7144 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7145 		It would be nice if the FS would find some more kernel support
7146 		for them.
7147 		For example, query parsing should be moved into the kernel.
7148 */
7149 static int
7150 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7151 	int32 token, bool kernel)
7152 {
7153 	struct fs_mount* mount;
7154 	void* cookie;
7155 
7156 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7157 		device, query, kernel));
7158 
7159 	status_t status = get_mount(device, &mount);
7160 	if (status != B_OK)
7161 		return status;
7162 
7163 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7164 		status = B_UNSUPPORTED;
7165 		goto error;
7166 	}
7167 
7168 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7169 		&cookie);
7170 	if (status != B_OK)
7171 		goto error;
7172 
7173 	// get fd for the index directory
7174 	int fd;
7175 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7176 	if (fd >= 0)
7177 		return fd;
7178 
7179 	status = fd;
7180 
7181 	// something went wrong
7182 	FS_MOUNT_CALL(mount, close_query, cookie);
7183 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7184 
7185 error:
7186 	put_mount(mount);
7187 	return status;
7188 }
7189 
7190 
7191 static status_t
7192 query_close(struct file_descriptor* descriptor)
7193 {
7194 	struct fs_mount* mount = descriptor->u.mount;
7195 
7196 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7197 
7198 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7199 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7200 
7201 	return B_OK;
7202 }
7203 
7204 
7205 static void
7206 query_free_fd(struct file_descriptor* descriptor)
7207 {
7208 	struct fs_mount* mount = descriptor->u.mount;
7209 
7210 	if (mount != NULL) {
7211 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7212 		put_mount(mount);
7213 	}
7214 }
7215 
7216 
7217 static status_t
7218 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7219 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7220 {
7221 	struct fs_mount* mount = descriptor->u.mount;
7222 
7223 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7224 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7225 			bufferSize, _count);
7226 	}
7227 
7228 	return B_UNSUPPORTED;
7229 }
7230 
7231 
7232 static status_t
7233 query_rewind(struct file_descriptor* descriptor)
7234 {
7235 	struct fs_mount* mount = descriptor->u.mount;
7236 
7237 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7238 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7239 
7240 	return B_UNSUPPORTED;
7241 }
7242 
7243 
7244 //	#pragma mark - General File System functions
7245 
7246 
7247 static dev_t
7248 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7249 	const char* args, bool kernel)
7250 {
7251 	struct ::fs_mount* mount;
7252 	status_t status = B_OK;
7253 	fs_volume* volume = NULL;
7254 	int32 layer = 0;
7255 	Vnode* coveredNode = NULL;
7256 
7257 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7258 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7259 
7260 	// The path is always safe, we just have to make sure that fsName is
7261 	// almost valid - we can't make any assumptions about args, though.
7262 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7263 	// We'll get it from the DDM later.
7264 	if (fsName == NULL) {
7265 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7266 			return B_BAD_VALUE;
7267 	} else if (fsName[0] == '\0')
7268 		return B_BAD_VALUE;
7269 
7270 	RecursiveLocker mountOpLocker(sMountOpLock);
7271 
7272 	// Helper to delete a newly created file device on failure.
7273 	// Not exactly beautiful, but helps to keep the code below cleaner.
7274 	struct FileDeviceDeleter {
7275 		FileDeviceDeleter() : id(-1) {}
7276 		~FileDeviceDeleter()
7277 		{
7278 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7279 		}
7280 
7281 		partition_id id;
7282 	} fileDeviceDeleter;
7283 
7284 	// If the file system is not a "virtual" one, the device argument should
7285 	// point to a real file/device (if given at all).
7286 	// get the partition
7287 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7288 	KPartition* partition = NULL;
7289 	KPath normalizedDevice;
7290 	bool newlyCreatedFileDevice = false;
7291 
7292 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7293 		// normalize the device path
7294 		status = normalizedDevice.SetTo(device, true);
7295 		if (status != B_OK)
7296 			return status;
7297 
7298 		// get a corresponding partition from the DDM
7299 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7300 		if (partition == NULL) {
7301 			// Partition not found: This either means, the user supplied
7302 			// an invalid path, or the path refers to an image file. We try
7303 			// to let the DDM create a file device for the path.
7304 			partition_id deviceID = ddm->CreateFileDevice(
7305 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7306 			if (deviceID >= 0) {
7307 				partition = ddm->RegisterPartition(deviceID);
7308 				if (newlyCreatedFileDevice)
7309 					fileDeviceDeleter.id = deviceID;
7310 			}
7311 		}
7312 
7313 		if (!partition) {
7314 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7315 				normalizedDevice.Path()));
7316 			return B_ENTRY_NOT_FOUND;
7317 		}
7318 
7319 		device = normalizedDevice.Path();
7320 			// correct path to file device
7321 	}
7322 	PartitionRegistrar partitionRegistrar(partition, true);
7323 
7324 	// Write lock the partition's device. For the time being, we keep the lock
7325 	// until we're done mounting -- not nice, but ensure, that no-one is
7326 	// interfering.
7327 	// TODO: Just mark the partition busy while mounting!
7328 	KDiskDevice* diskDevice = NULL;
7329 	if (partition) {
7330 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7331 		if (!diskDevice) {
7332 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7333 			return B_ERROR;
7334 		}
7335 	}
7336 
7337 	DeviceWriteLocker writeLocker(diskDevice, true);
7338 		// this takes over the write lock acquired before
7339 
7340 	if (partition != NULL) {
7341 		// make sure, that the partition is not busy
7342 		if (partition->IsBusy()) {
7343 			TRACE(("fs_mount(): Partition is busy.\n"));
7344 			return B_BUSY;
7345 		}
7346 
7347 		// if no FS name had been supplied, we get it from the partition
7348 		if (fsName == NULL) {
7349 			KDiskSystem* diskSystem = partition->DiskSystem();
7350 			if (!diskSystem) {
7351 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7352 					"recognize it.\n"));
7353 				return B_BAD_VALUE;
7354 			}
7355 
7356 			if (!diskSystem->IsFileSystem()) {
7357 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7358 					"partitioning system.\n"));
7359 				return B_BAD_VALUE;
7360 			}
7361 
7362 			// The disk system name will not change, and the KDiskSystem
7363 			// object will not go away while the disk device is locked (and
7364 			// the partition has a reference to it), so this is safe.
7365 			fsName = diskSystem->Name();
7366 		}
7367 	}
7368 
7369 	mount = new(std::nothrow) (struct ::fs_mount);
7370 	if (mount == NULL)
7371 		return B_NO_MEMORY;
7372 
7373 	mount->device_name = strdup(device);
7374 		// "device" can be NULL
7375 
7376 	status = mount->entry_cache.Init();
7377 	if (status != B_OK)
7378 		goto err1;
7379 
7380 	// initialize structure
7381 	mount->id = sNextMountID++;
7382 	mount->partition = NULL;
7383 	mount->root_vnode = NULL;
7384 	mount->covers_vnode = NULL;
7385 	mount->unmounting = false;
7386 	mount->owns_file_device = false;
7387 	mount->volume = NULL;
7388 
7389 	// build up the volume(s)
7390 	while (true) {
7391 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7392 		if (layerFSName == NULL) {
7393 			if (layer == 0) {
7394 				status = B_NO_MEMORY;
7395 				goto err1;
7396 			}
7397 
7398 			break;
7399 		}
7400 		MemoryDeleter layerFSNameDeleter(layerFSName);
7401 
7402 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7403 		if (volume == NULL) {
7404 			status = B_NO_MEMORY;
7405 			goto err1;
7406 		}
7407 
7408 		volume->id = mount->id;
7409 		volume->partition = partition != NULL ? partition->ID() : -1;
7410 		volume->layer = layer++;
7411 		volume->private_volume = NULL;
7412 		volume->ops = NULL;
7413 		volume->sub_volume = NULL;
7414 		volume->super_volume = NULL;
7415 		volume->file_system = NULL;
7416 		volume->file_system_name = NULL;
7417 
7418 		volume->file_system_name = get_file_system_name(layerFSName);
7419 		if (volume->file_system_name == NULL) {
7420 			status = B_NO_MEMORY;
7421 			free(volume);
7422 			goto err1;
7423 		}
7424 
7425 		volume->file_system = get_file_system(layerFSName);
7426 		if (volume->file_system == NULL) {
7427 			status = B_DEVICE_NOT_FOUND;
7428 			free(volume->file_system_name);
7429 			free(volume);
7430 			goto err1;
7431 		}
7432 
7433 		if (mount->volume == NULL)
7434 			mount->volume = volume;
7435 		else {
7436 			volume->super_volume = mount->volume;
7437 			mount->volume->sub_volume = volume;
7438 			mount->volume = volume;
7439 		}
7440 	}
7441 
7442 	// insert mount struct into list before we call FS's mount() function
7443 	// so that vnodes can be created for this mount
7444 	mutex_lock(&sMountMutex);
7445 	sMountsTable->Insert(mount);
7446 	mutex_unlock(&sMountMutex);
7447 
7448 	ino_t rootID;
7449 
7450 	if (!sRoot) {
7451 		// we haven't mounted anything yet
7452 		if (strcmp(path, "/") != 0) {
7453 			status = B_ERROR;
7454 			goto err2;
7455 		}
7456 
7457 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7458 			args, &rootID);
7459 		if (status != 0)
7460 			goto err2;
7461 	} else {
7462 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7463 		if (status != B_OK)
7464 			goto err2;
7465 
7466 		mount->covers_vnode = coveredNode;
7467 
7468 		// make sure covered_vnode is a directory
7469 		if (!S_ISDIR(coveredNode->Type())) {
7470 			status = B_NOT_A_DIRECTORY;
7471 			goto err3;
7472 		}
7473 
7474 		if (coveredNode->IsCovered()) {
7475 			// this is already a covered vnode
7476 			status = B_BUSY;
7477 			goto err3;
7478 		}
7479 
7480 		// mount it/them
7481 		fs_volume* volume = mount->volume;
7482 		while (volume) {
7483 			status = volume->file_system->mount(volume, device, flags, args,
7484 				&rootID);
7485 			if (status != B_OK) {
7486 				if (volume->sub_volume)
7487 					goto err4;
7488 				goto err3;
7489 			}
7490 
7491 			volume = volume->super_volume;
7492 		}
7493 
7494 		volume = mount->volume;
7495 		while (volume) {
7496 			if (volume->ops->all_layers_mounted != NULL)
7497 				volume->ops->all_layers_mounted(volume);
7498 			volume = volume->super_volume;
7499 		}
7500 	}
7501 
7502 	// the root node is supposed to be owned by the file system - it must
7503 	// exist at this point
7504 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7505 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7506 		panic("fs_mount: file system does not own its root node!\n");
7507 		status = B_ERROR;
7508 		goto err4;
7509 	}
7510 
7511 	// set up the links between the root vnode and the vnode it covers
7512 	rw_lock_write_lock(&sVnodeLock);
7513 	if (coveredNode != NULL) {
7514 		if (coveredNode->IsCovered()) {
7515 			// the vnode is covered now
7516 			status = B_BUSY;
7517 			rw_lock_write_unlock(&sVnodeLock);
7518 			goto err4;
7519 		}
7520 
7521 		mount->root_vnode->covers = coveredNode;
7522 		mount->root_vnode->SetCovering(true);
7523 
7524 		coveredNode->covered_by = mount->root_vnode;
7525 		coveredNode->SetCovered(true);
7526 	}
7527 	rw_lock_write_unlock(&sVnodeLock);
7528 
7529 	if (!sRoot) {
7530 		sRoot = mount->root_vnode;
7531 		mutex_lock(&sIOContextRootLock);
7532 		get_current_io_context(true)->root = sRoot;
7533 		mutex_unlock(&sIOContextRootLock);
7534 		inc_vnode_ref_count(sRoot);
7535 	}
7536 
7537 	// supply the partition (if any) with the mount cookie and mark it mounted
7538 	if (partition) {
7539 		partition->SetMountCookie(mount->volume->private_volume);
7540 		partition->SetVolumeID(mount->id);
7541 
7542 		// keep a partition reference as long as the partition is mounted
7543 		partitionRegistrar.Detach();
7544 		mount->partition = partition;
7545 		mount->owns_file_device = newlyCreatedFileDevice;
7546 		fileDeviceDeleter.id = -1;
7547 	}
7548 
7549 	notify_mount(mount->id,
7550 		coveredNode != NULL ? coveredNode->device : -1,
7551 		coveredNode ? coveredNode->id : -1);
7552 
7553 	return mount->id;
7554 
7555 err4:
7556 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7557 err3:
7558 	if (coveredNode != NULL)
7559 		put_vnode(coveredNode);
7560 err2:
7561 	mutex_lock(&sMountMutex);
7562 	sMountsTable->Remove(mount);
7563 	mutex_unlock(&sMountMutex);
7564 err1:
7565 	delete mount;
7566 
7567 	return status;
7568 }
7569 
7570 
7571 static status_t
7572 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7573 {
7574 	struct fs_mount* mount;
7575 	status_t err;
7576 
7577 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7578 		mountID, kernel));
7579 
7580 	struct vnode* pathVnode = NULL;
7581 	if (path != NULL) {
7582 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7583 		if (err != B_OK)
7584 			return B_ENTRY_NOT_FOUND;
7585 	}
7586 
7587 	RecursiveLocker mountOpLocker(sMountOpLock);
7588 
7589 	// this lock is not strictly necessary, but here in case of KDEBUG
7590 	// to keep the ASSERT in find_mount() working.
7591 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7592 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7593 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7594 	if (mount == NULL) {
7595 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7596 			pathVnode);
7597 	}
7598 
7599 	if (path != NULL) {
7600 		put_vnode(pathVnode);
7601 
7602 		if (mount->root_vnode != pathVnode) {
7603 			// not mountpoint
7604 			return B_BAD_VALUE;
7605 		}
7606 	}
7607 
7608 	// if the volume is associated with a partition, lock the device of the
7609 	// partition as long as we are unmounting
7610 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7611 	KPartition* partition = mount->partition;
7612 	KDiskDevice* diskDevice = NULL;
7613 	if (partition != NULL) {
7614 		if (partition->Device() == NULL) {
7615 			dprintf("fs_unmount(): There is no device!\n");
7616 			return B_ERROR;
7617 		}
7618 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7619 		if (!diskDevice) {
7620 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7621 			return B_ERROR;
7622 		}
7623 	}
7624 	DeviceWriteLocker writeLocker(diskDevice, true);
7625 
7626 	// make sure, that the partition is not busy
7627 	if (partition != NULL) {
7628 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7629 			TRACE(("fs_unmount(): Partition is busy.\n"));
7630 			return B_BUSY;
7631 		}
7632 	}
7633 
7634 	// grab the vnode master mutex to keep someone from creating
7635 	// a vnode while we're figuring out if we can continue
7636 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7637 
7638 	bool disconnectedDescriptors = false;
7639 
7640 	while (true) {
7641 		bool busy = false;
7642 
7643 		// cycle through the list of vnodes associated with this mount and
7644 		// make sure all of them are not busy or have refs on them
7645 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7646 		while (struct vnode* vnode = iterator.Next()) {
7647 			if (vnode->IsBusy()) {
7648 				busy = true;
7649 				break;
7650 			}
7651 
7652 			// check the vnode's ref count -- subtract additional references for
7653 			// covering
7654 			int32 refCount = vnode->ref_count;
7655 			if (vnode->covers != NULL)
7656 				refCount--;
7657 			if (vnode->covered_by != NULL)
7658 				refCount--;
7659 
7660 			if (refCount != 0) {
7661 				// there are still vnodes in use on this mount, so we cannot
7662 				// unmount yet
7663 				busy = true;
7664 				break;
7665 			}
7666 		}
7667 
7668 		if (!busy)
7669 			break;
7670 
7671 		if ((flags & B_FORCE_UNMOUNT) == 0)
7672 			return B_BUSY;
7673 
7674 		if (disconnectedDescriptors) {
7675 			// wait a bit until the last access is finished, and then try again
7676 			vnodesWriteLocker.Unlock();
7677 			snooze(100000);
7678 			// TODO: if there is some kind of bug that prevents the ref counts
7679 			// from getting back to zero, this will fall into an endless loop...
7680 			vnodesWriteLocker.Lock();
7681 			continue;
7682 		}
7683 
7684 		// the file system is still busy - but we're forced to unmount it,
7685 		// so let's disconnect all open file descriptors
7686 
7687 		mount->unmounting = true;
7688 			// prevent new vnodes from being created
7689 
7690 		vnodesWriteLocker.Unlock();
7691 
7692 		disconnect_mount_or_vnode_fds(mount, NULL);
7693 		disconnectedDescriptors = true;
7694 
7695 		vnodesWriteLocker.Lock();
7696 	}
7697 
7698 	// We can safely continue. Mark all of the vnodes busy and this mount
7699 	// structure in unmounting state. Also undo the vnode covers/covered_by
7700 	// links.
7701 	mount->unmounting = true;
7702 
7703 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7704 	while (struct vnode* vnode = iterator.Next()) {
7705 		// Remove all covers/covered_by links from other mounts' nodes to this
7706 		// vnode and adjust the node ref count accordingly. We will release the
7707 		// references to the external vnodes below.
7708 		if (Vnode* coveredNode = vnode->covers) {
7709 			if (Vnode* coveringNode = vnode->covered_by) {
7710 				// We have both covered and covering vnodes, so just remove us
7711 				// from the chain.
7712 				coveredNode->covered_by = coveringNode;
7713 				coveringNode->covers = coveredNode;
7714 				vnode->ref_count -= 2;
7715 
7716 				vnode->covered_by = NULL;
7717 				vnode->covers = NULL;
7718 				vnode->SetCovering(false);
7719 				vnode->SetCovered(false);
7720 			} else {
7721 				// We only have a covered vnode. Remove its link to us.
7722 				coveredNode->covered_by = NULL;
7723 				coveredNode->SetCovered(false);
7724 				vnode->ref_count--;
7725 
7726 				// If the other node is an external vnode, we keep its link
7727 				// link around so we can put the reference later on. Otherwise
7728 				// we get rid of it right now.
7729 				if (coveredNode->mount == mount) {
7730 					vnode->covers = NULL;
7731 					coveredNode->ref_count--;
7732 				}
7733 			}
7734 		} else if (Vnode* coveringNode = vnode->covered_by) {
7735 			// We only have a covering vnode. Remove its link to us.
7736 			coveringNode->covers = NULL;
7737 			coveringNode->SetCovering(false);
7738 			vnode->ref_count--;
7739 
7740 			// If the other node is an external vnode, we keep its link
7741 			// link around so we can put the reference later on. Otherwise
7742 			// we get rid of it right now.
7743 			if (coveringNode->mount == mount) {
7744 				vnode->covered_by = NULL;
7745 				coveringNode->ref_count--;
7746 			}
7747 		}
7748 
7749 		vnode->SetBusy(true);
7750 		vnode_to_be_freed(vnode);
7751 	}
7752 
7753 	vnodesWriteLocker.Unlock();
7754 
7755 	// Free all vnodes associated with this mount.
7756 	// They will be removed from the mount list by free_vnode(), so
7757 	// we don't have to do this.
7758 	while (struct vnode* vnode = mount->vnodes.Head()) {
7759 		// Put the references to external covered/covering vnodes we kept above.
7760 		if (Vnode* coveredNode = vnode->covers)
7761 			put_vnode(coveredNode);
7762 		if (Vnode* coveringNode = vnode->covered_by)
7763 			put_vnode(coveringNode);
7764 
7765 		free_vnode(vnode, false);
7766 	}
7767 
7768 	// remove the mount structure from the hash table
7769 	mutex_lock(&sMountMutex);
7770 	sMountsTable->Remove(mount);
7771 	mutex_unlock(&sMountMutex);
7772 
7773 	mountOpLocker.Unlock();
7774 
7775 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7776 	notify_unmount(mount->id);
7777 
7778 	// dereference the partition and mark it unmounted
7779 	if (partition) {
7780 		partition->SetVolumeID(-1);
7781 		partition->SetMountCookie(NULL);
7782 
7783 		if (mount->owns_file_device)
7784 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7785 		partition->Unregister();
7786 	}
7787 
7788 	delete mount;
7789 	return B_OK;
7790 }
7791 
7792 
7793 static status_t
7794 fs_sync(dev_t device)
7795 {
7796 	struct fs_mount* mount;
7797 	status_t status = get_mount(device, &mount);
7798 	if (status != B_OK)
7799 		return status;
7800 
7801 	struct vnode marker;
7802 	memset(&marker, 0, sizeof(marker));
7803 	marker.SetBusy(true);
7804 	marker.SetRemoved(true);
7805 
7806 	// First, synchronize all file caches
7807 
7808 	while (true) {
7809 		WriteLocker locker(sVnodeLock);
7810 			// Note: That's the easy way. Which is probably OK for sync(),
7811 			// since it's a relatively rare call and doesn't need to allow for
7812 			// a lot of concurrency. Using a read lock would be possible, but
7813 			// also more involved, since we had to lock the individual nodes
7814 			// and take care of the locking order, which we might not want to
7815 			// do while holding fs_mount::rlock.
7816 
7817 		// synchronize access to vnode list
7818 		recursive_lock_lock(&mount->rlock);
7819 
7820 		struct vnode* vnode;
7821 		if (!marker.IsRemoved()) {
7822 			vnode = mount->vnodes.GetNext(&marker);
7823 			mount->vnodes.Remove(&marker);
7824 			marker.SetRemoved(true);
7825 		} else
7826 			vnode = mount->vnodes.First();
7827 
7828 		while (vnode != NULL && (vnode->cache == NULL
7829 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7830 			// TODO: we could track writes (and writable mapped vnodes)
7831 			//	and have a simple flag that we could test for here
7832 			vnode = mount->vnodes.GetNext(vnode);
7833 		}
7834 
7835 		if (vnode != NULL) {
7836 			// insert marker vnode again
7837 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7838 			marker.SetRemoved(false);
7839 		}
7840 
7841 		recursive_lock_unlock(&mount->rlock);
7842 
7843 		if (vnode == NULL)
7844 			break;
7845 
7846 		vnode = lookup_vnode(mount->id, vnode->id);
7847 		if (vnode == NULL || vnode->IsBusy())
7848 			continue;
7849 
7850 		if (vnode->ref_count == 0) {
7851 			// this vnode has been unused before
7852 			vnode_used(vnode);
7853 		}
7854 		inc_vnode_ref_count(vnode);
7855 
7856 		locker.Unlock();
7857 
7858 		if (vnode->cache != NULL && !vnode->IsRemoved())
7859 			vnode->cache->WriteModified();
7860 
7861 		put_vnode(vnode);
7862 	}
7863 
7864 	// And then, let the file systems do their synchronizing work
7865 
7866 	if (HAS_FS_MOUNT_CALL(mount, sync))
7867 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7868 
7869 	put_mount(mount);
7870 	return status;
7871 }
7872 
7873 
7874 static status_t
7875 fs_read_info(dev_t device, struct fs_info* info)
7876 {
7877 	struct fs_mount* mount;
7878 	status_t status = get_mount(device, &mount);
7879 	if (status != B_OK)
7880 		return status;
7881 
7882 	memset(info, 0, sizeof(struct fs_info));
7883 
7884 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7885 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7886 
7887 	// fill in info the file system doesn't (have to) know about
7888 	if (status == B_OK) {
7889 		info->dev = mount->id;
7890 		info->root = mount->root_vnode->id;
7891 
7892 		fs_volume* volume = mount->volume;
7893 		while (volume->super_volume != NULL)
7894 			volume = volume->super_volume;
7895 
7896 		strlcpy(info->fsh_name, volume->file_system_name,
7897 			sizeof(info->fsh_name));
7898 		if (mount->device_name != NULL) {
7899 			strlcpy(info->device_name, mount->device_name,
7900 				sizeof(info->device_name));
7901 		}
7902 	}
7903 
7904 	// if the call is not supported by the file system, there are still
7905 	// the parts that we filled out ourselves
7906 
7907 	put_mount(mount);
7908 	return status;
7909 }
7910 
7911 
7912 static status_t
7913 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7914 {
7915 	struct fs_mount* mount;
7916 	status_t status = get_mount(device, &mount);
7917 	if (status != B_OK)
7918 		return status;
7919 
7920 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7921 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7922 	else
7923 		status = B_READ_ONLY_DEVICE;
7924 
7925 	put_mount(mount);
7926 	return status;
7927 }
7928 
7929 
7930 static dev_t
7931 fs_next_device(int32* _cookie)
7932 {
7933 	struct fs_mount* mount = NULL;
7934 	dev_t device = *_cookie;
7935 
7936 	mutex_lock(&sMountMutex);
7937 
7938 	// Since device IDs are assigned sequentially, this algorithm
7939 	// does work good enough. It makes sure that the device list
7940 	// returned is sorted, and that no device is skipped when an
7941 	// already visited device got unmounted.
7942 
7943 	while (device < sNextMountID) {
7944 		mount = find_mount(device++);
7945 		if (mount != NULL && mount->volume->private_volume != NULL)
7946 			break;
7947 	}
7948 
7949 	*_cookie = device;
7950 
7951 	if (mount != NULL)
7952 		device = mount->id;
7953 	else
7954 		device = B_BAD_VALUE;
7955 
7956 	mutex_unlock(&sMountMutex);
7957 
7958 	return device;
7959 }
7960 
7961 
7962 ssize_t
7963 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7964 	void *buffer, size_t readBytes)
7965 {
7966 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7967 	if (attrFD < 0)
7968 		return attrFD;
7969 
7970 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7971 
7972 	_kern_close(attrFD);
7973 
7974 	return bytesRead;
7975 }
7976 
7977 
7978 static status_t
7979 get_cwd(char* buffer, size_t size, bool kernel)
7980 {
7981 	// Get current working directory from io context
7982 	struct io_context* context = get_current_io_context(kernel);
7983 	status_t status;
7984 
7985 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7986 
7987 	mutex_lock(&context->io_mutex);
7988 
7989 	struct vnode* vnode = context->cwd;
7990 	if (vnode)
7991 		inc_vnode_ref_count(vnode);
7992 
7993 	mutex_unlock(&context->io_mutex);
7994 
7995 	if (vnode) {
7996 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7997 		put_vnode(vnode);
7998 	} else
7999 		status = B_ERROR;
8000 
8001 	return status;
8002 }
8003 
8004 
8005 static status_t
8006 set_cwd(int fd, char* path, bool kernel)
8007 {
8008 	struct io_context* context;
8009 	struct vnode* vnode = NULL;
8010 	struct vnode* oldDirectory;
8011 	status_t status;
8012 
8013 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8014 
8015 	// Get vnode for passed path, and bail if it failed
8016 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8017 	if (status < 0)
8018 		return status;
8019 
8020 	if (!S_ISDIR(vnode->Type())) {
8021 		// nope, can't cwd to here
8022 		status = B_NOT_A_DIRECTORY;
8023 		goto err;
8024 	}
8025 
8026 	// We need to have the permission to enter the directory, too
8027 	if (HAS_FS_CALL(vnode, access)) {
8028 		status = FS_CALL(vnode, access, X_OK);
8029 		if (status != B_OK)
8030 			goto err;
8031 	}
8032 
8033 	// Get current io context and lock
8034 	context = get_current_io_context(kernel);
8035 	mutex_lock(&context->io_mutex);
8036 
8037 	// save the old current working directory first
8038 	oldDirectory = context->cwd;
8039 	context->cwd = vnode;
8040 
8041 	mutex_unlock(&context->io_mutex);
8042 
8043 	if (oldDirectory)
8044 		put_vnode(oldDirectory);
8045 
8046 	return B_NO_ERROR;
8047 
8048 err:
8049 	put_vnode(vnode);
8050 	return status;
8051 }
8052 
8053 
8054 //	#pragma mark - kernel mirrored syscalls
8055 
8056 
8057 dev_t
8058 _kern_mount(const char* path, const char* device, const char* fsName,
8059 	uint32 flags, const char* args, size_t argsLength)
8060 {
8061 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8062 	if (pathBuffer.InitCheck() != B_OK)
8063 		return B_NO_MEMORY;
8064 
8065 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8066 }
8067 
8068 
8069 status_t
8070 _kern_unmount(const char* path, uint32 flags)
8071 {
8072 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8073 	if (pathBuffer.InitCheck() != B_OK)
8074 		return B_NO_MEMORY;
8075 
8076 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8077 }
8078 
8079 
8080 status_t
8081 _kern_read_fs_info(dev_t device, struct fs_info* info)
8082 {
8083 	if (info == NULL)
8084 		return B_BAD_VALUE;
8085 
8086 	return fs_read_info(device, info);
8087 }
8088 
8089 
8090 status_t
8091 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8092 {
8093 	if (info == NULL)
8094 		return B_BAD_VALUE;
8095 
8096 	return fs_write_info(device, info, mask);
8097 }
8098 
8099 
8100 status_t
8101 _kern_sync(void)
8102 {
8103 	// Note: _kern_sync() is also called from _user_sync()
8104 	int32 cookie = 0;
8105 	dev_t device;
8106 	while ((device = next_dev(&cookie)) >= 0) {
8107 		status_t status = fs_sync(device);
8108 		if (status != B_OK && status != B_BAD_VALUE) {
8109 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8110 				strerror(status));
8111 		}
8112 	}
8113 
8114 	return B_OK;
8115 }
8116 
8117 
8118 dev_t
8119 _kern_next_device(int32* _cookie)
8120 {
8121 	return fs_next_device(_cookie);
8122 }
8123 
8124 
8125 status_t
8126 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8127 	size_t infoSize)
8128 {
8129 	if (infoSize != sizeof(fd_info))
8130 		return B_BAD_VALUE;
8131 
8132 	// get the team
8133 	Team* team = Team::Get(teamID);
8134 	if (team == NULL)
8135 		return B_BAD_TEAM_ID;
8136 	BReference<Team> teamReference(team, true);
8137 
8138 	// now that we have a team reference, its I/O context won't go away
8139 	io_context* context = team->io_context;
8140 	MutexLocker contextLocker(context->io_mutex);
8141 
8142 	uint32 slot = *_cookie;
8143 
8144 	struct file_descriptor* descriptor;
8145 	while (slot < context->table_size
8146 		&& (descriptor = context->fds[slot]) == NULL) {
8147 		slot++;
8148 	}
8149 
8150 	if (slot >= context->table_size)
8151 		return B_ENTRY_NOT_FOUND;
8152 
8153 	info->number = slot;
8154 	info->open_mode = descriptor->open_mode;
8155 
8156 	struct vnode* vnode = fd_vnode(descriptor);
8157 	if (vnode != NULL) {
8158 		info->device = vnode->device;
8159 		info->node = vnode->id;
8160 	} else if (descriptor->u.mount != NULL) {
8161 		info->device = descriptor->u.mount->id;
8162 		info->node = -1;
8163 	}
8164 
8165 	*_cookie = slot + 1;
8166 	return B_OK;
8167 }
8168 
8169 
8170 int
8171 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8172 	int perms)
8173 {
8174 	if ((openMode & O_CREAT) != 0) {
8175 		return file_create_entry_ref(device, inode, name, openMode, perms,
8176 			true);
8177 	}
8178 
8179 	return file_open_entry_ref(device, inode, name, openMode, true);
8180 }
8181 
8182 
8183 /*!	\brief Opens a node specified by a FD + path pair.
8184 
8185 	At least one of \a fd and \a path must be specified.
8186 	If only \a fd is given, the function opens the node identified by this
8187 	FD. If only a path is given, this path is opened. If both are given and
8188 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8189 	of the directory (!) identified by \a fd.
8190 
8191 	\param fd The FD. May be < 0.
8192 	\param path The absolute or relative path. May be \c NULL.
8193 	\param openMode The open mode.
8194 	\return A FD referring to the newly opened node, or an error code,
8195 			if an error occurs.
8196 */
8197 int
8198 _kern_open(int fd, const char* path, int openMode, int perms)
8199 {
8200 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8201 	if (pathBuffer.InitCheck() != B_OK)
8202 		return B_NO_MEMORY;
8203 
8204 	if ((openMode & O_CREAT) != 0)
8205 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8206 
8207 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8208 }
8209 
8210 
8211 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8212 
8213 	The supplied name may be \c NULL, in which case directory identified
8214 	by \a device and \a inode will be opened. Otherwise \a device and
8215 	\a inode identify the parent directory of the directory to be opened
8216 	and \a name its entry name.
8217 
8218 	\param device If \a name is specified the ID of the device the parent
8219 		   directory of the directory to be opened resides on, otherwise
8220 		   the device of the directory itself.
8221 	\param inode If \a name is specified the node ID of the parent
8222 		   directory of the directory to be opened, otherwise node ID of the
8223 		   directory itself.
8224 	\param name The entry name of the directory to be opened. If \c NULL,
8225 		   the \a device + \a inode pair identify the node to be opened.
8226 	\return The FD of the newly opened directory or an error code, if
8227 			something went wrong.
8228 */
8229 int
8230 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8231 {
8232 	return dir_open_entry_ref(device, inode, name, true);
8233 }
8234 
8235 
8236 /*!	\brief Opens a directory specified by a FD + path pair.
8237 
8238 	At least one of \a fd and \a path must be specified.
8239 	If only \a fd is given, the function opens the directory identified by this
8240 	FD. If only a path is given, this path is opened. If both are given and
8241 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8242 	of the directory (!) identified by \a fd.
8243 
8244 	\param fd The FD. May be < 0.
8245 	\param path The absolute or relative path. May be \c NULL.
8246 	\return A FD referring to the newly opened directory, or an error code,
8247 			if an error occurs.
8248 */
8249 int
8250 _kern_open_dir(int fd, const char* path)
8251 {
8252 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8253 	if (pathBuffer.InitCheck() != B_OK)
8254 		return B_NO_MEMORY;
8255 
8256 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8257 }
8258 
8259 
8260 status_t
8261 _kern_fcntl(int fd, int op, size_t argument)
8262 {
8263 	return common_fcntl(fd, op, argument, true);
8264 }
8265 
8266 
8267 status_t
8268 _kern_fsync(int fd)
8269 {
8270 	return common_sync(fd, true);
8271 }
8272 
8273 
8274 status_t
8275 _kern_lock_node(int fd)
8276 {
8277 	return common_lock_node(fd, true);
8278 }
8279 
8280 
8281 status_t
8282 _kern_unlock_node(int fd)
8283 {
8284 	return common_unlock_node(fd, true);
8285 }
8286 
8287 
8288 status_t
8289 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8290 	int perms)
8291 {
8292 	return dir_create_entry_ref(device, inode, name, perms, true);
8293 }
8294 
8295 
8296 /*!	\brief Creates a directory specified by a FD + path pair.
8297 
8298 	\a path must always be specified (it contains the name of the new directory
8299 	at least). If only a path is given, this path identifies the location at
8300 	which the directory shall be created. If both \a fd and \a path are given
8301 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8302 	of the directory (!) identified by \a fd.
8303 
8304 	\param fd The FD. May be < 0.
8305 	\param path The absolute or relative path. Must not be \c NULL.
8306 	\param perms The access permissions the new directory shall have.
8307 	\return \c B_OK, if the directory has been created successfully, another
8308 			error code otherwise.
8309 */
8310 status_t
8311 _kern_create_dir(int fd, const char* path, int perms)
8312 {
8313 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8314 	if (pathBuffer.InitCheck() != B_OK)
8315 		return B_NO_MEMORY;
8316 
8317 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8318 }
8319 
8320 
8321 status_t
8322 _kern_remove_dir(int fd, const char* path)
8323 {
8324 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8325 	if (pathBuffer.InitCheck() != B_OK)
8326 		return B_NO_MEMORY;
8327 
8328 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8329 }
8330 
8331 
8332 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8333 
8334 	At least one of \a fd and \a path must be specified.
8335 	If only \a fd is given, the function the symlink to be read is the node
8336 	identified by this FD. If only a path is given, this path identifies the
8337 	symlink to be read. If both are given and the path is absolute, \a fd is
8338 	ignored; a relative path is reckoned off of the directory (!) identified
8339 	by \a fd.
8340 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8341 	will still be updated to reflect the required buffer size.
8342 
8343 	\param fd The FD. May be < 0.
8344 	\param path The absolute or relative path. May be \c NULL.
8345 	\param buffer The buffer into which the contents of the symlink shall be
8346 		   written.
8347 	\param _bufferSize A pointer to the size of the supplied buffer.
8348 	\return The length of the link on success or an appropriate error code
8349 */
8350 status_t
8351 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8352 {
8353 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8354 	if (pathBuffer.InitCheck() != B_OK)
8355 		return B_NO_MEMORY;
8356 
8357 	return common_read_link(fd, pathBuffer.LockBuffer(),
8358 		buffer, _bufferSize, true);
8359 }
8360 
8361 
8362 /*!	\brief Creates a symlink specified by a FD + path pair.
8363 
8364 	\a path must always be specified (it contains the name of the new symlink
8365 	at least). If only a path is given, this path identifies the location at
8366 	which the symlink shall be created. If both \a fd and \a path are given and
8367 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8368 	of the directory (!) identified by \a fd.
8369 
8370 	\param fd The FD. May be < 0.
8371 	\param toPath The absolute or relative path. Must not be \c NULL.
8372 	\param mode The access permissions the new symlink shall have.
8373 	\return \c B_OK, if the symlink has been created successfully, another
8374 			error code otherwise.
8375 */
8376 status_t
8377 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8378 {
8379 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8380 	if (pathBuffer.InitCheck() != B_OK)
8381 		return B_NO_MEMORY;
8382 
8383 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8384 		toPath, mode, true);
8385 }
8386 
8387 
8388 status_t
8389 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8390 	bool traverseLeafLink)
8391 {
8392 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8393 	KPath toPathBuffer(toPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8394 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8395 		return B_NO_MEMORY;
8396 
8397 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8398 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8399 }
8400 
8401 
8402 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8403 
8404 	\a path must always be specified (it contains at least the name of the entry
8405 	to be deleted). If only a path is given, this path identifies the entry
8406 	directly. If both \a fd and \a path are given and the path is absolute,
8407 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8408 	identified by \a fd.
8409 
8410 	\param fd The FD. May be < 0.
8411 	\param path The absolute or relative path. Must not be \c NULL.
8412 	\return \c B_OK, if the entry has been removed successfully, another
8413 			error code otherwise.
8414 */
8415 status_t
8416 _kern_unlink(int fd, const char* path)
8417 {
8418 	KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8419 	if (pathBuffer.InitCheck() != B_OK)
8420 		return B_NO_MEMORY;
8421 
8422 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8423 }
8424 
8425 
8426 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8427 		   by another FD + path pair.
8428 
8429 	\a oldPath and \a newPath must always be specified (they contain at least
8430 	the name of the entry). If only a path is given, this path identifies the
8431 	entry directly. If both a FD and a path are given and the path is absolute,
8432 	the FD is ignored; a relative path is reckoned off of the directory (!)
8433 	identified by the respective FD.
8434 
8435 	\param oldFD The FD of the old location. May be < 0.
8436 	\param oldPath The absolute or relative path of the old location. Must not
8437 		   be \c NULL.
8438 	\param newFD The FD of the new location. May be < 0.
8439 	\param newPath The absolute or relative path of the new location. Must not
8440 		   be \c NULL.
8441 	\return \c B_OK, if the entry has been moved successfully, another
8442 			error code otherwise.
8443 */
8444 status_t
8445 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8446 {
8447 	KPath oldPathBuffer(oldPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8448 	KPath newPathBuffer(newPath, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8449 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8450 		return B_NO_MEMORY;
8451 
8452 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8453 		newFD, newPathBuffer.LockBuffer(), true);
8454 }
8455 
8456 
8457 status_t
8458 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8459 {
8460 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8461 	if (pathBuffer.InitCheck() != B_OK)
8462 		return B_NO_MEMORY;
8463 
8464 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8465 		true);
8466 }
8467 
8468 
8469 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8470 
8471 	If only \a fd is given, the stat operation associated with the type
8472 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8473 	given, this path identifies the entry for whose node to retrieve the
8474 	stat data. If both \a fd and \a path are given and the path is absolute,
8475 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8476 	identified by \a fd and specifies the entry whose stat data shall be
8477 	retrieved.
8478 
8479 	\param fd The FD. May be < 0.
8480 	\param path The absolute or relative path. Must not be \c NULL.
8481 	\param traverseLeafLink If \a path is given, \c true specifies that the
8482 		   function shall not stick to symlinks, but traverse them.
8483 	\param stat The buffer the stat data shall be written into.
8484 	\param statSize The size of the supplied stat buffer.
8485 	\return \c B_OK, if the the stat data have been read successfully, another
8486 			error code otherwise.
8487 */
8488 status_t
8489 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8490 	struct stat* stat, size_t statSize)
8491 {
8492 	struct stat completeStat;
8493 	struct stat* originalStat = NULL;
8494 	status_t status;
8495 
8496 	if (statSize > sizeof(struct stat))
8497 		return B_BAD_VALUE;
8498 
8499 	// this supports different stat extensions
8500 	if (statSize < sizeof(struct stat)) {
8501 		originalStat = stat;
8502 		stat = &completeStat;
8503 	}
8504 
8505 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8506 
8507 	if (status == B_OK && originalStat != NULL)
8508 		memcpy(originalStat, stat, statSize);
8509 
8510 	return status;
8511 }
8512 
8513 
8514 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8515 
8516 	If only \a fd is given, the stat operation associated with the type
8517 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8518 	given, this path identifies the entry for whose node to write the
8519 	stat data. If both \a fd and \a path are given and the path is absolute,
8520 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8521 	identified by \a fd and specifies the entry whose stat data shall be
8522 	written.
8523 
8524 	\param fd The FD. May be < 0.
8525 	\param path The absolute or relative path. May be \c NULL.
8526 	\param traverseLeafLink If \a path is given, \c true specifies that the
8527 		   function shall not stick to symlinks, but traverse them.
8528 	\param stat The buffer containing the stat data to be written.
8529 	\param statSize The size of the supplied stat buffer.
8530 	\param statMask A mask specifying which parts of the stat data shall be
8531 		   written.
8532 	\return \c B_OK, if the the stat data have been written successfully,
8533 			another error code otherwise.
8534 */
8535 status_t
8536 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8537 	const struct stat* stat, size_t statSize, int statMask)
8538 {
8539 	struct stat completeStat;
8540 
8541 	if (statSize > sizeof(struct stat))
8542 		return B_BAD_VALUE;
8543 
8544 	// this supports different stat extensions
8545 	if (statSize < sizeof(struct stat)) {
8546 		memset((uint8*)&completeStat + statSize, 0,
8547 			sizeof(struct stat) - statSize);
8548 		memcpy(&completeStat, stat, statSize);
8549 		stat = &completeStat;
8550 	}
8551 
8552 	status_t status;
8553 
8554 	if (path != NULL) {
8555 		// path given: write the stat of the node referred to by (fd, path)
8556 		KPath pathBuffer(path, KPath::DEFAULT, B_PATH_NAME_LENGTH + 1);
8557 		if (pathBuffer.InitCheck() != B_OK)
8558 			return B_NO_MEMORY;
8559 
8560 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8561 			traverseLeafLink, stat, statMask, true);
8562 	} else {
8563 		// no path given: get the FD and use the FD operation
8564 		struct file_descriptor* descriptor
8565 			= get_fd(get_current_io_context(true), fd);
8566 		if (descriptor == NULL)
8567 			return B_FILE_ERROR;
8568 
8569 		if (descriptor->ops->fd_write_stat)
8570 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8571 		else
8572 			status = B_UNSUPPORTED;
8573 
8574 		put_fd(descriptor);
8575 	}
8576 
8577 	return status;
8578 }
8579 
8580 
8581 int
8582 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8583 {
8584 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8585 	if (pathBuffer.InitCheck() != B_OK)
8586 		return B_NO_MEMORY;
8587 
8588 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8589 }
8590 
8591 
8592 int
8593 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8594 	int openMode)
8595 {
8596 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8597 	if (pathBuffer.InitCheck() != B_OK)
8598 		return B_NO_MEMORY;
8599 
8600 	if ((openMode & O_CREAT) != 0) {
8601 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8602 			true);
8603 	}
8604 
8605 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8606 }
8607 
8608 
8609 status_t
8610 _kern_remove_attr(int fd, const char* name)
8611 {
8612 	return attr_remove(fd, name, true);
8613 }
8614 
8615 
8616 status_t
8617 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8618 	const char* toName)
8619 {
8620 	return attr_rename(fromFile, fromName, toFile, toName, true);
8621 }
8622 
8623 
8624 int
8625 _kern_open_index_dir(dev_t device)
8626 {
8627 	return index_dir_open(device, true);
8628 }
8629 
8630 
8631 status_t
8632 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8633 {
8634 	return index_create(device, name, type, flags, true);
8635 }
8636 
8637 
8638 status_t
8639 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8640 {
8641 	return index_name_read_stat(device, name, stat, true);
8642 }
8643 
8644 
8645 status_t
8646 _kern_remove_index(dev_t device, const char* name)
8647 {
8648 	return index_remove(device, name, true);
8649 }
8650 
8651 
8652 status_t
8653 _kern_getcwd(char* buffer, size_t size)
8654 {
8655 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8656 
8657 	// Call vfs to get current working directory
8658 	return get_cwd(buffer, size, true);
8659 }
8660 
8661 
8662 status_t
8663 _kern_setcwd(int fd, const char* path)
8664 {
8665 	KPath pathBuffer(path, KPath::LAZY_ALLOC, B_PATH_NAME_LENGTH + 1);
8666 	if (pathBuffer.InitCheck() != B_OK)
8667 		return B_NO_MEMORY;
8668 
8669 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8670 }
8671 
8672 
8673 //	#pragma mark - userland syscalls
8674 
8675 
8676 dev_t
8677 _user_mount(const char* userPath, const char* userDevice,
8678 	const char* userFileSystem, uint32 flags, const char* userArgs,
8679 	size_t argsLength)
8680 {
8681 	char fileSystem[B_FILE_NAME_LENGTH];
8682 	KPath path, device;
8683 	char* args = NULL;
8684 	status_t status;
8685 
8686 	if (!IS_USER_ADDRESS(userPath)
8687 		|| !IS_USER_ADDRESS(userFileSystem)
8688 		|| !IS_USER_ADDRESS(userDevice))
8689 		return B_BAD_ADDRESS;
8690 
8691 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8692 		return B_NO_MEMORY;
8693 
8694 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8695 		return B_BAD_ADDRESS;
8696 
8697 	if (userFileSystem != NULL
8698 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8699 		return B_BAD_ADDRESS;
8700 
8701 	if (userDevice != NULL
8702 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8703 			< B_OK)
8704 		return B_BAD_ADDRESS;
8705 
8706 	if (userArgs != NULL && argsLength > 0) {
8707 		// this is a safety restriction
8708 		if (argsLength >= 65536)
8709 			return B_NAME_TOO_LONG;
8710 
8711 		args = (char*)malloc(argsLength + 1);
8712 		if (args == NULL)
8713 			return B_NO_MEMORY;
8714 
8715 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8716 			free(args);
8717 			return B_BAD_ADDRESS;
8718 		}
8719 	}
8720 	path.UnlockBuffer();
8721 	device.UnlockBuffer();
8722 
8723 	status = fs_mount(path.LockBuffer(),
8724 		userDevice != NULL ? device.Path() : NULL,
8725 		userFileSystem ? fileSystem : NULL, flags, args, false);
8726 
8727 	free(args);
8728 	return status;
8729 }
8730 
8731 
8732 status_t
8733 _user_unmount(const char* userPath, uint32 flags)
8734 {
8735 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8736 	if (pathBuffer.InitCheck() != B_OK)
8737 		return B_NO_MEMORY;
8738 
8739 	char* path = pathBuffer.LockBuffer();
8740 
8741 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8742 		return B_BAD_ADDRESS;
8743 
8744 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8745 }
8746 
8747 
8748 status_t
8749 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8750 {
8751 	struct fs_info info;
8752 	status_t status;
8753 
8754 	if (userInfo == NULL)
8755 		return B_BAD_VALUE;
8756 
8757 	if (!IS_USER_ADDRESS(userInfo))
8758 		return B_BAD_ADDRESS;
8759 
8760 	status = fs_read_info(device, &info);
8761 	if (status != B_OK)
8762 		return status;
8763 
8764 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8765 		return B_BAD_ADDRESS;
8766 
8767 	return B_OK;
8768 }
8769 
8770 
8771 status_t
8772 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8773 {
8774 	struct fs_info info;
8775 
8776 	if (userInfo == NULL)
8777 		return B_BAD_VALUE;
8778 
8779 	if (!IS_USER_ADDRESS(userInfo)
8780 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8781 		return B_BAD_ADDRESS;
8782 
8783 	return fs_write_info(device, &info, mask);
8784 }
8785 
8786 
8787 dev_t
8788 _user_next_device(int32* _userCookie)
8789 {
8790 	int32 cookie;
8791 	dev_t device;
8792 
8793 	if (!IS_USER_ADDRESS(_userCookie)
8794 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8795 		return B_BAD_ADDRESS;
8796 
8797 	device = fs_next_device(&cookie);
8798 
8799 	if (device >= B_OK) {
8800 		// update user cookie
8801 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8802 			return B_BAD_ADDRESS;
8803 	}
8804 
8805 	return device;
8806 }
8807 
8808 
8809 status_t
8810 _user_sync(void)
8811 {
8812 	return _kern_sync();
8813 }
8814 
8815 
8816 status_t
8817 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8818 	size_t infoSize)
8819 {
8820 	struct fd_info info;
8821 	uint32 cookie;
8822 
8823 	// only root can do this (or should root's group be enough?)
8824 	if (geteuid() != 0)
8825 		return B_NOT_ALLOWED;
8826 
8827 	if (infoSize != sizeof(fd_info))
8828 		return B_BAD_VALUE;
8829 
8830 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8831 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8832 		return B_BAD_ADDRESS;
8833 
8834 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8835 	if (status != B_OK)
8836 		return status;
8837 
8838 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8839 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8840 		return B_BAD_ADDRESS;
8841 
8842 	return status;
8843 }
8844 
8845 
8846 status_t
8847 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8848 	char* userPath, size_t pathLength)
8849 {
8850 	if (!IS_USER_ADDRESS(userPath))
8851 		return B_BAD_ADDRESS;
8852 
8853 	KPath path(B_PATH_NAME_LENGTH + 1);
8854 	if (path.InitCheck() != B_OK)
8855 		return B_NO_MEMORY;
8856 
8857 	// copy the leaf name onto the stack
8858 	char stackLeaf[B_FILE_NAME_LENGTH];
8859 	if (leaf != NULL) {
8860 		if (!IS_USER_ADDRESS(leaf))
8861 			return B_BAD_ADDRESS;
8862 
8863 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8864 		if (length < 0)
8865 			return length;
8866 		if (length >= B_FILE_NAME_LENGTH)
8867 			return B_NAME_TOO_LONG;
8868 
8869 		leaf = stackLeaf;
8870 	}
8871 
8872 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8873 		false, path.LockBuffer(), path.BufferSize());
8874 	if (status != B_OK)
8875 		return status;
8876 
8877 	path.UnlockBuffer();
8878 
8879 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8880 	if (length < 0)
8881 		return length;
8882 	if (length >= (int)pathLength)
8883 		return B_BUFFER_OVERFLOW;
8884 
8885 	return B_OK;
8886 }
8887 
8888 
8889 status_t
8890 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8891 {
8892 	if (userPath == NULL || buffer == NULL)
8893 		return B_BAD_VALUE;
8894 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8895 		return B_BAD_ADDRESS;
8896 
8897 	// copy path from userland
8898 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8899 	if (pathBuffer.InitCheck() != B_OK)
8900 		return B_NO_MEMORY;
8901 	char* path = pathBuffer.LockBuffer();
8902 
8903 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8904 		return B_BAD_ADDRESS;
8905 
8906 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8907 		false);
8908 	if (error != B_OK)
8909 		return error;
8910 
8911 	// copy back to userland
8912 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8913 	if (len < 0)
8914 		return len;
8915 	if (len >= B_PATH_NAME_LENGTH)
8916 		return B_BUFFER_OVERFLOW;
8917 
8918 	return B_OK;
8919 }
8920 
8921 
8922 int
8923 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8924 	int openMode, int perms)
8925 {
8926 	char name[B_FILE_NAME_LENGTH];
8927 
8928 	if (userName == NULL || device < 0 || inode < 0)
8929 		return B_BAD_VALUE;
8930 	if (!IS_USER_ADDRESS(userName)
8931 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8932 		return B_BAD_ADDRESS;
8933 
8934 	if ((openMode & O_CREAT) != 0) {
8935 		return file_create_entry_ref(device, inode, name, openMode, perms,
8936 			false);
8937 	}
8938 
8939 	return file_open_entry_ref(device, inode, name, openMode, false);
8940 }
8941 
8942 
8943 int
8944 _user_open(int fd, const char* userPath, int openMode, int perms)
8945 {
8946 	KPath path(B_PATH_NAME_LENGTH + 1);
8947 	if (path.InitCheck() != B_OK)
8948 		return B_NO_MEMORY;
8949 
8950 	char* buffer = path.LockBuffer();
8951 
8952 	if (!IS_USER_ADDRESS(userPath)
8953 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8954 		return B_BAD_ADDRESS;
8955 
8956 	if ((openMode & O_CREAT) != 0)
8957 		return file_create(fd, buffer, openMode, perms, false);
8958 
8959 	return file_open(fd, buffer, openMode, false);
8960 }
8961 
8962 
8963 int
8964 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8965 {
8966 	if (userName != NULL) {
8967 		char name[B_FILE_NAME_LENGTH];
8968 
8969 		if (!IS_USER_ADDRESS(userName)
8970 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8971 			return B_BAD_ADDRESS;
8972 
8973 		return dir_open_entry_ref(device, inode, name, false);
8974 	}
8975 	return dir_open_entry_ref(device, inode, NULL, false);
8976 }
8977 
8978 
8979 int
8980 _user_open_dir(int fd, const char* userPath)
8981 {
8982 	if (userPath == NULL)
8983 		return dir_open(fd, NULL, false);
8984 
8985 	KPath path(B_PATH_NAME_LENGTH + 1);
8986 	if (path.InitCheck() != B_OK)
8987 		return B_NO_MEMORY;
8988 
8989 	char* buffer = path.LockBuffer();
8990 
8991 	if (!IS_USER_ADDRESS(userPath)
8992 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8993 		return B_BAD_ADDRESS;
8994 
8995 	return dir_open(fd, buffer, false);
8996 }
8997 
8998 
8999 /*!	\brief Opens a directory's parent directory and returns the entry name
9000 		   of the former.
9001 
9002 	Aside from that it returns the directory's entry name, this method is
9003 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9004 	equivalent, if \a userName is \c NULL.
9005 
9006 	If a name buffer is supplied and the name does not fit the buffer, the
9007 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9008 
9009 	\param fd A FD referring to a directory.
9010 	\param userName Buffer the directory's entry name shall be written into.
9011 		   May be \c NULL.
9012 	\param nameLength Size of the name buffer.
9013 	\return The file descriptor of the opened parent directory, if everything
9014 			went fine, an error code otherwise.
9015 */
9016 int
9017 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9018 {
9019 	bool kernel = false;
9020 
9021 	if (userName && !IS_USER_ADDRESS(userName))
9022 		return B_BAD_ADDRESS;
9023 
9024 	// open the parent dir
9025 	int parentFD = dir_open(fd, (char*)"..", kernel);
9026 	if (parentFD < 0)
9027 		return parentFD;
9028 	FDCloser fdCloser(parentFD, kernel);
9029 
9030 	if (userName) {
9031 		// get the vnodes
9032 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9033 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9034 		VNodePutter parentVNodePutter(parentVNode);
9035 		VNodePutter dirVNodePutter(dirVNode);
9036 		if (!parentVNode || !dirVNode)
9037 			return B_FILE_ERROR;
9038 
9039 		// get the vnode name
9040 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9041 		struct dirent* buffer = (struct dirent*)_buffer;
9042 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9043 			sizeof(_buffer), get_current_io_context(false));
9044 		if (status != B_OK)
9045 			return status;
9046 
9047 		// copy the name to the userland buffer
9048 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9049 		if (len < 0)
9050 			return len;
9051 		if (len >= (int)nameLength)
9052 			return B_BUFFER_OVERFLOW;
9053 	}
9054 
9055 	return fdCloser.Detach();
9056 }
9057 
9058 
9059 status_t
9060 _user_fcntl(int fd, int op, size_t argument)
9061 {
9062 	status_t status = common_fcntl(fd, op, argument, false);
9063 	if (op == F_SETLKW)
9064 		syscall_restart_handle_post(status);
9065 
9066 	return status;
9067 }
9068 
9069 
9070 status_t
9071 _user_fsync(int fd)
9072 {
9073 	return common_sync(fd, false);
9074 }
9075 
9076 
9077 status_t
9078 _user_flock(int fd, int operation)
9079 {
9080 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9081 
9082 	// Check if the operation is valid
9083 	switch (operation & ~LOCK_NB) {
9084 		case LOCK_UN:
9085 		case LOCK_SH:
9086 		case LOCK_EX:
9087 			break;
9088 
9089 		default:
9090 			return B_BAD_VALUE;
9091 	}
9092 
9093 	struct file_descriptor* descriptor;
9094 	struct vnode* vnode;
9095 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9096 	if (descriptor == NULL)
9097 		return B_FILE_ERROR;
9098 
9099 	if (descriptor->type != FDTYPE_FILE) {
9100 		put_fd(descriptor);
9101 		return B_BAD_VALUE;
9102 	}
9103 
9104 	struct flock flock;
9105 	flock.l_start = 0;
9106 	flock.l_len = OFF_MAX;
9107 	flock.l_whence = 0;
9108 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9109 
9110 	status_t status;
9111 	if ((operation & LOCK_UN) != 0)
9112 		status = release_advisory_lock(vnode, &flock);
9113 	else {
9114 		status = acquire_advisory_lock(vnode,
9115 			thread_get_current_thread()->team->session_id, &flock,
9116 			(operation & LOCK_NB) == 0);
9117 	}
9118 
9119 	syscall_restart_handle_post(status);
9120 
9121 	put_fd(descriptor);
9122 	return status;
9123 }
9124 
9125 
9126 status_t
9127 _user_lock_node(int fd)
9128 {
9129 	return common_lock_node(fd, false);
9130 }
9131 
9132 
9133 status_t
9134 _user_unlock_node(int fd)
9135 {
9136 	return common_unlock_node(fd, false);
9137 }
9138 
9139 
9140 status_t
9141 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9142 	int perms)
9143 {
9144 	char name[B_FILE_NAME_LENGTH];
9145 	status_t status;
9146 
9147 	if (!IS_USER_ADDRESS(userName))
9148 		return B_BAD_ADDRESS;
9149 
9150 	status = user_strlcpy(name, userName, sizeof(name));
9151 	if (status < 0)
9152 		return status;
9153 
9154 	return dir_create_entry_ref(device, inode, name, perms, false);
9155 }
9156 
9157 
9158 status_t
9159 _user_create_dir(int fd, const char* userPath, int perms)
9160 {
9161 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9162 	if (pathBuffer.InitCheck() != B_OK)
9163 		return B_NO_MEMORY;
9164 
9165 	char* path = pathBuffer.LockBuffer();
9166 
9167 	if (!IS_USER_ADDRESS(userPath)
9168 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9169 		return B_BAD_ADDRESS;
9170 
9171 	return dir_create(fd, path, perms, false);
9172 }
9173 
9174 
9175 status_t
9176 _user_remove_dir(int fd, const char* userPath)
9177 {
9178 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9179 	if (pathBuffer.InitCheck() != B_OK)
9180 		return B_NO_MEMORY;
9181 
9182 	char* path = pathBuffer.LockBuffer();
9183 
9184 	if (userPath != NULL) {
9185 		if (!IS_USER_ADDRESS(userPath)
9186 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9187 			return B_BAD_ADDRESS;
9188 	}
9189 
9190 	return dir_remove(fd, userPath ? path : NULL, false);
9191 }
9192 
9193 
9194 status_t
9195 _user_read_link(int fd, const char* userPath, char* userBuffer,
9196 	size_t* userBufferSize)
9197 {
9198 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9199 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9200 		return B_NO_MEMORY;
9201 
9202 	size_t bufferSize;
9203 
9204 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9205 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9206 		return B_BAD_ADDRESS;
9207 
9208 	char* path = pathBuffer.LockBuffer();
9209 	char* buffer = linkBuffer.LockBuffer();
9210 
9211 	if (userPath) {
9212 		if (!IS_USER_ADDRESS(userPath)
9213 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9214 			return B_BAD_ADDRESS;
9215 
9216 		if (bufferSize > B_PATH_NAME_LENGTH)
9217 			bufferSize = B_PATH_NAME_LENGTH;
9218 	}
9219 
9220 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9221 		&bufferSize, false);
9222 
9223 	// we also update the bufferSize in case of errors
9224 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9225 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9226 		return B_BAD_ADDRESS;
9227 
9228 	if (status != B_OK)
9229 		return status;
9230 
9231 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9232 		return B_BAD_ADDRESS;
9233 
9234 	return B_OK;
9235 }
9236 
9237 
9238 status_t
9239 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9240 	int mode)
9241 {
9242 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9243 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9244 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9245 		return B_NO_MEMORY;
9246 
9247 	char* path = pathBuffer.LockBuffer();
9248 	char* toPath = toPathBuffer.LockBuffer();
9249 
9250 	if (!IS_USER_ADDRESS(userPath)
9251 		|| !IS_USER_ADDRESS(userToPath)
9252 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9253 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9254 		return B_BAD_ADDRESS;
9255 
9256 	return common_create_symlink(fd, path, toPath, mode, false);
9257 }
9258 
9259 
9260 status_t
9261 _user_create_link(int pathFD, const char* userPath, int toFD,
9262 	const char* userToPath, bool traverseLeafLink)
9263 {
9264 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9265 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9266 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9267 		return B_NO_MEMORY;
9268 
9269 	char* path = pathBuffer.LockBuffer();
9270 	char* toPath = toPathBuffer.LockBuffer();
9271 
9272 	if (!IS_USER_ADDRESS(userPath)
9273 		|| !IS_USER_ADDRESS(userToPath)
9274 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9275 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9276 		return B_BAD_ADDRESS;
9277 
9278 	status_t status = check_path(toPath);
9279 	if (status != B_OK)
9280 		return status;
9281 
9282 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9283 		false);
9284 }
9285 
9286 
9287 status_t
9288 _user_unlink(int fd, const char* userPath)
9289 {
9290 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9291 	if (pathBuffer.InitCheck() != B_OK)
9292 		return B_NO_MEMORY;
9293 
9294 	char* path = pathBuffer.LockBuffer();
9295 
9296 	if (!IS_USER_ADDRESS(userPath)
9297 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9298 		return B_BAD_ADDRESS;
9299 
9300 	return common_unlink(fd, path, false);
9301 }
9302 
9303 
9304 status_t
9305 _user_rename(int oldFD, const char* userOldPath, int newFD,
9306 	const char* userNewPath)
9307 {
9308 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9309 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9310 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9311 		return B_NO_MEMORY;
9312 
9313 	char* oldPath = oldPathBuffer.LockBuffer();
9314 	char* newPath = newPathBuffer.LockBuffer();
9315 
9316 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9317 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9318 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9319 		return B_BAD_ADDRESS;
9320 
9321 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9322 }
9323 
9324 
9325 status_t
9326 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9327 {
9328 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9329 	if (pathBuffer.InitCheck() != B_OK)
9330 		return B_NO_MEMORY;
9331 
9332 	char* path = pathBuffer.LockBuffer();
9333 
9334 	if (!IS_USER_ADDRESS(userPath)
9335 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9336 		return B_BAD_ADDRESS;
9337 	}
9338 
9339 	// split into directory vnode and filename path
9340 	char filename[B_FILE_NAME_LENGTH];
9341 	struct vnode* dir;
9342 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9343 	if (status != B_OK)
9344 		return status;
9345 
9346 	VNodePutter _(dir);
9347 
9348 	// the underlying FS needs to support creating FIFOs
9349 	if (!HAS_FS_CALL(dir, create_special_node))
9350 		return B_UNSUPPORTED;
9351 
9352 	// create the entry	-- the FIFO sub node is set up automatically
9353 	fs_vnode superVnode;
9354 	ino_t nodeID;
9355 	status = FS_CALL(dir, create_special_node, filename, NULL,
9356 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9357 
9358 	// create_special_node() acquired a reference for us that we don't need.
9359 	if (status == B_OK)
9360 		put_vnode(dir->mount->volume, nodeID);
9361 
9362 	return status;
9363 }
9364 
9365 
9366 status_t
9367 _user_create_pipe(int* userFDs)
9368 {
9369 	// rootfs should support creating FIFOs, but let's be sure
9370 	if (!HAS_FS_CALL(sRoot, create_special_node))
9371 		return B_UNSUPPORTED;
9372 
9373 	// create the node	-- the FIFO sub node is set up automatically
9374 	fs_vnode superVnode;
9375 	ino_t nodeID;
9376 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9377 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9378 	if (status != B_OK)
9379 		return status;
9380 
9381 	// We've got one reference to the node and need another one.
9382 	struct vnode* vnode;
9383 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9384 	if (status != B_OK) {
9385 		// that should not happen
9386 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9387 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9388 		return status;
9389 	}
9390 
9391 	// Everything looks good so far. Open two FDs for reading respectively
9392 	// writing.
9393 	int fds[2];
9394 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9395 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9396 
9397 	FDCloser closer0(fds[0], false);
9398 	FDCloser closer1(fds[1], false);
9399 
9400 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9401 
9402 	// copy FDs to userland
9403 	if (status == B_OK) {
9404 		if (!IS_USER_ADDRESS(userFDs)
9405 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9406 			status = B_BAD_ADDRESS;
9407 		}
9408 	}
9409 
9410 	// keep FDs, if everything went fine
9411 	if (status == B_OK) {
9412 		closer0.Detach();
9413 		closer1.Detach();
9414 	}
9415 
9416 	return status;
9417 }
9418 
9419 
9420 status_t
9421 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9422 {
9423 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9424 	if (pathBuffer.InitCheck() != B_OK)
9425 		return B_NO_MEMORY;
9426 
9427 	char* path = pathBuffer.LockBuffer();
9428 
9429 	if (!IS_USER_ADDRESS(userPath)
9430 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9431 		return B_BAD_ADDRESS;
9432 
9433 	return common_access(fd, path, mode, effectiveUserGroup, false);
9434 }
9435 
9436 
9437 status_t
9438 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9439 	struct stat* userStat, size_t statSize)
9440 {
9441 	struct stat stat;
9442 	status_t status;
9443 
9444 	if (statSize > sizeof(struct stat))
9445 		return B_BAD_VALUE;
9446 
9447 	if (!IS_USER_ADDRESS(userStat))
9448 		return B_BAD_ADDRESS;
9449 
9450 	if (userPath != NULL) {
9451 		// path given: get the stat of the node referred to by (fd, path)
9452 		if (!IS_USER_ADDRESS(userPath))
9453 			return B_BAD_ADDRESS;
9454 
9455 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9456 		if (pathBuffer.InitCheck() != B_OK)
9457 			return B_NO_MEMORY;
9458 
9459 		char* path = pathBuffer.LockBuffer();
9460 
9461 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9462 		if (length < B_OK)
9463 			return length;
9464 		if (length >= B_PATH_NAME_LENGTH)
9465 			return B_NAME_TOO_LONG;
9466 
9467 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9468 	} else {
9469 		// no path given: get the FD and use the FD operation
9470 		struct file_descriptor* descriptor
9471 			= get_fd(get_current_io_context(false), fd);
9472 		if (descriptor == NULL)
9473 			return B_FILE_ERROR;
9474 
9475 		if (descriptor->ops->fd_read_stat)
9476 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9477 		else
9478 			status = B_UNSUPPORTED;
9479 
9480 		put_fd(descriptor);
9481 	}
9482 
9483 	if (status != B_OK)
9484 		return status;
9485 
9486 	return user_memcpy(userStat, &stat, statSize);
9487 }
9488 
9489 
9490 status_t
9491 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9492 	const struct stat* userStat, size_t statSize, int statMask)
9493 {
9494 	if (statSize > sizeof(struct stat))
9495 		return B_BAD_VALUE;
9496 
9497 	struct stat stat;
9498 
9499 	if (!IS_USER_ADDRESS(userStat)
9500 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9501 		return B_BAD_ADDRESS;
9502 
9503 	// clear additional stat fields
9504 	if (statSize < sizeof(struct stat))
9505 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9506 
9507 	status_t status;
9508 
9509 	if (userPath != NULL) {
9510 		// path given: write the stat of the node referred to by (fd, path)
9511 		if (!IS_USER_ADDRESS(userPath))
9512 			return B_BAD_ADDRESS;
9513 
9514 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9515 		if (pathBuffer.InitCheck() != B_OK)
9516 			return B_NO_MEMORY;
9517 
9518 		char* path = pathBuffer.LockBuffer();
9519 
9520 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9521 		if (length < B_OK)
9522 			return length;
9523 		if (length >= B_PATH_NAME_LENGTH)
9524 			return B_NAME_TOO_LONG;
9525 
9526 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9527 			statMask, false);
9528 	} else {
9529 		// no path given: get the FD and use the FD operation
9530 		struct file_descriptor* descriptor
9531 			= get_fd(get_current_io_context(false), fd);
9532 		if (descriptor == NULL)
9533 			return B_FILE_ERROR;
9534 
9535 		if (descriptor->ops->fd_write_stat) {
9536 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9537 				statMask);
9538 		} else
9539 			status = B_UNSUPPORTED;
9540 
9541 		put_fd(descriptor);
9542 	}
9543 
9544 	return status;
9545 }
9546 
9547 
9548 int
9549 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9550 {
9551 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9552 	if (pathBuffer.InitCheck() != B_OK)
9553 		return B_NO_MEMORY;
9554 
9555 	char* path = pathBuffer.LockBuffer();
9556 
9557 	if (userPath != NULL) {
9558 		if (!IS_USER_ADDRESS(userPath)
9559 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9560 			return B_BAD_ADDRESS;
9561 	}
9562 
9563 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9564 }
9565 
9566 
9567 ssize_t
9568 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9569 	size_t readBytes)
9570 {
9571 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9572 	if (attr < 0)
9573 		return attr;
9574 
9575 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9576 	_user_close(attr);
9577 
9578 	return bytes;
9579 }
9580 
9581 
9582 ssize_t
9583 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9584 	const void* buffer, size_t writeBytes)
9585 {
9586 	// Try to support the BeOS typical truncation as well as the position
9587 	// argument
9588 	int attr = attr_create(fd, NULL, attribute, type,
9589 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9590 	if (attr < 0)
9591 		return attr;
9592 
9593 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9594 	_user_close(attr);
9595 
9596 	return bytes;
9597 }
9598 
9599 
9600 status_t
9601 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9602 {
9603 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9604 	if (attr < 0)
9605 		return attr;
9606 
9607 	struct file_descriptor* descriptor
9608 		= get_fd(get_current_io_context(false), attr);
9609 	if (descriptor == NULL) {
9610 		_user_close(attr);
9611 		return B_FILE_ERROR;
9612 	}
9613 
9614 	struct stat stat;
9615 	status_t status;
9616 	if (descriptor->ops->fd_read_stat)
9617 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9618 	else
9619 		status = B_UNSUPPORTED;
9620 
9621 	put_fd(descriptor);
9622 	_user_close(attr);
9623 
9624 	if (status == B_OK) {
9625 		attr_info info;
9626 		info.type = stat.st_type;
9627 		info.size = stat.st_size;
9628 
9629 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9630 			return B_BAD_ADDRESS;
9631 	}
9632 
9633 	return status;
9634 }
9635 
9636 
9637 int
9638 _user_open_attr(int fd, const char* userPath, const char* userName,
9639 	uint32 type, int openMode)
9640 {
9641 	char name[B_FILE_NAME_LENGTH];
9642 
9643 	if (!IS_USER_ADDRESS(userName)
9644 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9645 		return B_BAD_ADDRESS;
9646 
9647 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9648 	if (pathBuffer.InitCheck() != B_OK)
9649 		return B_NO_MEMORY;
9650 
9651 	char* path = pathBuffer.LockBuffer();
9652 
9653 	if (userPath != NULL) {
9654 		if (!IS_USER_ADDRESS(userPath)
9655 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9656 			return B_BAD_ADDRESS;
9657 	}
9658 
9659 	if ((openMode & O_CREAT) != 0) {
9660 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9661 			false);
9662 	}
9663 
9664 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9665 }
9666 
9667 
9668 status_t
9669 _user_remove_attr(int fd, const char* userName)
9670 {
9671 	char name[B_FILE_NAME_LENGTH];
9672 
9673 	if (!IS_USER_ADDRESS(userName)
9674 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9675 		return B_BAD_ADDRESS;
9676 
9677 	return attr_remove(fd, name, false);
9678 }
9679 
9680 
9681 status_t
9682 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9683 	const char* userToName)
9684 {
9685 	if (!IS_USER_ADDRESS(userFromName)
9686 		|| !IS_USER_ADDRESS(userToName))
9687 		return B_BAD_ADDRESS;
9688 
9689 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9690 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9691 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9692 		return B_NO_MEMORY;
9693 
9694 	char* fromName = fromNameBuffer.LockBuffer();
9695 	char* toName = toNameBuffer.LockBuffer();
9696 
9697 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9698 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9699 		return B_BAD_ADDRESS;
9700 
9701 	return attr_rename(fromFile, fromName, toFile, toName, false);
9702 }
9703 
9704 
9705 int
9706 _user_open_index_dir(dev_t device)
9707 {
9708 	return index_dir_open(device, false);
9709 }
9710 
9711 
9712 status_t
9713 _user_create_index(dev_t device, const char* userName, uint32 type,
9714 	uint32 flags)
9715 {
9716 	char name[B_FILE_NAME_LENGTH];
9717 
9718 	if (!IS_USER_ADDRESS(userName)
9719 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9720 		return B_BAD_ADDRESS;
9721 
9722 	return index_create(device, name, type, flags, false);
9723 }
9724 
9725 
9726 status_t
9727 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9728 {
9729 	char name[B_FILE_NAME_LENGTH];
9730 	struct stat stat;
9731 	status_t status;
9732 
9733 	if (!IS_USER_ADDRESS(userName)
9734 		|| !IS_USER_ADDRESS(userStat)
9735 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9736 		return B_BAD_ADDRESS;
9737 
9738 	status = index_name_read_stat(device, name, &stat, false);
9739 	if (status == B_OK) {
9740 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9741 			return B_BAD_ADDRESS;
9742 	}
9743 
9744 	return status;
9745 }
9746 
9747 
9748 status_t
9749 _user_remove_index(dev_t device, const char* userName)
9750 {
9751 	char name[B_FILE_NAME_LENGTH];
9752 
9753 	if (!IS_USER_ADDRESS(userName)
9754 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9755 		return B_BAD_ADDRESS;
9756 
9757 	return index_remove(device, name, false);
9758 }
9759 
9760 
9761 status_t
9762 _user_getcwd(char* userBuffer, size_t size)
9763 {
9764 	if (size == 0)
9765 		return B_BAD_VALUE;
9766 	if (!IS_USER_ADDRESS(userBuffer))
9767 		return B_BAD_ADDRESS;
9768 
9769 	if (size > kMaxPathLength)
9770 		size = kMaxPathLength;
9771 
9772 	KPath pathBuffer(size);
9773 	if (pathBuffer.InitCheck() != B_OK)
9774 		return B_NO_MEMORY;
9775 
9776 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9777 
9778 	char* path = pathBuffer.LockBuffer();
9779 
9780 	status_t status = get_cwd(path, size, false);
9781 	if (status != B_OK)
9782 		return status;
9783 
9784 	// Copy back the result
9785 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9786 		return B_BAD_ADDRESS;
9787 
9788 	return status;
9789 }
9790 
9791 
9792 status_t
9793 _user_setcwd(int fd, const char* userPath)
9794 {
9795 	TRACE(("user_setcwd: path = %p\n", userPath));
9796 
9797 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9798 	if (pathBuffer.InitCheck() != B_OK)
9799 		return B_NO_MEMORY;
9800 
9801 	char* path = pathBuffer.LockBuffer();
9802 
9803 	if (userPath != NULL) {
9804 		if (!IS_USER_ADDRESS(userPath)
9805 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9806 			return B_BAD_ADDRESS;
9807 	}
9808 
9809 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9810 }
9811 
9812 
9813 status_t
9814 _user_change_root(const char* userPath)
9815 {
9816 	// only root is allowed to chroot()
9817 	if (geteuid() != 0)
9818 		return B_NOT_ALLOWED;
9819 
9820 	// alloc path buffer
9821 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9822 	if (pathBuffer.InitCheck() != B_OK)
9823 		return B_NO_MEMORY;
9824 
9825 	// copy userland path to kernel
9826 	char* path = pathBuffer.LockBuffer();
9827 	if (userPath != NULL) {
9828 		if (!IS_USER_ADDRESS(userPath)
9829 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9830 			return B_BAD_ADDRESS;
9831 	}
9832 
9833 	// get the vnode
9834 	struct vnode* vnode;
9835 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9836 	if (status != B_OK)
9837 		return status;
9838 
9839 	// set the new root
9840 	struct io_context* context = get_current_io_context(false);
9841 	mutex_lock(&sIOContextRootLock);
9842 	struct vnode* oldRoot = context->root;
9843 	context->root = vnode;
9844 	mutex_unlock(&sIOContextRootLock);
9845 
9846 	put_vnode(oldRoot);
9847 
9848 	return B_OK;
9849 }
9850 
9851 
9852 int
9853 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9854 	uint32 flags, port_id port, int32 token)
9855 {
9856 	char* query;
9857 
9858 	if (device < 0 || userQuery == NULL || queryLength == 0)
9859 		return B_BAD_VALUE;
9860 
9861 	// this is a safety restriction
9862 	if (queryLength >= 65536)
9863 		return B_NAME_TOO_LONG;
9864 
9865 	query = (char*)malloc(queryLength + 1);
9866 	if (query == NULL)
9867 		return B_NO_MEMORY;
9868 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9869 		free(query);
9870 		return B_BAD_ADDRESS;
9871 	}
9872 
9873 	int fd = query_open(device, query, flags, port, token, false);
9874 
9875 	free(query);
9876 	return fd;
9877 }
9878 
9879 
9880 #include "vfs_request_io.cpp"
9881