xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 6d2f2ec177bf615a117a7428d71be4330545b320)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2014, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <vfs.h>
53 #include <vm/vm.h>
54 #include <vm/VMCache.h>
55 
56 #include "EntryCache.h"
57 #include "fifo.h"
58 #include "IORequest.h"
59 #include "unused_vnodes.h"
60 #include "vfs_tracing.h"
61 #include "Vnode.h"
62 #include "../cache/vnode_store.h"
63 
64 
65 //#define TRACE_VFS
66 #ifdef TRACE_VFS
67 #	define TRACE(x) dprintf x
68 #	define FUNCTION(x) dprintf x
69 #else
70 #	define TRACE(x) ;
71 #	define FUNCTION(x) ;
72 #endif
73 
74 #define ADD_DEBUGGER_COMMANDS
75 
76 
77 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
78 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
79 
80 #if KDEBUG
81 #	define FS_CALL(vnode, op, params...) \
82 		( HAS_FS_CALL(vnode, op) ? \
83 			vnode->ops->op(vnode->mount->volume, vnode, params) \
84 			: (panic("FS_CALL op " #op " is NULL"), 0))
85 #	define FS_CALL_NO_PARAMS(vnode, op) \
86 		( HAS_FS_CALL(vnode, op) ? \
87 			vnode->ops->op(vnode->mount->volume, vnode) \
88 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
89 #	define FS_MOUNT_CALL(mount, op, params...) \
90 		( HAS_FS_MOUNT_CALL(mount, op) ? \
91 			mount->volume->ops->op(mount->volume, params) \
92 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
93 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
94 		( HAS_FS_MOUNT_CALL(mount, op) ? \
95 			mount->volume->ops->op(mount->volume) \
96 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
97 #else
98 #	define FS_CALL(vnode, op, params...) \
99 			vnode->ops->op(vnode->mount->volume, vnode, params)
100 #	define FS_CALL_NO_PARAMS(vnode, op) \
101 			vnode->ops->op(vnode->mount->volume, vnode)
102 #	define FS_MOUNT_CALL(mount, op, params...) \
103 			mount->volume->ops->op(mount->volume, params)
104 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
105 			mount->volume->ops->op(mount->volume)
106 #endif
107 
108 
109 const static size_t kMaxPathLength = 65536;
110 	// The absolute maximum path length (for getcwd() - this is not depending
111 	// on PATH_MAX
112 
113 
114 struct vnode_hash_key {
115 	dev_t	device;
116 	ino_t	vnode;
117 };
118 
119 typedef DoublyLinkedList<vnode> VnodeList;
120 
121 /*!	\brief Structure to manage a mounted file system
122 
123 	Note: The root_vnode and root_vnode->covers fields (what others?) are
124 	initialized in fs_mount() and not changed afterwards. That is as soon
125 	as the mount is mounted and it is made sure it won't be unmounted
126 	(e.g. by holding a reference to a vnode of that mount) (read) access
127 	to those fields is always safe, even without additional locking. Morever
128 	while mounted the mount holds a reference to the root_vnode->covers vnode,
129 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
130 	safe if a reference to vnode is held (note that for the root mount
131 	root_vnode->covers is NULL, though).
132 */
133 struct fs_mount {
134 	fs_mount()
135 		:
136 		volume(NULL),
137 		device_name(NULL)
138 	{
139 		recursive_lock_init(&rlock, "mount rlock");
140 	}
141 
142 	~fs_mount()
143 	{
144 		recursive_lock_destroy(&rlock);
145 		free(device_name);
146 
147 		while (volume) {
148 			fs_volume* superVolume = volume->super_volume;
149 
150 			if (volume->file_system != NULL)
151 				put_module(volume->file_system->info.name);
152 
153 			free(volume->file_system_name);
154 			free(volume);
155 			volume = superVolume;
156 		}
157 	}
158 
159 	struct fs_mount* next;
160 	dev_t			id;
161 	fs_volume*		volume;
162 	char*			device_name;
163 	recursive_lock	rlock;	// guards the vnodes list
164 		// TODO: Make this a mutex! It is never used recursively.
165 	struct vnode*	root_vnode;
166 	struct vnode*	covers_vnode;	// immutable
167 	KPartition*		partition;
168 	VnodeList		vnodes;
169 	EntryCache		entry_cache;
170 	bool			unmounting;
171 	bool			owns_file_device;
172 };
173 
174 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
175 	list_link		link;
176 	team_id			team;
177 	pid_t			session;
178 	off_t			start;
179 	off_t			end;
180 	bool			shared;
181 };
182 
183 typedef DoublyLinkedList<advisory_lock> LockList;
184 
185 struct advisory_locking {
186 	sem_id			lock;
187 	sem_id			wait_sem;
188 	LockList		locks;
189 
190 	advisory_locking()
191 		:
192 		lock(-1),
193 		wait_sem(-1)
194 	{
195 	}
196 
197 	~advisory_locking()
198 	{
199 		if (lock >= 0)
200 			delete_sem(lock);
201 		if (wait_sem >= 0)
202 			delete_sem(wait_sem);
203 	}
204 };
205 
206 /*!	\brief Guards sMountsTable.
207 
208 	The holder is allowed to read/write access the sMountsTable.
209 	Manipulation of the fs_mount structures themselves
210 	(and their destruction) requires different locks though.
211 */
212 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
213 
214 /*!	\brief Guards mount/unmount operations.
215 
216 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
217 	That is locking the lock ensures that no FS is mounted/unmounted. In
218 	particular this means that
219 	- sMountsTable will not be modified,
220 	- the fields immutable after initialization of the fs_mount structures in
221 	  sMountsTable will not be modified,
222 
223 	The thread trying to lock the lock must not hold sVnodeLock or
224 	sMountMutex.
225 */
226 static recursive_lock sMountOpLock;
227 
228 /*!	\brief Guards sVnodeTable.
229 
230 	The holder is allowed read/write access to sVnodeTable and to
231 	any unbusy vnode in that table, save to the immutable fields (device, id,
232 	private_node, mount) to which only read-only access is allowed.
233 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
234 	well as the busy, removed, unused flags, and the vnode's type can also be
235 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
236 	locked. Write access to covered_by and covers requires to write lock
237 	sVnodeLock.
238 
239 	The thread trying to acquire the lock must not hold sMountMutex.
240 	You must not hold this lock when calling create_sem(), as this might call
241 	vfs_free_unused_vnodes() and thus cause a deadlock.
242 */
243 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
244 
245 /*!	\brief Guards io_context::root.
246 
247 	Must be held when setting or getting the io_context::root field.
248 	The only operation allowed while holding this lock besides getting or
249 	setting the field is inc_vnode_ref_count() on io_context::root.
250 */
251 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
252 
253 
254 struct VnodeHash {
255 	typedef vnode_hash_key	KeyType;
256 	typedef	struct vnode	ValueType;
257 
258 #define VHASH(mountid, vnodeid) \
259 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
260 
261 	size_t HashKey(KeyType key) const
262 	{
263 		return VHASH(key.device, key.vnode);
264 	}
265 
266 	size_t Hash(ValueType* vnode) const
267 	{
268 		return VHASH(vnode->device, vnode->id);
269 	}
270 
271 #undef VHASH
272 
273 	bool Compare(KeyType key, ValueType* vnode) const
274 	{
275 		return vnode->device == key.device && vnode->id == key.vnode;
276 	}
277 
278 	ValueType*& GetLink(ValueType* value) const
279 	{
280 		return value->next;
281 	}
282 };
283 
284 typedef BOpenHashTable<VnodeHash> VnodeTable;
285 
286 
287 struct MountHash {
288 	typedef dev_t			KeyType;
289 	typedef	struct fs_mount	ValueType;
290 
291 	size_t HashKey(KeyType key) const
292 	{
293 		return key;
294 	}
295 
296 	size_t Hash(ValueType* mount) const
297 	{
298 		return mount->id;
299 	}
300 
301 	bool Compare(KeyType key, ValueType* mount) const
302 	{
303 		return mount->id == key;
304 	}
305 
306 	ValueType*& GetLink(ValueType* value) const
307 	{
308 		return value->next;
309 	}
310 };
311 
312 typedef BOpenHashTable<MountHash> MountTable;
313 
314 
315 #define VNODE_HASH_TABLE_SIZE 1024
316 static VnodeTable* sVnodeTable;
317 static struct vnode* sRoot;
318 
319 #define MOUNTS_HASH_TABLE_SIZE 16
320 static MountTable* sMountsTable;
321 static dev_t sNextMountID = 1;
322 
323 #define MAX_TEMP_IO_VECS 8
324 
325 mode_t __gUmask = 022;
326 
327 /* function declarations */
328 
329 static void free_unused_vnodes();
330 
331 // file descriptor operation prototypes
332 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
333 	void* buffer, size_t* _bytes);
334 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
335 	const void* buffer, size_t* _bytes);
336 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
337 	int seekType);
338 static void file_free_fd(struct file_descriptor* descriptor);
339 static status_t file_close(struct file_descriptor* descriptor);
340 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
341 	struct selectsync* sync);
342 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
343 	struct selectsync* sync);
344 static status_t dir_read(struct io_context* context,
345 	struct file_descriptor* descriptor, struct dirent* buffer,
346 	size_t bufferSize, uint32* _count);
347 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
348 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
349 static status_t dir_rewind(struct file_descriptor* descriptor);
350 static void dir_free_fd(struct file_descriptor* descriptor);
351 static status_t dir_close(struct file_descriptor* descriptor);
352 static status_t attr_dir_read(struct io_context* context,
353 	struct file_descriptor* descriptor, struct dirent* buffer,
354 	size_t bufferSize, uint32* _count);
355 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
356 static void attr_dir_free_fd(struct file_descriptor* descriptor);
357 static status_t attr_dir_close(struct file_descriptor* descriptor);
358 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
359 	void* buffer, size_t* _bytes);
360 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
361 	const void* buffer, size_t* _bytes);
362 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
363 	int seekType);
364 static void attr_free_fd(struct file_descriptor* descriptor);
365 static status_t attr_close(struct file_descriptor* descriptor);
366 static status_t attr_read_stat(struct file_descriptor* descriptor,
367 	struct stat* statData);
368 static status_t attr_write_stat(struct file_descriptor* descriptor,
369 	const struct stat* stat, int statMask);
370 static status_t index_dir_read(struct io_context* context,
371 	struct file_descriptor* descriptor, struct dirent* buffer,
372 	size_t bufferSize, uint32* _count);
373 static status_t index_dir_rewind(struct file_descriptor* descriptor);
374 static void index_dir_free_fd(struct file_descriptor* descriptor);
375 static status_t index_dir_close(struct file_descriptor* descriptor);
376 static status_t query_read(struct io_context* context,
377 	struct file_descriptor* descriptor, struct dirent* buffer,
378 	size_t bufferSize, uint32* _count);
379 static status_t query_rewind(struct file_descriptor* descriptor);
380 static void query_free_fd(struct file_descriptor* descriptor);
381 static status_t query_close(struct file_descriptor* descriptor);
382 
383 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
384 	void* buffer, size_t length);
385 static status_t common_read_stat(struct file_descriptor* descriptor,
386 	struct stat* statData);
387 static status_t common_write_stat(struct file_descriptor* descriptor,
388 	const struct stat* statData, int statMask);
389 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
390 	struct stat* stat, bool kernel);
391 
392 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
393 	bool traverseLeafLink, int count, bool kernel,
394 	struct vnode** _vnode, ino_t* _parentID);
395 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
396 	size_t bufferSize, bool kernel);
397 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
398 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
399 static void inc_vnode_ref_count(struct vnode* vnode);
400 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
401 	bool reenter);
402 static inline void put_vnode(struct vnode* vnode);
403 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
404 	bool kernel);
405 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
406 
407 
408 static struct fd_ops sFileOps = {
409 	file_read,
410 	file_write,
411 	file_seek,
412 	common_ioctl,
413 	NULL,		// set_flags
414 	file_select,
415 	file_deselect,
416 	NULL,		// read_dir()
417 	NULL,		// rewind_dir()
418 	common_read_stat,
419 	common_write_stat,
420 	file_close,
421 	file_free_fd
422 };
423 
424 static struct fd_ops sDirectoryOps = {
425 	NULL,		// read()
426 	NULL,		// write()
427 	NULL,		// seek()
428 	common_ioctl,
429 	NULL,		// set_flags
430 	NULL,		// select()
431 	NULL,		// deselect()
432 	dir_read,
433 	dir_rewind,
434 	common_read_stat,
435 	common_write_stat,
436 	dir_close,
437 	dir_free_fd
438 };
439 
440 static struct fd_ops sAttributeDirectoryOps = {
441 	NULL,		// read()
442 	NULL,		// write()
443 	NULL,		// seek()
444 	common_ioctl,
445 	NULL,		// set_flags
446 	NULL,		// select()
447 	NULL,		// deselect()
448 	attr_dir_read,
449 	attr_dir_rewind,
450 	common_read_stat,
451 	common_write_stat,
452 	attr_dir_close,
453 	attr_dir_free_fd
454 };
455 
456 static struct fd_ops sAttributeOps = {
457 	attr_read,
458 	attr_write,
459 	attr_seek,
460 	common_ioctl,
461 	NULL,		// set_flags
462 	NULL,		// select()
463 	NULL,		// deselect()
464 	NULL,		// read_dir()
465 	NULL,		// rewind_dir()
466 	attr_read_stat,
467 	attr_write_stat,
468 	attr_close,
469 	attr_free_fd
470 };
471 
472 static struct fd_ops sIndexDirectoryOps = {
473 	NULL,		// read()
474 	NULL,		// write()
475 	NULL,		// seek()
476 	NULL,		// ioctl()
477 	NULL,		// set_flags
478 	NULL,		// select()
479 	NULL,		// deselect()
480 	index_dir_read,
481 	index_dir_rewind,
482 	NULL,		// read_stat()
483 	NULL,		// write_stat()
484 	index_dir_close,
485 	index_dir_free_fd
486 };
487 
488 #if 0
489 static struct fd_ops sIndexOps = {
490 	NULL,		// read()
491 	NULL,		// write()
492 	NULL,		// seek()
493 	NULL,		// ioctl()
494 	NULL,		// set_flags
495 	NULL,		// select()
496 	NULL,		// deselect()
497 	NULL,		// dir_read()
498 	NULL,		// dir_rewind()
499 	index_read_stat,	// read_stat()
500 	NULL,		// write_stat()
501 	NULL,		// dir_close()
502 	NULL		// free_fd()
503 };
504 #endif
505 
506 static struct fd_ops sQueryOps = {
507 	NULL,		// read()
508 	NULL,		// write()
509 	NULL,		// seek()
510 	NULL,		// ioctl()
511 	NULL,		// set_flags
512 	NULL,		// select()
513 	NULL,		// deselect()
514 	query_read,
515 	query_rewind,
516 	NULL,		// read_stat()
517 	NULL,		// write_stat()
518 	query_close,
519 	query_free_fd
520 };
521 
522 
523 // VNodePutter
524 class VNodePutter {
525 public:
526 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
527 
528 	~VNodePutter()
529 	{
530 		Put();
531 	}
532 
533 	void SetTo(struct vnode* vnode)
534 	{
535 		Put();
536 		fVNode = vnode;
537 	}
538 
539 	void Put()
540 	{
541 		if (fVNode) {
542 			put_vnode(fVNode);
543 			fVNode = NULL;
544 		}
545 	}
546 
547 	struct vnode* Detach()
548 	{
549 		struct vnode* vnode = fVNode;
550 		fVNode = NULL;
551 		return vnode;
552 	}
553 
554 private:
555 	struct vnode* fVNode;
556 };
557 
558 
559 class FDCloser {
560 public:
561 	FDCloser() : fFD(-1), fKernel(true) {}
562 
563 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
564 
565 	~FDCloser()
566 	{
567 		Close();
568 	}
569 
570 	void SetTo(int fd, bool kernel)
571 	{
572 		Close();
573 		fFD = fd;
574 		fKernel = kernel;
575 	}
576 
577 	void Close()
578 	{
579 		if (fFD >= 0) {
580 			if (fKernel)
581 				_kern_close(fFD);
582 			else
583 				_user_close(fFD);
584 			fFD = -1;
585 		}
586 	}
587 
588 	int Detach()
589 	{
590 		int fd = fFD;
591 		fFD = -1;
592 		return fd;
593 	}
594 
595 private:
596 	int		fFD;
597 	bool	fKernel;
598 };
599 
600 
601 #if VFS_PAGES_IO_TRACING
602 
603 namespace VFSPagesIOTracing {
604 
605 class PagesIOTraceEntry : public AbstractTraceEntry {
606 protected:
607 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
608 		const generic_io_vec* vecs, uint32 count, uint32 flags,
609 		generic_size_t bytesRequested, status_t status,
610 		generic_size_t bytesTransferred)
611 		:
612 		fVnode(vnode),
613 		fMountID(vnode->mount->id),
614 		fNodeID(vnode->id),
615 		fCookie(cookie),
616 		fPos(pos),
617 		fCount(count),
618 		fFlags(flags),
619 		fBytesRequested(bytesRequested),
620 		fStatus(status),
621 		fBytesTransferred(bytesTransferred)
622 	{
623 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
624 			sizeof(generic_io_vec) * count, false);
625 	}
626 
627 	void AddDump(TraceOutput& out, const char* mode)
628 	{
629 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
630 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
631 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
632 			(uint64)fBytesRequested);
633 
634 		if (fVecs != NULL) {
635 			for (uint32 i = 0; i < fCount; i++) {
636 				if (i > 0)
637 					out.Print(", ");
638 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
639 					(uint64)fVecs[i].length);
640 			}
641 		}
642 
643 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
644 			"transferred: %" B_PRIu64, fFlags, fStatus,
645 			(uint64)fBytesTransferred);
646 	}
647 
648 protected:
649 	struct vnode*	fVnode;
650 	dev_t			fMountID;
651 	ino_t			fNodeID;
652 	void*			fCookie;
653 	off_t			fPos;
654 	generic_io_vec*	fVecs;
655 	uint32			fCount;
656 	uint32			fFlags;
657 	generic_size_t	fBytesRequested;
658 	status_t		fStatus;
659 	generic_size_t	fBytesTransferred;
660 };
661 
662 
663 class ReadPages : public PagesIOTraceEntry {
664 public:
665 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
666 		const generic_io_vec* vecs, uint32 count, uint32 flags,
667 		generic_size_t bytesRequested, status_t status,
668 		generic_size_t bytesTransferred)
669 		:
670 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
671 			bytesRequested, status, bytesTransferred)
672 	{
673 		Initialized();
674 	}
675 
676 	virtual void AddDump(TraceOutput& out)
677 	{
678 		PagesIOTraceEntry::AddDump(out, "read");
679 	}
680 };
681 
682 
683 class WritePages : public PagesIOTraceEntry {
684 public:
685 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
686 		const generic_io_vec* vecs, uint32 count, uint32 flags,
687 		generic_size_t bytesRequested, status_t status,
688 		generic_size_t bytesTransferred)
689 		:
690 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
691 			bytesRequested, status, bytesTransferred)
692 	{
693 		Initialized();
694 	}
695 
696 	virtual void AddDump(TraceOutput& out)
697 	{
698 		PagesIOTraceEntry::AddDump(out, "write");
699 	}
700 };
701 
702 }	// namespace VFSPagesIOTracing
703 
704 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
705 #else
706 #	define TPIO(x) ;
707 #endif	// VFS_PAGES_IO_TRACING
708 
709 
710 /*! Finds the mounted device (the fs_mount structure) with the given ID.
711 	Note, you must hold the gMountMutex lock when you call this function.
712 */
713 static struct fs_mount*
714 find_mount(dev_t id)
715 {
716 	ASSERT_LOCKED_MUTEX(&sMountMutex);
717 
718 	return sMountsTable->Lookup(id);
719 }
720 
721 
722 static status_t
723 get_mount(dev_t id, struct fs_mount** _mount)
724 {
725 	struct fs_mount* mount;
726 
727 	ReadLocker nodeLocker(sVnodeLock);
728 	MutexLocker mountLocker(sMountMutex);
729 
730 	mount = find_mount(id);
731 	if (mount == NULL)
732 		return B_BAD_VALUE;
733 
734 	struct vnode* rootNode = mount->root_vnode;
735 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
736 		|| rootNode->ref_count == 0) {
737 		// might have been called during a mount/unmount operation
738 		return B_BUSY;
739 	}
740 
741 	inc_vnode_ref_count(rootNode);
742 	*_mount = mount;
743 	return B_OK;
744 }
745 
746 
747 static void
748 put_mount(struct fs_mount* mount)
749 {
750 	if (mount)
751 		put_vnode(mount->root_vnode);
752 }
753 
754 
755 /*!	Tries to open the specified file system module.
756 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
757 	Returns a pointer to file system module interface, or NULL if it
758 	could not open the module.
759 */
760 static file_system_module_info*
761 get_file_system(const char* fsName)
762 {
763 	char name[B_FILE_NAME_LENGTH];
764 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
765 		// construct module name if we didn't get one
766 		// (we currently support only one API)
767 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
768 		fsName = NULL;
769 	}
770 
771 	file_system_module_info* info;
772 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
773 		return NULL;
774 
775 	return info;
776 }
777 
778 
779 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
780 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
781 	The name is allocated for you, and you have to free() it when you're
782 	done with it.
783 	Returns NULL if the required memory is not available.
784 */
785 static char*
786 get_file_system_name(const char* fsName)
787 {
788 	const size_t length = strlen("file_systems/");
789 
790 	if (strncmp(fsName, "file_systems/", length)) {
791 		// the name already seems to be the module's file name
792 		return strdup(fsName);
793 	}
794 
795 	fsName += length;
796 	const char* end = strchr(fsName, '/');
797 	if (end == NULL) {
798 		// this doesn't seem to be a valid name, but well...
799 		return strdup(fsName);
800 	}
801 
802 	// cut off the trailing /v1
803 
804 	char* name = (char*)malloc(end + 1 - fsName);
805 	if (name == NULL)
806 		return NULL;
807 
808 	strlcpy(name, fsName, end + 1 - fsName);
809 	return name;
810 }
811 
812 
813 /*!	Accepts a list of file system names separated by a colon, one for each
814 	layer and returns the file system name for the specified layer.
815 	The name is allocated for you, and you have to free() it when you're
816 	done with it.
817 	Returns NULL if the required memory is not available or if there is no
818 	name for the specified layer.
819 */
820 static char*
821 get_file_system_name_for_layer(const char* fsNames, int32 layer)
822 {
823 	while (layer >= 0) {
824 		const char* end = strchr(fsNames, ':');
825 		if (end == NULL) {
826 			if (layer == 0)
827 				return strdup(fsNames);
828 			return NULL;
829 		}
830 
831 		if (layer == 0) {
832 			size_t length = end - fsNames + 1;
833 			char* result = (char*)malloc(length);
834 			strlcpy(result, fsNames, length);
835 			return result;
836 		}
837 
838 		fsNames = end + 1;
839 		layer--;
840 	}
841 
842 	return NULL;
843 }
844 
845 
846 static void
847 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
848 {
849 	RecursiveLocker _(mount->rlock);
850 	mount->vnodes.Add(vnode);
851 }
852 
853 
854 static void
855 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
856 {
857 	RecursiveLocker _(mount->rlock);
858 	mount->vnodes.Remove(vnode);
859 }
860 
861 
862 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
863 
864 	The caller must hold the sVnodeLock (read lock at least).
865 
866 	\param mountID the mount ID.
867 	\param vnodeID the node ID.
868 
869 	\return The vnode structure, if it was found in the hash table, \c NULL
870 			otherwise.
871 */
872 static struct vnode*
873 lookup_vnode(dev_t mountID, ino_t vnodeID)
874 {
875 	struct vnode_hash_key key;
876 
877 	key.device = mountID;
878 	key.vnode = vnodeID;
879 
880 	return sVnodeTable->Lookup(key);
881 }
882 
883 
884 /*!	Creates a new vnode with the given mount and node ID.
885 	If the node already exists, it is returned instead and no new node is
886 	created. In either case -- but not, if an error occurs -- the function write
887 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
888 	error the lock is not held on return.
889 
890 	\param mountID The mount ID.
891 	\param vnodeID The vnode ID.
892 	\param _vnode Will be set to the new vnode on success.
893 	\param _nodeCreated Will be set to \c true when the returned vnode has
894 		been newly created, \c false when it already existed. Will not be
895 		changed on error.
896 	\return \c B_OK, when the vnode was successfully created and inserted or
897 		a node with the given ID was found, \c B_NO_MEMORY or
898 		\c B_ENTRY_NOT_FOUND on error.
899 */
900 static status_t
901 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
902 	bool& _nodeCreated)
903 {
904 	FUNCTION(("create_new_vnode_and_lock()\n"));
905 
906 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
907 	if (vnode == NULL)
908 		return B_NO_MEMORY;
909 
910 	// initialize basic values
911 	memset(vnode, 0, sizeof(struct vnode));
912 	vnode->device = mountID;
913 	vnode->id = vnodeID;
914 	vnode->ref_count = 1;
915 	vnode->SetBusy(true);
916 
917 	// look up the the node -- it might have been added by someone else in the
918 	// meantime
919 	rw_lock_write_lock(&sVnodeLock);
920 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
921 	if (existingVnode != NULL) {
922 		free(vnode);
923 		_vnode = existingVnode;
924 		_nodeCreated = false;
925 		return B_OK;
926 	}
927 
928 	// get the mount structure
929 	mutex_lock(&sMountMutex);
930 	vnode->mount = find_mount(mountID);
931 	if (!vnode->mount || vnode->mount->unmounting) {
932 		mutex_unlock(&sMountMutex);
933 		rw_lock_write_unlock(&sVnodeLock);
934 		free(vnode);
935 		return B_ENTRY_NOT_FOUND;
936 	}
937 
938 	// add the vnode to the mount's node list and the hash table
939 	sVnodeTable->Insert(vnode);
940 	add_vnode_to_mount_list(vnode, vnode->mount);
941 
942 	mutex_unlock(&sMountMutex);
943 
944 	_vnode = vnode;
945 	_nodeCreated = true;
946 
947 	// keep the vnode lock locked
948 	return B_OK;
949 }
950 
951 
952 /*!	Frees the vnode and all resources it has acquired, and removes
953 	it from the vnode hash as well as from its mount structure.
954 	Will also make sure that any cache modifications are written back.
955 */
956 static void
957 free_vnode(struct vnode* vnode, bool reenter)
958 {
959 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
960 		vnode);
961 
962 	// write back any changes in this vnode's cache -- but only
963 	// if the vnode won't be deleted, in which case the changes
964 	// will be discarded
965 
966 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
967 		FS_CALL_NO_PARAMS(vnode, fsync);
968 
969 	// Note: If this vnode has a cache attached, there will still be two
970 	// references to that cache at this point. The last one belongs to the vnode
971 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
972 	// cache. Each but the last reference to a cache also includes a reference
973 	// to the vnode. The file cache, however, released its reference (cf.
974 	// file_cache_create()), so that this vnode's ref count has the chance to
975 	// ever drop to 0. Deleting the file cache now, will cause the next to last
976 	// cache reference to be released, which will also release a (no longer
977 	// existing) vnode reference. To avoid problems, we set the vnode's ref
978 	// count, so that it will neither become negative nor 0.
979 	vnode->ref_count = 2;
980 
981 	if (!vnode->IsUnpublished()) {
982 		if (vnode->IsRemoved())
983 			FS_CALL(vnode, remove_vnode, reenter);
984 		else
985 			FS_CALL(vnode, put_vnode, reenter);
986 	}
987 
988 	// If the vnode has a VMCache attached, make sure that it won't try to get
989 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
990 	// long as the vnode is busy and in the hash, that won't happen, but as
991 	// soon as we've removed it from the hash, it could reload the vnode -- with
992 	// a new cache attached!
993 	if (vnode->cache != NULL)
994 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
995 
996 	// The file system has removed the resources of the vnode now, so we can
997 	// make it available again (by removing the busy vnode from the hash).
998 	rw_lock_write_lock(&sVnodeLock);
999 	sVnodeTable->Remove(vnode);
1000 	rw_lock_write_unlock(&sVnodeLock);
1001 
1002 	// if we have a VMCache attached, remove it
1003 	if (vnode->cache)
1004 		vnode->cache->ReleaseRef();
1005 
1006 	vnode->cache = NULL;
1007 
1008 	remove_vnode_from_mount_list(vnode, vnode->mount);
1009 
1010 	free(vnode);
1011 }
1012 
1013 
1014 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1015 	if the counter dropped to 0.
1016 
1017 	The caller must, of course, own a reference to the vnode to call this
1018 	function.
1019 	The caller must not hold the sVnodeLock or the sMountMutex.
1020 
1021 	\param vnode the vnode.
1022 	\param alwaysFree don't move this vnode into the unused list, but really
1023 		   delete it if possible.
1024 	\param reenter \c true, if this function is called (indirectly) from within
1025 		   a file system. This will be passed to file system hooks only.
1026 	\return \c B_OK, if everything went fine, an error code otherwise.
1027 */
1028 static status_t
1029 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1030 {
1031 	ReadLocker locker(sVnodeLock);
1032 	AutoLocker<Vnode> nodeLocker(vnode);
1033 
1034 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1035 
1036 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1037 
1038 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1039 		vnode->ref_count));
1040 
1041 	if (oldRefCount != 1)
1042 		return B_OK;
1043 
1044 	if (vnode->IsBusy())
1045 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1046 
1047 	bool freeNode = false;
1048 	bool freeUnusedNodes = false;
1049 
1050 	// Just insert the vnode into an unused list if we don't need
1051 	// to delete it
1052 	if (vnode->IsRemoved() || alwaysFree) {
1053 		vnode_to_be_freed(vnode);
1054 		vnode->SetBusy(true);
1055 		freeNode = true;
1056 	} else
1057 		freeUnusedNodes = vnode_unused(vnode);
1058 
1059 	nodeLocker.Unlock();
1060 	locker.Unlock();
1061 
1062 	if (freeNode)
1063 		free_vnode(vnode, reenter);
1064 	else if (freeUnusedNodes)
1065 		free_unused_vnodes();
1066 
1067 	return B_OK;
1068 }
1069 
1070 
1071 /*!	\brief Increments the reference counter of the given vnode.
1072 
1073 	The caller must make sure that the node isn't deleted while this function
1074 	is called. This can be done either:
1075 	- by ensuring that a reference to the node exists and remains in existence,
1076 	  or
1077 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1078 	  or by holding sVnodeLock write locked.
1079 
1080 	In the second case the caller is responsible for dealing with the ref count
1081 	0 -> 1 transition. That is 1. this function must not be invoked when the
1082 	node is busy in the first place and 2. vnode_used() must be called for the
1083 	node.
1084 
1085 	\param vnode the vnode.
1086 */
1087 static void
1088 inc_vnode_ref_count(struct vnode* vnode)
1089 {
1090 	atomic_add(&vnode->ref_count, 1);
1091 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1092 		vnode->ref_count));
1093 }
1094 
1095 
1096 static bool
1097 is_special_node_type(int type)
1098 {
1099 	// at the moment only FIFOs are supported
1100 	return S_ISFIFO(type);
1101 }
1102 
1103 
1104 static status_t
1105 create_special_sub_node(struct vnode* vnode, uint32 flags)
1106 {
1107 	if (S_ISFIFO(vnode->Type()))
1108 		return create_fifo_vnode(vnode->mount->volume, vnode);
1109 
1110 	return B_BAD_VALUE;
1111 }
1112 
1113 
1114 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1115 
1116 	If the node is not yet in memory, it will be loaded.
1117 
1118 	The caller must not hold the sVnodeLock or the sMountMutex.
1119 
1120 	\param mountID the mount ID.
1121 	\param vnodeID the node ID.
1122 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1123 		   retrieved vnode structure shall be written.
1124 	\param reenter \c true, if this function is called (indirectly) from within
1125 		   a file system.
1126 	\return \c B_OK, if everything when fine, an error code otherwise.
1127 */
1128 static status_t
1129 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1130 	int reenter)
1131 {
1132 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1133 		mountID, vnodeID, _vnode));
1134 
1135 	rw_lock_read_lock(&sVnodeLock);
1136 
1137 	int32 tries = 2000;
1138 		// try for 10 secs
1139 restart:
1140 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1141 	AutoLocker<Vnode> nodeLocker(vnode);
1142 
1143 	if (vnode && vnode->IsBusy()) {
1144 		nodeLocker.Unlock();
1145 		rw_lock_read_unlock(&sVnodeLock);
1146 		if (!canWait || --tries < 0) {
1147 			// vnode doesn't seem to become unbusy
1148 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is not becoming unbusy!\n",
1149 				mountID, vnodeID);
1150 			return B_BUSY;
1151 		}
1152 		snooze(5000); // 5 ms
1153 		rw_lock_read_lock(&sVnodeLock);
1154 		goto restart;
1155 	}
1156 
1157 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1158 
1159 	status_t status;
1160 
1161 	if (vnode) {
1162 		if (vnode->ref_count == 0) {
1163 			// this vnode has been unused before
1164 			vnode_used(vnode);
1165 		}
1166 		inc_vnode_ref_count(vnode);
1167 
1168 		nodeLocker.Unlock();
1169 		rw_lock_read_unlock(&sVnodeLock);
1170 	} else {
1171 		// we need to create a new vnode and read it in
1172 		rw_lock_read_unlock(&sVnodeLock);
1173 			// unlock -- create_new_vnode_and_lock() write-locks on success
1174 		bool nodeCreated;
1175 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1176 			nodeCreated);
1177 		if (status != B_OK)
1178 			return status;
1179 
1180 		if (!nodeCreated) {
1181 			rw_lock_read_lock(&sVnodeLock);
1182 			rw_lock_write_unlock(&sVnodeLock);
1183 			goto restart;
1184 		}
1185 
1186 		rw_lock_write_unlock(&sVnodeLock);
1187 
1188 		int type;
1189 		uint32 flags;
1190 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1191 			&flags, reenter);
1192 		if (status == B_OK && vnode->private_node == NULL)
1193 			status = B_BAD_VALUE;
1194 
1195 		bool gotNode = status == B_OK;
1196 		bool publishSpecialSubNode = false;
1197 		if (gotNode) {
1198 			vnode->SetType(type);
1199 			publishSpecialSubNode = is_special_node_type(type)
1200 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1201 		}
1202 
1203 		if (gotNode && publishSpecialSubNode)
1204 			status = create_special_sub_node(vnode, flags);
1205 
1206 		if (status != B_OK) {
1207 			if (gotNode)
1208 				FS_CALL(vnode, put_vnode, reenter);
1209 
1210 			rw_lock_write_lock(&sVnodeLock);
1211 			sVnodeTable->Remove(vnode);
1212 			remove_vnode_from_mount_list(vnode, vnode->mount);
1213 			rw_lock_write_unlock(&sVnodeLock);
1214 
1215 			free(vnode);
1216 			return status;
1217 		}
1218 
1219 		rw_lock_read_lock(&sVnodeLock);
1220 		vnode->Lock();
1221 
1222 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1223 		vnode->SetBusy(false);
1224 
1225 		vnode->Unlock();
1226 		rw_lock_read_unlock(&sVnodeLock);
1227 	}
1228 
1229 	TRACE(("get_vnode: returning %p\n", vnode));
1230 
1231 	*_vnode = vnode;
1232 	return B_OK;
1233 }
1234 
1235 
1236 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1237 	if the counter dropped to 0.
1238 
1239 	The caller must, of course, own a reference to the vnode to call this
1240 	function.
1241 	The caller must not hold the sVnodeLock or the sMountMutex.
1242 
1243 	\param vnode the vnode.
1244 */
1245 static inline void
1246 put_vnode(struct vnode* vnode)
1247 {
1248 	dec_vnode_ref_count(vnode, false, false);
1249 }
1250 
1251 
1252 static void
1253 free_unused_vnodes(int32 level)
1254 {
1255 	unused_vnodes_check_started();
1256 
1257 	if (level == B_NO_LOW_RESOURCE) {
1258 		unused_vnodes_check_done();
1259 		return;
1260 	}
1261 
1262 	flush_hot_vnodes();
1263 
1264 	// determine how many nodes to free
1265 	uint32 count = 1;
1266 	{
1267 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1268 
1269 		switch (level) {
1270 			case B_LOW_RESOURCE_NOTE:
1271 				count = sUnusedVnodes / 100;
1272 				break;
1273 			case B_LOW_RESOURCE_WARNING:
1274 				count = sUnusedVnodes / 10;
1275 				break;
1276 			case B_LOW_RESOURCE_CRITICAL:
1277 				count = sUnusedVnodes;
1278 				break;
1279 		}
1280 
1281 		if (count > sUnusedVnodes)
1282 			count = sUnusedVnodes;
1283 	}
1284 
1285 	// Write back the modified pages of some unused vnodes and free them.
1286 
1287 	for (uint32 i = 0; i < count; i++) {
1288 		ReadLocker vnodesReadLocker(sVnodeLock);
1289 
1290 		// get the first node
1291 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1292 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1293 			&sUnusedVnodeList);
1294 		unusedVnodesLocker.Unlock();
1295 
1296 		if (vnode == NULL)
1297 			break;
1298 
1299 		// lock the node
1300 		AutoLocker<Vnode> nodeLocker(vnode);
1301 
1302 		// Check whether the node is still unused -- since we only append to the
1303 		// the tail of the unused queue, the vnode should still be at its head.
1304 		// Alternatively we could check its ref count for 0 and its busy flag,
1305 		// but if the node is no longer at the head of the queue, it means it
1306 		// has been touched in the meantime, i.e. it is no longer the least
1307 		// recently used unused vnode and we rather don't free it.
1308 		unusedVnodesLocker.Lock();
1309 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1310 			continue;
1311 		unusedVnodesLocker.Unlock();
1312 
1313 		ASSERT(!vnode->IsBusy());
1314 
1315 		// grab a reference
1316 		inc_vnode_ref_count(vnode);
1317 		vnode_used(vnode);
1318 
1319 		// write back changes and free the node
1320 		nodeLocker.Unlock();
1321 		vnodesReadLocker.Unlock();
1322 
1323 		if (vnode->cache != NULL)
1324 			vnode->cache->WriteModified();
1325 
1326 		dec_vnode_ref_count(vnode, true, false);
1327 			// this should free the vnode when it's still unused
1328 	}
1329 
1330 	unused_vnodes_check_done();
1331 }
1332 
1333 
1334 /*!	Gets the vnode the given vnode is covering.
1335 
1336 	The caller must have \c sVnodeLock read-locked at least.
1337 
1338 	The function returns a reference to the retrieved vnode (if any), the caller
1339 	is responsible to free.
1340 
1341 	\param vnode The vnode whose covered node shall be returned.
1342 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1343 		vnode.
1344 */
1345 static inline Vnode*
1346 get_covered_vnode_locked(Vnode* vnode)
1347 {
1348 	if (Vnode* coveredNode = vnode->covers) {
1349 		while (coveredNode->covers != NULL)
1350 			coveredNode = coveredNode->covers;
1351 
1352 		inc_vnode_ref_count(coveredNode);
1353 		return coveredNode;
1354 	}
1355 
1356 	return NULL;
1357 }
1358 
1359 
1360 /*!	Gets the vnode the given vnode is covering.
1361 
1362 	The caller must not hold \c sVnodeLock. Note that this implies a race
1363 	condition, since the situation can change at any time.
1364 
1365 	The function returns a reference to the retrieved vnode (if any), the caller
1366 	is responsible to free.
1367 
1368 	\param vnode The vnode whose covered node shall be returned.
1369 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1370 		vnode.
1371 */
1372 static inline Vnode*
1373 get_covered_vnode(Vnode* vnode)
1374 {
1375 	if (!vnode->IsCovering())
1376 		return NULL;
1377 
1378 	ReadLocker vnodeReadLocker(sVnodeLock);
1379 	return get_covered_vnode_locked(vnode);
1380 }
1381 
1382 
1383 /*!	Gets the vnode the given vnode is covered by.
1384 
1385 	The caller must have \c sVnodeLock read-locked at least.
1386 
1387 	The function returns a reference to the retrieved vnode (if any), the caller
1388 	is responsible to free.
1389 
1390 	\param vnode The vnode whose covering node shall be returned.
1391 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1392 		any vnode.
1393 */
1394 static Vnode*
1395 get_covering_vnode_locked(Vnode* vnode)
1396 {
1397 	if (Vnode* coveringNode = vnode->covered_by) {
1398 		while (coveringNode->covered_by != NULL)
1399 			coveringNode = coveringNode->covered_by;
1400 
1401 		inc_vnode_ref_count(coveringNode);
1402 		return coveringNode;
1403 	}
1404 
1405 	return NULL;
1406 }
1407 
1408 
1409 /*!	Gets the vnode the given vnode is covered by.
1410 
1411 	The caller must not hold \c sVnodeLock. Note that this implies a race
1412 	condition, since the situation can change at any time.
1413 
1414 	The function returns a reference to the retrieved vnode (if any), the caller
1415 	is responsible to free.
1416 
1417 	\param vnode The vnode whose covering node shall be returned.
1418 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1419 		any vnode.
1420 */
1421 static inline Vnode*
1422 get_covering_vnode(Vnode* vnode)
1423 {
1424 	if (!vnode->IsCovered())
1425 		return NULL;
1426 
1427 	ReadLocker vnodeReadLocker(sVnodeLock);
1428 	return get_covering_vnode_locked(vnode);
1429 }
1430 
1431 
1432 static void
1433 free_unused_vnodes()
1434 {
1435 	free_unused_vnodes(
1436 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1437 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1438 }
1439 
1440 
1441 static void
1442 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1443 {
1444 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1445 
1446 	free_unused_vnodes(level);
1447 }
1448 
1449 
1450 static inline void
1451 put_advisory_locking(struct advisory_locking* locking)
1452 {
1453 	release_sem(locking->lock);
1454 }
1455 
1456 
1457 /*!	Returns the advisory_locking object of the \a vnode in case it
1458 	has one, and locks it.
1459 	You have to call put_advisory_locking() when you're done with
1460 	it.
1461 	Note, you must not have the vnode mutex locked when calling
1462 	this function.
1463 */
1464 static struct advisory_locking*
1465 get_advisory_locking(struct vnode* vnode)
1466 {
1467 	rw_lock_read_lock(&sVnodeLock);
1468 	vnode->Lock();
1469 
1470 	struct advisory_locking* locking = vnode->advisory_locking;
1471 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1472 
1473 	vnode->Unlock();
1474 	rw_lock_read_unlock(&sVnodeLock);
1475 
1476 	if (lock >= 0)
1477 		lock = acquire_sem(lock);
1478 	if (lock < 0) {
1479 		// This means the locking has been deleted in the mean time
1480 		// or had never existed in the first place - otherwise, we
1481 		// would get the lock at some point.
1482 		return NULL;
1483 	}
1484 
1485 	return locking;
1486 }
1487 
1488 
1489 /*!	Creates a locked advisory_locking object, and attaches it to the
1490 	given \a vnode.
1491 	Returns B_OK in case of success - also if the vnode got such an
1492 	object from someone else in the mean time, you'll still get this
1493 	one locked then.
1494 */
1495 static status_t
1496 create_advisory_locking(struct vnode* vnode)
1497 {
1498 	if (vnode == NULL)
1499 		return B_FILE_ERROR;
1500 
1501 	ObjectDeleter<advisory_locking> lockingDeleter;
1502 	struct advisory_locking* locking = NULL;
1503 
1504 	while (get_advisory_locking(vnode) == NULL) {
1505 		// no locking object set on the vnode yet, create one
1506 		if (locking == NULL) {
1507 			locking = new(std::nothrow) advisory_locking;
1508 			if (locking == NULL)
1509 				return B_NO_MEMORY;
1510 			lockingDeleter.SetTo(locking);
1511 
1512 			locking->wait_sem = create_sem(0, "advisory lock");
1513 			if (locking->wait_sem < 0)
1514 				return locking->wait_sem;
1515 
1516 			locking->lock = create_sem(0, "advisory locking");
1517 			if (locking->lock < 0)
1518 				return locking->lock;
1519 		}
1520 
1521 		// set our newly created locking object
1522 		ReadLocker _(sVnodeLock);
1523 		AutoLocker<Vnode> nodeLocker(vnode);
1524 		if (vnode->advisory_locking == NULL) {
1525 			vnode->advisory_locking = locking;
1526 			lockingDeleter.Detach();
1527 			return B_OK;
1528 		}
1529 	}
1530 
1531 	// The vnode already had a locking object. That's just as well.
1532 
1533 	return B_OK;
1534 }
1535 
1536 
1537 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1538 	with the advisory_lock \a lock.
1539 */
1540 static bool
1541 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1542 {
1543 	if (flock == NULL)
1544 		return true;
1545 
1546 	return lock->start <= flock->l_start - 1 + flock->l_len
1547 		&& lock->end >= flock->l_start;
1548 }
1549 
1550 
1551 /*!	Tests whether acquiring a lock would block.
1552 */
1553 static status_t
1554 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1555 {
1556 	flock->l_type = F_UNLCK;
1557 
1558 	struct advisory_locking* locking = get_advisory_locking(vnode);
1559 	if (locking == NULL)
1560 		return B_OK;
1561 
1562 	team_id team = team_get_current_team_id();
1563 
1564 	LockList::Iterator iterator = locking->locks.GetIterator();
1565 	while (iterator.HasNext()) {
1566 		struct advisory_lock* lock = iterator.Next();
1567 
1568 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1569 			// locks do overlap
1570 			if (flock->l_type != F_RDLCK || !lock->shared) {
1571 				// collision
1572 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1573 				flock->l_whence = SEEK_SET;
1574 				flock->l_start = lock->start;
1575 				flock->l_len = lock->end - lock->start + 1;
1576 				flock->l_pid = lock->team;
1577 				break;
1578 			}
1579 		}
1580 	}
1581 
1582 	put_advisory_locking(locking);
1583 	return B_OK;
1584 }
1585 
1586 
1587 /*!	Removes the specified lock, or all locks of the calling team
1588 	if \a flock is NULL.
1589 */
1590 static status_t
1591 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1592 {
1593 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1594 
1595 	struct advisory_locking* locking = get_advisory_locking(vnode);
1596 	if (locking == NULL)
1597 		return B_OK;
1598 
1599 	// TODO: use the thread ID instead??
1600 	team_id team = team_get_current_team_id();
1601 	pid_t session = thread_get_current_thread()->team->session_id;
1602 
1603 	// find matching lock entries
1604 
1605 	LockList::Iterator iterator = locking->locks.GetIterator();
1606 	while (iterator.HasNext()) {
1607 		struct advisory_lock* lock = iterator.Next();
1608 		bool removeLock = false;
1609 
1610 		if (lock->session == session)
1611 			removeLock = true;
1612 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1613 			bool endsBeyond = false;
1614 			bool startsBefore = false;
1615 			if (flock != NULL) {
1616 				startsBefore = lock->start < flock->l_start;
1617 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1618 			}
1619 
1620 			if (!startsBefore && !endsBeyond) {
1621 				// lock is completely contained in flock
1622 				removeLock = true;
1623 			} else if (startsBefore && !endsBeyond) {
1624 				// cut the end of the lock
1625 				lock->end = flock->l_start - 1;
1626 			} else if (!startsBefore && endsBeyond) {
1627 				// cut the start of the lock
1628 				lock->start = flock->l_start + flock->l_len;
1629 			} else {
1630 				// divide the lock into two locks
1631 				struct advisory_lock* secondLock = new advisory_lock;
1632 				if (secondLock == NULL) {
1633 					// TODO: we should probably revert the locks we already
1634 					// changed... (ie. allocate upfront)
1635 					put_advisory_locking(locking);
1636 					return B_NO_MEMORY;
1637 				}
1638 
1639 				lock->end = flock->l_start - 1;
1640 
1641 				secondLock->team = lock->team;
1642 				secondLock->session = lock->session;
1643 				// values must already be normalized when getting here
1644 				secondLock->start = flock->l_start + flock->l_len;
1645 				secondLock->end = lock->end;
1646 				secondLock->shared = lock->shared;
1647 
1648 				locking->locks.Add(secondLock);
1649 			}
1650 		}
1651 
1652 		if (removeLock) {
1653 			// this lock is no longer used
1654 			iterator.Remove();
1655 			free(lock);
1656 		}
1657 	}
1658 
1659 	bool removeLocking = locking->locks.IsEmpty();
1660 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1661 
1662 	put_advisory_locking(locking);
1663 
1664 	if (removeLocking) {
1665 		// We can remove the whole advisory locking structure; it's no
1666 		// longer used
1667 		locking = get_advisory_locking(vnode);
1668 		if (locking != NULL) {
1669 			ReadLocker locker(sVnodeLock);
1670 			AutoLocker<Vnode> nodeLocker(vnode);
1671 
1672 			// the locking could have been changed in the mean time
1673 			if (locking->locks.IsEmpty()) {
1674 				vnode->advisory_locking = NULL;
1675 				nodeLocker.Unlock();
1676 				locker.Unlock();
1677 
1678 				// we've detached the locking from the vnode, so we can
1679 				// safely delete it
1680 				delete locking;
1681 			} else {
1682 				// the locking is in use again
1683 				nodeLocker.Unlock();
1684 				locker.Unlock();
1685 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1686 			}
1687 		}
1688 	}
1689 
1690 	return B_OK;
1691 }
1692 
1693 
1694 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1695 	will wait for the lock to become available, if there are any collisions
1696 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1697 
1698 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1699 	BSD flock() semantics are used, that is, all children can unlock the file
1700 	in question (we even allow parents to remove the lock, though, but that
1701 	seems to be in line to what the BSD's are doing).
1702 */
1703 static status_t
1704 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1705 	bool wait)
1706 {
1707 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1708 		vnode, flock, wait ? "yes" : "no"));
1709 
1710 	bool shared = flock->l_type == F_RDLCK;
1711 	status_t status = B_OK;
1712 
1713 	// TODO: do deadlock detection!
1714 
1715 	struct advisory_locking* locking;
1716 
1717 	while (true) {
1718 		// if this vnode has an advisory_locking structure attached,
1719 		// lock that one and search for any colliding file lock
1720 		status = create_advisory_locking(vnode);
1721 		if (status != B_OK)
1722 			return status;
1723 
1724 		locking = vnode->advisory_locking;
1725 		team_id team = team_get_current_team_id();
1726 		sem_id waitForLock = -1;
1727 
1728 		// test for collisions
1729 		LockList::Iterator iterator = locking->locks.GetIterator();
1730 		while (iterator.HasNext()) {
1731 			struct advisory_lock* lock = iterator.Next();
1732 
1733 			// TODO: locks from the same team might be joinable!
1734 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1735 				// locks do overlap
1736 				if (!shared || !lock->shared) {
1737 					// we need to wait
1738 					waitForLock = locking->wait_sem;
1739 					break;
1740 				}
1741 			}
1742 		}
1743 
1744 		if (waitForLock < 0)
1745 			break;
1746 
1747 		// We need to wait. Do that or fail now, if we've been asked not to.
1748 
1749 		if (!wait) {
1750 			put_advisory_locking(locking);
1751 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1752 		}
1753 
1754 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1755 			B_CAN_INTERRUPT, 0);
1756 		if (status != B_OK && status != B_BAD_SEM_ID)
1757 			return status;
1758 
1759 		// We have been notified, but we need to re-lock the locking object. So
1760 		// go another round...
1761 	}
1762 
1763 	// install new lock
1764 
1765 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1766 		sizeof(struct advisory_lock));
1767 	if (lock == NULL) {
1768 		put_advisory_locking(locking);
1769 		return B_NO_MEMORY;
1770 	}
1771 
1772 	lock->team = team_get_current_team_id();
1773 	lock->session = session;
1774 	// values must already be normalized when getting here
1775 	lock->start = flock->l_start;
1776 	lock->end = flock->l_start - 1 + flock->l_len;
1777 	lock->shared = shared;
1778 
1779 	locking->locks.Add(lock);
1780 	put_advisory_locking(locking);
1781 
1782 	return status;
1783 }
1784 
1785 
1786 /*!	Normalizes the \a flock structure to make it easier to compare the
1787 	structure with others. The l_start and l_len fields are set to absolute
1788 	values according to the l_whence field.
1789 */
1790 static status_t
1791 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1792 {
1793 	switch (flock->l_whence) {
1794 		case SEEK_SET:
1795 			break;
1796 		case SEEK_CUR:
1797 			flock->l_start += descriptor->pos;
1798 			break;
1799 		case SEEK_END:
1800 		{
1801 			struct vnode* vnode = descriptor->u.vnode;
1802 			struct stat stat;
1803 			status_t status;
1804 
1805 			if (!HAS_FS_CALL(vnode, read_stat))
1806 				return B_UNSUPPORTED;
1807 
1808 			status = FS_CALL(vnode, read_stat, &stat);
1809 			if (status != B_OK)
1810 				return status;
1811 
1812 			flock->l_start += stat.st_size;
1813 			break;
1814 		}
1815 		default:
1816 			return B_BAD_VALUE;
1817 	}
1818 
1819 	if (flock->l_start < 0)
1820 		flock->l_start = 0;
1821 	if (flock->l_len == 0)
1822 		flock->l_len = OFF_MAX;
1823 
1824 	// don't let the offset and length overflow
1825 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1826 		flock->l_len = OFF_MAX - flock->l_start;
1827 
1828 	if (flock->l_len < 0) {
1829 		// a negative length reverses the region
1830 		flock->l_start += flock->l_len;
1831 		flock->l_len = -flock->l_len;
1832 	}
1833 
1834 	return B_OK;
1835 }
1836 
1837 
1838 static void
1839 replace_vnode_if_disconnected(struct fs_mount* mount,
1840 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1841 	struct vnode* fallBack, bool lockRootLock)
1842 {
1843 	struct vnode* givenVnode = vnode;
1844 	bool vnodeReplaced = false;
1845 
1846 	ReadLocker vnodeReadLocker(sVnodeLock);
1847 
1848 	if (lockRootLock)
1849 		mutex_lock(&sIOContextRootLock);
1850 
1851 	while (vnode != NULL && vnode->mount == mount
1852 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1853 		if (vnode->covers != NULL) {
1854 			// redirect the vnode to the covered vnode
1855 			vnode = vnode->covers;
1856 		} else
1857 			vnode = fallBack;
1858 
1859 		vnodeReplaced = true;
1860 	}
1861 
1862 	// If we've replaced the node, grab a reference for the new one.
1863 	if (vnodeReplaced && vnode != NULL)
1864 		inc_vnode_ref_count(vnode);
1865 
1866 	if (lockRootLock)
1867 		mutex_unlock(&sIOContextRootLock);
1868 
1869 	vnodeReadLocker.Unlock();
1870 
1871 	if (vnodeReplaced)
1872 		put_vnode(givenVnode);
1873 }
1874 
1875 
1876 /*!	Disconnects all file descriptors that are associated with the
1877 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1878 	\a mount object.
1879 
1880 	Note, after you've called this function, there might still be ongoing
1881 	accesses - they won't be interrupted if they already happened before.
1882 	However, any subsequent access will fail.
1883 
1884 	This is not a cheap function and should be used with care and rarely.
1885 	TODO: there is currently no means to stop a blocking read/write!
1886 */
1887 static void
1888 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1889 	struct vnode* vnodeToDisconnect)
1890 {
1891 	// iterate over all teams and peek into their file descriptors
1892 	TeamListIterator teamIterator;
1893 	while (Team* team = teamIterator.Next()) {
1894 		BReference<Team> teamReference(team, true);
1895 
1896 		// lock the I/O context
1897 		io_context* context = team->io_context;
1898 		MutexLocker contextLocker(context->io_mutex);
1899 
1900 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1901 			sRoot, true);
1902 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1903 			sRoot, false);
1904 
1905 		for (uint32 i = 0; i < context->table_size; i++) {
1906 			if (struct file_descriptor* descriptor = context->fds[i]) {
1907 				inc_fd_ref_count(descriptor);
1908 
1909 				// if this descriptor points at this mount, we
1910 				// need to disconnect it to be able to unmount
1911 				struct vnode* vnode = fd_vnode(descriptor);
1912 				if (vnodeToDisconnect != NULL) {
1913 					if (vnode == vnodeToDisconnect)
1914 						disconnect_fd(descriptor);
1915 				} else if ((vnode != NULL && vnode->mount == mount)
1916 					|| (vnode == NULL && descriptor->u.mount == mount))
1917 					disconnect_fd(descriptor);
1918 
1919 				put_fd(descriptor);
1920 			}
1921 		}
1922 	}
1923 }
1924 
1925 
1926 /*!	\brief Gets the root node of the current IO context.
1927 	If \a kernel is \c true, the kernel IO context will be used.
1928 	The caller obtains a reference to the returned node.
1929 */
1930 struct vnode*
1931 get_root_vnode(bool kernel)
1932 {
1933 	if (!kernel) {
1934 		// Get current working directory from io context
1935 		struct io_context* context = get_current_io_context(kernel);
1936 
1937 		mutex_lock(&sIOContextRootLock);
1938 
1939 		struct vnode* root = context->root;
1940 		if (root != NULL)
1941 			inc_vnode_ref_count(root);
1942 
1943 		mutex_unlock(&sIOContextRootLock);
1944 
1945 		if (root != NULL)
1946 			return root;
1947 
1948 		// That should never happen.
1949 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1950 			"have a root\n", team_get_current_team_id());
1951 	}
1952 
1953 	inc_vnode_ref_count(sRoot);
1954 	return sRoot;
1955 }
1956 
1957 
1958 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
1959 
1960 	Given an arbitrary vnode (identified by mount and node ID), the function
1961 	checks, whether the vnode is covered by another vnode. If it is, the
1962 	function returns the mount and node ID of the covering vnode. Otherwise
1963 	it simply returns the supplied mount and node ID.
1964 
1965 	In case of error (e.g. the supplied node could not be found) the variables
1966 	for storing the resolved mount and node ID remain untouched and an error
1967 	code is returned.
1968 
1969 	\param mountID The mount ID of the vnode in question.
1970 	\param nodeID The node ID of the vnode in question.
1971 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1972 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1973 	\return
1974 	- \c B_OK, if everything went fine,
1975 	- another error code, if something went wrong.
1976 */
1977 status_t
1978 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
1979 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1980 {
1981 	// get the node
1982 	struct vnode* node;
1983 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1984 	if (error != B_OK)
1985 		return error;
1986 
1987 	// resolve the node
1988 	if (Vnode* coveringNode = get_covering_vnode(node)) {
1989 		put_vnode(node);
1990 		node = coveringNode;
1991 	}
1992 
1993 	// set the return values
1994 	*resolvedMountID = node->device;
1995 	*resolvedNodeID = node->id;
1996 
1997 	put_vnode(node);
1998 
1999 	return B_OK;
2000 }
2001 
2002 
2003 /*!	\brief Gets the directory path and leaf name for a given path.
2004 
2005 	The supplied \a path is transformed to refer to the directory part of
2006 	the entry identified by the original path, and into the buffer \a filename
2007 	the leaf name of the original entry is written.
2008 	Neither the returned path nor the leaf name can be expected to be
2009 	canonical.
2010 
2011 	\param path The path to be analyzed. Must be able to store at least one
2012 		   additional character.
2013 	\param filename The buffer into which the leaf name will be written.
2014 		   Must be of size B_FILE_NAME_LENGTH at least.
2015 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2016 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2017 		   if the given path name is empty.
2018 */
2019 static status_t
2020 get_dir_path_and_leaf(char* path, char* filename)
2021 {
2022 	if (*path == '\0')
2023 		return B_ENTRY_NOT_FOUND;
2024 
2025 	char* last = strrchr(path, '/');
2026 		// '/' are not allowed in file names!
2027 
2028 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2029 
2030 	if (last == NULL) {
2031 		// this path is single segment with no '/' in it
2032 		// ex. "foo"
2033 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2034 			return B_NAME_TOO_LONG;
2035 
2036 		strcpy(path, ".");
2037 	} else {
2038 		last++;
2039 		if (last[0] == '\0') {
2040 			// special case: the path ends in one or more '/' - remove them
2041 			while (*--last == '/' && last != path);
2042 			last[1] = '\0';
2043 
2044 			if (last == path && last[0] == '/') {
2045 				// This path points to the root of the file system
2046 				strcpy(filename, ".");
2047 				return B_OK;
2048 			}
2049 			for (; last != path && *(last - 1) != '/'; last--);
2050 				// rewind to the start of the leaf before the '/'
2051 		}
2052 
2053 		// normal leaf: replace the leaf portion of the path with a '.'
2054 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2055 			return B_NAME_TOO_LONG;
2056 
2057 		last[0] = '.';
2058 		last[1] = '\0';
2059 	}
2060 	return B_OK;
2061 }
2062 
2063 
2064 static status_t
2065 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2066 	bool traverse, bool kernel, struct vnode** _vnode)
2067 {
2068 	char clonedName[B_FILE_NAME_LENGTH + 1];
2069 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2070 		return B_NAME_TOO_LONG;
2071 
2072 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2073 	struct vnode* directory;
2074 
2075 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2076 	if (status < 0)
2077 		return status;
2078 
2079 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2080 		_vnode, NULL);
2081 }
2082 
2083 
2084 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2085 	and returns the respective vnode.
2086 	On success a reference to the vnode is acquired for the caller.
2087 */
2088 static status_t
2089 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2090 {
2091 	ino_t id;
2092 
2093 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2094 		return get_vnode(dir->device, id, _vnode, true, false);
2095 
2096 	status_t status = FS_CALL(dir, lookup, name, &id);
2097 	if (status != B_OK)
2098 		return status;
2099 
2100 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2101 	// have a reference and just need to look the node up.
2102 	rw_lock_read_lock(&sVnodeLock);
2103 	*_vnode = lookup_vnode(dir->device, id);
2104 	rw_lock_read_unlock(&sVnodeLock);
2105 
2106 	if (*_vnode == NULL) {
2107 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2108 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2109 		return B_ENTRY_NOT_FOUND;
2110 	}
2111 
2112 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2113 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2114 //		(*_vnode)->mount->id, (*_vnode)->id);
2115 
2116 	return B_OK;
2117 }
2118 
2119 
2120 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2121 	\a path must not be NULL.
2122 	If it returns successfully, \a path contains the name of the last path
2123 	component. This function clobbers the buffer pointed to by \a path only
2124 	if it does contain more than one component.
2125 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2126 	it is successful or not!
2127 */
2128 static status_t
2129 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2130 	int count, struct io_context* ioContext, struct vnode** _vnode,
2131 	ino_t* _parentID)
2132 {
2133 	status_t status = B_OK;
2134 	ino_t lastParentID = vnode->id;
2135 
2136 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2137 
2138 	if (path == NULL) {
2139 		put_vnode(vnode);
2140 		return B_BAD_VALUE;
2141 	}
2142 
2143 	if (*path == '\0') {
2144 		put_vnode(vnode);
2145 		return B_ENTRY_NOT_FOUND;
2146 	}
2147 
2148 	while (true) {
2149 		struct vnode* nextVnode;
2150 		char* nextPath;
2151 
2152 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2153 			path));
2154 
2155 		// done?
2156 		if (path[0] == '\0')
2157 			break;
2158 
2159 		// walk to find the next path component ("path" will point to a single
2160 		// path component), and filter out multiple slashes
2161 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2162 				nextPath++);
2163 
2164 		if (*nextPath == '/') {
2165 			*nextPath = '\0';
2166 			do
2167 				nextPath++;
2168 			while (*nextPath == '/');
2169 		}
2170 
2171 		// See if the '..' is at a covering vnode move to the covered
2172 		// vnode so we pass the '..' path to the underlying filesystem.
2173 		// Also prevent breaking the root of the IO context.
2174 		if (strcmp("..", path) == 0) {
2175 			if (vnode == ioContext->root) {
2176 				// Attempted prison break! Keep it contained.
2177 				path = nextPath;
2178 				continue;
2179 			}
2180 
2181 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2182 				nextVnode = coveredVnode;
2183 				put_vnode(vnode);
2184 				vnode = nextVnode;
2185 			}
2186 		}
2187 
2188 		// check if vnode is really a directory
2189 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2190 			status = B_NOT_A_DIRECTORY;
2191 
2192 		// Check if we have the right to search the current directory vnode.
2193 		// If a file system doesn't have the access() function, we assume that
2194 		// searching a directory is always allowed
2195 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2196 			status = FS_CALL(vnode, access, X_OK);
2197 
2198 		// Tell the filesystem to get the vnode of this path component (if we
2199 		// got the permission from the call above)
2200 		if (status == B_OK)
2201 			status = lookup_dir_entry(vnode, path, &nextVnode);
2202 
2203 		if (status != B_OK) {
2204 			put_vnode(vnode);
2205 			return status;
2206 		}
2207 
2208 		// If the new node is a symbolic link, resolve it (if we've been told
2209 		// to do it)
2210 		if (S_ISLNK(nextVnode->Type())
2211 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2212 			size_t bufferSize;
2213 			char* buffer;
2214 
2215 			TRACE(("traverse link\n"));
2216 
2217 			// it's not exactly nice style using goto in this way, but hey,
2218 			// it works :-/
2219 			if (count + 1 > B_MAX_SYMLINKS) {
2220 				status = B_LINK_LIMIT;
2221 				goto resolve_link_error;
2222 			}
2223 
2224 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2225 			if (buffer == NULL) {
2226 				status = B_NO_MEMORY;
2227 				goto resolve_link_error;
2228 			}
2229 
2230 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2231 				bufferSize--;
2232 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2233 				// null-terminate
2234 				if (status >= 0)
2235 					buffer[bufferSize] = '\0';
2236 			} else
2237 				status = B_BAD_VALUE;
2238 
2239 			if (status != B_OK) {
2240 				free(buffer);
2241 
2242 		resolve_link_error:
2243 				put_vnode(vnode);
2244 				put_vnode(nextVnode);
2245 
2246 				return status;
2247 			}
2248 			put_vnode(nextVnode);
2249 
2250 			// Check if we start from the root directory or the current
2251 			// directory ("vnode" still points to that one).
2252 			// Cut off all leading slashes if it's the root directory
2253 			path = buffer;
2254 			bool absoluteSymlink = false;
2255 			if (path[0] == '/') {
2256 				// we don't need the old directory anymore
2257 				put_vnode(vnode);
2258 
2259 				while (*++path == '/')
2260 					;
2261 
2262 				mutex_lock(&sIOContextRootLock);
2263 				vnode = ioContext->root;
2264 				inc_vnode_ref_count(vnode);
2265 				mutex_unlock(&sIOContextRootLock);
2266 
2267 				absoluteSymlink = true;
2268 			}
2269 
2270 			inc_vnode_ref_count(vnode);
2271 				// balance the next recursion - we will decrement the
2272 				// ref_count of the vnode, no matter if we succeeded or not
2273 
2274 			if (absoluteSymlink && *path == '\0') {
2275 				// symlink was just "/"
2276 				nextVnode = vnode;
2277 			} else {
2278 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2279 					ioContext, &nextVnode, &lastParentID);
2280 			}
2281 
2282 			free(buffer);
2283 
2284 			if (status != B_OK) {
2285 				put_vnode(vnode);
2286 				return status;
2287 			}
2288 		} else
2289 			lastParentID = vnode->id;
2290 
2291 		// decrease the ref count on the old dir we just looked up into
2292 		put_vnode(vnode);
2293 
2294 		path = nextPath;
2295 		vnode = nextVnode;
2296 
2297 		// see if we hit a covered node
2298 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2299 			put_vnode(vnode);
2300 			vnode = coveringNode;
2301 		}
2302 	}
2303 
2304 	*_vnode = vnode;
2305 	if (_parentID)
2306 		*_parentID = lastParentID;
2307 
2308 	return B_OK;
2309 }
2310 
2311 
2312 static status_t
2313 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2314 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2315 {
2316 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2317 		get_current_io_context(kernel), _vnode, _parentID);
2318 }
2319 
2320 
2321 static status_t
2322 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2323 	ino_t* _parentID, bool kernel)
2324 {
2325 	struct vnode* start = NULL;
2326 
2327 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2328 
2329 	if (!path)
2330 		return B_BAD_VALUE;
2331 
2332 	if (*path == '\0')
2333 		return B_ENTRY_NOT_FOUND;
2334 
2335 	// figure out if we need to start at root or at cwd
2336 	if (*path == '/') {
2337 		if (sRoot == NULL) {
2338 			// we're a bit early, aren't we?
2339 			return B_ERROR;
2340 		}
2341 
2342 		while (*++path == '/')
2343 			;
2344 		start = get_root_vnode(kernel);
2345 
2346 		if (*path == '\0') {
2347 			*_vnode = start;
2348 			return B_OK;
2349 		}
2350 
2351 	} else {
2352 		struct io_context* context = get_current_io_context(kernel);
2353 
2354 		mutex_lock(&context->io_mutex);
2355 		start = context->cwd;
2356 		if (start != NULL)
2357 			inc_vnode_ref_count(start);
2358 		mutex_unlock(&context->io_mutex);
2359 
2360 		if (start == NULL)
2361 			return B_ERROR;
2362 	}
2363 
2364 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2365 		_parentID);
2366 }
2367 
2368 
2369 /*! Returns the vnode in the next to last segment of the path, and returns
2370 	the last portion in filename.
2371 	The path buffer must be able to store at least one additional character.
2372 */
2373 static status_t
2374 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2375 	bool kernel)
2376 {
2377 	status_t status = get_dir_path_and_leaf(path, filename);
2378 	if (status != B_OK)
2379 		return status;
2380 
2381 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2382 }
2383 
2384 
2385 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2386 		   to by a FD + path pair.
2387 
2388 	\a path must be given in either case. \a fd might be omitted, in which
2389 	case \a path is either an absolute path or one relative to the current
2390 	directory. If both a supplied and \a path is relative it is reckoned off
2391 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2392 	ignored.
2393 
2394 	The caller has the responsibility to call put_vnode() on the returned
2395 	directory vnode.
2396 
2397 	\param fd The FD. May be < 0.
2398 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2399 	       is modified by this function. It must have at least room for a
2400 	       string one character longer than the path it contains.
2401 	\param _vnode A pointer to a variable the directory vnode shall be written
2402 		   into.
2403 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2404 		   the leaf name of the specified entry will be written.
2405 	\param kernel \c true, if invoked from inside the kernel, \c false if
2406 		   invoked from userland.
2407 	\return \c B_OK, if everything went fine, another error code otherwise.
2408 */
2409 static status_t
2410 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2411 	char* filename, bool kernel)
2412 {
2413 	if (!path)
2414 		return B_BAD_VALUE;
2415 	if (*path == '\0')
2416 		return B_ENTRY_NOT_FOUND;
2417 	if (fd < 0)
2418 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2419 
2420 	status_t status = get_dir_path_and_leaf(path, filename);
2421 	if (status != B_OK)
2422 		return status;
2423 
2424 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2425 }
2426 
2427 
2428 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2429 		   to by a vnode + path pair.
2430 
2431 	\a path must be given in either case. \a vnode might be omitted, in which
2432 	case \a path is either an absolute path or one relative to the current
2433 	directory. If both a supplied and \a path is relative it is reckoned off
2434 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2435 	ignored.
2436 
2437 	The caller has the responsibility to call put_vnode() on the returned
2438 	directory vnode.
2439 
2440 	\param vnode The vnode. May be \c NULL.
2441 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2442 	       is modified by this function. It must have at least room for a
2443 	       string one character longer than the path it contains.
2444 	\param _vnode A pointer to a variable the directory vnode shall be written
2445 		   into.
2446 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2447 		   the leaf name of the specified entry will be written.
2448 	\param kernel \c true, if invoked from inside the kernel, \c false if
2449 		   invoked from userland.
2450 	\return \c B_OK, if everything went fine, another error code otherwise.
2451 */
2452 static status_t
2453 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2454 	struct vnode** _vnode, char* filename, bool kernel)
2455 {
2456 	if (!path)
2457 		return B_BAD_VALUE;
2458 	if (*path == '\0')
2459 		return B_ENTRY_NOT_FOUND;
2460 	if (vnode == NULL || path[0] == '/')
2461 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2462 
2463 	status_t status = get_dir_path_and_leaf(path, filename);
2464 	if (status != B_OK)
2465 		return status;
2466 
2467 	inc_vnode_ref_count(vnode);
2468 		// vnode_path_to_vnode() always decrements the ref count
2469 
2470 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2471 }
2472 
2473 
2474 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2475 */
2476 static status_t
2477 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2478 	size_t bufferSize, struct io_context* ioContext)
2479 {
2480 	if (bufferSize < sizeof(struct dirent))
2481 		return B_BAD_VALUE;
2482 
2483 	// See if the vnode is convering another vnode and move to the covered
2484 	// vnode so we get the underlying file system
2485 	VNodePutter vnodePutter;
2486 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2487 		vnode = coveredVnode;
2488 		vnodePutter.SetTo(vnode);
2489 	}
2490 
2491 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2492 		// The FS supports getting the name of a vnode.
2493 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2494 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2495 			return B_OK;
2496 	}
2497 
2498 	// The FS doesn't support getting the name of a vnode. So we search the
2499 	// parent directory for the vnode, if the caller let us.
2500 
2501 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2502 		return B_UNSUPPORTED;
2503 
2504 	void* cookie;
2505 
2506 	status_t status = FS_CALL(parent, open_dir, &cookie);
2507 	if (status >= B_OK) {
2508 		while (true) {
2509 			uint32 num = 1;
2510 			// We use the FS hook directly instead of dir_read(), since we don't
2511 			// want the entries to be fixed. We have already resolved vnode to
2512 			// the covered node.
2513 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2514 				&num);
2515 			if (status != B_OK)
2516 				break;
2517 			if (num == 0) {
2518 				status = B_ENTRY_NOT_FOUND;
2519 				break;
2520 			}
2521 
2522 			if (vnode->id == buffer->d_ino) {
2523 				// found correct entry!
2524 				break;
2525 			}
2526 		}
2527 
2528 		FS_CALL(vnode, close_dir, cookie);
2529 		FS_CALL(vnode, free_dir_cookie, cookie);
2530 	}
2531 	return status;
2532 }
2533 
2534 
2535 static status_t
2536 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2537 	size_t nameSize, bool kernel)
2538 {
2539 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2540 	struct dirent* dirent = (struct dirent*)buffer;
2541 
2542 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2543 		get_current_io_context(kernel));
2544 	if (status != B_OK)
2545 		return status;
2546 
2547 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2548 		return B_BUFFER_OVERFLOW;
2549 
2550 	return B_OK;
2551 }
2552 
2553 
2554 /*!	Gets the full path to a given directory vnode.
2555 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2556 	file system doesn't support this call, it will fall back to iterating
2557 	through the parent directory to get the name of the child.
2558 
2559 	To protect against circular loops, it supports a maximum tree depth
2560 	of 256 levels.
2561 
2562 	Note that the path may not be correct the time this function returns!
2563 	It doesn't use any locking to prevent returning the correct path, as
2564 	paths aren't safe anyway: the path to a file can change at any time.
2565 
2566 	It might be a good idea, though, to check if the returned path exists
2567 	in the calling function (it's not done here because of efficiency)
2568 */
2569 static status_t
2570 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2571 	bool kernel)
2572 {
2573 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2574 
2575 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2576 		return B_BAD_VALUE;
2577 
2578 	if (!S_ISDIR(vnode->Type()))
2579 		return B_NOT_A_DIRECTORY;
2580 
2581 	char* path = buffer;
2582 	int32 insert = bufferSize;
2583 	int32 maxLevel = 256;
2584 	int32 length;
2585 	status_t status = B_OK;
2586 	struct io_context* ioContext = get_current_io_context(kernel);
2587 
2588 	// we don't use get_vnode() here because this call is more
2589 	// efficient and does all we need from get_vnode()
2590 	inc_vnode_ref_count(vnode);
2591 
2592 	path[--insert] = '\0';
2593 		// the path is filled right to left
2594 
2595 	while (true) {
2596 		// If the node is the context's root, bail out. Otherwise resolve mount
2597 		// points.
2598 		if (vnode == ioContext->root)
2599 			break;
2600 
2601 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2602 			put_vnode(vnode);
2603 			vnode = coveredVnode;
2604 		}
2605 
2606 		// lookup the parent vnode
2607 		struct vnode* parentVnode;
2608 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2609 		if (status != B_OK)
2610 			goto out;
2611 
2612 		if (parentVnode == vnode) {
2613 			// The caller apparently got their hands on a node outside of their
2614 			// context's root. Now we've hit the global root.
2615 			put_vnode(parentVnode);
2616 			break;
2617 		}
2618 
2619 		// get the node's name
2620 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2621 			// also used for fs_read_dir()
2622 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2623 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2624 			sizeof(nameBuffer), ioContext);
2625 
2626 		// release the current vnode, we only need its parent from now on
2627 		put_vnode(vnode);
2628 		vnode = parentVnode;
2629 
2630 		if (status != B_OK)
2631 			goto out;
2632 
2633 		// TODO: add an explicit check for loops in about 10 levels to do
2634 		// real loop detection
2635 
2636 		// don't go deeper as 'maxLevel' to prevent circular loops
2637 		if (maxLevel-- < 0) {
2638 			status = B_LINK_LIMIT;
2639 			goto out;
2640 		}
2641 
2642 		// add the name in front of the current path
2643 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2644 		length = strlen(name);
2645 		insert -= length;
2646 		if (insert <= 0) {
2647 			status = B_RESULT_NOT_REPRESENTABLE;
2648 			goto out;
2649 		}
2650 		memcpy(path + insert, name, length);
2651 		path[--insert] = '/';
2652 	}
2653 
2654 	// the root dir will result in an empty path: fix it
2655 	if (path[insert] == '\0')
2656 		path[--insert] = '/';
2657 
2658 	TRACE(("  path is: %s\n", path + insert));
2659 
2660 	// move the path to the start of the buffer
2661 	length = bufferSize - insert;
2662 	memmove(buffer, path + insert, length);
2663 
2664 out:
2665 	put_vnode(vnode);
2666 	return status;
2667 }
2668 
2669 
2670 /*!	Checks the length of every path component, and adds a '.'
2671 	if the path ends in a slash.
2672 	The given path buffer must be able to store at least one
2673 	additional character.
2674 */
2675 static status_t
2676 check_path(char* to)
2677 {
2678 	int32 length = 0;
2679 
2680 	// check length of every path component
2681 
2682 	while (*to) {
2683 		char* begin;
2684 		if (*to == '/')
2685 			to++, length++;
2686 
2687 		begin = to;
2688 		while (*to != '/' && *to)
2689 			to++, length++;
2690 
2691 		if (to - begin > B_FILE_NAME_LENGTH)
2692 			return B_NAME_TOO_LONG;
2693 	}
2694 
2695 	if (length == 0)
2696 		return B_ENTRY_NOT_FOUND;
2697 
2698 	// complete path if there is a slash at the end
2699 
2700 	if (*(to - 1) == '/') {
2701 		if (length > B_PATH_NAME_LENGTH - 2)
2702 			return B_NAME_TOO_LONG;
2703 
2704 		to[0] = '.';
2705 		to[1] = '\0';
2706 	}
2707 
2708 	return B_OK;
2709 }
2710 
2711 
2712 static struct file_descriptor*
2713 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2714 {
2715 	struct file_descriptor* descriptor
2716 		= get_fd(get_current_io_context(kernel), fd);
2717 	if (descriptor == NULL)
2718 		return NULL;
2719 
2720 	struct vnode* vnode = fd_vnode(descriptor);
2721 	if (vnode == NULL) {
2722 		put_fd(descriptor);
2723 		return NULL;
2724 	}
2725 
2726 	// ToDo: when we can close a file descriptor at any point, investigate
2727 	//	if this is still valid to do (accessing the vnode without ref_count
2728 	//	or locking)
2729 	*_vnode = vnode;
2730 	return descriptor;
2731 }
2732 
2733 
2734 static struct vnode*
2735 get_vnode_from_fd(int fd, bool kernel)
2736 {
2737 	struct file_descriptor* descriptor;
2738 	struct vnode* vnode;
2739 
2740 	descriptor = get_fd(get_current_io_context(kernel), fd);
2741 	if (descriptor == NULL)
2742 		return NULL;
2743 
2744 	vnode = fd_vnode(descriptor);
2745 	if (vnode != NULL)
2746 		inc_vnode_ref_count(vnode);
2747 
2748 	put_fd(descriptor);
2749 	return vnode;
2750 }
2751 
2752 
2753 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2754 	only the path will be considered. In this case, the \a path must not be
2755 	NULL.
2756 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2757 	and should be NULL for files.
2758 */
2759 static status_t
2760 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2761 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2762 {
2763 	if (fd < 0 && !path)
2764 		return B_BAD_VALUE;
2765 
2766 	if (path != NULL && *path == '\0')
2767 		return B_ENTRY_NOT_FOUND;
2768 
2769 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2770 		// no FD or absolute path
2771 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2772 	}
2773 
2774 	// FD only, or FD + relative path
2775 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2776 	if (!vnode)
2777 		return B_FILE_ERROR;
2778 
2779 	if (path != NULL) {
2780 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2781 			_vnode, _parentID);
2782 	}
2783 
2784 	// there is no relative path to take into account
2785 
2786 	*_vnode = vnode;
2787 	if (_parentID)
2788 		*_parentID = -1;
2789 
2790 	return B_OK;
2791 }
2792 
2793 
2794 static int
2795 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2796 	void* cookie, int openMode, bool kernel)
2797 {
2798 	struct file_descriptor* descriptor;
2799 	int fd;
2800 
2801 	// If the vnode is locked, we don't allow creating a new file/directory
2802 	// file_descriptor for it
2803 	if (vnode && vnode->mandatory_locked_by != NULL
2804 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2805 		return B_BUSY;
2806 
2807 	descriptor = alloc_fd();
2808 	if (!descriptor)
2809 		return B_NO_MEMORY;
2810 
2811 	if (vnode)
2812 		descriptor->u.vnode = vnode;
2813 	else
2814 		descriptor->u.mount = mount;
2815 	descriptor->cookie = cookie;
2816 
2817 	switch (type) {
2818 		// vnode types
2819 		case FDTYPE_FILE:
2820 			descriptor->ops = &sFileOps;
2821 			break;
2822 		case FDTYPE_DIR:
2823 			descriptor->ops = &sDirectoryOps;
2824 			break;
2825 		case FDTYPE_ATTR:
2826 			descriptor->ops = &sAttributeOps;
2827 			break;
2828 		case FDTYPE_ATTR_DIR:
2829 			descriptor->ops = &sAttributeDirectoryOps;
2830 			break;
2831 
2832 		// mount types
2833 		case FDTYPE_INDEX_DIR:
2834 			descriptor->ops = &sIndexDirectoryOps;
2835 			break;
2836 		case FDTYPE_QUERY:
2837 			descriptor->ops = &sQueryOps;
2838 			break;
2839 
2840 		default:
2841 			panic("get_new_fd() called with unknown type %d\n", type);
2842 			break;
2843 	}
2844 	descriptor->type = type;
2845 	descriptor->open_mode = openMode;
2846 
2847 	io_context* context = get_current_io_context(kernel);
2848 	fd = new_fd(context, descriptor);
2849 	if (fd < 0) {
2850 		free(descriptor);
2851 		return B_NO_MORE_FDS;
2852 	}
2853 
2854 	mutex_lock(&context->io_mutex);
2855 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2856 	mutex_unlock(&context->io_mutex);
2857 
2858 	return fd;
2859 }
2860 
2861 
2862 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2863 	vfs_normalize_path(). See there for more documentation.
2864 */
2865 static status_t
2866 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2867 {
2868 	VNodePutter dirPutter;
2869 	struct vnode* dir = NULL;
2870 	status_t error;
2871 
2872 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2873 		// get dir vnode + leaf name
2874 		struct vnode* nextDir;
2875 		char leaf[B_FILE_NAME_LENGTH];
2876 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2877 		if (error != B_OK)
2878 			return error;
2879 
2880 		dir = nextDir;
2881 		strcpy(path, leaf);
2882 		dirPutter.SetTo(dir);
2883 
2884 		// get file vnode, if we shall resolve links
2885 		bool fileExists = false;
2886 		struct vnode* fileVnode;
2887 		VNodePutter fileVnodePutter;
2888 		if (traverseLink) {
2889 			inc_vnode_ref_count(dir);
2890 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2891 					NULL) == B_OK) {
2892 				fileVnodePutter.SetTo(fileVnode);
2893 				fileExists = true;
2894 			}
2895 		}
2896 
2897 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2898 			// we're done -- construct the path
2899 			bool hasLeaf = true;
2900 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2901 				// special cases "." and ".." -- get the dir, forget the leaf
2902 				inc_vnode_ref_count(dir);
2903 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2904 					&nextDir, NULL);
2905 				if (error != B_OK)
2906 					return error;
2907 				dir = nextDir;
2908 				dirPutter.SetTo(dir);
2909 				hasLeaf = false;
2910 			}
2911 
2912 			// get the directory path
2913 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2914 			if (error != B_OK)
2915 				return error;
2916 
2917 			// append the leaf name
2918 			if (hasLeaf) {
2919 				// insert a directory separator if this is not the file system
2920 				// root
2921 				if ((strcmp(path, "/") != 0
2922 					&& strlcat(path, "/", pathSize) >= pathSize)
2923 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2924 					return B_NAME_TOO_LONG;
2925 				}
2926 			}
2927 
2928 			return B_OK;
2929 		}
2930 
2931 		// read link
2932 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2933 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2934 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2935 			if (error != B_OK)
2936 				return error;
2937 			path[bufferSize] = '\0';
2938 		} else
2939 			return B_BAD_VALUE;
2940 	}
2941 
2942 	return B_LINK_LIMIT;
2943 }
2944 
2945 
2946 #ifdef ADD_DEBUGGER_COMMANDS
2947 
2948 
2949 static void
2950 _dump_advisory_locking(advisory_locking* locking)
2951 {
2952 	if (locking == NULL)
2953 		return;
2954 
2955 	kprintf("   lock:        %" B_PRId32, locking->lock);
2956 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2957 
2958 	int32 index = 0;
2959 	LockList::Iterator iterator = locking->locks.GetIterator();
2960 	while (iterator.HasNext()) {
2961 		struct advisory_lock* lock = iterator.Next();
2962 
2963 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2964 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2965 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2966 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2967 	}
2968 }
2969 
2970 
2971 static void
2972 _dump_mount(struct fs_mount* mount)
2973 {
2974 	kprintf("MOUNT: %p\n", mount);
2975 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
2976 	kprintf(" device_name:   %s\n", mount->device_name);
2977 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2978 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
2979 	kprintf(" partition:     %p\n", mount->partition);
2980 	kprintf(" lock:          %p\n", &mount->rlock);
2981 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2982 		mount->owns_file_device ? " owns_file_device" : "");
2983 
2984 	fs_volume* volume = mount->volume;
2985 	while (volume != NULL) {
2986 		kprintf(" volume %p:\n", volume);
2987 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
2988 		kprintf("  private_volume:   %p\n", volume->private_volume);
2989 		kprintf("  ops:              %p\n", volume->ops);
2990 		kprintf("  file_system:      %p\n", volume->file_system);
2991 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2992 		volume = volume->super_volume;
2993 	}
2994 
2995 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2996 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2997 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
2998 	set_debug_variable("_partition", (addr_t)mount->partition);
2999 }
3000 
3001 
3002 static bool
3003 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3004 	const char* name)
3005 {
3006 	bool insertSlash = buffer[bufferSize] != '\0';
3007 	size_t nameLength = strlen(name);
3008 
3009 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3010 		return false;
3011 
3012 	if (insertSlash)
3013 		buffer[--bufferSize] = '/';
3014 
3015 	bufferSize -= nameLength;
3016 	memcpy(buffer + bufferSize, name, nameLength);
3017 
3018 	return true;
3019 }
3020 
3021 
3022 static bool
3023 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3024 	ino_t nodeID)
3025 {
3026 	if (bufferSize == 0)
3027 		return false;
3028 
3029 	bool insertSlash = buffer[bufferSize] != '\0';
3030 	if (insertSlash)
3031 		buffer[--bufferSize] = '/';
3032 
3033 	size_t size = snprintf(buffer, bufferSize,
3034 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3035 	if (size > bufferSize) {
3036 		if (insertSlash)
3037 			bufferSize++;
3038 		return false;
3039 	}
3040 
3041 	if (size < bufferSize)
3042 		memmove(buffer + bufferSize - size, buffer, size);
3043 
3044 	bufferSize -= size;
3045 	return true;
3046 }
3047 
3048 
3049 static char*
3050 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3051 	bool& _truncated)
3052 {
3053 	// null-terminate the path
3054 	buffer[--bufferSize] = '\0';
3055 
3056 	while (true) {
3057 		while (vnode->covers != NULL)
3058 			vnode = vnode->covers;
3059 
3060 		if (vnode == sRoot) {
3061 			_truncated = bufferSize == 0;
3062 			if (!_truncated)
3063 				buffer[--bufferSize] = '/';
3064 			return buffer + bufferSize;
3065 		}
3066 
3067 		// resolve the name
3068 		ino_t dirID;
3069 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3070 			vnode->id, dirID);
3071 		if (name == NULL) {
3072 			// Failed to resolve the name -- prepend "<dev,node>/".
3073 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3074 				vnode->mount->id, vnode->id);
3075 			return buffer + bufferSize;
3076 		}
3077 
3078 		// prepend the name
3079 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3080 			_truncated = true;
3081 			return buffer + bufferSize;
3082 		}
3083 
3084 		// resolve the directory node
3085 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3086 		if (nextVnode == NULL) {
3087 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3088 				vnode->mount->id, dirID);
3089 			return buffer + bufferSize;
3090 		}
3091 
3092 		vnode = nextVnode;
3093 	}
3094 }
3095 
3096 
3097 static void
3098 _dump_vnode(struct vnode* vnode, bool printPath)
3099 {
3100 	kprintf("VNODE: %p\n", vnode);
3101 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3102 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3103 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3104 	kprintf(" private_node:  %p\n", vnode->private_node);
3105 	kprintf(" mount:         %p\n", vnode->mount);
3106 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3107 	kprintf(" covers:        %p\n", vnode->covers);
3108 	kprintf(" cache:         %p\n", vnode->cache);
3109 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3110 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3111 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3112 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3113 
3114 	_dump_advisory_locking(vnode->advisory_locking);
3115 
3116 	if (printPath) {
3117 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3118 		if (buffer != NULL) {
3119 			bool truncated;
3120 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3121 				B_PATH_NAME_LENGTH, truncated);
3122 			if (path != NULL) {
3123 				kprintf(" path:          ");
3124 				if (truncated)
3125 					kputs("<truncated>/");
3126 				kputs(path);
3127 				kputs("\n");
3128 			} else
3129 				kprintf("Failed to resolve vnode path.\n");
3130 
3131 			debug_free(buffer);
3132 		} else
3133 			kprintf("Failed to allocate memory for constructing the path.\n");
3134 	}
3135 
3136 	set_debug_variable("_node", (addr_t)vnode->private_node);
3137 	set_debug_variable("_mount", (addr_t)vnode->mount);
3138 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3139 	set_debug_variable("_covers", (addr_t)vnode->covers);
3140 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3141 }
3142 
3143 
3144 static int
3145 dump_mount(int argc, char** argv)
3146 {
3147 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3148 		kprintf("usage: %s [id|address]\n", argv[0]);
3149 		return 0;
3150 	}
3151 
3152 	ulong val = parse_expression(argv[1]);
3153 	uint32 id = val;
3154 
3155 	struct fs_mount* mount = sMountsTable->Lookup(id);
3156 	if (mount == NULL) {
3157 		if (IS_USER_ADDRESS(id)) {
3158 			kprintf("fs_mount not found\n");
3159 			return 0;
3160 		}
3161 		mount = (fs_mount*)val;
3162 	}
3163 
3164 	_dump_mount(mount);
3165 	return 0;
3166 }
3167 
3168 
3169 static int
3170 dump_mounts(int argc, char** argv)
3171 {
3172 	if (argc != 1) {
3173 		kprintf("usage: %s\n", argv[0]);
3174 		return 0;
3175 	}
3176 
3177 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3178 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3179 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3180 
3181 	struct fs_mount* mount;
3182 
3183 	MountTable::Iterator iterator(sMountsTable);
3184 	while (iterator.HasNext()) {
3185 		mount = iterator.Next();
3186 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3187 			mount->root_vnode->covers, mount->volume->private_volume,
3188 			mount->volume->file_system_name);
3189 
3190 		fs_volume* volume = mount->volume;
3191 		while (volume->super_volume != NULL) {
3192 			volume = volume->super_volume;
3193 			kprintf("                                     %p %s\n",
3194 				volume->private_volume, volume->file_system_name);
3195 		}
3196 	}
3197 
3198 	return 0;
3199 }
3200 
3201 
3202 static int
3203 dump_vnode(int argc, char** argv)
3204 {
3205 	bool printPath = false;
3206 	int argi = 1;
3207 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3208 		printPath = true;
3209 		argi++;
3210 	}
3211 
3212 	if (argi >= argc || argi + 2 < argc) {
3213 		print_debugger_command_usage(argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	struct vnode* vnode = NULL;
3218 
3219 	if (argi + 1 == argc) {
3220 		vnode = (struct vnode*)parse_expression(argv[argi]);
3221 		if (IS_USER_ADDRESS(vnode)) {
3222 			kprintf("invalid vnode address\n");
3223 			return 0;
3224 		}
3225 		_dump_vnode(vnode, printPath);
3226 		return 0;
3227 	}
3228 
3229 	dev_t device = parse_expression(argv[argi]);
3230 	ino_t id = parse_expression(argv[argi + 1]);
3231 
3232 	VnodeTable::Iterator iterator(sVnodeTable);
3233 	while (iterator.HasNext()) {
3234 		vnode = iterator.Next();
3235 		if (vnode->id != id || vnode->device != device)
3236 			continue;
3237 
3238 		_dump_vnode(vnode, printPath);
3239 	}
3240 
3241 	return 0;
3242 }
3243 
3244 
3245 static int
3246 dump_vnodes(int argc, char** argv)
3247 {
3248 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s [device]\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	// restrict dumped nodes to a certain device if requested
3254 	dev_t device = parse_expression(argv[1]);
3255 
3256 	struct vnode* vnode;
3257 
3258 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3259 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3260 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3261 
3262 	VnodeTable::Iterator iterator(sVnodeTable);
3263 	while (iterator.HasNext()) {
3264 		vnode = iterator.Next();
3265 		if (vnode->device != device)
3266 			continue;
3267 
3268 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3269 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3270 			vnode->private_node, vnode->advisory_locking,
3271 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3272 			vnode->IsUnpublished() ? "u" : "-");
3273 	}
3274 
3275 	return 0;
3276 }
3277 
3278 
3279 static int
3280 dump_vnode_caches(int argc, char** argv)
3281 {
3282 	struct vnode* vnode;
3283 
3284 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3285 		kprintf("usage: %s [device]\n", argv[0]);
3286 		return 0;
3287 	}
3288 
3289 	// restrict dumped nodes to a certain device if requested
3290 	dev_t device = -1;
3291 	if (argc > 1)
3292 		device = parse_expression(argv[1]);
3293 
3294 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3295 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3296 
3297 	VnodeTable::Iterator iterator(sVnodeTable);
3298 	while (iterator.HasNext()) {
3299 		vnode = iterator.Next();
3300 		if (vnode->cache == NULL)
3301 			continue;
3302 		if (device != -1 && vnode->device != device)
3303 			continue;
3304 
3305 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3306 			vnode, vnode->device, vnode->id, vnode->cache,
3307 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3308 			vnode->cache->page_count);
3309 	}
3310 
3311 	return 0;
3312 }
3313 
3314 
3315 int
3316 dump_io_context(int argc, char** argv)
3317 {
3318 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3319 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3320 		return 0;
3321 	}
3322 
3323 	struct io_context* context = NULL;
3324 
3325 	if (argc > 1) {
3326 		ulong num = parse_expression(argv[1]);
3327 		if (IS_KERNEL_ADDRESS(num))
3328 			context = (struct io_context*)num;
3329 		else {
3330 			Team* team = team_get_team_struct_locked(num);
3331 			if (team == NULL) {
3332 				kprintf("could not find team with ID %lu\n", num);
3333 				return 0;
3334 			}
3335 			context = (struct io_context*)team->io_context;
3336 		}
3337 	} else
3338 		context = get_current_io_context(true);
3339 
3340 	kprintf("I/O CONTEXT: %p\n", context);
3341 	kprintf(" root vnode:\t%p\n", context->root);
3342 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3343 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3344 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3345 
3346 	if (context->num_used_fds) {
3347 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3348 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3349 	}
3350 
3351 	for (uint32 i = 0; i < context->table_size; i++) {
3352 		struct file_descriptor* fd = context->fds[i];
3353 		if (fd == NULL)
3354 			continue;
3355 
3356 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3357 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3358 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3359 			fd->pos, fd->cookie,
3360 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3361 				? "mount" : "vnode",
3362 			fd->u.vnode);
3363 	}
3364 
3365 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3366 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3367 
3368 	set_debug_variable("_cwd", (addr_t)context->cwd);
3369 
3370 	return 0;
3371 }
3372 
3373 
3374 int
3375 dump_vnode_usage(int argc, char** argv)
3376 {
3377 	if (argc != 1) {
3378 		kprintf("usage: %s\n", argv[0]);
3379 		return 0;
3380 	}
3381 
3382 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3383 		sUnusedVnodes, kMaxUnusedVnodes);
3384 
3385 	uint32 count = sVnodeTable->CountElements();
3386 
3387 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3388 		count - sUnusedVnodes);
3389 	return 0;
3390 }
3391 
3392 #endif	// ADD_DEBUGGER_COMMANDS
3393 
3394 
3395 /*!	Clears memory specified by an iovec array.
3396 */
3397 static void
3398 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3399 {
3400 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3401 		size_t length = std::min(vecs[i].iov_len, bytes);
3402 		memset(vecs[i].iov_base, 0, length);
3403 		bytes -= length;
3404 	}
3405 }
3406 
3407 
3408 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3409 	and calls the file system hooks to read/write the request to disk.
3410 */
3411 static status_t
3412 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3413 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3414 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3415 	bool doWrite)
3416 {
3417 	if (fileVecCount == 0) {
3418 		// There are no file vecs at this offset, so we're obviously trying
3419 		// to access the file outside of its bounds
3420 		return B_BAD_VALUE;
3421 	}
3422 
3423 	size_t numBytes = *_numBytes;
3424 	uint32 fileVecIndex;
3425 	size_t vecOffset = *_vecOffset;
3426 	uint32 vecIndex = *_vecIndex;
3427 	status_t status;
3428 	size_t size;
3429 
3430 	if (!doWrite && vecOffset == 0) {
3431 		// now directly read the data from the device
3432 		// the first file_io_vec can be read directly
3433 
3434 		if (fileVecs[0].length < (off_t)numBytes)
3435 			size = fileVecs[0].length;
3436 		else
3437 			size = numBytes;
3438 
3439 		if (fileVecs[0].offset >= 0) {
3440 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3441 				&vecs[vecIndex], vecCount - vecIndex, &size);
3442 		} else {
3443 			// sparse read
3444 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3445 			status = B_OK;
3446 		}
3447 		if (status != B_OK)
3448 			return status;
3449 
3450 		// TODO: this is a work-around for buggy device drivers!
3451 		//	When our own drivers honour the length, we can:
3452 		//	a) also use this direct I/O for writes (otherwise, it would
3453 		//	   overwrite precious data)
3454 		//	b) panic if the term below is true (at least for writes)
3455 		if ((off_t)size > fileVecs[0].length) {
3456 			//dprintf("warning: device driver %p doesn't respect total length "
3457 			//	"in read_pages() call!\n", ref->device);
3458 			size = fileVecs[0].length;
3459 		}
3460 
3461 		ASSERT((off_t)size <= fileVecs[0].length);
3462 
3463 		// If the file portion was contiguous, we're already done now
3464 		if (size == numBytes)
3465 			return B_OK;
3466 
3467 		// if we reached the end of the file, we can return as well
3468 		if ((off_t)size != fileVecs[0].length) {
3469 			*_numBytes = size;
3470 			return B_OK;
3471 		}
3472 
3473 		fileVecIndex = 1;
3474 
3475 		// first, find out where we have to continue in our iovecs
3476 		for (; vecIndex < vecCount; vecIndex++) {
3477 			if (size < vecs[vecIndex].iov_len)
3478 				break;
3479 
3480 			size -= vecs[vecIndex].iov_len;
3481 		}
3482 
3483 		vecOffset = size;
3484 	} else {
3485 		fileVecIndex = 0;
3486 		size = 0;
3487 	}
3488 
3489 	// Too bad, let's process the rest of the file_io_vecs
3490 
3491 	size_t totalSize = size;
3492 	size_t bytesLeft = numBytes - size;
3493 
3494 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3495 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3496 		off_t fileOffset = fileVec.offset;
3497 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3498 
3499 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3500 			fileLeft));
3501 
3502 		// process the complete fileVec
3503 		while (fileLeft > 0) {
3504 			iovec tempVecs[MAX_TEMP_IO_VECS];
3505 			uint32 tempCount = 0;
3506 
3507 			// size tracks how much of what is left of the current fileVec
3508 			// (fileLeft) has been assigned to tempVecs
3509 			size = 0;
3510 
3511 			// assign what is left of the current fileVec to the tempVecs
3512 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3513 					&& tempCount < MAX_TEMP_IO_VECS;) {
3514 				// try to satisfy one iovec per iteration (or as much as
3515 				// possible)
3516 
3517 				// bytes left of the current iovec
3518 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3519 				if (vecLeft == 0) {
3520 					vecOffset = 0;
3521 					vecIndex++;
3522 					continue;
3523 				}
3524 
3525 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3526 					vecIndex, vecOffset, size));
3527 
3528 				// actually available bytes
3529 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3530 
3531 				tempVecs[tempCount].iov_base
3532 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3533 				tempVecs[tempCount].iov_len = tempVecSize;
3534 				tempCount++;
3535 
3536 				size += tempVecSize;
3537 				vecOffset += tempVecSize;
3538 			}
3539 
3540 			size_t bytes = size;
3541 
3542 			if (fileOffset == -1) {
3543 				if (doWrite) {
3544 					panic("sparse write attempt: vnode %p", vnode);
3545 					status = B_IO_ERROR;
3546 				} else {
3547 					// sparse read
3548 					zero_iovecs(tempVecs, tempCount, bytes);
3549 					status = B_OK;
3550 				}
3551 			} else if (doWrite) {
3552 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3553 					tempVecs, tempCount, &bytes);
3554 			} else {
3555 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3556 					tempVecs, tempCount, &bytes);
3557 			}
3558 			if (status != B_OK)
3559 				return status;
3560 
3561 			totalSize += bytes;
3562 			bytesLeft -= size;
3563 			if (fileOffset >= 0)
3564 				fileOffset += size;
3565 			fileLeft -= size;
3566 			//dprintf("-> file left = %Lu\n", fileLeft);
3567 
3568 			if (size != bytes || vecIndex >= vecCount) {
3569 				// there are no more bytes or iovecs, let's bail out
3570 				*_numBytes = totalSize;
3571 				return B_OK;
3572 			}
3573 		}
3574 	}
3575 
3576 	*_vecIndex = vecIndex;
3577 	*_vecOffset = vecOffset;
3578 	*_numBytes = totalSize;
3579 	return B_OK;
3580 }
3581 
3582 
3583 static bool
3584 is_user_in_group(gid_t gid)
3585 {
3586 	if (gid == getegid())
3587 		return true;
3588 
3589 	gid_t groups[NGROUPS_MAX];
3590 	int groupCount = getgroups(NGROUPS_MAX, groups);
3591 	for (int i = 0; i < groupCount; i++) {
3592 		if (gid == groups[i])
3593 			return true;
3594 	}
3595 
3596 	return false;
3597 }
3598 
3599 
3600 //	#pragma mark - public API for file systems
3601 
3602 
3603 extern "C" status_t
3604 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3605 	fs_vnode_ops* ops)
3606 {
3607 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3608 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3609 
3610 	if (privateNode == NULL)
3611 		return B_BAD_VALUE;
3612 
3613 	// create the node
3614 	bool nodeCreated;
3615 	struct vnode* vnode;
3616 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3617 		nodeCreated);
3618 	if (status != B_OK)
3619 		return status;
3620 
3621 	WriteLocker nodeLocker(sVnodeLock, true);
3622 		// create_new_vnode_and_lock() has locked for us
3623 
3624 	// file system integrity check:
3625 	// test if the vnode already exists and bail out if this is the case!
3626 	if (!nodeCreated) {
3627 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3628 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3629 			vnode->private_node);
3630 		return B_ERROR;
3631 	}
3632 
3633 	vnode->private_node = privateNode;
3634 	vnode->ops = ops;
3635 	vnode->SetUnpublished(true);
3636 
3637 	TRACE(("returns: %s\n", strerror(status)));
3638 
3639 	return status;
3640 }
3641 
3642 
3643 extern "C" status_t
3644 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3645 	fs_vnode_ops* ops, int type, uint32 flags)
3646 {
3647 	FUNCTION(("publish_vnode()\n"));
3648 
3649 	WriteLocker locker(sVnodeLock);
3650 
3651 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3652 
3653 	bool nodeCreated = false;
3654 	if (vnode == NULL) {
3655 		if (privateNode == NULL)
3656 			return B_BAD_VALUE;
3657 
3658 		// create the node
3659 		locker.Unlock();
3660 			// create_new_vnode_and_lock() will re-lock for us on success
3661 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3662 			nodeCreated);
3663 		if (status != B_OK)
3664 			return status;
3665 
3666 		locker.SetTo(sVnodeLock, true);
3667 	}
3668 
3669 	if (nodeCreated) {
3670 		vnode->private_node = privateNode;
3671 		vnode->ops = ops;
3672 		vnode->SetUnpublished(true);
3673 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3674 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3675 		// already known, but not published
3676 	} else
3677 		return B_BAD_VALUE;
3678 
3679 	bool publishSpecialSubNode = false;
3680 
3681 	vnode->SetType(type);
3682 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3683 	publishSpecialSubNode = is_special_node_type(type)
3684 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3685 
3686 	status_t status = B_OK;
3687 
3688 	// create sub vnodes, if necessary
3689 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3690 		locker.Unlock();
3691 
3692 		fs_volume* subVolume = volume;
3693 		if (volume->sub_volume != NULL) {
3694 			while (status == B_OK && subVolume->sub_volume != NULL) {
3695 				subVolume = subVolume->sub_volume;
3696 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3697 					vnode);
3698 			}
3699 		}
3700 
3701 		if (status == B_OK && publishSpecialSubNode)
3702 			status = create_special_sub_node(vnode, flags);
3703 
3704 		if (status != B_OK) {
3705 			// error -- clean up the created sub vnodes
3706 			while (subVolume->super_volume != volume) {
3707 				subVolume = subVolume->super_volume;
3708 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3709 			}
3710 		}
3711 
3712 		if (status == B_OK) {
3713 			ReadLocker vnodesReadLocker(sVnodeLock);
3714 			AutoLocker<Vnode> nodeLocker(vnode);
3715 			vnode->SetBusy(false);
3716 			vnode->SetUnpublished(false);
3717 		} else {
3718 			locker.Lock();
3719 			sVnodeTable->Remove(vnode);
3720 			remove_vnode_from_mount_list(vnode, vnode->mount);
3721 			free(vnode);
3722 		}
3723 	} else {
3724 		// we still hold the write lock -- mark the node unbusy and published
3725 		vnode->SetBusy(false);
3726 		vnode->SetUnpublished(false);
3727 	}
3728 
3729 	TRACE(("returns: %s\n", strerror(status)));
3730 
3731 	return status;
3732 }
3733 
3734 
3735 extern "C" status_t
3736 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3737 {
3738 	struct vnode* vnode;
3739 
3740 	if (volume == NULL)
3741 		return B_BAD_VALUE;
3742 
3743 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3744 	if (status != B_OK)
3745 		return status;
3746 
3747 	// If this is a layered FS, we need to get the node cookie for the requested
3748 	// layer.
3749 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3750 		fs_vnode resolvedNode;
3751 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3752 			&resolvedNode);
3753 		if (status != B_OK) {
3754 			panic("get_vnode(): Failed to get super node for vnode %p, "
3755 				"volume: %p", vnode, volume);
3756 			put_vnode(vnode);
3757 			return status;
3758 		}
3759 
3760 		if (_privateNode != NULL)
3761 			*_privateNode = resolvedNode.private_node;
3762 	} else if (_privateNode != NULL)
3763 		*_privateNode = vnode->private_node;
3764 
3765 	return B_OK;
3766 }
3767 
3768 
3769 extern "C" status_t
3770 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3771 {
3772 	struct vnode* vnode;
3773 
3774 	rw_lock_read_lock(&sVnodeLock);
3775 	vnode = lookup_vnode(volume->id, vnodeID);
3776 	rw_lock_read_unlock(&sVnodeLock);
3777 
3778 	if (vnode == NULL)
3779 		return B_BAD_VALUE;
3780 
3781 	inc_vnode_ref_count(vnode);
3782 	return B_OK;
3783 }
3784 
3785 
3786 extern "C" status_t
3787 put_vnode(fs_volume* volume, ino_t vnodeID)
3788 {
3789 	struct vnode* vnode;
3790 
3791 	rw_lock_read_lock(&sVnodeLock);
3792 	vnode = lookup_vnode(volume->id, vnodeID);
3793 	rw_lock_read_unlock(&sVnodeLock);
3794 
3795 	if (vnode == NULL)
3796 		return B_BAD_VALUE;
3797 
3798 	dec_vnode_ref_count(vnode, false, true);
3799 	return B_OK;
3800 }
3801 
3802 
3803 extern "C" status_t
3804 remove_vnode(fs_volume* volume, ino_t vnodeID)
3805 {
3806 	ReadLocker locker(sVnodeLock);
3807 
3808 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3809 	if (vnode == NULL)
3810 		return B_ENTRY_NOT_FOUND;
3811 
3812 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3813 		// this vnode is in use
3814 		return B_BUSY;
3815 	}
3816 
3817 	vnode->Lock();
3818 
3819 	vnode->SetRemoved(true);
3820 	bool removeUnpublished = false;
3821 
3822 	if (vnode->IsUnpublished()) {
3823 		// prepare the vnode for deletion
3824 		removeUnpublished = true;
3825 		vnode->SetBusy(true);
3826 	}
3827 
3828 	vnode->Unlock();
3829 	locker.Unlock();
3830 
3831 	if (removeUnpublished) {
3832 		// If the vnode hasn't been published yet, we delete it here
3833 		atomic_add(&vnode->ref_count, -1);
3834 		free_vnode(vnode, true);
3835 	}
3836 
3837 	return B_OK;
3838 }
3839 
3840 
3841 extern "C" status_t
3842 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3843 {
3844 	struct vnode* vnode;
3845 
3846 	rw_lock_read_lock(&sVnodeLock);
3847 
3848 	vnode = lookup_vnode(volume->id, vnodeID);
3849 	if (vnode) {
3850 		AutoLocker<Vnode> nodeLocker(vnode);
3851 		vnode->SetRemoved(false);
3852 	}
3853 
3854 	rw_lock_read_unlock(&sVnodeLock);
3855 	return B_OK;
3856 }
3857 
3858 
3859 extern "C" status_t
3860 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3861 {
3862 	ReadLocker _(sVnodeLock);
3863 
3864 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3865 		if (_removed != NULL)
3866 			*_removed = vnode->IsRemoved();
3867 		return B_OK;
3868 	}
3869 
3870 	return B_BAD_VALUE;
3871 }
3872 
3873 
3874 extern "C" fs_volume*
3875 volume_for_vnode(fs_vnode* _vnode)
3876 {
3877 	if (_vnode == NULL)
3878 		return NULL;
3879 
3880 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3881 	return vnode->mount->volume;
3882 }
3883 
3884 
3885 extern "C" status_t
3886 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3887 	uid_t nodeUserID)
3888 {
3889 	// get node permissions
3890 	int userPermissions = (mode & S_IRWXU) >> 6;
3891 	int groupPermissions = (mode & S_IRWXG) >> 3;
3892 	int otherPermissions = mode & S_IRWXO;
3893 
3894 	// get the node permissions for this uid/gid
3895 	int permissions = 0;
3896 	uid_t uid = geteuid();
3897 
3898 	if (uid == 0) {
3899 		// user is root
3900 		// root has always read/write permission, but at least one of the
3901 		// X bits must be set for execute permission
3902 		permissions = userPermissions | groupPermissions | otherPermissions
3903 			| S_IROTH | S_IWOTH;
3904 		if (S_ISDIR(mode))
3905 			permissions |= S_IXOTH;
3906 	} else if (uid == nodeUserID) {
3907 		// user is node owner
3908 		permissions = userPermissions;
3909 	} else if (is_user_in_group(nodeGroupID)) {
3910 		// user is in owning group
3911 		permissions = groupPermissions;
3912 	} else {
3913 		// user is one of the others
3914 		permissions = otherPermissions;
3915 	}
3916 
3917 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
3918 }
3919 
3920 
3921 #if 0
3922 extern "C" status_t
3923 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3924 	size_t* _numBytes)
3925 {
3926 	struct file_descriptor* descriptor;
3927 	struct vnode* vnode;
3928 
3929 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3930 	if (descriptor == NULL)
3931 		return B_FILE_ERROR;
3932 
3933 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3934 		count, 0, _numBytes);
3935 
3936 	put_fd(descriptor);
3937 	return status;
3938 }
3939 
3940 
3941 extern "C" status_t
3942 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3943 	size_t* _numBytes)
3944 {
3945 	struct file_descriptor* descriptor;
3946 	struct vnode* vnode;
3947 
3948 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3949 	if (descriptor == NULL)
3950 		return B_FILE_ERROR;
3951 
3952 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3953 		count, 0, _numBytes);
3954 
3955 	put_fd(descriptor);
3956 	return status;
3957 }
3958 #endif
3959 
3960 
3961 extern "C" status_t
3962 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3963 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3964 	size_t* _bytes)
3965 {
3966 	struct file_descriptor* descriptor;
3967 	struct vnode* vnode;
3968 
3969 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3970 	if (descriptor == NULL)
3971 		return B_FILE_ERROR;
3972 
3973 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3974 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3975 		false);
3976 
3977 	put_fd(descriptor);
3978 	return status;
3979 }
3980 
3981 
3982 extern "C" status_t
3983 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3984 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3985 	size_t* _bytes)
3986 {
3987 	struct file_descriptor* descriptor;
3988 	struct vnode* vnode;
3989 
3990 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3991 	if (descriptor == NULL)
3992 		return B_FILE_ERROR;
3993 
3994 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3995 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3996 		true);
3997 
3998 	put_fd(descriptor);
3999 	return status;
4000 }
4001 
4002 
4003 extern "C" status_t
4004 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4005 {
4006 	// lookup mount -- the caller is required to make sure that the mount
4007 	// won't go away
4008 	MutexLocker locker(sMountMutex);
4009 	struct fs_mount* mount = find_mount(mountID);
4010 	if (mount == NULL)
4011 		return B_BAD_VALUE;
4012 	locker.Unlock();
4013 
4014 	return mount->entry_cache.Add(dirID, name, nodeID);
4015 }
4016 
4017 
4018 extern "C" status_t
4019 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4020 {
4021 	// lookup mount -- the caller is required to make sure that the mount
4022 	// won't go away
4023 	MutexLocker locker(sMountMutex);
4024 	struct fs_mount* mount = find_mount(mountID);
4025 	if (mount == NULL)
4026 		return B_BAD_VALUE;
4027 	locker.Unlock();
4028 
4029 	return mount->entry_cache.Remove(dirID, name);
4030 }
4031 
4032 
4033 //	#pragma mark - private VFS API
4034 //	Functions the VFS exports for other parts of the kernel
4035 
4036 
4037 /*! Acquires another reference to the vnode that has to be released
4038 	by calling vfs_put_vnode().
4039 */
4040 void
4041 vfs_acquire_vnode(struct vnode* vnode)
4042 {
4043 	inc_vnode_ref_count(vnode);
4044 }
4045 
4046 
4047 /*! This is currently called from file_cache_create() only.
4048 	It's probably a temporary solution as long as devfs requires that
4049 	fs_read_pages()/fs_write_pages() are called with the standard
4050 	open cookie and not with a device cookie.
4051 	If that's done differently, remove this call; it has no other
4052 	purpose.
4053 */
4054 extern "C" status_t
4055 vfs_get_cookie_from_fd(int fd, void** _cookie)
4056 {
4057 	struct file_descriptor* descriptor;
4058 
4059 	descriptor = get_fd(get_current_io_context(true), fd);
4060 	if (descriptor == NULL)
4061 		return B_FILE_ERROR;
4062 
4063 	*_cookie = descriptor->cookie;
4064 	return B_OK;
4065 }
4066 
4067 
4068 extern "C" status_t
4069 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4070 {
4071 	*vnode = get_vnode_from_fd(fd, kernel);
4072 
4073 	if (*vnode == NULL)
4074 		return B_FILE_ERROR;
4075 
4076 	return B_NO_ERROR;
4077 }
4078 
4079 
4080 extern "C" status_t
4081 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4082 {
4083 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4084 		path, kernel));
4085 
4086 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4087 	if (pathBuffer.InitCheck() != B_OK)
4088 		return B_NO_MEMORY;
4089 
4090 	char* buffer = pathBuffer.LockBuffer();
4091 	strlcpy(buffer, path, pathBuffer.BufferSize());
4092 
4093 	struct vnode* vnode;
4094 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4095 	if (status != B_OK)
4096 		return status;
4097 
4098 	*_vnode = vnode;
4099 	return B_OK;
4100 }
4101 
4102 
4103 extern "C" status_t
4104 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4105 {
4106 	struct vnode* vnode;
4107 
4108 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4109 	if (status != B_OK)
4110 		return status;
4111 
4112 	*_vnode = vnode;
4113 	return B_OK;
4114 }
4115 
4116 
4117 extern "C" status_t
4118 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4119 	const char* name, struct vnode** _vnode)
4120 {
4121 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4122 }
4123 
4124 
4125 extern "C" void
4126 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4127 {
4128 	*_mountID = vnode->device;
4129 	*_vnodeID = vnode->id;
4130 }
4131 
4132 
4133 /*!
4134 	Helper function abstracting the process of "converting" a given
4135 	vnode-pointer to a fs_vnode-pointer.
4136 	Currently only used in bindfs.
4137 */
4138 extern "C" fs_vnode*
4139 vfs_fsnode_for_vnode(struct vnode* vnode)
4140 {
4141 	return vnode;
4142 }
4143 
4144 
4145 /*!
4146 	Calls fs_open() on the given vnode and returns a new
4147 	file descriptor for it
4148 */
4149 int
4150 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4151 {
4152 	return open_vnode(vnode, openMode, kernel);
4153 }
4154 
4155 
4156 /*!	Looks up a vnode with the given mount and vnode ID.
4157 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4158 	to the node.
4159 	It's currently only be used by file_cache_create().
4160 */
4161 extern "C" status_t
4162 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4163 {
4164 	rw_lock_read_lock(&sVnodeLock);
4165 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4166 	rw_lock_read_unlock(&sVnodeLock);
4167 
4168 	if (vnode == NULL)
4169 		return B_ERROR;
4170 
4171 	*_vnode = vnode;
4172 	return B_OK;
4173 }
4174 
4175 
4176 extern "C" status_t
4177 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4178 	bool traverseLeafLink, bool kernel, void** _node)
4179 {
4180 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4181 		volume, path, kernel));
4182 
4183 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4184 	if (pathBuffer.InitCheck() != B_OK)
4185 		return B_NO_MEMORY;
4186 
4187 	fs_mount* mount;
4188 	status_t status = get_mount(volume->id, &mount);
4189 	if (status != B_OK)
4190 		return status;
4191 
4192 	char* buffer = pathBuffer.LockBuffer();
4193 	strlcpy(buffer, path, pathBuffer.BufferSize());
4194 
4195 	struct vnode* vnode = mount->root_vnode;
4196 
4197 	if (buffer[0] == '/')
4198 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4199 	else {
4200 		inc_vnode_ref_count(vnode);
4201 			// vnode_path_to_vnode() releases a reference to the starting vnode
4202 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4203 			kernel, &vnode, NULL);
4204 	}
4205 
4206 	put_mount(mount);
4207 
4208 	if (status != B_OK)
4209 		return status;
4210 
4211 	if (vnode->device != volume->id) {
4212 		// wrong mount ID - must not gain access on foreign file system nodes
4213 		put_vnode(vnode);
4214 		return B_BAD_VALUE;
4215 	}
4216 
4217 	// Use get_vnode() to resolve the cookie for the right layer.
4218 	status = get_vnode(volume, vnode->id, _node);
4219 	put_vnode(vnode);
4220 
4221 	return status;
4222 }
4223 
4224 
4225 status_t
4226 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4227 	struct stat* stat, bool kernel)
4228 {
4229 	status_t status;
4230 
4231 	if (path) {
4232 		// path given: get the stat of the node referred to by (fd, path)
4233 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4234 		if (pathBuffer.InitCheck() != B_OK)
4235 			return B_NO_MEMORY;
4236 
4237 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4238 			traverseLeafLink, stat, kernel);
4239 	} else {
4240 		// no path given: get the FD and use the FD operation
4241 		struct file_descriptor* descriptor
4242 			= get_fd(get_current_io_context(kernel), fd);
4243 		if (descriptor == NULL)
4244 			return B_FILE_ERROR;
4245 
4246 		if (descriptor->ops->fd_read_stat)
4247 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4248 		else
4249 			status = B_UNSUPPORTED;
4250 
4251 		put_fd(descriptor);
4252 	}
4253 
4254 	return status;
4255 }
4256 
4257 
4258 /*!	Finds the full path to the file that contains the module \a moduleName,
4259 	puts it into \a pathBuffer, and returns B_OK for success.
4260 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4261 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4262 	\a pathBuffer is clobbered in any case and must not be relied on if this
4263 	functions returns unsuccessfully.
4264 	\a basePath and \a pathBuffer must not point to the same space.
4265 */
4266 status_t
4267 vfs_get_module_path(const char* basePath, const char* moduleName,
4268 	char* pathBuffer, size_t bufferSize)
4269 {
4270 	struct vnode* dir;
4271 	struct vnode* file;
4272 	status_t status;
4273 	size_t length;
4274 	char* path;
4275 
4276 	if (bufferSize == 0
4277 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4278 		return B_BUFFER_OVERFLOW;
4279 
4280 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4281 	if (status != B_OK)
4282 		return status;
4283 
4284 	// the path buffer had been clobbered by the above call
4285 	length = strlcpy(pathBuffer, basePath, bufferSize);
4286 	if (pathBuffer[length - 1] != '/')
4287 		pathBuffer[length++] = '/';
4288 
4289 	path = pathBuffer + length;
4290 	bufferSize -= length;
4291 
4292 	while (moduleName) {
4293 		char* nextPath = strchr(moduleName, '/');
4294 		if (nextPath == NULL)
4295 			length = strlen(moduleName);
4296 		else {
4297 			length = nextPath - moduleName;
4298 			nextPath++;
4299 		}
4300 
4301 		if (length + 1 >= bufferSize) {
4302 			status = B_BUFFER_OVERFLOW;
4303 			goto err;
4304 		}
4305 
4306 		memcpy(path, moduleName, length);
4307 		path[length] = '\0';
4308 		moduleName = nextPath;
4309 
4310 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4311 		if (status != B_OK) {
4312 			// vnode_path_to_vnode() has already released the reference to dir
4313 			return status;
4314 		}
4315 
4316 		if (S_ISDIR(file->Type())) {
4317 			// goto the next directory
4318 			path[length] = '/';
4319 			path[length + 1] = '\0';
4320 			path += length + 1;
4321 			bufferSize -= length + 1;
4322 
4323 			dir = file;
4324 		} else if (S_ISREG(file->Type())) {
4325 			// it's a file so it should be what we've searched for
4326 			put_vnode(file);
4327 
4328 			return B_OK;
4329 		} else {
4330 			TRACE(("vfs_get_module_path(): something is strange here: "
4331 				"0x%08" B_PRIx32 "...\n", file->Type()));
4332 			status = B_ERROR;
4333 			dir = file;
4334 			goto err;
4335 		}
4336 	}
4337 
4338 	// if we got here, the moduleName just pointed to a directory, not to
4339 	// a real module - what should we do in this case?
4340 	status = B_ENTRY_NOT_FOUND;
4341 
4342 err:
4343 	put_vnode(dir);
4344 	return status;
4345 }
4346 
4347 
4348 /*!	\brief Normalizes a given path.
4349 
4350 	The path must refer to an existing or non-existing entry in an existing
4351 	directory, that is chopping off the leaf component the remaining path must
4352 	refer to an existing directory.
4353 
4354 	The returned will be canonical in that it will be absolute, will not
4355 	contain any "." or ".." components or duplicate occurrences of '/'s,
4356 	and none of the directory components will by symbolic links.
4357 
4358 	Any two paths referring to the same entry, will result in the same
4359 	normalized path (well, that is pretty much the definition of `normalized',
4360 	isn't it :-).
4361 
4362 	\param path The path to be normalized.
4363 	\param buffer The buffer into which the normalized path will be written.
4364 		   May be the same one as \a path.
4365 	\param bufferSize The size of \a buffer.
4366 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4367 	\param kernel \c true, if the IO context of the kernel shall be used,
4368 		   otherwise that of the team this thread belongs to. Only relevant,
4369 		   if the path is relative (to get the CWD).
4370 	\return \c B_OK if everything went fine, another error code otherwise.
4371 */
4372 status_t
4373 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4374 	bool traverseLink, bool kernel)
4375 {
4376 	if (!path || !buffer || bufferSize < 1)
4377 		return B_BAD_VALUE;
4378 
4379 	if (path != buffer) {
4380 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4381 			return B_BUFFER_OVERFLOW;
4382 	}
4383 
4384 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4385 }
4386 
4387 
4388 /*!	\brief Creates a special node in the file system.
4389 
4390 	The caller gets a reference to the newly created node (which is passed
4391 	back through \a _createdVnode) and is responsible for releasing it.
4392 
4393 	\param path The path where to create the entry for the node. Can be \c NULL,
4394 		in which case the node is created without an entry in the root FS -- it
4395 		will automatically be deleted when the last reference has been released.
4396 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4397 		the target file system will just create the node with its standard
4398 		operations. Depending on the type of the node a subnode might be created
4399 		automatically, though.
4400 	\param mode The type and permissions for the node to be created.
4401 	\param flags Flags to be passed to the creating FS.
4402 	\param kernel \c true, if called in the kernel context (relevant only if
4403 		\a path is not \c NULL and not absolute).
4404 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4405 		file system creating the node, with the private data pointer and
4406 		operations for the super node. Can be \c NULL.
4407 	\param _createVnode Pointer to pre-allocated storage where to store the
4408 		pointer to the newly created node.
4409 	\return \c B_OK, if everything went fine, another error code otherwise.
4410 */
4411 status_t
4412 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4413 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4414 	struct vnode** _createdVnode)
4415 {
4416 	struct vnode* dirNode;
4417 	char _leaf[B_FILE_NAME_LENGTH];
4418 	char* leaf = NULL;
4419 
4420 	if (path) {
4421 		// We've got a path. Get the dir vnode and the leaf name.
4422 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4423 		if (tmpPathBuffer.InitCheck() != B_OK)
4424 			return B_NO_MEMORY;
4425 
4426 		char* tmpPath = tmpPathBuffer.LockBuffer();
4427 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4428 			return B_NAME_TOO_LONG;
4429 
4430 		// get the dir vnode and the leaf name
4431 		leaf = _leaf;
4432 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4433 		if (error != B_OK)
4434 			return error;
4435 	} else {
4436 		// No path. Create the node in the root FS.
4437 		dirNode = sRoot;
4438 		inc_vnode_ref_count(dirNode);
4439 	}
4440 
4441 	VNodePutter _(dirNode);
4442 
4443 	// check support for creating special nodes
4444 	if (!HAS_FS_CALL(dirNode, create_special_node))
4445 		return B_UNSUPPORTED;
4446 
4447 	// create the node
4448 	fs_vnode superVnode;
4449 	ino_t nodeID;
4450 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4451 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4452 	if (status != B_OK)
4453 		return status;
4454 
4455 	// lookup the node
4456 	rw_lock_read_lock(&sVnodeLock);
4457 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4458 	rw_lock_read_unlock(&sVnodeLock);
4459 
4460 	if (*_createdVnode == NULL) {
4461 		panic("vfs_create_special_node(): lookup of node failed");
4462 		return B_ERROR;
4463 	}
4464 
4465 	return B_OK;
4466 }
4467 
4468 
4469 extern "C" void
4470 vfs_put_vnode(struct vnode* vnode)
4471 {
4472 	put_vnode(vnode);
4473 }
4474 
4475 
4476 extern "C" status_t
4477 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4478 {
4479 	// Get current working directory from io context
4480 	struct io_context* context = get_current_io_context(false);
4481 	status_t status = B_OK;
4482 
4483 	mutex_lock(&context->io_mutex);
4484 
4485 	if (context->cwd != NULL) {
4486 		*_mountID = context->cwd->device;
4487 		*_vnodeID = context->cwd->id;
4488 	} else
4489 		status = B_ERROR;
4490 
4491 	mutex_unlock(&context->io_mutex);
4492 	return status;
4493 }
4494 
4495 
4496 status_t
4497 vfs_unmount(dev_t mountID, uint32 flags)
4498 {
4499 	return fs_unmount(NULL, mountID, flags, true);
4500 }
4501 
4502 
4503 extern "C" status_t
4504 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4505 {
4506 	struct vnode* vnode;
4507 
4508 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4509 	if (status != B_OK)
4510 		return status;
4511 
4512 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4513 	put_vnode(vnode);
4514 	return B_OK;
4515 }
4516 
4517 
4518 extern "C" void
4519 vfs_free_unused_vnodes(int32 level)
4520 {
4521 	vnode_low_resource_handler(NULL,
4522 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4523 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4524 		level);
4525 }
4526 
4527 
4528 extern "C" bool
4529 vfs_can_page(struct vnode* vnode, void* cookie)
4530 {
4531 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4532 
4533 	if (HAS_FS_CALL(vnode, can_page))
4534 		return FS_CALL(vnode, can_page, cookie);
4535 	return false;
4536 }
4537 
4538 
4539 extern "C" status_t
4540 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4541 	const generic_io_vec* vecs, size_t count, uint32 flags,
4542 	generic_size_t* _numBytes)
4543 {
4544 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4545 		vecs, pos));
4546 
4547 #if VFS_PAGES_IO_TRACING
4548 	generic_size_t bytesRequested = *_numBytes;
4549 #endif
4550 
4551 	IORequest request;
4552 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4553 	if (status == B_OK) {
4554 		status = vfs_vnode_io(vnode, cookie, &request);
4555 		if (status == B_OK)
4556 			status = request.Wait();
4557 		*_numBytes = request.TransferredBytes();
4558 	}
4559 
4560 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4561 		status, *_numBytes));
4562 
4563 	return status;
4564 }
4565 
4566 
4567 extern "C" status_t
4568 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4569 	const generic_io_vec* vecs, size_t count, uint32 flags,
4570 	generic_size_t* _numBytes)
4571 {
4572 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4573 		vecs, pos));
4574 
4575 #if VFS_PAGES_IO_TRACING
4576 	generic_size_t bytesRequested = *_numBytes;
4577 #endif
4578 
4579 	IORequest request;
4580 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4581 	if (status == B_OK) {
4582 		status = vfs_vnode_io(vnode, cookie, &request);
4583 		if (status == B_OK)
4584 			status = request.Wait();
4585 		*_numBytes = request.TransferredBytes();
4586 	}
4587 
4588 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4589 		status, *_numBytes));
4590 
4591 	return status;
4592 }
4593 
4594 
4595 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4596 	created if \a allocate is \c true.
4597 	In case it's successful, it will also grab a reference to the cache
4598 	it returns.
4599 */
4600 extern "C" status_t
4601 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4602 {
4603 	if (vnode->cache != NULL) {
4604 		vnode->cache->AcquireRef();
4605 		*_cache = vnode->cache;
4606 		return B_OK;
4607 	}
4608 
4609 	rw_lock_read_lock(&sVnodeLock);
4610 	vnode->Lock();
4611 
4612 	status_t status = B_OK;
4613 
4614 	// The cache could have been created in the meantime
4615 	if (vnode->cache == NULL) {
4616 		if (allocate) {
4617 			// TODO: actually the vnode needs to be busy already here, or
4618 			//	else this won't work...
4619 			bool wasBusy = vnode->IsBusy();
4620 			vnode->SetBusy(true);
4621 
4622 			vnode->Unlock();
4623 			rw_lock_read_unlock(&sVnodeLock);
4624 
4625 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4626 
4627 			rw_lock_read_lock(&sVnodeLock);
4628 			vnode->Lock();
4629 			vnode->SetBusy(wasBusy);
4630 		} else
4631 			status = B_BAD_VALUE;
4632 	}
4633 
4634 	vnode->Unlock();
4635 	rw_lock_read_unlock(&sVnodeLock);
4636 
4637 	if (status == B_OK) {
4638 		vnode->cache->AcquireRef();
4639 		*_cache = vnode->cache;
4640 	}
4641 
4642 	return status;
4643 }
4644 
4645 
4646 status_t
4647 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4648 	file_io_vec* vecs, size_t* _count)
4649 {
4650 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4651 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4652 
4653 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4654 }
4655 
4656 
4657 status_t
4658 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4659 {
4660 	status_t status = FS_CALL(vnode, read_stat, stat);
4661 
4662 	// fill in the st_dev and st_ino fields
4663 	if (status == B_OK) {
4664 		stat->st_dev = vnode->device;
4665 		stat->st_ino = vnode->id;
4666 		stat->st_rdev = -1;
4667 	}
4668 
4669 	return status;
4670 }
4671 
4672 
4673 status_t
4674 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4675 {
4676 	struct vnode* vnode;
4677 	status_t status = get_vnode(device, inode, &vnode, true, false);
4678 	if (status != B_OK)
4679 		return status;
4680 
4681 	status = FS_CALL(vnode, read_stat, stat);
4682 
4683 	// fill in the st_dev and st_ino fields
4684 	if (status == B_OK) {
4685 		stat->st_dev = vnode->device;
4686 		stat->st_ino = vnode->id;
4687 		stat->st_rdev = -1;
4688 	}
4689 
4690 	put_vnode(vnode);
4691 	return status;
4692 }
4693 
4694 
4695 status_t
4696 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4697 {
4698 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4699 }
4700 
4701 
4702 status_t
4703 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4704 	bool kernel, char* path, size_t pathLength)
4705 {
4706 	struct vnode* vnode;
4707 	status_t status;
4708 
4709 	// filter invalid leaf names
4710 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4711 		return B_BAD_VALUE;
4712 
4713 	// get the vnode matching the dir's node_ref
4714 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4715 		// special cases "." and "..": we can directly get the vnode of the
4716 		// referenced directory
4717 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4718 		leaf = NULL;
4719 	} else
4720 		status = get_vnode(device, inode, &vnode, true, false);
4721 	if (status != B_OK)
4722 		return status;
4723 
4724 	// get the directory path
4725 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4726 	put_vnode(vnode);
4727 		// we don't need the vnode anymore
4728 	if (status != B_OK)
4729 		return status;
4730 
4731 	// append the leaf name
4732 	if (leaf) {
4733 		// insert a directory separator if this is not the file system root
4734 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4735 				>= pathLength)
4736 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4737 			return B_NAME_TOO_LONG;
4738 		}
4739 	}
4740 
4741 	return B_OK;
4742 }
4743 
4744 
4745 /*!	If the given descriptor locked its vnode, that lock will be released. */
4746 void
4747 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4748 {
4749 	struct vnode* vnode = fd_vnode(descriptor);
4750 
4751 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4752 		vnode->mandatory_locked_by = NULL;
4753 }
4754 
4755 
4756 /*!	Closes all file descriptors of the specified I/O context that
4757 	have the O_CLOEXEC flag set.
4758 */
4759 void
4760 vfs_exec_io_context(io_context* context)
4761 {
4762 	uint32 i;
4763 
4764 	for (i = 0; i < context->table_size; i++) {
4765 		mutex_lock(&context->io_mutex);
4766 
4767 		struct file_descriptor* descriptor = context->fds[i];
4768 		bool remove = false;
4769 
4770 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4771 			context->fds[i] = NULL;
4772 			context->num_used_fds--;
4773 
4774 			remove = true;
4775 		}
4776 
4777 		mutex_unlock(&context->io_mutex);
4778 
4779 		if (remove) {
4780 			close_fd(descriptor);
4781 			put_fd(descriptor);
4782 		}
4783 	}
4784 }
4785 
4786 
4787 /*! Sets up a new io_control structure, and inherits the properties
4788 	of the parent io_control if it is given.
4789 */
4790 io_context*
4791 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4792 {
4793 	io_context* context = (io_context*)malloc(sizeof(io_context));
4794 	if (context == NULL)
4795 		return NULL;
4796 
4797 	TIOC(NewIOContext(context, parentContext));
4798 
4799 	memset(context, 0, sizeof(io_context));
4800 	context->ref_count = 1;
4801 
4802 	MutexLocker parentLocker;
4803 
4804 	size_t tableSize;
4805 	if (parentContext) {
4806 		parentLocker.SetTo(parentContext->io_mutex, false);
4807 		tableSize = parentContext->table_size;
4808 	} else
4809 		tableSize = DEFAULT_FD_TABLE_SIZE;
4810 
4811 	// allocate space for FDs and their close-on-exec flag
4812 	context->fds = (file_descriptor**)malloc(
4813 		sizeof(struct file_descriptor*) * tableSize
4814 		+ sizeof(struct select_sync*) * tableSize
4815 		+ (tableSize + 7) / 8);
4816 	if (context->fds == NULL) {
4817 		free(context);
4818 		return NULL;
4819 	}
4820 
4821 	context->select_infos = (select_info**)(context->fds + tableSize);
4822 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4823 
4824 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4825 		+ sizeof(struct select_sync*) * tableSize
4826 		+ (tableSize + 7) / 8);
4827 
4828 	mutex_init(&context->io_mutex, "I/O context");
4829 
4830 	// Copy all parent file descriptors
4831 
4832 	if (parentContext) {
4833 		size_t i;
4834 
4835 		mutex_lock(&sIOContextRootLock);
4836 		context->root = parentContext->root;
4837 		if (context->root)
4838 			inc_vnode_ref_count(context->root);
4839 		mutex_unlock(&sIOContextRootLock);
4840 
4841 		context->cwd = parentContext->cwd;
4842 		if (context->cwd)
4843 			inc_vnode_ref_count(context->cwd);
4844 
4845 		for (i = 0; i < tableSize; i++) {
4846 			struct file_descriptor* descriptor = parentContext->fds[i];
4847 
4848 			if (descriptor != NULL) {
4849 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4850 				if (closeOnExec && purgeCloseOnExec)
4851 					continue;
4852 
4853 				TFD(InheritFD(context, i, descriptor, parentContext));
4854 
4855 				context->fds[i] = descriptor;
4856 				context->num_used_fds++;
4857 				atomic_add(&descriptor->ref_count, 1);
4858 				atomic_add(&descriptor->open_count, 1);
4859 
4860 				if (closeOnExec)
4861 					fd_set_close_on_exec(context, i, true);
4862 			}
4863 		}
4864 
4865 		parentLocker.Unlock();
4866 	} else {
4867 		context->root = sRoot;
4868 		context->cwd = sRoot;
4869 
4870 		if (context->root)
4871 			inc_vnode_ref_count(context->root);
4872 
4873 		if (context->cwd)
4874 			inc_vnode_ref_count(context->cwd);
4875 	}
4876 
4877 	context->table_size = tableSize;
4878 
4879 	list_init(&context->node_monitors);
4880 	context->max_monitors = DEFAULT_NODE_MONITORS;
4881 
4882 	return context;
4883 }
4884 
4885 
4886 static status_t
4887 vfs_free_io_context(io_context* context)
4888 {
4889 	uint32 i;
4890 
4891 	TIOC(FreeIOContext(context));
4892 
4893 	if (context->root)
4894 		put_vnode(context->root);
4895 
4896 	if (context->cwd)
4897 		put_vnode(context->cwd);
4898 
4899 	mutex_lock(&context->io_mutex);
4900 
4901 	for (i = 0; i < context->table_size; i++) {
4902 		if (struct file_descriptor* descriptor = context->fds[i]) {
4903 			close_fd(descriptor);
4904 			put_fd(descriptor);
4905 		}
4906 	}
4907 
4908 	mutex_destroy(&context->io_mutex);
4909 
4910 	remove_node_monitors(context);
4911 	free(context->fds);
4912 	free(context);
4913 
4914 	return B_OK;
4915 }
4916 
4917 
4918 void
4919 vfs_get_io_context(io_context* context)
4920 {
4921 	atomic_add(&context->ref_count, 1);
4922 }
4923 
4924 
4925 void
4926 vfs_put_io_context(io_context* context)
4927 {
4928 	if (atomic_add(&context->ref_count, -1) == 1)
4929 		vfs_free_io_context(context);
4930 }
4931 
4932 
4933 status_t
4934 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
4935 {
4936 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
4937 		return B_BAD_VALUE;
4938 
4939 	TIOC(ResizeIOContext(context, newSize));
4940 
4941 	MutexLocker _(context->io_mutex);
4942 
4943 	uint32 oldSize = context->table_size;
4944 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4945 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4946 
4947 	// If the tables shrink, make sure none of the fds being dropped are in use.
4948 	if (newSize < oldSize) {
4949 		for (uint32 i = oldSize; i-- > newSize;) {
4950 			if (context->fds[i])
4951 				return B_BUSY;
4952 		}
4953 	}
4954 
4955 	// store pointers to the old tables
4956 	file_descriptor** oldFDs = context->fds;
4957 	select_info** oldSelectInfos = context->select_infos;
4958 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4959 
4960 	// allocate new tables
4961 	file_descriptor** newFDs = (file_descriptor**)malloc(
4962 		sizeof(struct file_descriptor*) * newSize
4963 		+ sizeof(struct select_sync*) * newSize
4964 		+ newCloseOnExitBitmapSize);
4965 	if (newFDs == NULL)
4966 		return B_NO_MEMORY;
4967 
4968 	context->fds = newFDs;
4969 	context->select_infos = (select_info**)(context->fds + newSize);
4970 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4971 	context->table_size = newSize;
4972 
4973 	// copy entries from old tables
4974 	uint32 toCopy = min_c(oldSize, newSize);
4975 
4976 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4977 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4978 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4979 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4980 
4981 	// clear additional entries, if the tables grow
4982 	if (newSize > oldSize) {
4983 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4984 		memset(context->select_infos + oldSize, 0,
4985 			sizeof(void*) * (newSize - oldSize));
4986 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4987 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4988 	}
4989 
4990 	free(oldFDs);
4991 
4992 	return B_OK;
4993 }
4994 
4995 
4996 static status_t
4997 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4998 {
4999 	int	status = B_OK;
5000 
5001 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
5002 		return B_BAD_VALUE;
5003 
5004 	mutex_lock(&context->io_mutex);
5005 
5006 	if ((size_t)newSize < context->num_monitors) {
5007 		status = B_BUSY;
5008 		goto out;
5009 	}
5010 	context->max_monitors = newSize;
5011 
5012 out:
5013 	mutex_unlock(&context->io_mutex);
5014 	return status;
5015 }
5016 
5017 
5018 status_t
5019 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5020 	ino_t* _mountPointNodeID)
5021 {
5022 	ReadLocker nodeLocker(sVnodeLock);
5023 	MutexLocker mountLocker(sMountMutex);
5024 
5025 	struct fs_mount* mount = find_mount(mountID);
5026 	if (mount == NULL)
5027 		return B_BAD_VALUE;
5028 
5029 	Vnode* mountPoint = mount->covers_vnode;
5030 
5031 	*_mountPointMountID = mountPoint->device;
5032 	*_mountPointNodeID = mountPoint->id;
5033 
5034 	return B_OK;
5035 }
5036 
5037 
5038 status_t
5039 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5040 	ino_t coveredNodeID)
5041 {
5042 	// get the vnodes
5043 	Vnode* vnode;
5044 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5045 	if (error != B_OK)
5046 		return B_BAD_VALUE;
5047 	VNodePutter vnodePutter(vnode);
5048 
5049 	Vnode* coveredVnode;
5050 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5051 		false);
5052 	if (error != B_OK)
5053 		return B_BAD_VALUE;
5054 	VNodePutter coveredVnodePutter(coveredVnode);
5055 
5056 	// establish the covered/covering links
5057 	WriteLocker locker(sVnodeLock);
5058 
5059 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5060 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5061 		return B_BUSY;
5062 	}
5063 
5064 	vnode->covers = coveredVnode;
5065 	vnode->SetCovering(true);
5066 
5067 	coveredVnode->covered_by = vnode;
5068 	coveredVnode->SetCovered(true);
5069 
5070 	// the vnodes do now reference each other
5071 	inc_vnode_ref_count(vnode);
5072 	inc_vnode_ref_count(coveredVnode);
5073 
5074 	return B_OK;
5075 }
5076 
5077 
5078 int
5079 vfs_getrlimit(int resource, struct rlimit* rlp)
5080 {
5081 	if (!rlp)
5082 		return B_BAD_ADDRESS;
5083 
5084 	switch (resource) {
5085 		case RLIMIT_NOFILE:
5086 		{
5087 			struct io_context* context = get_current_io_context(false);
5088 			MutexLocker _(context->io_mutex);
5089 
5090 			rlp->rlim_cur = context->table_size;
5091 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5092 			return 0;
5093 		}
5094 
5095 		case RLIMIT_NOVMON:
5096 		{
5097 			struct io_context* context = get_current_io_context(false);
5098 			MutexLocker _(context->io_mutex);
5099 
5100 			rlp->rlim_cur = context->max_monitors;
5101 			rlp->rlim_max = MAX_NODE_MONITORS;
5102 			return 0;
5103 		}
5104 
5105 		default:
5106 			return B_BAD_VALUE;
5107 	}
5108 }
5109 
5110 
5111 int
5112 vfs_setrlimit(int resource, const struct rlimit* rlp)
5113 {
5114 	if (!rlp)
5115 		return B_BAD_ADDRESS;
5116 
5117 	switch (resource) {
5118 		case RLIMIT_NOFILE:
5119 			/* TODO: check getuid() */
5120 			if (rlp->rlim_max != RLIM_SAVED_MAX
5121 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5122 				return B_NOT_ALLOWED;
5123 
5124 			return vfs_resize_fd_table(get_current_io_context(false),
5125 				rlp->rlim_cur);
5126 
5127 		case RLIMIT_NOVMON:
5128 			/* TODO: check getuid() */
5129 			if (rlp->rlim_max != RLIM_SAVED_MAX
5130 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5131 				return B_NOT_ALLOWED;
5132 
5133 			return vfs_resize_monitor_table(get_current_io_context(false),
5134 				rlp->rlim_cur);
5135 
5136 		default:
5137 			return B_BAD_VALUE;
5138 	}
5139 }
5140 
5141 
5142 status_t
5143 vfs_init(kernel_args* args)
5144 {
5145 	vnode::StaticInit();
5146 
5147 	sVnodeTable = new(std::nothrow) VnodeTable();
5148 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5149 		panic("vfs_init: error creating vnode hash table\n");
5150 
5151 	struct vnode dummy_vnode;
5152 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5153 
5154 	struct fs_mount dummyMount;
5155 	sMountsTable = new(std::nothrow) MountTable();
5156 	if (sMountsTable == NULL
5157 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5158 		panic("vfs_init: error creating mounts hash table\n");
5159 
5160 	node_monitor_init();
5161 
5162 	sRoot = NULL;
5163 
5164 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5165 
5166 	if (block_cache_init() != B_OK)
5167 		return B_ERROR;
5168 
5169 #ifdef ADD_DEBUGGER_COMMANDS
5170 	// add some debugger commands
5171 	add_debugger_command_etc("vnode", &dump_vnode,
5172 		"Print info about the specified vnode",
5173 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5174 		"Prints information about the vnode specified by address <vnode> or\n"
5175 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5176 		"constructed and printed. It might not be possible to construct a\n"
5177 		"complete path, though.\n",
5178 		0);
5179 	add_debugger_command("vnodes", &dump_vnodes,
5180 		"list all vnodes (from the specified device)");
5181 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5182 		"list all vnode caches");
5183 	add_debugger_command("mount", &dump_mount,
5184 		"info about the specified fs_mount");
5185 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5186 	add_debugger_command("io_context", &dump_io_context,
5187 		"info about the I/O context");
5188 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5189 		"info about vnode usage");
5190 #endif
5191 
5192 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5193 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5194 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5195 		0);
5196 
5197 	fifo_init();
5198 	file_map_init();
5199 
5200 	return file_cache_init();
5201 }
5202 
5203 
5204 //	#pragma mark - fd_ops implementations
5205 
5206 
5207 /*!
5208 	Calls fs_open() on the given vnode and returns a new
5209 	file descriptor for it
5210 */
5211 static int
5212 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5213 {
5214 	void* cookie;
5215 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5216 	if (status != B_OK)
5217 		return status;
5218 
5219 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5220 	if (fd < 0) {
5221 		FS_CALL(vnode, close, cookie);
5222 		FS_CALL(vnode, free_cookie, cookie);
5223 	}
5224 	return fd;
5225 }
5226 
5227 
5228 /*!
5229 	Calls fs_open() on the given vnode and returns a new
5230 	file descriptor for it
5231 */
5232 static int
5233 create_vnode(struct vnode* directory, const char* name, int openMode,
5234 	int perms, bool kernel)
5235 {
5236 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5237 	status_t status = B_ERROR;
5238 	struct vnode* vnode;
5239 	void* cookie;
5240 	ino_t newID;
5241 
5242 	// This is somewhat tricky: If the entry already exists, the FS responsible
5243 	// for the directory might not necessarily also be the one responsible for
5244 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5245 	// we can actually never call the create() hook without O_EXCL. Instead we
5246 	// try to look the entry up first. If it already exists, we just open the
5247 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5248 	// introduces a race condition, since someone else might have created the
5249 	// entry in the meantime. We hope the respective FS returns the correct
5250 	// error code and retry (up to 3 times) again.
5251 
5252 	for (int i = 0; i < 3 && status != B_OK; i++) {
5253 		// look the node up
5254 		status = lookup_dir_entry(directory, name, &vnode);
5255 		if (status == B_OK) {
5256 			VNodePutter putter(vnode);
5257 
5258 			if ((openMode & O_EXCL) != 0)
5259 				return B_FILE_EXISTS;
5260 
5261 			// If the node is a symlink, we have to follow it, unless
5262 			// O_NOTRAVERSE is set.
5263 			if (S_ISLNK(vnode->Type()) && traverse) {
5264 				putter.Put();
5265 				char clonedName[B_FILE_NAME_LENGTH + 1];
5266 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5267 						>= B_FILE_NAME_LENGTH) {
5268 					return B_NAME_TOO_LONG;
5269 				}
5270 
5271 				inc_vnode_ref_count(directory);
5272 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5273 					kernel, &vnode, NULL);
5274 				if (status != B_OK)
5275 					return status;
5276 
5277 				putter.SetTo(vnode);
5278 			}
5279 
5280 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5281 				return B_LINK_LIMIT;
5282 
5283 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5284 			// on success keep the vnode reference for the FD
5285 			if (fd >= 0)
5286 				putter.Detach();
5287 
5288 			return fd;
5289 		}
5290 
5291 		// it doesn't exist yet -- try to create it
5292 
5293 		if (!HAS_FS_CALL(directory, create))
5294 			return B_READ_ONLY_DEVICE;
5295 
5296 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5297 			&cookie, &newID);
5298 		if (status != B_OK
5299 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5300 			return status;
5301 		}
5302 	}
5303 
5304 	if (status != B_OK)
5305 		return status;
5306 
5307 	// the node has been created successfully
5308 
5309 	rw_lock_read_lock(&sVnodeLock);
5310 	vnode = lookup_vnode(directory->device, newID);
5311 	rw_lock_read_unlock(&sVnodeLock);
5312 
5313 	if (vnode == NULL) {
5314 		panic("vfs: fs_create() returned success but there is no vnode, "
5315 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5316 		return B_BAD_VALUE;
5317 	}
5318 
5319 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5320 	if (fd >= 0)
5321 		return fd;
5322 
5323 	status = fd;
5324 
5325 	// something went wrong, clean up
5326 
5327 	FS_CALL(vnode, close, cookie);
5328 	FS_CALL(vnode, free_cookie, cookie);
5329 	put_vnode(vnode);
5330 
5331 	FS_CALL(directory, unlink, name);
5332 
5333 	return status;
5334 }
5335 
5336 
5337 /*! Calls fs open_dir() on the given vnode and returns a new
5338 	file descriptor for it
5339 */
5340 static int
5341 open_dir_vnode(struct vnode* vnode, bool kernel)
5342 {
5343 	void* cookie;
5344 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5345 	if (status != B_OK)
5346 		return status;
5347 
5348 	// directory is opened, create a fd
5349 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5350 	if (status >= 0)
5351 		return status;
5352 
5353 	FS_CALL(vnode, close_dir, cookie);
5354 	FS_CALL(vnode, free_dir_cookie, cookie);
5355 
5356 	return status;
5357 }
5358 
5359 
5360 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5361 	file descriptor for it.
5362 	Used by attr_dir_open(), and attr_dir_open_fd().
5363 */
5364 static int
5365 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5366 {
5367 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5368 		return B_UNSUPPORTED;
5369 
5370 	void* cookie;
5371 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5372 	if (status != B_OK)
5373 		return status;
5374 
5375 	// directory is opened, create a fd
5376 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5377 		kernel);
5378 	if (status >= 0)
5379 		return status;
5380 
5381 	FS_CALL(vnode, close_attr_dir, cookie);
5382 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5383 
5384 	return status;
5385 }
5386 
5387 
5388 static int
5389 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5390 	int openMode, int perms, bool kernel)
5391 {
5392 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5393 		"kernel %d\n", name, openMode, perms, kernel));
5394 
5395 	// get directory to put the new file in
5396 	struct vnode* directory;
5397 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5398 	if (status != B_OK)
5399 		return status;
5400 
5401 	status = create_vnode(directory, name, openMode, perms, kernel);
5402 	put_vnode(directory);
5403 
5404 	return status;
5405 }
5406 
5407 
5408 static int
5409 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5410 {
5411 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5412 		openMode, perms, kernel));
5413 
5414 	// get directory to put the new file in
5415 	char name[B_FILE_NAME_LENGTH];
5416 	struct vnode* directory;
5417 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5418 		kernel);
5419 	if (status < 0)
5420 		return status;
5421 
5422 	status = create_vnode(directory, name, openMode, perms, kernel);
5423 
5424 	put_vnode(directory);
5425 	return status;
5426 }
5427 
5428 
5429 static int
5430 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5431 	int openMode, bool kernel)
5432 {
5433 	if (name == NULL || *name == '\0')
5434 		return B_BAD_VALUE;
5435 
5436 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5437 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5438 
5439 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5440 
5441 	// get the vnode matching the entry_ref
5442 	struct vnode* vnode;
5443 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5444 		kernel, &vnode);
5445 	if (status != B_OK)
5446 		return status;
5447 
5448 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5449 		put_vnode(vnode);
5450 		return B_LINK_LIMIT;
5451 	}
5452 
5453 	int newFD = open_vnode(vnode, openMode, kernel);
5454 	if (newFD >= 0) {
5455 		// The vnode reference has been transferred to the FD
5456 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5457 			directoryID, vnode->id, name);
5458 	} else
5459 		put_vnode(vnode);
5460 
5461 	return newFD;
5462 }
5463 
5464 
5465 static int
5466 file_open(int fd, char* path, int openMode, bool kernel)
5467 {
5468 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5469 
5470 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5471 		fd, path, openMode, kernel));
5472 
5473 	// get the vnode matching the vnode + path combination
5474 	struct vnode* vnode;
5475 	ino_t parentID;
5476 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5477 		&parentID, kernel);
5478 	if (status != B_OK)
5479 		return status;
5480 
5481 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5482 		put_vnode(vnode);
5483 		return B_LINK_LIMIT;
5484 	}
5485 
5486 	// open the vnode
5487 	int newFD = open_vnode(vnode, openMode, kernel);
5488 	if (newFD >= 0) {
5489 		// The vnode reference has been transferred to the FD
5490 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5491 			vnode->device, parentID, vnode->id, NULL);
5492 	} else
5493 		put_vnode(vnode);
5494 
5495 	return newFD;
5496 }
5497 
5498 
5499 static status_t
5500 file_close(struct file_descriptor* descriptor)
5501 {
5502 	struct vnode* vnode = descriptor->u.vnode;
5503 	status_t status = B_OK;
5504 
5505 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5506 
5507 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5508 		vnode->id);
5509 	if (HAS_FS_CALL(vnode, close)) {
5510 		status = FS_CALL(vnode, close, descriptor->cookie);
5511 	}
5512 
5513 	if (status == B_OK) {
5514 		// remove all outstanding locks for this team
5515 		if (HAS_FS_CALL(vnode, release_lock))
5516 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5517 		else
5518 			status = release_advisory_lock(vnode, NULL);
5519 	}
5520 	return status;
5521 }
5522 
5523 
5524 static void
5525 file_free_fd(struct file_descriptor* descriptor)
5526 {
5527 	struct vnode* vnode = descriptor->u.vnode;
5528 
5529 	if (vnode != NULL) {
5530 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5531 		put_vnode(vnode);
5532 	}
5533 }
5534 
5535 
5536 static status_t
5537 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5538 	size_t* length)
5539 {
5540 	struct vnode* vnode = descriptor->u.vnode;
5541 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5542 		pos, length, *length));
5543 
5544 	if (S_ISDIR(vnode->Type()))
5545 		return B_IS_A_DIRECTORY;
5546 
5547 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5548 }
5549 
5550 
5551 static status_t
5552 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5553 	size_t* length)
5554 {
5555 	struct vnode* vnode = descriptor->u.vnode;
5556 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5557 		length));
5558 
5559 	if (S_ISDIR(vnode->Type()))
5560 		return B_IS_A_DIRECTORY;
5561 	if (!HAS_FS_CALL(vnode, write))
5562 		return B_READ_ONLY_DEVICE;
5563 
5564 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5565 }
5566 
5567 
5568 static off_t
5569 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5570 {
5571 	struct vnode* vnode = descriptor->u.vnode;
5572 	off_t offset;
5573 	bool isDevice = false;
5574 
5575 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5576 		seekType));
5577 
5578 	// some kinds of files are not seekable
5579 	switch (vnode->Type() & S_IFMT) {
5580 		case S_IFIFO:
5581 		case S_IFSOCK:
5582 			return ESPIPE;
5583 
5584 		// drivers publish block devices as chr, so pick both
5585 		case S_IFBLK:
5586 		case S_IFCHR:
5587 			isDevice = true;
5588 			break;
5589 		// The Open Group Base Specs don't mention any file types besides pipes,
5590 		// fifos, and sockets specially, so we allow seeking them.
5591 		case S_IFREG:
5592 		case S_IFDIR:
5593 		case S_IFLNK:
5594 			break;
5595 	}
5596 
5597 	switch (seekType) {
5598 		case SEEK_SET:
5599 			offset = 0;
5600 			break;
5601 		case SEEK_CUR:
5602 			offset = descriptor->pos;
5603 			break;
5604 		case SEEK_END:
5605 		{
5606 			// stat() the node
5607 			if (!HAS_FS_CALL(vnode, read_stat))
5608 				return B_UNSUPPORTED;
5609 
5610 			struct stat stat;
5611 			status_t status = FS_CALL(vnode, read_stat, &stat);
5612 			if (status != B_OK)
5613 				return status;
5614 
5615 			offset = stat.st_size;
5616 
5617 			if (offset == 0 && isDevice) {
5618 				// stat() on regular drivers doesn't report size
5619 				device_geometry geometry;
5620 
5621 				if (HAS_FS_CALL(vnode, ioctl)) {
5622 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5623 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5624 					if (status == B_OK)
5625 						offset = (off_t)geometry.bytes_per_sector
5626 							* geometry.sectors_per_track
5627 							* geometry.cylinder_count
5628 							* geometry.head_count;
5629 				}
5630 			}
5631 
5632 			break;
5633 		}
5634 		default:
5635 			return B_BAD_VALUE;
5636 	}
5637 
5638 	// assumes off_t is 64 bits wide
5639 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5640 		return B_BUFFER_OVERFLOW;
5641 
5642 	pos += offset;
5643 	if (pos < 0)
5644 		return B_BAD_VALUE;
5645 
5646 	return descriptor->pos = pos;
5647 }
5648 
5649 
5650 static status_t
5651 file_select(struct file_descriptor* descriptor, uint8 event,
5652 	struct selectsync* sync)
5653 {
5654 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5655 
5656 	struct vnode* vnode = descriptor->u.vnode;
5657 
5658 	// If the FS has no select() hook, notify select() now.
5659 	if (!HAS_FS_CALL(vnode, select))
5660 		return notify_select_event(sync, event);
5661 
5662 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5663 }
5664 
5665 
5666 static status_t
5667 file_deselect(struct file_descriptor* descriptor, uint8 event,
5668 	struct selectsync* sync)
5669 {
5670 	struct vnode* vnode = descriptor->u.vnode;
5671 
5672 	if (!HAS_FS_CALL(vnode, deselect))
5673 		return B_OK;
5674 
5675 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5676 }
5677 
5678 
5679 static status_t
5680 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5681 	bool kernel)
5682 {
5683 	struct vnode* vnode;
5684 	status_t status;
5685 
5686 	if (name == NULL || *name == '\0')
5687 		return B_BAD_VALUE;
5688 
5689 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5690 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5691 
5692 	status = get_vnode(mountID, parentID, &vnode, true, false);
5693 	if (status != B_OK)
5694 		return status;
5695 
5696 	if (HAS_FS_CALL(vnode, create_dir))
5697 		status = FS_CALL(vnode, create_dir, name, perms);
5698 	else
5699 		status = B_READ_ONLY_DEVICE;
5700 
5701 	put_vnode(vnode);
5702 	return status;
5703 }
5704 
5705 
5706 static status_t
5707 dir_create(int fd, char* path, int perms, bool kernel)
5708 {
5709 	char filename[B_FILE_NAME_LENGTH];
5710 	struct vnode* vnode;
5711 	status_t status;
5712 
5713 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5714 		kernel));
5715 
5716 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5717 	if (status < 0)
5718 		return status;
5719 
5720 	if (HAS_FS_CALL(vnode, create_dir)) {
5721 		status = FS_CALL(vnode, create_dir, filename, perms);
5722 	} else
5723 		status = B_READ_ONLY_DEVICE;
5724 
5725 	put_vnode(vnode);
5726 	return status;
5727 }
5728 
5729 
5730 static int
5731 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5732 {
5733 	FUNCTION(("dir_open_entry_ref()\n"));
5734 
5735 	if (name && name[0] == '\0')
5736 		return B_BAD_VALUE;
5737 
5738 	// get the vnode matching the entry_ref/node_ref
5739 	struct vnode* vnode;
5740 	status_t status;
5741 	if (name) {
5742 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5743 			&vnode);
5744 	} else
5745 		status = get_vnode(mountID, parentID, &vnode, true, false);
5746 	if (status != B_OK)
5747 		return status;
5748 
5749 	int newFD = open_dir_vnode(vnode, kernel);
5750 	if (newFD >= 0) {
5751 		// The vnode reference has been transferred to the FD
5752 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5753 			vnode->id, name);
5754 	} else
5755 		put_vnode(vnode);
5756 
5757 	return newFD;
5758 }
5759 
5760 
5761 static int
5762 dir_open(int fd, char* path, bool kernel)
5763 {
5764 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5765 		kernel));
5766 
5767 	// get the vnode matching the vnode + path combination
5768 	struct vnode* vnode = NULL;
5769 	ino_t parentID;
5770 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5771 		kernel);
5772 	if (status != B_OK)
5773 		return status;
5774 
5775 	// open the dir
5776 	int newFD = open_dir_vnode(vnode, kernel);
5777 	if (newFD >= 0) {
5778 		// The vnode reference has been transferred to the FD
5779 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5780 			parentID, vnode->id, NULL);
5781 	} else
5782 		put_vnode(vnode);
5783 
5784 	return newFD;
5785 }
5786 
5787 
5788 static status_t
5789 dir_close(struct file_descriptor* descriptor)
5790 {
5791 	struct vnode* vnode = descriptor->u.vnode;
5792 
5793 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5794 
5795 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5796 		vnode->id);
5797 	if (HAS_FS_CALL(vnode, close_dir))
5798 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5799 
5800 	return B_OK;
5801 }
5802 
5803 
5804 static void
5805 dir_free_fd(struct file_descriptor* descriptor)
5806 {
5807 	struct vnode* vnode = descriptor->u.vnode;
5808 
5809 	if (vnode != NULL) {
5810 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5811 		put_vnode(vnode);
5812 	}
5813 }
5814 
5815 
5816 static status_t
5817 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5818 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5819 {
5820 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5821 		bufferSize, _count);
5822 }
5823 
5824 
5825 static status_t
5826 fix_dirent(struct vnode* parent, struct dirent* entry,
5827 	struct io_context* ioContext)
5828 {
5829 	// set d_pdev and d_pino
5830 	entry->d_pdev = parent->device;
5831 	entry->d_pino = parent->id;
5832 
5833 	// If this is the ".." entry and the directory covering another vnode,
5834 	// we need to replace d_dev and d_ino with the actual values.
5835 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
5836 		// Make sure the IO context root is not bypassed.
5837 		if (parent == ioContext->root) {
5838 			entry->d_dev = parent->device;
5839 			entry->d_ino = parent->id;
5840 		} else {
5841 			inc_vnode_ref_count(parent);
5842 				// vnode_path_to_vnode() puts the node
5843 
5844 			// ".." is guaranteed not to be clobbered by this call
5845 			struct vnode* vnode;
5846 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5847 				ioContext, &vnode, NULL);
5848 
5849 			if (status == B_OK) {
5850 				entry->d_dev = vnode->device;
5851 				entry->d_ino = vnode->id;
5852 				put_vnode(vnode);
5853 			}
5854 		}
5855 	} else {
5856 		// resolve covered vnodes
5857 		ReadLocker _(&sVnodeLock);
5858 
5859 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5860 		if (vnode != NULL && vnode->covered_by != NULL) {
5861 			do {
5862 				vnode = vnode->covered_by;
5863 			} while (vnode->covered_by != NULL);
5864 
5865 			entry->d_dev = vnode->device;
5866 			entry->d_ino = vnode->id;
5867 		}
5868 	}
5869 
5870 	return B_OK;
5871 }
5872 
5873 
5874 static status_t
5875 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5876 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5877 {
5878 	if (!HAS_FS_CALL(vnode, read_dir))
5879 		return B_UNSUPPORTED;
5880 
5881 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5882 		_count);
5883 	if (error != B_OK)
5884 		return error;
5885 
5886 	// we need to adjust the read dirents
5887 	uint32 count = *_count;
5888 	for (uint32 i = 0; i < count; i++) {
5889 		error = fix_dirent(vnode, buffer, ioContext);
5890 		if (error != B_OK)
5891 			return error;
5892 
5893 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5894 	}
5895 
5896 	return error;
5897 }
5898 
5899 
5900 static status_t
5901 dir_rewind(struct file_descriptor* descriptor)
5902 {
5903 	struct vnode* vnode = descriptor->u.vnode;
5904 
5905 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5906 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5907 	}
5908 
5909 	return B_UNSUPPORTED;
5910 }
5911 
5912 
5913 static status_t
5914 dir_remove(int fd, char* path, bool kernel)
5915 {
5916 	char name[B_FILE_NAME_LENGTH];
5917 	struct vnode* directory;
5918 	status_t status;
5919 
5920 	if (path != NULL) {
5921 		// we need to make sure our path name doesn't stop with "/", ".",
5922 		// or ".."
5923 		char* lastSlash;
5924 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5925 			char* leaf = lastSlash + 1;
5926 			if (!strcmp(leaf, ".."))
5927 				return B_NOT_ALLOWED;
5928 
5929 			// omit multiple slashes
5930 			while (lastSlash > path && lastSlash[-1] == '/')
5931 				lastSlash--;
5932 
5933 			if (leaf[0]
5934 				&& strcmp(leaf, ".")) {
5935 				break;
5936 			}
5937 			// "name/" -> "name", or "name/." -> "name"
5938 			lastSlash[0] = '\0';
5939 		}
5940 
5941 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5942 			return B_NOT_ALLOWED;
5943 	}
5944 
5945 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5946 	if (status != B_OK)
5947 		return status;
5948 
5949 	if (HAS_FS_CALL(directory, remove_dir))
5950 		status = FS_CALL(directory, remove_dir, name);
5951 	else
5952 		status = B_READ_ONLY_DEVICE;
5953 
5954 	put_vnode(directory);
5955 	return status;
5956 }
5957 
5958 
5959 static status_t
5960 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5961 	size_t length)
5962 {
5963 	struct vnode* vnode = descriptor->u.vnode;
5964 
5965 	if (HAS_FS_CALL(vnode, ioctl))
5966 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5967 
5968 	return B_DEV_INVALID_IOCTL;
5969 }
5970 
5971 
5972 static status_t
5973 common_fcntl(int fd, int op, size_t argument, bool kernel)
5974 {
5975 	struct flock flock;
5976 
5977 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5978 		fd, op, argument, kernel ? "kernel" : "user"));
5979 
5980 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5981 		fd);
5982 	if (descriptor == NULL)
5983 		return B_FILE_ERROR;
5984 
5985 	struct vnode* vnode = fd_vnode(descriptor);
5986 
5987 	status_t status = B_OK;
5988 
5989 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5990 		if (descriptor->type != FDTYPE_FILE)
5991 			status = B_BAD_VALUE;
5992 		else if (user_memcpy(&flock, (struct flock*)argument,
5993 				sizeof(struct flock)) != B_OK)
5994 			status = B_BAD_ADDRESS;
5995 
5996 		if (status != B_OK) {
5997 			put_fd(descriptor);
5998 			return status;
5999 		}
6000 	}
6001 
6002 	switch (op) {
6003 		case F_SETFD:
6004 		{
6005 			struct io_context* context = get_current_io_context(kernel);
6006 			// Set file descriptor flags
6007 
6008 			// O_CLOEXEC is the only flag available at this time
6009 			mutex_lock(&context->io_mutex);
6010 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6011 			mutex_unlock(&context->io_mutex);
6012 
6013 			status = B_OK;
6014 			break;
6015 		}
6016 
6017 		case F_GETFD:
6018 		{
6019 			struct io_context* context = get_current_io_context(kernel);
6020 
6021 			// Get file descriptor flags
6022 			mutex_lock(&context->io_mutex);
6023 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6024 			mutex_unlock(&context->io_mutex);
6025 			break;
6026 		}
6027 
6028 		case F_SETFL:
6029 			// Set file descriptor open mode
6030 
6031 			// we only accept changes to O_APPEND and O_NONBLOCK
6032 			argument &= O_APPEND | O_NONBLOCK;
6033 			if (descriptor->ops->fd_set_flags != NULL) {
6034 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6035 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6036 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6037 					(int)argument);
6038 			} else
6039 				status = B_UNSUPPORTED;
6040 
6041 			if (status == B_OK) {
6042 				// update this descriptor's open_mode field
6043 				descriptor->open_mode = (descriptor->open_mode
6044 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6045 			}
6046 
6047 			break;
6048 
6049 		case F_GETFL:
6050 			// Get file descriptor open mode
6051 			status = descriptor->open_mode;
6052 			break;
6053 
6054 		case F_DUPFD:
6055 		{
6056 			struct io_context* context = get_current_io_context(kernel);
6057 
6058 			status = new_fd_etc(context, descriptor, (int)argument);
6059 			if (status >= 0) {
6060 				mutex_lock(&context->io_mutex);
6061 				fd_set_close_on_exec(context, fd, false);
6062 				mutex_unlock(&context->io_mutex);
6063 
6064 				atomic_add(&descriptor->ref_count, 1);
6065 			}
6066 			break;
6067 		}
6068 
6069 		case F_GETLK:
6070 			if (vnode != NULL) {
6071 				struct flock normalizedLock;
6072 
6073 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6074 				status = normalize_flock(descriptor, &normalizedLock);
6075 				if (status != B_OK)
6076 					break;
6077 
6078 				if (HAS_FS_CALL(vnode, test_lock)) {
6079 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6080 						&normalizedLock);
6081 				} else
6082 					status = test_advisory_lock(vnode, &normalizedLock);
6083 				if (status == B_OK) {
6084 					if (normalizedLock.l_type == F_UNLCK) {
6085 						// no conflicting lock found, copy back the same struct
6086 						// we were given except change type to F_UNLCK
6087 						flock.l_type = F_UNLCK;
6088 						status = user_memcpy((struct flock*)argument, &flock,
6089 							sizeof(struct flock));
6090 					} else {
6091 						// a conflicting lock was found, copy back its range and
6092 						// type
6093 						if (normalizedLock.l_len == OFF_MAX)
6094 							normalizedLock.l_len = 0;
6095 
6096 						status = user_memcpy((struct flock*)argument,
6097 							&normalizedLock, sizeof(struct flock));
6098 					}
6099 				}
6100 			} else
6101 				status = B_BAD_VALUE;
6102 			break;
6103 
6104 		case F_SETLK:
6105 		case F_SETLKW:
6106 			status = normalize_flock(descriptor, &flock);
6107 			if (status != B_OK)
6108 				break;
6109 
6110 			if (vnode == NULL) {
6111 				status = B_BAD_VALUE;
6112 			} else if (flock.l_type == F_UNLCK) {
6113 				if (HAS_FS_CALL(vnode, release_lock)) {
6114 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6115 						&flock);
6116 				} else
6117 					status = release_advisory_lock(vnode, &flock);
6118 			} else {
6119 				// the open mode must match the lock type
6120 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6121 						&& flock.l_type == F_WRLCK)
6122 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6123 						&& flock.l_type == F_RDLCK))
6124 					status = B_FILE_ERROR;
6125 				else {
6126 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6127 						status = FS_CALL(vnode, acquire_lock,
6128 							descriptor->cookie, &flock, op == F_SETLKW);
6129 					} else {
6130 						status = acquire_advisory_lock(vnode, -1,
6131 							&flock, op == F_SETLKW);
6132 					}
6133 				}
6134 			}
6135 			break;
6136 
6137 		// ToDo: add support for more ops?
6138 
6139 		default:
6140 			status = B_BAD_VALUE;
6141 	}
6142 
6143 	put_fd(descriptor);
6144 	return status;
6145 }
6146 
6147 
6148 static status_t
6149 common_sync(int fd, bool kernel)
6150 {
6151 	struct file_descriptor* descriptor;
6152 	struct vnode* vnode;
6153 	status_t status;
6154 
6155 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6156 
6157 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6158 	if (descriptor == NULL)
6159 		return B_FILE_ERROR;
6160 
6161 	if (HAS_FS_CALL(vnode, fsync))
6162 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6163 	else
6164 		status = B_UNSUPPORTED;
6165 
6166 	put_fd(descriptor);
6167 	return status;
6168 }
6169 
6170 
6171 static status_t
6172 common_lock_node(int fd, bool kernel)
6173 {
6174 	struct file_descriptor* descriptor;
6175 	struct vnode* vnode;
6176 
6177 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6178 	if (descriptor == NULL)
6179 		return B_FILE_ERROR;
6180 
6181 	status_t status = B_OK;
6182 
6183 	// We need to set the locking atomically - someone
6184 	// else might set one at the same time
6185 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6186 			(file_descriptor*)NULL) != NULL)
6187 		status = B_BUSY;
6188 
6189 	put_fd(descriptor);
6190 	return status;
6191 }
6192 
6193 
6194 static status_t
6195 common_unlock_node(int fd, bool kernel)
6196 {
6197 	struct file_descriptor* descriptor;
6198 	struct vnode* vnode;
6199 
6200 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6201 	if (descriptor == NULL)
6202 		return B_FILE_ERROR;
6203 
6204 	status_t status = B_OK;
6205 
6206 	// We need to set the locking atomically - someone
6207 	// else might set one at the same time
6208 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6209 			(file_descriptor*)NULL, descriptor) != descriptor)
6210 		status = B_BAD_VALUE;
6211 
6212 	put_fd(descriptor);
6213 	return status;
6214 }
6215 
6216 
6217 static status_t
6218 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6219 	bool kernel)
6220 {
6221 	struct vnode* vnode;
6222 	status_t status;
6223 
6224 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6225 	if (status != B_OK)
6226 		return status;
6227 
6228 	if (HAS_FS_CALL(vnode, read_symlink)) {
6229 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6230 	} else
6231 		status = B_BAD_VALUE;
6232 
6233 	put_vnode(vnode);
6234 	return status;
6235 }
6236 
6237 
6238 static status_t
6239 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6240 	bool kernel)
6241 {
6242 	// path validity checks have to be in the calling function!
6243 	char name[B_FILE_NAME_LENGTH];
6244 	struct vnode* vnode;
6245 	status_t status;
6246 
6247 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6248 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6249 
6250 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6251 	if (status != B_OK)
6252 		return status;
6253 
6254 	if (HAS_FS_CALL(vnode, create_symlink))
6255 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6256 	else {
6257 		status = HAS_FS_CALL(vnode, write)
6258 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6259 	}
6260 
6261 	put_vnode(vnode);
6262 
6263 	return status;
6264 }
6265 
6266 
6267 static status_t
6268 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6269 	bool traverseLeafLink, bool kernel)
6270 {
6271 	// path validity checks have to be in the calling function!
6272 
6273 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6274 		toPath, kernel));
6275 
6276 	char name[B_FILE_NAME_LENGTH];
6277 	struct vnode* directory;
6278 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6279 		kernel);
6280 	if (status != B_OK)
6281 		return status;
6282 
6283 	struct vnode* vnode;
6284 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6285 		kernel);
6286 	if (status != B_OK)
6287 		goto err;
6288 
6289 	if (directory->mount != vnode->mount) {
6290 		status = B_CROSS_DEVICE_LINK;
6291 		goto err1;
6292 	}
6293 
6294 	if (HAS_FS_CALL(directory, link))
6295 		status = FS_CALL(directory, link, name, vnode);
6296 	else
6297 		status = B_READ_ONLY_DEVICE;
6298 
6299 err1:
6300 	put_vnode(vnode);
6301 err:
6302 	put_vnode(directory);
6303 
6304 	return status;
6305 }
6306 
6307 
6308 static status_t
6309 common_unlink(int fd, char* path, bool kernel)
6310 {
6311 	char filename[B_FILE_NAME_LENGTH];
6312 	struct vnode* vnode;
6313 	status_t status;
6314 
6315 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6316 		kernel));
6317 
6318 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6319 	if (status < 0)
6320 		return status;
6321 
6322 	if (HAS_FS_CALL(vnode, unlink))
6323 		status = FS_CALL(vnode, unlink, filename);
6324 	else
6325 		status = B_READ_ONLY_DEVICE;
6326 
6327 	put_vnode(vnode);
6328 
6329 	return status;
6330 }
6331 
6332 
6333 static status_t
6334 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6335 {
6336 	struct vnode* vnode;
6337 	status_t status;
6338 
6339 	// TODO: honor effectiveUserGroup argument
6340 
6341 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6342 	if (status != B_OK)
6343 		return status;
6344 
6345 	if (HAS_FS_CALL(vnode, access))
6346 		status = FS_CALL(vnode, access, mode);
6347 	else
6348 		status = B_OK;
6349 
6350 	put_vnode(vnode);
6351 
6352 	return status;
6353 }
6354 
6355 
6356 static status_t
6357 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6358 {
6359 	struct vnode* fromVnode;
6360 	struct vnode* toVnode;
6361 	char fromName[B_FILE_NAME_LENGTH];
6362 	char toName[B_FILE_NAME_LENGTH];
6363 	status_t status;
6364 
6365 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6366 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6367 
6368 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6369 	if (status != B_OK)
6370 		return status;
6371 
6372 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6373 	if (status != B_OK)
6374 		goto err1;
6375 
6376 	if (fromVnode->device != toVnode->device) {
6377 		status = B_CROSS_DEVICE_LINK;
6378 		goto err2;
6379 	}
6380 
6381 	if (fromName[0] == '\0' || toName[0] == '\0'
6382 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6383 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6384 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6385 		status = B_BAD_VALUE;
6386 		goto err2;
6387 	}
6388 
6389 	if (HAS_FS_CALL(fromVnode, rename))
6390 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6391 	else
6392 		status = B_READ_ONLY_DEVICE;
6393 
6394 err2:
6395 	put_vnode(toVnode);
6396 err1:
6397 	put_vnode(fromVnode);
6398 
6399 	return status;
6400 }
6401 
6402 
6403 static status_t
6404 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6405 {
6406 	struct vnode* vnode = descriptor->u.vnode;
6407 
6408 	FUNCTION(("common_read_stat: stat %p\n", stat));
6409 
6410 	// TODO: remove this once all file systems properly set them!
6411 	stat->st_crtim.tv_nsec = 0;
6412 	stat->st_ctim.tv_nsec = 0;
6413 	stat->st_mtim.tv_nsec = 0;
6414 	stat->st_atim.tv_nsec = 0;
6415 
6416 	status_t status = FS_CALL(vnode, read_stat, stat);
6417 
6418 	// fill in the st_dev and st_ino fields
6419 	if (status == B_OK) {
6420 		stat->st_dev = vnode->device;
6421 		stat->st_ino = vnode->id;
6422 		stat->st_rdev = -1;
6423 	}
6424 
6425 	return status;
6426 }
6427 
6428 
6429 static status_t
6430 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6431 	int statMask)
6432 {
6433 	struct vnode* vnode = descriptor->u.vnode;
6434 
6435 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6436 		vnode, stat, statMask));
6437 
6438 	if (!HAS_FS_CALL(vnode, write_stat))
6439 		return B_READ_ONLY_DEVICE;
6440 
6441 	return FS_CALL(vnode, write_stat, stat, statMask);
6442 }
6443 
6444 
6445 static status_t
6446 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6447 	struct stat* stat, bool kernel)
6448 {
6449 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6450 		stat));
6451 
6452 	struct vnode* vnode;
6453 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6454 		NULL, kernel);
6455 	if (status != B_OK)
6456 		return status;
6457 
6458 	status = FS_CALL(vnode, read_stat, stat);
6459 
6460 	// fill in the st_dev and st_ino fields
6461 	if (status == B_OK) {
6462 		stat->st_dev = vnode->device;
6463 		stat->st_ino = vnode->id;
6464 		stat->st_rdev = -1;
6465 	}
6466 
6467 	put_vnode(vnode);
6468 	return status;
6469 }
6470 
6471 
6472 static status_t
6473 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6474 	const struct stat* stat, int statMask, bool kernel)
6475 {
6476 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6477 		"kernel %d\n", fd, path, stat, statMask, kernel));
6478 
6479 	struct vnode* vnode;
6480 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6481 		NULL, kernel);
6482 	if (status != B_OK)
6483 		return status;
6484 
6485 	if (HAS_FS_CALL(vnode, write_stat))
6486 		status = FS_CALL(vnode, write_stat, stat, statMask);
6487 	else
6488 		status = B_READ_ONLY_DEVICE;
6489 
6490 	put_vnode(vnode);
6491 
6492 	return status;
6493 }
6494 
6495 
6496 static int
6497 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6498 {
6499 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6500 		kernel));
6501 
6502 	struct vnode* vnode;
6503 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6504 		NULL, kernel);
6505 	if (status != B_OK)
6506 		return status;
6507 
6508 	status = open_attr_dir_vnode(vnode, kernel);
6509 	if (status < 0)
6510 		put_vnode(vnode);
6511 
6512 	return status;
6513 }
6514 
6515 
6516 static status_t
6517 attr_dir_close(struct file_descriptor* descriptor)
6518 {
6519 	struct vnode* vnode = descriptor->u.vnode;
6520 
6521 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6522 
6523 	if (HAS_FS_CALL(vnode, close_attr_dir))
6524 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6525 
6526 	return B_OK;
6527 }
6528 
6529 
6530 static void
6531 attr_dir_free_fd(struct file_descriptor* descriptor)
6532 {
6533 	struct vnode* vnode = descriptor->u.vnode;
6534 
6535 	if (vnode != NULL) {
6536 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6537 		put_vnode(vnode);
6538 	}
6539 }
6540 
6541 
6542 static status_t
6543 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6544 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6545 {
6546 	struct vnode* vnode = descriptor->u.vnode;
6547 
6548 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6549 
6550 	if (HAS_FS_CALL(vnode, read_attr_dir))
6551 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6552 			bufferSize, _count);
6553 
6554 	return B_UNSUPPORTED;
6555 }
6556 
6557 
6558 static status_t
6559 attr_dir_rewind(struct file_descriptor* descriptor)
6560 {
6561 	struct vnode* vnode = descriptor->u.vnode;
6562 
6563 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6564 
6565 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6566 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6567 
6568 	return B_UNSUPPORTED;
6569 }
6570 
6571 
6572 static int
6573 attr_create(int fd, char* path, const char* name, uint32 type,
6574 	int openMode, bool kernel)
6575 {
6576 	if (name == NULL || *name == '\0')
6577 		return B_BAD_VALUE;
6578 
6579 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6580 	struct vnode* vnode;
6581 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6582 		kernel);
6583 	if (status != B_OK)
6584 		return status;
6585 
6586 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6587 		status = B_LINK_LIMIT;
6588 		goto err;
6589 	}
6590 
6591 	if (!HAS_FS_CALL(vnode, create_attr)) {
6592 		status = B_READ_ONLY_DEVICE;
6593 		goto err;
6594 	}
6595 
6596 	void* cookie;
6597 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6598 	if (status != B_OK)
6599 		goto err;
6600 
6601 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6602 	if (fd >= 0)
6603 		return fd;
6604 
6605 	status = fd;
6606 
6607 	FS_CALL(vnode, close_attr, cookie);
6608 	FS_CALL(vnode, free_attr_cookie, cookie);
6609 
6610 	FS_CALL(vnode, remove_attr, name);
6611 
6612 err:
6613 	put_vnode(vnode);
6614 
6615 	return status;
6616 }
6617 
6618 
6619 static int
6620 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6621 {
6622 	if (name == NULL || *name == '\0')
6623 		return B_BAD_VALUE;
6624 
6625 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6626 	struct vnode* vnode;
6627 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6628 		kernel);
6629 	if (status != B_OK)
6630 		return status;
6631 
6632 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6633 		status = B_LINK_LIMIT;
6634 		goto err;
6635 	}
6636 
6637 	if (!HAS_FS_CALL(vnode, open_attr)) {
6638 		status = B_UNSUPPORTED;
6639 		goto err;
6640 	}
6641 
6642 	void* cookie;
6643 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6644 	if (status != B_OK)
6645 		goto err;
6646 
6647 	// now we only need a file descriptor for this attribute and we're done
6648 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6649 	if (fd >= 0)
6650 		return fd;
6651 
6652 	status = fd;
6653 
6654 	FS_CALL(vnode, close_attr, cookie);
6655 	FS_CALL(vnode, free_attr_cookie, cookie);
6656 
6657 err:
6658 	put_vnode(vnode);
6659 
6660 	return status;
6661 }
6662 
6663 
6664 static status_t
6665 attr_close(struct file_descriptor* descriptor)
6666 {
6667 	struct vnode* vnode = descriptor->u.vnode;
6668 
6669 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6670 
6671 	if (HAS_FS_CALL(vnode, close_attr))
6672 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6673 
6674 	return B_OK;
6675 }
6676 
6677 
6678 static void
6679 attr_free_fd(struct file_descriptor* descriptor)
6680 {
6681 	struct vnode* vnode = descriptor->u.vnode;
6682 
6683 	if (vnode != NULL) {
6684 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6685 		put_vnode(vnode);
6686 	}
6687 }
6688 
6689 
6690 static status_t
6691 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6692 	size_t* length)
6693 {
6694 	struct vnode* vnode = descriptor->u.vnode;
6695 
6696 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6697 		pos, length, *length));
6698 
6699 	if (!HAS_FS_CALL(vnode, read_attr))
6700 		return B_UNSUPPORTED;
6701 
6702 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6703 }
6704 
6705 
6706 static status_t
6707 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6708 	size_t* length)
6709 {
6710 	struct vnode* vnode = descriptor->u.vnode;
6711 
6712 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6713 		length));
6714 
6715 	if (!HAS_FS_CALL(vnode, write_attr))
6716 		return B_UNSUPPORTED;
6717 
6718 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6719 }
6720 
6721 
6722 static off_t
6723 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6724 {
6725 	off_t offset;
6726 
6727 	switch (seekType) {
6728 		case SEEK_SET:
6729 			offset = 0;
6730 			break;
6731 		case SEEK_CUR:
6732 			offset = descriptor->pos;
6733 			break;
6734 		case SEEK_END:
6735 		{
6736 			struct vnode* vnode = descriptor->u.vnode;
6737 			if (!HAS_FS_CALL(vnode, read_stat))
6738 				return B_UNSUPPORTED;
6739 
6740 			struct stat stat;
6741 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6742 				&stat);
6743 			if (status != B_OK)
6744 				return status;
6745 
6746 			offset = stat.st_size;
6747 			break;
6748 		}
6749 		default:
6750 			return B_BAD_VALUE;
6751 	}
6752 
6753 	// assumes off_t is 64 bits wide
6754 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6755 		return B_BUFFER_OVERFLOW;
6756 
6757 	pos += offset;
6758 	if (pos < 0)
6759 		return B_BAD_VALUE;
6760 
6761 	return descriptor->pos = pos;
6762 }
6763 
6764 
6765 static status_t
6766 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6767 {
6768 	struct vnode* vnode = descriptor->u.vnode;
6769 
6770 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6771 
6772 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6773 		return B_UNSUPPORTED;
6774 
6775 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6776 }
6777 
6778 
6779 static status_t
6780 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6781 	int statMask)
6782 {
6783 	struct vnode* vnode = descriptor->u.vnode;
6784 
6785 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6786 
6787 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6788 		return B_READ_ONLY_DEVICE;
6789 
6790 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6791 }
6792 
6793 
6794 static status_t
6795 attr_remove(int fd, const char* name, bool kernel)
6796 {
6797 	struct file_descriptor* descriptor;
6798 	struct vnode* vnode;
6799 	status_t status;
6800 
6801 	if (name == NULL || *name == '\0')
6802 		return B_BAD_VALUE;
6803 
6804 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6805 		kernel));
6806 
6807 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6808 	if (descriptor == NULL)
6809 		return B_FILE_ERROR;
6810 
6811 	if (HAS_FS_CALL(vnode, remove_attr))
6812 		status = FS_CALL(vnode, remove_attr, name);
6813 	else
6814 		status = B_READ_ONLY_DEVICE;
6815 
6816 	put_fd(descriptor);
6817 
6818 	return status;
6819 }
6820 
6821 
6822 static status_t
6823 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6824 	bool kernel)
6825 {
6826 	struct file_descriptor* fromDescriptor;
6827 	struct file_descriptor* toDescriptor;
6828 	struct vnode* fromVnode;
6829 	struct vnode* toVnode;
6830 	status_t status;
6831 
6832 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6833 		|| *toName == '\0')
6834 		return B_BAD_VALUE;
6835 
6836 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6837 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6838 
6839 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6840 	if (fromDescriptor == NULL)
6841 		return B_FILE_ERROR;
6842 
6843 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6844 	if (toDescriptor == NULL) {
6845 		status = B_FILE_ERROR;
6846 		goto err;
6847 	}
6848 
6849 	// are the files on the same volume?
6850 	if (fromVnode->device != toVnode->device) {
6851 		status = B_CROSS_DEVICE_LINK;
6852 		goto err1;
6853 	}
6854 
6855 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6856 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6857 	} else
6858 		status = B_READ_ONLY_DEVICE;
6859 
6860 err1:
6861 	put_fd(toDescriptor);
6862 err:
6863 	put_fd(fromDescriptor);
6864 
6865 	return status;
6866 }
6867 
6868 
6869 static int
6870 index_dir_open(dev_t mountID, bool kernel)
6871 {
6872 	struct fs_mount* mount;
6873 	void* cookie;
6874 
6875 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
6876 		kernel));
6877 
6878 	status_t status = get_mount(mountID, &mount);
6879 	if (status != B_OK)
6880 		return status;
6881 
6882 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6883 		status = B_UNSUPPORTED;
6884 		goto error;
6885 	}
6886 
6887 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6888 	if (status != B_OK)
6889 		goto error;
6890 
6891 	// get fd for the index directory
6892 	int fd;
6893 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6894 	if (fd >= 0)
6895 		return fd;
6896 
6897 	// something went wrong
6898 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6899 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6900 
6901 	status = fd;
6902 
6903 error:
6904 	put_mount(mount);
6905 	return status;
6906 }
6907 
6908 
6909 static status_t
6910 index_dir_close(struct file_descriptor* descriptor)
6911 {
6912 	struct fs_mount* mount = descriptor->u.mount;
6913 
6914 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6915 
6916 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6917 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6918 
6919 	return B_OK;
6920 }
6921 
6922 
6923 static void
6924 index_dir_free_fd(struct file_descriptor* descriptor)
6925 {
6926 	struct fs_mount* mount = descriptor->u.mount;
6927 
6928 	if (mount != NULL) {
6929 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6930 		put_mount(mount);
6931 	}
6932 }
6933 
6934 
6935 static status_t
6936 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6937 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6938 {
6939 	struct fs_mount* mount = descriptor->u.mount;
6940 
6941 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6942 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6943 			bufferSize, _count);
6944 	}
6945 
6946 	return B_UNSUPPORTED;
6947 }
6948 
6949 
6950 static status_t
6951 index_dir_rewind(struct file_descriptor* descriptor)
6952 {
6953 	struct fs_mount* mount = descriptor->u.mount;
6954 
6955 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6956 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6957 
6958 	return B_UNSUPPORTED;
6959 }
6960 
6961 
6962 static status_t
6963 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6964 	bool kernel)
6965 {
6966 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
6967 		mountID, name, kernel));
6968 
6969 	struct fs_mount* mount;
6970 	status_t status = get_mount(mountID, &mount);
6971 	if (status != B_OK)
6972 		return status;
6973 
6974 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6975 		status = B_READ_ONLY_DEVICE;
6976 		goto out;
6977 	}
6978 
6979 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6980 
6981 out:
6982 	put_mount(mount);
6983 	return status;
6984 }
6985 
6986 
6987 #if 0
6988 static status_t
6989 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6990 {
6991 	struct vnode* vnode = descriptor->u.vnode;
6992 
6993 	// ToDo: currently unused!
6994 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6995 	if (!HAS_FS_CALL(vnode, read_index_stat))
6996 		return B_UNSUPPORTED;
6997 
6998 	return B_UNSUPPORTED;
6999 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7000 }
7001 
7002 
7003 static void
7004 index_free_fd(struct file_descriptor* descriptor)
7005 {
7006 	struct vnode* vnode = descriptor->u.vnode;
7007 
7008 	if (vnode != NULL) {
7009 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7010 		put_vnode(vnode);
7011 	}
7012 }
7013 #endif
7014 
7015 
7016 static status_t
7017 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7018 	bool kernel)
7019 {
7020 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7021 		mountID, name, kernel));
7022 
7023 	struct fs_mount* mount;
7024 	status_t status = get_mount(mountID, &mount);
7025 	if (status != B_OK)
7026 		return status;
7027 
7028 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7029 		status = B_UNSUPPORTED;
7030 		goto out;
7031 	}
7032 
7033 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7034 
7035 out:
7036 	put_mount(mount);
7037 	return status;
7038 }
7039 
7040 
7041 static status_t
7042 index_remove(dev_t mountID, const char* name, bool kernel)
7043 {
7044 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7045 		mountID, name, kernel));
7046 
7047 	struct fs_mount* mount;
7048 	status_t status = get_mount(mountID, &mount);
7049 	if (status != B_OK)
7050 		return status;
7051 
7052 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7053 		status = B_READ_ONLY_DEVICE;
7054 		goto out;
7055 	}
7056 
7057 	status = FS_MOUNT_CALL(mount, remove_index, name);
7058 
7059 out:
7060 	put_mount(mount);
7061 	return status;
7062 }
7063 
7064 
7065 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7066 		It would be nice if the FS would find some more kernel support
7067 		for them.
7068 		For example, query parsing should be moved into the kernel.
7069 */
7070 static int
7071 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7072 	int32 token, bool kernel)
7073 {
7074 	struct fs_mount* mount;
7075 	void* cookie;
7076 
7077 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7078 		device, query, kernel));
7079 
7080 	status_t status = get_mount(device, &mount);
7081 	if (status != B_OK)
7082 		return status;
7083 
7084 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7085 		status = B_UNSUPPORTED;
7086 		goto error;
7087 	}
7088 
7089 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7090 		&cookie);
7091 	if (status != B_OK)
7092 		goto error;
7093 
7094 	// get fd for the index directory
7095 	int fd;
7096 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7097 	if (fd >= 0)
7098 		return fd;
7099 
7100 	status = fd;
7101 
7102 	// something went wrong
7103 	FS_MOUNT_CALL(mount, close_query, cookie);
7104 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7105 
7106 error:
7107 	put_mount(mount);
7108 	return status;
7109 }
7110 
7111 
7112 static status_t
7113 query_close(struct file_descriptor* descriptor)
7114 {
7115 	struct fs_mount* mount = descriptor->u.mount;
7116 
7117 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7118 
7119 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7120 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7121 
7122 	return B_OK;
7123 }
7124 
7125 
7126 static void
7127 query_free_fd(struct file_descriptor* descriptor)
7128 {
7129 	struct fs_mount* mount = descriptor->u.mount;
7130 
7131 	if (mount != NULL) {
7132 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7133 		put_mount(mount);
7134 	}
7135 }
7136 
7137 
7138 static status_t
7139 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7140 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7141 {
7142 	struct fs_mount* mount = descriptor->u.mount;
7143 
7144 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7145 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7146 			bufferSize, _count);
7147 	}
7148 
7149 	return B_UNSUPPORTED;
7150 }
7151 
7152 
7153 static status_t
7154 query_rewind(struct file_descriptor* descriptor)
7155 {
7156 	struct fs_mount* mount = descriptor->u.mount;
7157 
7158 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7159 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7160 
7161 	return B_UNSUPPORTED;
7162 }
7163 
7164 
7165 //	#pragma mark - General File System functions
7166 
7167 
7168 static dev_t
7169 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7170 	const char* args, bool kernel)
7171 {
7172 	struct ::fs_mount* mount;
7173 	status_t status = B_OK;
7174 	fs_volume* volume = NULL;
7175 	int32 layer = 0;
7176 	Vnode* coveredNode = NULL;
7177 
7178 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7179 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7180 
7181 	// The path is always safe, we just have to make sure that fsName is
7182 	// almost valid - we can't make any assumptions about args, though.
7183 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7184 	// We'll get it from the DDM later.
7185 	if (fsName == NULL) {
7186 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7187 			return B_BAD_VALUE;
7188 	} else if (fsName[0] == '\0')
7189 		return B_BAD_VALUE;
7190 
7191 	RecursiveLocker mountOpLocker(sMountOpLock);
7192 
7193 	// Helper to delete a newly created file device on failure.
7194 	// Not exactly beautiful, but helps to keep the code below cleaner.
7195 	struct FileDeviceDeleter {
7196 		FileDeviceDeleter() : id(-1) {}
7197 		~FileDeviceDeleter()
7198 		{
7199 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7200 		}
7201 
7202 		partition_id id;
7203 	} fileDeviceDeleter;
7204 
7205 	// If the file system is not a "virtual" one, the device argument should
7206 	// point to a real file/device (if given at all).
7207 	// get the partition
7208 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7209 	KPartition* partition = NULL;
7210 	KPath normalizedDevice;
7211 	bool newlyCreatedFileDevice = false;
7212 
7213 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7214 		// normalize the device path
7215 		status = normalizedDevice.SetTo(device, true);
7216 		if (status != B_OK)
7217 			return status;
7218 
7219 		// get a corresponding partition from the DDM
7220 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7221 		if (partition == NULL) {
7222 			// Partition not found: This either means, the user supplied
7223 			// an invalid path, or the path refers to an image file. We try
7224 			// to let the DDM create a file device for the path.
7225 			partition_id deviceID = ddm->CreateFileDevice(
7226 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7227 			if (deviceID >= 0) {
7228 				partition = ddm->RegisterPartition(deviceID);
7229 				if (newlyCreatedFileDevice)
7230 					fileDeviceDeleter.id = deviceID;
7231 			}
7232 		}
7233 
7234 		if (!partition) {
7235 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7236 				normalizedDevice.Path()));
7237 			return B_ENTRY_NOT_FOUND;
7238 		}
7239 
7240 		device = normalizedDevice.Path();
7241 			// correct path to file device
7242 	}
7243 	PartitionRegistrar partitionRegistrar(partition, true);
7244 
7245 	// Write lock the partition's device. For the time being, we keep the lock
7246 	// until we're done mounting -- not nice, but ensure, that no-one is
7247 	// interfering.
7248 	// TODO: Just mark the partition busy while mounting!
7249 	KDiskDevice* diskDevice = NULL;
7250 	if (partition) {
7251 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7252 		if (!diskDevice) {
7253 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7254 			return B_ERROR;
7255 		}
7256 	}
7257 
7258 	DeviceWriteLocker writeLocker(diskDevice, true);
7259 		// this takes over the write lock acquired before
7260 
7261 	if (partition != NULL) {
7262 		// make sure, that the partition is not busy
7263 		if (partition->IsBusy()) {
7264 			TRACE(("fs_mount(): Partition is busy.\n"));
7265 			return B_BUSY;
7266 		}
7267 
7268 		// if no FS name had been supplied, we get it from the partition
7269 		if (fsName == NULL) {
7270 			KDiskSystem* diskSystem = partition->DiskSystem();
7271 			if (!diskSystem) {
7272 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7273 					"recognize it.\n"));
7274 				return B_BAD_VALUE;
7275 			}
7276 
7277 			if (!diskSystem->IsFileSystem()) {
7278 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7279 					"partitioning system.\n"));
7280 				return B_BAD_VALUE;
7281 			}
7282 
7283 			// The disk system name will not change, and the KDiskSystem
7284 			// object will not go away while the disk device is locked (and
7285 			// the partition has a reference to it), so this is safe.
7286 			fsName = diskSystem->Name();
7287 		}
7288 	}
7289 
7290 	mount = new(std::nothrow) (struct ::fs_mount);
7291 	if (mount == NULL)
7292 		return B_NO_MEMORY;
7293 
7294 	mount->device_name = strdup(device);
7295 		// "device" can be NULL
7296 
7297 	status = mount->entry_cache.Init();
7298 	if (status != B_OK)
7299 		goto err1;
7300 
7301 	// initialize structure
7302 	mount->id = sNextMountID++;
7303 	mount->partition = NULL;
7304 	mount->root_vnode = NULL;
7305 	mount->covers_vnode = NULL;
7306 	mount->unmounting = false;
7307 	mount->owns_file_device = false;
7308 	mount->volume = NULL;
7309 
7310 	// build up the volume(s)
7311 	while (true) {
7312 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7313 		if (layerFSName == NULL) {
7314 			if (layer == 0) {
7315 				status = B_NO_MEMORY;
7316 				goto err1;
7317 			}
7318 
7319 			break;
7320 		}
7321 		MemoryDeleter layerFSNameDeleter(layerFSName);
7322 
7323 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7324 		if (volume == NULL) {
7325 			status = B_NO_MEMORY;
7326 			goto err1;
7327 		}
7328 
7329 		volume->id = mount->id;
7330 		volume->partition = partition != NULL ? partition->ID() : -1;
7331 		volume->layer = layer++;
7332 		volume->private_volume = NULL;
7333 		volume->ops = NULL;
7334 		volume->sub_volume = NULL;
7335 		volume->super_volume = NULL;
7336 		volume->file_system = NULL;
7337 		volume->file_system_name = NULL;
7338 
7339 		volume->file_system_name = get_file_system_name(layerFSName);
7340 		if (volume->file_system_name == NULL) {
7341 			status = B_NO_MEMORY;
7342 			free(volume);
7343 			goto err1;
7344 		}
7345 
7346 		volume->file_system = get_file_system(layerFSName);
7347 		if (volume->file_system == NULL) {
7348 			status = B_DEVICE_NOT_FOUND;
7349 			free(volume->file_system_name);
7350 			free(volume);
7351 			goto err1;
7352 		}
7353 
7354 		if (mount->volume == NULL)
7355 			mount->volume = volume;
7356 		else {
7357 			volume->super_volume = mount->volume;
7358 			mount->volume->sub_volume = volume;
7359 			mount->volume = volume;
7360 		}
7361 	}
7362 
7363 	// insert mount struct into list before we call FS's mount() function
7364 	// so that vnodes can be created for this mount
7365 	mutex_lock(&sMountMutex);
7366 	sMountsTable->Insert(mount);
7367 	mutex_unlock(&sMountMutex);
7368 
7369 	ino_t rootID;
7370 
7371 	if (!sRoot) {
7372 		// we haven't mounted anything yet
7373 		if (strcmp(path, "/") != 0) {
7374 			status = B_ERROR;
7375 			goto err2;
7376 		}
7377 
7378 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7379 			args, &rootID);
7380 		if (status != 0)
7381 			goto err2;
7382 	} else {
7383 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7384 		if (status != B_OK)
7385 			goto err2;
7386 
7387 		mount->covers_vnode = coveredNode;
7388 
7389 		// make sure covered_vnode is a directory
7390 		if (!S_ISDIR(coveredNode->Type())) {
7391 			status = B_NOT_A_DIRECTORY;
7392 			goto err3;
7393 		}
7394 
7395 		if (coveredNode->IsCovered()) {
7396 			// this is already a covered vnode
7397 			status = B_BUSY;
7398 			goto err3;
7399 		}
7400 
7401 		// mount it/them
7402 		fs_volume* volume = mount->volume;
7403 		while (volume) {
7404 			status = volume->file_system->mount(volume, device, flags, args,
7405 				&rootID);
7406 			if (status != B_OK) {
7407 				if (volume->sub_volume)
7408 					goto err4;
7409 				goto err3;
7410 			}
7411 
7412 			volume = volume->super_volume;
7413 		}
7414 
7415 		volume = mount->volume;
7416 		while (volume) {
7417 			if (volume->ops->all_layers_mounted != NULL)
7418 				volume->ops->all_layers_mounted(volume);
7419 			volume = volume->super_volume;
7420 		}
7421 	}
7422 
7423 	// the root node is supposed to be owned by the file system - it must
7424 	// exist at this point
7425 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7426 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7427 		panic("fs_mount: file system does not own its root node!\n");
7428 		status = B_ERROR;
7429 		goto err4;
7430 	}
7431 
7432 	// set up the links between the root vnode and the vnode it covers
7433 	rw_lock_write_lock(&sVnodeLock);
7434 	if (coveredNode != NULL) {
7435 		if (coveredNode->IsCovered()) {
7436 			// the vnode is covered now
7437 			status = B_BUSY;
7438 			rw_lock_write_unlock(&sVnodeLock);
7439 			goto err4;
7440 		}
7441 
7442 		mount->root_vnode->covers = coveredNode;
7443 		mount->root_vnode->SetCovering(true);
7444 
7445 		coveredNode->covered_by = mount->root_vnode;
7446 		coveredNode->SetCovered(true);
7447 	}
7448 	rw_lock_write_unlock(&sVnodeLock);
7449 
7450 	if (!sRoot) {
7451 		sRoot = mount->root_vnode;
7452 		mutex_lock(&sIOContextRootLock);
7453 		get_current_io_context(true)->root = sRoot;
7454 		mutex_unlock(&sIOContextRootLock);
7455 		inc_vnode_ref_count(sRoot);
7456 	}
7457 
7458 	// supply the partition (if any) with the mount cookie and mark it mounted
7459 	if (partition) {
7460 		partition->SetMountCookie(mount->volume->private_volume);
7461 		partition->SetVolumeID(mount->id);
7462 
7463 		// keep a partition reference as long as the partition is mounted
7464 		partitionRegistrar.Detach();
7465 		mount->partition = partition;
7466 		mount->owns_file_device = newlyCreatedFileDevice;
7467 		fileDeviceDeleter.id = -1;
7468 	}
7469 
7470 	notify_mount(mount->id,
7471 		coveredNode != NULL ? coveredNode->device : -1,
7472 		coveredNode ? coveredNode->id : -1);
7473 
7474 	return mount->id;
7475 
7476 err4:
7477 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7478 err3:
7479 	if (coveredNode != NULL)
7480 		put_vnode(coveredNode);
7481 err2:
7482 	mutex_lock(&sMountMutex);
7483 	sMountsTable->Remove(mount);
7484 	mutex_unlock(&sMountMutex);
7485 err1:
7486 	delete mount;
7487 
7488 	return status;
7489 }
7490 
7491 
7492 static status_t
7493 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7494 {
7495 	struct fs_mount* mount;
7496 	status_t err;
7497 
7498 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7499 		mountID, kernel));
7500 
7501 	struct vnode* pathVnode = NULL;
7502 	if (path != NULL) {
7503 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7504 		if (err != B_OK)
7505 			return B_ENTRY_NOT_FOUND;
7506 	}
7507 
7508 	RecursiveLocker mountOpLocker(sMountOpLock);
7509 
7510 	// this lock is not strictly necessary, but here in case of KDEBUG
7511 	// to keep the ASSERT in find_mount() working.
7512 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7513 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7514 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7515 	if (mount == NULL) {
7516 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7517 			pathVnode);
7518 	}
7519 
7520 	if (path != NULL) {
7521 		put_vnode(pathVnode);
7522 
7523 		if (mount->root_vnode != pathVnode) {
7524 			// not mountpoint
7525 			return B_BAD_VALUE;
7526 		}
7527 	}
7528 
7529 	// if the volume is associated with a partition, lock the device of the
7530 	// partition as long as we are unmounting
7531 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7532 	KPartition* partition = mount->partition;
7533 	KDiskDevice* diskDevice = NULL;
7534 	if (partition != NULL) {
7535 		if (partition->Device() == NULL) {
7536 			dprintf("fs_unmount(): There is no device!\n");
7537 			return B_ERROR;
7538 		}
7539 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7540 		if (!diskDevice) {
7541 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7542 			return B_ERROR;
7543 		}
7544 	}
7545 	DeviceWriteLocker writeLocker(diskDevice, true);
7546 
7547 	// make sure, that the partition is not busy
7548 	if (partition != NULL) {
7549 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7550 			TRACE(("fs_unmount(): Partition is busy.\n"));
7551 			return B_BUSY;
7552 		}
7553 	}
7554 
7555 	// grab the vnode master mutex to keep someone from creating
7556 	// a vnode while we're figuring out if we can continue
7557 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7558 
7559 	bool disconnectedDescriptors = false;
7560 
7561 	while (true) {
7562 		bool busy = false;
7563 
7564 		// cycle through the list of vnodes associated with this mount and
7565 		// make sure all of them are not busy or have refs on them
7566 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7567 		while (struct vnode* vnode = iterator.Next()) {
7568 			if (vnode->IsBusy()) {
7569 				busy = true;
7570 				break;
7571 			}
7572 
7573 			// check the vnode's ref count -- subtract additional references for
7574 			// covering
7575 			int32 refCount = vnode->ref_count;
7576 			if (vnode->covers != NULL)
7577 				refCount--;
7578 			if (vnode->covered_by != NULL)
7579 				refCount--;
7580 
7581 			if (refCount != 0) {
7582 				// there are still vnodes in use on this mount, so we cannot
7583 				// unmount yet
7584 				busy = true;
7585 				break;
7586 			}
7587 		}
7588 
7589 		if (!busy)
7590 			break;
7591 
7592 		if ((flags & B_FORCE_UNMOUNT) == 0)
7593 			return B_BUSY;
7594 
7595 		if (disconnectedDescriptors) {
7596 			// wait a bit until the last access is finished, and then try again
7597 			vnodesWriteLocker.Unlock();
7598 			snooze(100000);
7599 			// TODO: if there is some kind of bug that prevents the ref counts
7600 			// from getting back to zero, this will fall into an endless loop...
7601 			vnodesWriteLocker.Lock();
7602 			continue;
7603 		}
7604 
7605 		// the file system is still busy - but we're forced to unmount it,
7606 		// so let's disconnect all open file descriptors
7607 
7608 		mount->unmounting = true;
7609 			// prevent new vnodes from being created
7610 
7611 		vnodesWriteLocker.Unlock();
7612 
7613 		disconnect_mount_or_vnode_fds(mount, NULL);
7614 		disconnectedDescriptors = true;
7615 
7616 		vnodesWriteLocker.Lock();
7617 	}
7618 
7619 	// We can safely continue. Mark all of the vnodes busy and this mount
7620 	// structure in unmounting state. Also undo the vnode covers/covered_by
7621 	// links.
7622 	mount->unmounting = true;
7623 
7624 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7625 	while (struct vnode* vnode = iterator.Next()) {
7626 		// Remove all covers/covered_by links from other mounts' nodes to this
7627 		// vnode and adjust the node ref count accordingly. We will release the
7628 		// references to the external vnodes below.
7629 		if (Vnode* coveredNode = vnode->covers) {
7630 			if (Vnode* coveringNode = vnode->covered_by) {
7631 				// We have both covered and covering vnodes, so just remove us
7632 				// from the chain.
7633 				coveredNode->covered_by = coveringNode;
7634 				coveringNode->covers = coveredNode;
7635 				vnode->ref_count -= 2;
7636 
7637 				vnode->covered_by = NULL;
7638 				vnode->covers = NULL;
7639 				vnode->SetCovering(false);
7640 				vnode->SetCovered(false);
7641 			} else {
7642 				// We only have a covered vnode. Remove its link to us.
7643 				coveredNode->covered_by = NULL;
7644 				coveredNode->SetCovered(false);
7645 				vnode->ref_count--;
7646 
7647 				// If the other node is an external vnode, we keep its link
7648 				// link around so we can put the reference later on. Otherwise
7649 				// we get rid of it right now.
7650 				if (coveredNode->mount == mount) {
7651 					vnode->covers = NULL;
7652 					coveredNode->ref_count--;
7653 				}
7654 			}
7655 		} else if (Vnode* coveringNode = vnode->covered_by) {
7656 			// We only have a covering vnode. Remove its link to us.
7657 			coveringNode->covers = NULL;
7658 			coveringNode->SetCovering(false);
7659 			vnode->ref_count--;
7660 
7661 			// If the other node is an external vnode, we keep its link
7662 			// link around so we can put the reference later on. Otherwise
7663 			// we get rid of it right now.
7664 			if (coveringNode->mount == mount) {
7665 				vnode->covered_by = NULL;
7666 				coveringNode->ref_count--;
7667 			}
7668 		}
7669 
7670 		vnode->SetBusy(true);
7671 		vnode_to_be_freed(vnode);
7672 	}
7673 
7674 	vnodesWriteLocker.Unlock();
7675 
7676 	// Free all vnodes associated with this mount.
7677 	// They will be removed from the mount list by free_vnode(), so
7678 	// we don't have to do this.
7679 	while (struct vnode* vnode = mount->vnodes.Head()) {
7680 		// Put the references to external covered/covering vnodes we kept above.
7681 		if (Vnode* coveredNode = vnode->covers)
7682 			put_vnode(coveredNode);
7683 		if (Vnode* coveringNode = vnode->covered_by)
7684 			put_vnode(coveringNode);
7685 
7686 		free_vnode(vnode, false);
7687 	}
7688 
7689 	// remove the mount structure from the hash table
7690 	mutex_lock(&sMountMutex);
7691 	sMountsTable->Remove(mount);
7692 	mutex_unlock(&sMountMutex);
7693 
7694 	mountOpLocker.Unlock();
7695 
7696 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7697 	notify_unmount(mount->id);
7698 
7699 	// dereference the partition and mark it unmounted
7700 	if (partition) {
7701 		partition->SetVolumeID(-1);
7702 		partition->SetMountCookie(NULL);
7703 
7704 		if (mount->owns_file_device)
7705 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7706 		partition->Unregister();
7707 	}
7708 
7709 	delete mount;
7710 	return B_OK;
7711 }
7712 
7713 
7714 static status_t
7715 fs_sync(dev_t device)
7716 {
7717 	struct fs_mount* mount;
7718 	status_t status = get_mount(device, &mount);
7719 	if (status != B_OK)
7720 		return status;
7721 
7722 	struct vnode marker;
7723 	memset(&marker, 0, sizeof(marker));
7724 	marker.SetBusy(true);
7725 	marker.SetRemoved(true);
7726 
7727 	// First, synchronize all file caches
7728 
7729 	while (true) {
7730 		WriteLocker locker(sVnodeLock);
7731 			// Note: That's the easy way. Which is probably OK for sync(),
7732 			// since it's a relatively rare call and doesn't need to allow for
7733 			// a lot of concurrency. Using a read lock would be possible, but
7734 			// also more involved, since we had to lock the individual nodes
7735 			// and take care of the locking order, which we might not want to
7736 			// do while holding fs_mount::rlock.
7737 
7738 		// synchronize access to vnode list
7739 		recursive_lock_lock(&mount->rlock);
7740 
7741 		struct vnode* vnode;
7742 		if (!marker.IsRemoved()) {
7743 			vnode = mount->vnodes.GetNext(&marker);
7744 			mount->vnodes.Remove(&marker);
7745 			marker.SetRemoved(true);
7746 		} else
7747 			vnode = mount->vnodes.First();
7748 
7749 		while (vnode != NULL && (vnode->cache == NULL
7750 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7751 			// TODO: we could track writes (and writable mapped vnodes)
7752 			//	and have a simple flag that we could test for here
7753 			vnode = mount->vnodes.GetNext(vnode);
7754 		}
7755 
7756 		if (vnode != NULL) {
7757 			// insert marker vnode again
7758 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7759 			marker.SetRemoved(false);
7760 		}
7761 
7762 		recursive_lock_unlock(&mount->rlock);
7763 
7764 		if (vnode == NULL)
7765 			break;
7766 
7767 		vnode = lookup_vnode(mount->id, vnode->id);
7768 		if (vnode == NULL || vnode->IsBusy())
7769 			continue;
7770 
7771 		if (vnode->ref_count == 0) {
7772 			// this vnode has been unused before
7773 			vnode_used(vnode);
7774 		}
7775 		inc_vnode_ref_count(vnode);
7776 
7777 		locker.Unlock();
7778 
7779 		if (vnode->cache != NULL && !vnode->IsRemoved())
7780 			vnode->cache->WriteModified();
7781 
7782 		put_vnode(vnode);
7783 	}
7784 
7785 	// And then, let the file systems do their synchronizing work
7786 
7787 	if (HAS_FS_MOUNT_CALL(mount, sync))
7788 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7789 
7790 	put_mount(mount);
7791 	return status;
7792 }
7793 
7794 
7795 static status_t
7796 fs_read_info(dev_t device, struct fs_info* info)
7797 {
7798 	struct fs_mount* mount;
7799 	status_t status = get_mount(device, &mount);
7800 	if (status != B_OK)
7801 		return status;
7802 
7803 	memset(info, 0, sizeof(struct fs_info));
7804 
7805 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7806 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7807 
7808 	// fill in info the file system doesn't (have to) know about
7809 	if (status == B_OK) {
7810 		info->dev = mount->id;
7811 		info->root = mount->root_vnode->id;
7812 
7813 		fs_volume* volume = mount->volume;
7814 		while (volume->super_volume != NULL)
7815 			volume = volume->super_volume;
7816 
7817 		strlcpy(info->fsh_name, volume->file_system_name,
7818 			sizeof(info->fsh_name));
7819 		if (mount->device_name != NULL) {
7820 			strlcpy(info->device_name, mount->device_name,
7821 				sizeof(info->device_name));
7822 		}
7823 	}
7824 
7825 	// if the call is not supported by the file system, there are still
7826 	// the parts that we filled out ourselves
7827 
7828 	put_mount(mount);
7829 	return status;
7830 }
7831 
7832 
7833 static status_t
7834 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7835 {
7836 	struct fs_mount* mount;
7837 	status_t status = get_mount(device, &mount);
7838 	if (status != B_OK)
7839 		return status;
7840 
7841 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7842 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7843 	else
7844 		status = B_READ_ONLY_DEVICE;
7845 
7846 	put_mount(mount);
7847 	return status;
7848 }
7849 
7850 
7851 static dev_t
7852 fs_next_device(int32* _cookie)
7853 {
7854 	struct fs_mount* mount = NULL;
7855 	dev_t device = *_cookie;
7856 
7857 	mutex_lock(&sMountMutex);
7858 
7859 	// Since device IDs are assigned sequentially, this algorithm
7860 	// does work good enough. It makes sure that the device list
7861 	// returned is sorted, and that no device is skipped when an
7862 	// already visited device got unmounted.
7863 
7864 	while (device < sNextMountID) {
7865 		mount = find_mount(device++);
7866 		if (mount != NULL && mount->volume->private_volume != NULL)
7867 			break;
7868 	}
7869 
7870 	*_cookie = device;
7871 
7872 	if (mount != NULL)
7873 		device = mount->id;
7874 	else
7875 		device = B_BAD_VALUE;
7876 
7877 	mutex_unlock(&sMountMutex);
7878 
7879 	return device;
7880 }
7881 
7882 
7883 ssize_t
7884 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7885 	void *buffer, size_t readBytes)
7886 {
7887 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7888 	if (attrFD < 0)
7889 		return attrFD;
7890 
7891 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7892 
7893 	_kern_close(attrFD);
7894 
7895 	return bytesRead;
7896 }
7897 
7898 
7899 static status_t
7900 get_cwd(char* buffer, size_t size, bool kernel)
7901 {
7902 	// Get current working directory from io context
7903 	struct io_context* context = get_current_io_context(kernel);
7904 	status_t status;
7905 
7906 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7907 
7908 	mutex_lock(&context->io_mutex);
7909 
7910 	struct vnode* vnode = context->cwd;
7911 	if (vnode)
7912 		inc_vnode_ref_count(vnode);
7913 
7914 	mutex_unlock(&context->io_mutex);
7915 
7916 	if (vnode) {
7917 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7918 		put_vnode(vnode);
7919 	} else
7920 		status = B_ERROR;
7921 
7922 	return status;
7923 }
7924 
7925 
7926 static status_t
7927 set_cwd(int fd, char* path, bool kernel)
7928 {
7929 	struct io_context* context;
7930 	struct vnode* vnode = NULL;
7931 	struct vnode* oldDirectory;
7932 	status_t status;
7933 
7934 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7935 
7936 	// Get vnode for passed path, and bail if it failed
7937 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7938 	if (status < 0)
7939 		return status;
7940 
7941 	if (!S_ISDIR(vnode->Type())) {
7942 		// nope, can't cwd to here
7943 		status = B_NOT_A_DIRECTORY;
7944 		goto err;
7945 	}
7946 
7947 	// We need to have the permission to enter the directory, too
7948 	if (HAS_FS_CALL(vnode, access)) {
7949 		status = FS_CALL(vnode, access, X_OK);
7950 		if (status != B_OK)
7951 			goto err;
7952 	}
7953 
7954 	// Get current io context and lock
7955 	context = get_current_io_context(kernel);
7956 	mutex_lock(&context->io_mutex);
7957 
7958 	// save the old current working directory first
7959 	oldDirectory = context->cwd;
7960 	context->cwd = vnode;
7961 
7962 	mutex_unlock(&context->io_mutex);
7963 
7964 	if (oldDirectory)
7965 		put_vnode(oldDirectory);
7966 
7967 	return B_NO_ERROR;
7968 
7969 err:
7970 	put_vnode(vnode);
7971 	return status;
7972 }
7973 
7974 
7975 //	#pragma mark - kernel mirrored syscalls
7976 
7977 
7978 dev_t
7979 _kern_mount(const char* path, const char* device, const char* fsName,
7980 	uint32 flags, const char* args, size_t argsLength)
7981 {
7982 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7983 	if (pathBuffer.InitCheck() != B_OK)
7984 		return B_NO_MEMORY;
7985 
7986 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7987 }
7988 
7989 
7990 status_t
7991 _kern_unmount(const char* path, uint32 flags)
7992 {
7993 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7994 	if (pathBuffer.InitCheck() != B_OK)
7995 		return B_NO_MEMORY;
7996 
7997 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7998 }
7999 
8000 
8001 status_t
8002 _kern_read_fs_info(dev_t device, struct fs_info* info)
8003 {
8004 	if (info == NULL)
8005 		return B_BAD_VALUE;
8006 
8007 	return fs_read_info(device, info);
8008 }
8009 
8010 
8011 status_t
8012 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8013 {
8014 	if (info == NULL)
8015 		return B_BAD_VALUE;
8016 
8017 	return fs_write_info(device, info, mask);
8018 }
8019 
8020 
8021 status_t
8022 _kern_sync(void)
8023 {
8024 	// Note: _kern_sync() is also called from _user_sync()
8025 	int32 cookie = 0;
8026 	dev_t device;
8027 	while ((device = next_dev(&cookie)) >= 0) {
8028 		status_t status = fs_sync(device);
8029 		if (status != B_OK && status != B_BAD_VALUE) {
8030 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8031 				strerror(status));
8032 		}
8033 	}
8034 
8035 	return B_OK;
8036 }
8037 
8038 
8039 dev_t
8040 _kern_next_device(int32* _cookie)
8041 {
8042 	return fs_next_device(_cookie);
8043 }
8044 
8045 
8046 status_t
8047 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8048 	size_t infoSize)
8049 {
8050 	if (infoSize != sizeof(fd_info))
8051 		return B_BAD_VALUE;
8052 
8053 	// get the team
8054 	Team* team = Team::Get(teamID);
8055 	if (team == NULL)
8056 		return B_BAD_TEAM_ID;
8057 	BReference<Team> teamReference(team, true);
8058 
8059 	// now that we have a team reference, its I/O context won't go away
8060 	io_context* context = team->io_context;
8061 	MutexLocker contextLocker(context->io_mutex);
8062 
8063 	uint32 slot = *_cookie;
8064 
8065 	struct file_descriptor* descriptor;
8066 	while (slot < context->table_size
8067 		&& (descriptor = context->fds[slot]) == NULL) {
8068 		slot++;
8069 	}
8070 
8071 	if (slot >= context->table_size)
8072 		return B_ENTRY_NOT_FOUND;
8073 
8074 	info->number = slot;
8075 	info->open_mode = descriptor->open_mode;
8076 
8077 	struct vnode* vnode = fd_vnode(descriptor);
8078 	if (vnode != NULL) {
8079 		info->device = vnode->device;
8080 		info->node = vnode->id;
8081 	} else if (descriptor->u.mount != NULL) {
8082 		info->device = descriptor->u.mount->id;
8083 		info->node = -1;
8084 	}
8085 
8086 	*_cookie = slot + 1;
8087 	return B_OK;
8088 }
8089 
8090 
8091 int
8092 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8093 	int perms)
8094 {
8095 	if ((openMode & O_CREAT) != 0) {
8096 		return file_create_entry_ref(device, inode, name, openMode, perms,
8097 			true);
8098 	}
8099 
8100 	return file_open_entry_ref(device, inode, name, openMode, true);
8101 }
8102 
8103 
8104 /*!	\brief Opens a node specified by a FD + path pair.
8105 
8106 	At least one of \a fd and \a path must be specified.
8107 	If only \a fd is given, the function opens the node identified by this
8108 	FD. If only a path is given, this path is opened. If both are given and
8109 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8110 	of the directory (!) identified by \a fd.
8111 
8112 	\param fd The FD. May be < 0.
8113 	\param path The absolute or relative path. May be \c NULL.
8114 	\param openMode The open mode.
8115 	\return A FD referring to the newly opened node, or an error code,
8116 			if an error occurs.
8117 */
8118 int
8119 _kern_open(int fd, const char* path, int openMode, int perms)
8120 {
8121 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8122 	if (pathBuffer.InitCheck() != B_OK)
8123 		return B_NO_MEMORY;
8124 
8125 	if (openMode & O_CREAT)
8126 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8127 
8128 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8129 }
8130 
8131 
8132 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8133 
8134 	The supplied name may be \c NULL, in which case directory identified
8135 	by \a device and \a inode will be opened. Otherwise \a device and
8136 	\a inode identify the parent directory of the directory to be opened
8137 	and \a name its entry name.
8138 
8139 	\param device If \a name is specified the ID of the device the parent
8140 		   directory of the directory to be opened resides on, otherwise
8141 		   the device of the directory itself.
8142 	\param inode If \a name is specified the node ID of the parent
8143 		   directory of the directory to be opened, otherwise node ID of the
8144 		   directory itself.
8145 	\param name The entry name of the directory to be opened. If \c NULL,
8146 		   the \a device + \a inode pair identify the node to be opened.
8147 	\return The FD of the newly opened directory or an error code, if
8148 			something went wrong.
8149 */
8150 int
8151 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8152 {
8153 	return dir_open_entry_ref(device, inode, name, true);
8154 }
8155 
8156 
8157 /*!	\brief Opens a directory specified by a FD + path pair.
8158 
8159 	At least one of \a fd and \a path must be specified.
8160 	If only \a fd is given, the function opens the directory identified by this
8161 	FD. If only a path is given, this path is opened. If both are given and
8162 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8163 	of the directory (!) identified by \a fd.
8164 
8165 	\param fd The FD. May be < 0.
8166 	\param path The absolute or relative path. May be \c NULL.
8167 	\return A FD referring to the newly opened directory, or an error code,
8168 			if an error occurs.
8169 */
8170 int
8171 _kern_open_dir(int fd, const char* path)
8172 {
8173 	if (path == NULL)
8174 		return dir_open(fd, NULL, true);;
8175 
8176 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8177 	if (pathBuffer.InitCheck() != B_OK)
8178 		return B_NO_MEMORY;
8179 
8180 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8181 }
8182 
8183 
8184 status_t
8185 _kern_fcntl(int fd, int op, size_t argument)
8186 {
8187 	return common_fcntl(fd, op, argument, true);
8188 }
8189 
8190 
8191 status_t
8192 _kern_fsync(int fd)
8193 {
8194 	return common_sync(fd, true);
8195 }
8196 
8197 
8198 status_t
8199 _kern_lock_node(int fd)
8200 {
8201 	return common_lock_node(fd, true);
8202 }
8203 
8204 
8205 status_t
8206 _kern_unlock_node(int fd)
8207 {
8208 	return common_unlock_node(fd, true);
8209 }
8210 
8211 
8212 status_t
8213 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8214 	int perms)
8215 {
8216 	return dir_create_entry_ref(device, inode, name, perms, true);
8217 }
8218 
8219 
8220 /*!	\brief Creates a directory specified by a FD + path pair.
8221 
8222 	\a path must always be specified (it contains the name of the new directory
8223 	at least). If only a path is given, this path identifies the location at
8224 	which the directory shall be created. If both \a fd and \a path are given
8225 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8226 	of the directory (!) identified by \a fd.
8227 
8228 	\param fd The FD. May be < 0.
8229 	\param path The absolute or relative path. Must not be \c NULL.
8230 	\param perms The access permissions the new directory shall have.
8231 	\return \c B_OK, if the directory has been created successfully, another
8232 			error code otherwise.
8233 */
8234 status_t
8235 _kern_create_dir(int fd, const char* path, int perms)
8236 {
8237 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8238 	if (pathBuffer.InitCheck() != B_OK)
8239 		return B_NO_MEMORY;
8240 
8241 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8242 }
8243 
8244 
8245 status_t
8246 _kern_remove_dir(int fd, const char* path)
8247 {
8248 	if (path) {
8249 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8250 		if (pathBuffer.InitCheck() != B_OK)
8251 			return B_NO_MEMORY;
8252 
8253 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
8254 	}
8255 
8256 	return dir_remove(fd, NULL, true);
8257 }
8258 
8259 
8260 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8261 
8262 	At least one of \a fd and \a path must be specified.
8263 	If only \a fd is given, the function the symlink to be read is the node
8264 	identified by this FD. If only a path is given, this path identifies the
8265 	symlink to be read. If both are given and the path is absolute, \a fd is
8266 	ignored; a relative path is reckoned off of the directory (!) identified
8267 	by \a fd.
8268 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8269 	will still be updated to reflect the required buffer size.
8270 
8271 	\param fd The FD. May be < 0.
8272 	\param path The absolute or relative path. May be \c NULL.
8273 	\param buffer The buffer into which the contents of the symlink shall be
8274 		   written.
8275 	\param _bufferSize A pointer to the size of the supplied buffer.
8276 	\return The length of the link on success or an appropriate error code
8277 */
8278 status_t
8279 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8280 {
8281 	if (path) {
8282 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8283 		if (pathBuffer.InitCheck() != B_OK)
8284 			return B_NO_MEMORY;
8285 
8286 		return common_read_link(fd, pathBuffer.LockBuffer(),
8287 			buffer, _bufferSize, true);
8288 	}
8289 
8290 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8291 }
8292 
8293 
8294 /*!	\brief Creates a symlink specified by a FD + path pair.
8295 
8296 	\a path must always be specified (it contains the name of the new symlink
8297 	at least). If only a path is given, this path identifies the location at
8298 	which the symlink shall be created. If both \a fd and \a path are given and
8299 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8300 	of the directory (!) identified by \a fd.
8301 
8302 	\param fd The FD. May be < 0.
8303 	\param toPath The absolute or relative path. Must not be \c NULL.
8304 	\param mode The access permissions the new symlink shall have.
8305 	\return \c B_OK, if the symlink has been created successfully, another
8306 			error code otherwise.
8307 */
8308 status_t
8309 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8310 {
8311 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8312 	if (pathBuffer.InitCheck() != B_OK)
8313 		return B_NO_MEMORY;
8314 
8315 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8316 		toPath, mode, true);
8317 }
8318 
8319 
8320 status_t
8321 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8322 	bool traverseLeafLink)
8323 {
8324 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8325 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8326 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8327 		return B_NO_MEMORY;
8328 
8329 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8330 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8331 }
8332 
8333 
8334 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8335 
8336 	\a path must always be specified (it contains at least the name of the entry
8337 	to be deleted). If only a path is given, this path identifies the entry
8338 	directly. If both \a fd and \a path are given and the path is absolute,
8339 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8340 	identified by \a fd.
8341 
8342 	\param fd The FD. May be < 0.
8343 	\param path The absolute or relative path. Must not be \c NULL.
8344 	\return \c B_OK, if the entry has been removed successfully, another
8345 			error code otherwise.
8346 */
8347 status_t
8348 _kern_unlink(int fd, const char* path)
8349 {
8350 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8351 	if (pathBuffer.InitCheck() != B_OK)
8352 		return B_NO_MEMORY;
8353 
8354 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8355 }
8356 
8357 
8358 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8359 		   by another FD + path pair.
8360 
8361 	\a oldPath and \a newPath must always be specified (they contain at least
8362 	the name of the entry). If only a path is given, this path identifies the
8363 	entry directly. If both a FD and a path are given and the path is absolute,
8364 	the FD is ignored; a relative path is reckoned off of the directory (!)
8365 	identified by the respective FD.
8366 
8367 	\param oldFD The FD of the old location. May be < 0.
8368 	\param oldPath The absolute or relative path of the old location. Must not
8369 		   be \c NULL.
8370 	\param newFD The FD of the new location. May be < 0.
8371 	\param newPath The absolute or relative path of the new location. Must not
8372 		   be \c NULL.
8373 	\return \c B_OK, if the entry has been moved successfully, another
8374 			error code otherwise.
8375 */
8376 status_t
8377 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8378 {
8379 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8380 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8381 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8382 		return B_NO_MEMORY;
8383 
8384 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8385 		newFD, newPathBuffer.LockBuffer(), true);
8386 }
8387 
8388 
8389 status_t
8390 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8391 {
8392 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8393 	if (pathBuffer.InitCheck() != B_OK)
8394 		return B_NO_MEMORY;
8395 
8396 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8397 		true);
8398 }
8399 
8400 
8401 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8402 
8403 	If only \a fd is given, the stat operation associated with the type
8404 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8405 	given, this path identifies the entry for whose node to retrieve the
8406 	stat data. If both \a fd and \a path are given and the path is absolute,
8407 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8408 	identified by \a fd and specifies the entry whose stat data shall be
8409 	retrieved.
8410 
8411 	\param fd The FD. May be < 0.
8412 	\param path The absolute or relative path. Must not be \c NULL.
8413 	\param traverseLeafLink If \a path is given, \c true specifies that the
8414 		   function shall not stick to symlinks, but traverse them.
8415 	\param stat The buffer the stat data shall be written into.
8416 	\param statSize The size of the supplied stat buffer.
8417 	\return \c B_OK, if the the stat data have been read successfully, another
8418 			error code otherwise.
8419 */
8420 status_t
8421 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8422 	struct stat* stat, size_t statSize)
8423 {
8424 	struct stat completeStat;
8425 	struct stat* originalStat = NULL;
8426 	status_t status;
8427 
8428 	if (statSize > sizeof(struct stat))
8429 		return B_BAD_VALUE;
8430 
8431 	// this supports different stat extensions
8432 	if (statSize < sizeof(struct stat)) {
8433 		originalStat = stat;
8434 		stat = &completeStat;
8435 	}
8436 
8437 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8438 
8439 	if (status == B_OK && originalStat != NULL)
8440 		memcpy(originalStat, stat, statSize);
8441 
8442 	return status;
8443 }
8444 
8445 
8446 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8447 
8448 	If only \a fd is given, the stat operation associated with the type
8449 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8450 	given, this path identifies the entry for whose node to write the
8451 	stat data. If both \a fd and \a path are given and the path is absolute,
8452 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8453 	identified by \a fd and specifies the entry whose stat data shall be
8454 	written.
8455 
8456 	\param fd The FD. May be < 0.
8457 	\param path The absolute or relative path. Must not be \c NULL.
8458 	\param traverseLeafLink If \a path is given, \c true specifies that the
8459 		   function shall not stick to symlinks, but traverse them.
8460 	\param stat The buffer containing the stat data to be written.
8461 	\param statSize The size of the supplied stat buffer.
8462 	\param statMask A mask specifying which parts of the stat data shall be
8463 		   written.
8464 	\return \c B_OK, if the the stat data have been written successfully,
8465 			another error code otherwise.
8466 */
8467 status_t
8468 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8469 	const struct stat* stat, size_t statSize, int statMask)
8470 {
8471 	struct stat completeStat;
8472 
8473 	if (statSize > sizeof(struct stat))
8474 		return B_BAD_VALUE;
8475 
8476 	// this supports different stat extensions
8477 	if (statSize < sizeof(struct stat)) {
8478 		memset((uint8*)&completeStat + statSize, 0,
8479 			sizeof(struct stat) - statSize);
8480 		memcpy(&completeStat, stat, statSize);
8481 		stat = &completeStat;
8482 	}
8483 
8484 	status_t status;
8485 
8486 	if (path) {
8487 		// path given: write the stat of the node referred to by (fd, path)
8488 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8489 		if (pathBuffer.InitCheck() != B_OK)
8490 			return B_NO_MEMORY;
8491 
8492 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8493 			traverseLeafLink, stat, statMask, true);
8494 	} else {
8495 		// no path given: get the FD and use the FD operation
8496 		struct file_descriptor* descriptor
8497 			= get_fd(get_current_io_context(true), fd);
8498 		if (descriptor == NULL)
8499 			return B_FILE_ERROR;
8500 
8501 		if (descriptor->ops->fd_write_stat)
8502 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8503 		else
8504 			status = B_UNSUPPORTED;
8505 
8506 		put_fd(descriptor);
8507 	}
8508 
8509 	return status;
8510 }
8511 
8512 
8513 int
8514 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8515 {
8516 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8517 	if (pathBuffer.InitCheck() != B_OK)
8518 		return B_NO_MEMORY;
8519 
8520 	if (path != NULL)
8521 		pathBuffer.SetTo(path);
8522 
8523 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL,
8524 		traverseLeafLink, true);
8525 }
8526 
8527 
8528 int
8529 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8530 	int openMode)
8531 {
8532 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8533 	if (pathBuffer.InitCheck() != B_OK)
8534 		return B_NO_MEMORY;
8535 
8536 	if ((openMode & O_CREAT) != 0) {
8537 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8538 			true);
8539 	}
8540 
8541 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8542 }
8543 
8544 
8545 status_t
8546 _kern_remove_attr(int fd, const char* name)
8547 {
8548 	return attr_remove(fd, name, true);
8549 }
8550 
8551 
8552 status_t
8553 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8554 	const char* toName)
8555 {
8556 	return attr_rename(fromFile, fromName, toFile, toName, true);
8557 }
8558 
8559 
8560 int
8561 _kern_open_index_dir(dev_t device)
8562 {
8563 	return index_dir_open(device, true);
8564 }
8565 
8566 
8567 status_t
8568 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8569 {
8570 	return index_create(device, name, type, flags, true);
8571 }
8572 
8573 
8574 status_t
8575 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8576 {
8577 	return index_name_read_stat(device, name, stat, true);
8578 }
8579 
8580 
8581 status_t
8582 _kern_remove_index(dev_t device, const char* name)
8583 {
8584 	return index_remove(device, name, true);
8585 }
8586 
8587 
8588 status_t
8589 _kern_getcwd(char* buffer, size_t size)
8590 {
8591 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8592 
8593 	// Call vfs to get current working directory
8594 	return get_cwd(buffer, size, true);
8595 }
8596 
8597 
8598 status_t
8599 _kern_setcwd(int fd, const char* path)
8600 {
8601 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8602 	if (pathBuffer.InitCheck() != B_OK)
8603 		return B_NO_MEMORY;
8604 
8605 	if (path != NULL)
8606 		pathBuffer.SetTo(path);
8607 
8608 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8609 }
8610 
8611 
8612 //	#pragma mark - userland syscalls
8613 
8614 
8615 dev_t
8616 _user_mount(const char* userPath, const char* userDevice,
8617 	const char* userFileSystem, uint32 flags, const char* userArgs,
8618 	size_t argsLength)
8619 {
8620 	char fileSystem[B_FILE_NAME_LENGTH];
8621 	KPath path, device;
8622 	char* args = NULL;
8623 	status_t status;
8624 
8625 	if (!IS_USER_ADDRESS(userPath)
8626 		|| !IS_USER_ADDRESS(userFileSystem)
8627 		|| !IS_USER_ADDRESS(userDevice))
8628 		return B_BAD_ADDRESS;
8629 
8630 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8631 		return B_NO_MEMORY;
8632 
8633 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8634 		return B_BAD_ADDRESS;
8635 
8636 	if (userFileSystem != NULL
8637 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8638 		return B_BAD_ADDRESS;
8639 
8640 	if (userDevice != NULL
8641 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8642 			< B_OK)
8643 		return B_BAD_ADDRESS;
8644 
8645 	if (userArgs != NULL && argsLength > 0) {
8646 		// this is a safety restriction
8647 		if (argsLength >= 65536)
8648 			return B_NAME_TOO_LONG;
8649 
8650 		args = (char*)malloc(argsLength + 1);
8651 		if (args == NULL)
8652 			return B_NO_MEMORY;
8653 
8654 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8655 			free(args);
8656 			return B_BAD_ADDRESS;
8657 		}
8658 	}
8659 	path.UnlockBuffer();
8660 	device.UnlockBuffer();
8661 
8662 	status = fs_mount(path.LockBuffer(),
8663 		userDevice != NULL ? device.Path() : NULL,
8664 		userFileSystem ? fileSystem : NULL, flags, args, false);
8665 
8666 	free(args);
8667 	return status;
8668 }
8669 
8670 
8671 status_t
8672 _user_unmount(const char* userPath, uint32 flags)
8673 {
8674 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8675 	if (pathBuffer.InitCheck() != B_OK)
8676 		return B_NO_MEMORY;
8677 
8678 	char* path = pathBuffer.LockBuffer();
8679 
8680 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8681 		return B_BAD_ADDRESS;
8682 
8683 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8684 }
8685 
8686 
8687 status_t
8688 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8689 {
8690 	struct fs_info info;
8691 	status_t status;
8692 
8693 	if (userInfo == NULL)
8694 		return B_BAD_VALUE;
8695 
8696 	if (!IS_USER_ADDRESS(userInfo))
8697 		return B_BAD_ADDRESS;
8698 
8699 	status = fs_read_info(device, &info);
8700 	if (status != B_OK)
8701 		return status;
8702 
8703 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8704 		return B_BAD_ADDRESS;
8705 
8706 	return B_OK;
8707 }
8708 
8709 
8710 status_t
8711 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8712 {
8713 	struct fs_info info;
8714 
8715 	if (userInfo == NULL)
8716 		return B_BAD_VALUE;
8717 
8718 	if (!IS_USER_ADDRESS(userInfo)
8719 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8720 		return B_BAD_ADDRESS;
8721 
8722 	return fs_write_info(device, &info, mask);
8723 }
8724 
8725 
8726 dev_t
8727 _user_next_device(int32* _userCookie)
8728 {
8729 	int32 cookie;
8730 	dev_t device;
8731 
8732 	if (!IS_USER_ADDRESS(_userCookie)
8733 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8734 		return B_BAD_ADDRESS;
8735 
8736 	device = fs_next_device(&cookie);
8737 
8738 	if (device >= B_OK) {
8739 		// update user cookie
8740 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8741 			return B_BAD_ADDRESS;
8742 	}
8743 
8744 	return device;
8745 }
8746 
8747 
8748 status_t
8749 _user_sync(void)
8750 {
8751 	return _kern_sync();
8752 }
8753 
8754 
8755 status_t
8756 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8757 	size_t infoSize)
8758 {
8759 	struct fd_info info;
8760 	uint32 cookie;
8761 
8762 	// only root can do this (or should root's group be enough?)
8763 	if (geteuid() != 0)
8764 		return B_NOT_ALLOWED;
8765 
8766 	if (infoSize != sizeof(fd_info))
8767 		return B_BAD_VALUE;
8768 
8769 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8770 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8771 		return B_BAD_ADDRESS;
8772 
8773 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8774 	if (status != B_OK)
8775 		return status;
8776 
8777 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8778 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8779 		return B_BAD_ADDRESS;
8780 
8781 	return status;
8782 }
8783 
8784 
8785 status_t
8786 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8787 	char* userPath, size_t pathLength)
8788 {
8789 	if (!IS_USER_ADDRESS(userPath))
8790 		return B_BAD_ADDRESS;
8791 
8792 	KPath path(B_PATH_NAME_LENGTH + 1);
8793 	if (path.InitCheck() != B_OK)
8794 		return B_NO_MEMORY;
8795 
8796 	// copy the leaf name onto the stack
8797 	char stackLeaf[B_FILE_NAME_LENGTH];
8798 	if (leaf) {
8799 		if (!IS_USER_ADDRESS(leaf))
8800 			return B_BAD_ADDRESS;
8801 
8802 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8803 		if (length < 0)
8804 			return length;
8805 		if (length >= B_FILE_NAME_LENGTH)
8806 			return B_NAME_TOO_LONG;
8807 
8808 		leaf = stackLeaf;
8809 	}
8810 
8811 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8812 		false, path.LockBuffer(), path.BufferSize());
8813 	if (status != B_OK)
8814 		return status;
8815 
8816 	path.UnlockBuffer();
8817 
8818 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8819 	if (length < 0)
8820 		return length;
8821 	if (length >= (int)pathLength)
8822 		return B_BUFFER_OVERFLOW;
8823 
8824 	return B_OK;
8825 }
8826 
8827 
8828 status_t
8829 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8830 {
8831 	if (userPath == NULL || buffer == NULL)
8832 		return B_BAD_VALUE;
8833 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8834 		return B_BAD_ADDRESS;
8835 
8836 	// copy path from userland
8837 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8838 	if (pathBuffer.InitCheck() != B_OK)
8839 		return B_NO_MEMORY;
8840 	char* path = pathBuffer.LockBuffer();
8841 
8842 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8843 		return B_BAD_ADDRESS;
8844 
8845 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8846 		false);
8847 	if (error != B_OK)
8848 		return error;
8849 
8850 	// copy back to userland
8851 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8852 	if (len < 0)
8853 		return len;
8854 	if (len >= B_PATH_NAME_LENGTH)
8855 		return B_BUFFER_OVERFLOW;
8856 
8857 	return B_OK;
8858 }
8859 
8860 
8861 int
8862 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8863 	int openMode, int perms)
8864 {
8865 	char name[B_FILE_NAME_LENGTH];
8866 
8867 	if (userName == NULL || device < 0 || inode < 0)
8868 		return B_BAD_VALUE;
8869 	if (!IS_USER_ADDRESS(userName)
8870 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8871 		return B_BAD_ADDRESS;
8872 
8873 	if ((openMode & O_CREAT) != 0) {
8874 		return file_create_entry_ref(device, inode, name, openMode, perms,
8875 		 false);
8876 	}
8877 
8878 	return file_open_entry_ref(device, inode, name, openMode, false);
8879 }
8880 
8881 
8882 int
8883 _user_open(int fd, const char* userPath, int openMode, int perms)
8884 {
8885 	KPath path(B_PATH_NAME_LENGTH + 1);
8886 	if (path.InitCheck() != B_OK)
8887 		return B_NO_MEMORY;
8888 
8889 	char* buffer = path.LockBuffer();
8890 
8891 	if (!IS_USER_ADDRESS(userPath)
8892 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8893 		return B_BAD_ADDRESS;
8894 
8895 	if ((openMode & O_CREAT) != 0)
8896 		return file_create(fd, buffer, openMode, perms, false);
8897 
8898 	return file_open(fd, buffer, openMode, false);
8899 }
8900 
8901 
8902 int
8903 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8904 {
8905 	if (userName != NULL) {
8906 		char name[B_FILE_NAME_LENGTH];
8907 
8908 		if (!IS_USER_ADDRESS(userName)
8909 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8910 			return B_BAD_ADDRESS;
8911 
8912 		return dir_open_entry_ref(device, inode, name, false);
8913 	}
8914 	return dir_open_entry_ref(device, inode, NULL, false);
8915 }
8916 
8917 
8918 int
8919 _user_open_dir(int fd, const char* userPath)
8920 {
8921 	if (userPath == NULL)
8922 		return dir_open(fd, NULL, false);
8923 
8924 	KPath path(B_PATH_NAME_LENGTH + 1);
8925 	if (path.InitCheck() != B_OK)
8926 		return B_NO_MEMORY;
8927 
8928 	char* buffer = path.LockBuffer();
8929 
8930 	if (!IS_USER_ADDRESS(userPath)
8931 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8932 		return B_BAD_ADDRESS;
8933 
8934 	return dir_open(fd, buffer, false);
8935 }
8936 
8937 
8938 /*!	\brief Opens a directory's parent directory and returns the entry name
8939 		   of the former.
8940 
8941 	Aside from that it returns the directory's entry name, this method is
8942 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8943 	equivalent, if \a userName is \c NULL.
8944 
8945 	If a name buffer is supplied and the name does not fit the buffer, the
8946 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8947 
8948 	\param fd A FD referring to a directory.
8949 	\param userName Buffer the directory's entry name shall be written into.
8950 		   May be \c NULL.
8951 	\param nameLength Size of the name buffer.
8952 	\return The file descriptor of the opened parent directory, if everything
8953 			went fine, an error code otherwise.
8954 */
8955 int
8956 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8957 {
8958 	bool kernel = false;
8959 
8960 	if (userName && !IS_USER_ADDRESS(userName))
8961 		return B_BAD_ADDRESS;
8962 
8963 	// open the parent dir
8964 	int parentFD = dir_open(fd, (char*)"..", kernel);
8965 	if (parentFD < 0)
8966 		return parentFD;
8967 	FDCloser fdCloser(parentFD, kernel);
8968 
8969 	if (userName) {
8970 		// get the vnodes
8971 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8972 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8973 		VNodePutter parentVNodePutter(parentVNode);
8974 		VNodePutter dirVNodePutter(dirVNode);
8975 		if (!parentVNode || !dirVNode)
8976 			return B_FILE_ERROR;
8977 
8978 		// get the vnode name
8979 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8980 		struct dirent* buffer = (struct dirent*)_buffer;
8981 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8982 			sizeof(_buffer), get_current_io_context(false));
8983 		if (status != B_OK)
8984 			return status;
8985 
8986 		// copy the name to the userland buffer
8987 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8988 		if (len < 0)
8989 			return len;
8990 		if (len >= (int)nameLength)
8991 			return B_BUFFER_OVERFLOW;
8992 	}
8993 
8994 	return fdCloser.Detach();
8995 }
8996 
8997 
8998 status_t
8999 _user_fcntl(int fd, int op, size_t argument)
9000 {
9001 	status_t status = common_fcntl(fd, op, argument, false);
9002 	if (op == F_SETLKW)
9003 		syscall_restart_handle_post(status);
9004 
9005 	return status;
9006 }
9007 
9008 
9009 status_t
9010 _user_fsync(int fd)
9011 {
9012 	return common_sync(fd, false);
9013 }
9014 
9015 
9016 status_t
9017 _user_flock(int fd, int operation)
9018 {
9019 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9020 
9021 	// Check if the operation is valid
9022 	switch (operation & ~LOCK_NB) {
9023 		case LOCK_UN:
9024 		case LOCK_SH:
9025 		case LOCK_EX:
9026 			break;
9027 
9028 		default:
9029 			return B_BAD_VALUE;
9030 	}
9031 
9032 	struct file_descriptor* descriptor;
9033 	struct vnode* vnode;
9034 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9035 	if (descriptor == NULL)
9036 		return B_FILE_ERROR;
9037 
9038 	if (descriptor->type != FDTYPE_FILE) {
9039 		put_fd(descriptor);
9040 		return B_BAD_VALUE;
9041 	}
9042 
9043 	struct flock flock;
9044 	flock.l_start = 0;
9045 	flock.l_len = OFF_MAX;
9046 	flock.l_whence = 0;
9047 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9048 
9049 	status_t status;
9050 	if ((operation & LOCK_UN) != 0)
9051 		status = release_advisory_lock(vnode, &flock);
9052 	else {
9053 		status = acquire_advisory_lock(vnode,
9054 			thread_get_current_thread()->team->session_id, &flock,
9055 			(operation & LOCK_NB) == 0);
9056 	}
9057 
9058 	syscall_restart_handle_post(status);
9059 
9060 	put_fd(descriptor);
9061 	return status;
9062 }
9063 
9064 
9065 status_t
9066 _user_lock_node(int fd)
9067 {
9068 	return common_lock_node(fd, false);
9069 }
9070 
9071 
9072 status_t
9073 _user_unlock_node(int fd)
9074 {
9075 	return common_unlock_node(fd, false);
9076 }
9077 
9078 
9079 status_t
9080 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9081 	int perms)
9082 {
9083 	char name[B_FILE_NAME_LENGTH];
9084 	status_t status;
9085 
9086 	if (!IS_USER_ADDRESS(userName))
9087 		return B_BAD_ADDRESS;
9088 
9089 	status = user_strlcpy(name, userName, sizeof(name));
9090 	if (status < 0)
9091 		return status;
9092 
9093 	return dir_create_entry_ref(device, inode, name, perms, false);
9094 }
9095 
9096 
9097 status_t
9098 _user_create_dir(int fd, const char* userPath, int perms)
9099 {
9100 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9101 	if (pathBuffer.InitCheck() != B_OK)
9102 		return B_NO_MEMORY;
9103 
9104 	char* path = pathBuffer.LockBuffer();
9105 
9106 	if (!IS_USER_ADDRESS(userPath)
9107 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9108 		return B_BAD_ADDRESS;
9109 
9110 	return dir_create(fd, path, perms, false);
9111 }
9112 
9113 
9114 status_t
9115 _user_remove_dir(int fd, const char* userPath)
9116 {
9117 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9118 	if (pathBuffer.InitCheck() != B_OK)
9119 		return B_NO_MEMORY;
9120 
9121 	char* path = pathBuffer.LockBuffer();
9122 
9123 	if (userPath != NULL) {
9124 		if (!IS_USER_ADDRESS(userPath)
9125 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9126 			return B_BAD_ADDRESS;
9127 	}
9128 
9129 	return dir_remove(fd, userPath ? path : NULL, false);
9130 }
9131 
9132 
9133 status_t
9134 _user_read_link(int fd, const char* userPath, char* userBuffer,
9135 	size_t* userBufferSize)
9136 {
9137 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
9138 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9139 		return B_NO_MEMORY;
9140 
9141 	size_t bufferSize;
9142 
9143 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9144 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9145 		return B_BAD_ADDRESS;
9146 
9147 	char* path = pathBuffer.LockBuffer();
9148 	char* buffer = linkBuffer.LockBuffer();
9149 
9150 	if (userPath) {
9151 		if (!IS_USER_ADDRESS(userPath)
9152 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9153 			return B_BAD_ADDRESS;
9154 
9155 		if (bufferSize > B_PATH_NAME_LENGTH)
9156 			bufferSize = B_PATH_NAME_LENGTH;
9157 	}
9158 
9159 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9160 		&bufferSize, false);
9161 
9162 	// we also update the bufferSize in case of errors
9163 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9164 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
9165 		return B_BAD_ADDRESS;
9166 
9167 	if (status != B_OK)
9168 		return status;
9169 
9170 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9171 		return B_BAD_ADDRESS;
9172 
9173 	return B_OK;
9174 }
9175 
9176 
9177 status_t
9178 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9179 	int mode)
9180 {
9181 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9182 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9183 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9184 		return B_NO_MEMORY;
9185 
9186 	char* path = pathBuffer.LockBuffer();
9187 	char* toPath = toPathBuffer.LockBuffer();
9188 
9189 	if (!IS_USER_ADDRESS(userPath)
9190 		|| !IS_USER_ADDRESS(userToPath)
9191 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9192 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9193 		return B_BAD_ADDRESS;
9194 
9195 	return common_create_symlink(fd, path, toPath, mode, false);
9196 }
9197 
9198 
9199 status_t
9200 _user_create_link(int pathFD, const char* userPath, int toFD,
9201 	const char* userToPath, bool traverseLeafLink)
9202 {
9203 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9204 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
9205 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9206 		return B_NO_MEMORY;
9207 
9208 	char* path = pathBuffer.LockBuffer();
9209 	char* toPath = toPathBuffer.LockBuffer();
9210 
9211 	if (!IS_USER_ADDRESS(userPath)
9212 		|| !IS_USER_ADDRESS(userToPath)
9213 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
9214 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
9215 		return B_BAD_ADDRESS;
9216 
9217 	status_t status = check_path(toPath);
9218 	if (status != B_OK)
9219 		return status;
9220 
9221 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9222 		false);
9223 }
9224 
9225 
9226 status_t
9227 _user_unlink(int fd, const char* userPath)
9228 {
9229 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9230 	if (pathBuffer.InitCheck() != B_OK)
9231 		return B_NO_MEMORY;
9232 
9233 	char* path = pathBuffer.LockBuffer();
9234 
9235 	if (!IS_USER_ADDRESS(userPath)
9236 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9237 		return B_BAD_ADDRESS;
9238 
9239 	return common_unlink(fd, path, false);
9240 }
9241 
9242 
9243 status_t
9244 _user_rename(int oldFD, const char* userOldPath, int newFD,
9245 	const char* userNewPath)
9246 {
9247 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
9248 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
9249 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9250 		return B_NO_MEMORY;
9251 
9252 	char* oldPath = oldPathBuffer.LockBuffer();
9253 	char* newPath = newPathBuffer.LockBuffer();
9254 
9255 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
9256 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
9257 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
9258 		return B_BAD_ADDRESS;
9259 
9260 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9261 }
9262 
9263 
9264 status_t
9265 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9266 {
9267 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9268 	if (pathBuffer.InitCheck() != B_OK)
9269 		return B_NO_MEMORY;
9270 
9271 	char* path = pathBuffer.LockBuffer();
9272 
9273 	if (!IS_USER_ADDRESS(userPath)
9274 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9275 		return B_BAD_ADDRESS;
9276 	}
9277 
9278 	// split into directory vnode and filename path
9279 	char filename[B_FILE_NAME_LENGTH];
9280 	struct vnode* dir;
9281 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9282 	if (status != B_OK)
9283 		return status;
9284 
9285 	VNodePutter _(dir);
9286 
9287 	// the underlying FS needs to support creating FIFOs
9288 	if (!HAS_FS_CALL(dir, create_special_node))
9289 		return B_UNSUPPORTED;
9290 
9291 	// create the entry	-- the FIFO sub node is set up automatically
9292 	fs_vnode superVnode;
9293 	ino_t nodeID;
9294 	status = FS_CALL(dir, create_special_node, filename, NULL,
9295 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9296 
9297 	// create_special_node() acquired a reference for us that we don't need.
9298 	if (status == B_OK)
9299 		put_vnode(dir->mount->volume, nodeID);
9300 
9301 	return status;
9302 }
9303 
9304 
9305 status_t
9306 _user_create_pipe(int* userFDs)
9307 {
9308 	// rootfs should support creating FIFOs, but let's be sure
9309 	if (!HAS_FS_CALL(sRoot, create_special_node))
9310 		return B_UNSUPPORTED;
9311 
9312 	// create the node	-- the FIFO sub node is set up automatically
9313 	fs_vnode superVnode;
9314 	ino_t nodeID;
9315 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9316 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9317 	if (status != B_OK)
9318 		return status;
9319 
9320 	// We've got one reference to the node and need another one.
9321 	struct vnode* vnode;
9322 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9323 	if (status != B_OK) {
9324 		// that should not happen
9325 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9326 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9327 		return status;
9328 	}
9329 
9330 	// Everything looks good so far. Open two FDs for reading respectively
9331 	// writing.
9332 	int fds[2];
9333 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9334 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9335 
9336 	FDCloser closer0(fds[0], false);
9337 	FDCloser closer1(fds[1], false);
9338 
9339 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9340 
9341 	// copy FDs to userland
9342 	if (status == B_OK) {
9343 		if (!IS_USER_ADDRESS(userFDs)
9344 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9345 			status = B_BAD_ADDRESS;
9346 		}
9347 	}
9348 
9349 	// keep FDs, if everything went fine
9350 	if (status == B_OK) {
9351 		closer0.Detach();
9352 		closer1.Detach();
9353 	}
9354 
9355 	return status;
9356 }
9357 
9358 
9359 status_t
9360 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9361 {
9362 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9363 	if (pathBuffer.InitCheck() != B_OK)
9364 		return B_NO_MEMORY;
9365 
9366 	char* path = pathBuffer.LockBuffer();
9367 
9368 	if (!IS_USER_ADDRESS(userPath)
9369 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9370 		return B_BAD_ADDRESS;
9371 
9372 	return common_access(fd, path, mode, effectiveUserGroup, false);
9373 }
9374 
9375 
9376 status_t
9377 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9378 	struct stat* userStat, size_t statSize)
9379 {
9380 	struct stat stat;
9381 	status_t status;
9382 
9383 	if (statSize > sizeof(struct stat))
9384 		return B_BAD_VALUE;
9385 
9386 	if (!IS_USER_ADDRESS(userStat))
9387 		return B_BAD_ADDRESS;
9388 
9389 	if (userPath) {
9390 		// path given: get the stat of the node referred to by (fd, path)
9391 		if (!IS_USER_ADDRESS(userPath))
9392 			return B_BAD_ADDRESS;
9393 
9394 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9395 		if (pathBuffer.InitCheck() != B_OK)
9396 			return B_NO_MEMORY;
9397 
9398 		char* path = pathBuffer.LockBuffer();
9399 
9400 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9401 		if (length < B_OK)
9402 			return length;
9403 		if (length >= B_PATH_NAME_LENGTH)
9404 			return B_NAME_TOO_LONG;
9405 
9406 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9407 	} else {
9408 		// no path given: get the FD and use the FD operation
9409 		struct file_descriptor* descriptor
9410 			= get_fd(get_current_io_context(false), fd);
9411 		if (descriptor == NULL)
9412 			return B_FILE_ERROR;
9413 
9414 		if (descriptor->ops->fd_read_stat)
9415 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9416 		else
9417 			status = B_UNSUPPORTED;
9418 
9419 		put_fd(descriptor);
9420 	}
9421 
9422 	if (status != B_OK)
9423 		return status;
9424 
9425 	return user_memcpy(userStat, &stat, statSize);
9426 }
9427 
9428 
9429 status_t
9430 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9431 	const struct stat* userStat, size_t statSize, int statMask)
9432 {
9433 	if (statSize > sizeof(struct stat))
9434 		return B_BAD_VALUE;
9435 
9436 	struct stat stat;
9437 
9438 	if (!IS_USER_ADDRESS(userStat)
9439 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9440 		return B_BAD_ADDRESS;
9441 
9442 	// clear additional stat fields
9443 	if (statSize < sizeof(struct stat))
9444 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9445 
9446 	status_t status;
9447 
9448 	if (userPath) {
9449 		// path given: write the stat of the node referred to by (fd, path)
9450 		if (!IS_USER_ADDRESS(userPath))
9451 			return B_BAD_ADDRESS;
9452 
9453 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9454 		if (pathBuffer.InitCheck() != B_OK)
9455 			return B_NO_MEMORY;
9456 
9457 		char* path = pathBuffer.LockBuffer();
9458 
9459 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9460 		if (length < B_OK)
9461 			return length;
9462 		if (length >= B_PATH_NAME_LENGTH)
9463 			return B_NAME_TOO_LONG;
9464 
9465 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9466 			statMask, false);
9467 	} else {
9468 		// no path given: get the FD and use the FD operation
9469 		struct file_descriptor* descriptor
9470 			= get_fd(get_current_io_context(false), fd);
9471 		if (descriptor == NULL)
9472 			return B_FILE_ERROR;
9473 
9474 		if (descriptor->ops->fd_write_stat) {
9475 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9476 				statMask);
9477 		} else
9478 			status = B_UNSUPPORTED;
9479 
9480 		put_fd(descriptor);
9481 	}
9482 
9483 	return status;
9484 }
9485 
9486 
9487 int
9488 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9489 {
9490 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9491 	if (pathBuffer.InitCheck() != B_OK)
9492 		return B_NO_MEMORY;
9493 
9494 	char* path = pathBuffer.LockBuffer();
9495 
9496 	if (userPath != NULL) {
9497 		if (!IS_USER_ADDRESS(userPath)
9498 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9499 			return B_BAD_ADDRESS;
9500 	}
9501 
9502 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9503 }
9504 
9505 
9506 ssize_t
9507 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9508 	size_t readBytes)
9509 {
9510 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9511 	if (attr < 0)
9512 		return attr;
9513 
9514 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9515 	_user_close(attr);
9516 
9517 	return bytes;
9518 }
9519 
9520 
9521 ssize_t
9522 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9523 	const void* buffer, size_t writeBytes)
9524 {
9525 	// Try to support the BeOS typical truncation as well as the position
9526 	// argument
9527 	int attr = attr_create(fd, NULL, attribute, type,
9528 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9529 	if (attr < 0)
9530 		return attr;
9531 
9532 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9533 	_user_close(attr);
9534 
9535 	return bytes;
9536 }
9537 
9538 
9539 status_t
9540 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9541 {
9542 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9543 	if (attr < 0)
9544 		return attr;
9545 
9546 	struct file_descriptor* descriptor
9547 		= get_fd(get_current_io_context(false), attr);
9548 	if (descriptor == NULL) {
9549 		_user_close(attr);
9550 		return B_FILE_ERROR;
9551 	}
9552 
9553 	struct stat stat;
9554 	status_t status;
9555 	if (descriptor->ops->fd_read_stat)
9556 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9557 	else
9558 		status = B_UNSUPPORTED;
9559 
9560 	put_fd(descriptor);
9561 	_user_close(attr);
9562 
9563 	if (status == B_OK) {
9564 		attr_info info;
9565 		info.type = stat.st_type;
9566 		info.size = stat.st_size;
9567 
9568 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9569 			return B_BAD_ADDRESS;
9570 	}
9571 
9572 	return status;
9573 }
9574 
9575 
9576 int
9577 _user_open_attr(int fd, const char* userPath, const char* userName,
9578 	uint32 type, int openMode)
9579 {
9580 	char name[B_FILE_NAME_LENGTH];
9581 
9582 	if (!IS_USER_ADDRESS(userName)
9583 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9584 		return B_BAD_ADDRESS;
9585 
9586 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9587 	if (pathBuffer.InitCheck() != B_OK)
9588 		return B_NO_MEMORY;
9589 
9590 	char* path = pathBuffer.LockBuffer();
9591 
9592 	if (userPath != NULL) {
9593 		if (!IS_USER_ADDRESS(userPath)
9594 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9595 			return B_BAD_ADDRESS;
9596 	}
9597 
9598 	if ((openMode & O_CREAT) != 0) {
9599 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9600 			false);
9601 	}
9602 
9603 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9604 }
9605 
9606 
9607 status_t
9608 _user_remove_attr(int fd, const char* userName)
9609 {
9610 	char name[B_FILE_NAME_LENGTH];
9611 
9612 	if (!IS_USER_ADDRESS(userName)
9613 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9614 		return B_BAD_ADDRESS;
9615 
9616 	return attr_remove(fd, name, false);
9617 }
9618 
9619 
9620 status_t
9621 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9622 	const char* userToName)
9623 {
9624 	if (!IS_USER_ADDRESS(userFromName)
9625 		|| !IS_USER_ADDRESS(userToName))
9626 		return B_BAD_ADDRESS;
9627 
9628 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9629 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9630 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9631 		return B_NO_MEMORY;
9632 
9633 	char* fromName = fromNameBuffer.LockBuffer();
9634 	char* toName = toNameBuffer.LockBuffer();
9635 
9636 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9637 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9638 		return B_BAD_ADDRESS;
9639 
9640 	return attr_rename(fromFile, fromName, toFile, toName, false);
9641 }
9642 
9643 
9644 int
9645 _user_open_index_dir(dev_t device)
9646 {
9647 	return index_dir_open(device, false);
9648 }
9649 
9650 
9651 status_t
9652 _user_create_index(dev_t device, const char* userName, uint32 type,
9653 	uint32 flags)
9654 {
9655 	char name[B_FILE_NAME_LENGTH];
9656 
9657 	if (!IS_USER_ADDRESS(userName)
9658 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9659 		return B_BAD_ADDRESS;
9660 
9661 	return index_create(device, name, type, flags, false);
9662 }
9663 
9664 
9665 status_t
9666 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9667 {
9668 	char name[B_FILE_NAME_LENGTH];
9669 	struct stat stat;
9670 	status_t status;
9671 
9672 	if (!IS_USER_ADDRESS(userName)
9673 		|| !IS_USER_ADDRESS(userStat)
9674 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9675 		return B_BAD_ADDRESS;
9676 
9677 	status = index_name_read_stat(device, name, &stat, false);
9678 	if (status == B_OK) {
9679 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9680 			return B_BAD_ADDRESS;
9681 	}
9682 
9683 	return status;
9684 }
9685 
9686 
9687 status_t
9688 _user_remove_index(dev_t device, const char* userName)
9689 {
9690 	char name[B_FILE_NAME_LENGTH];
9691 
9692 	if (!IS_USER_ADDRESS(userName)
9693 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9694 		return B_BAD_ADDRESS;
9695 
9696 	return index_remove(device, name, false);
9697 }
9698 
9699 
9700 status_t
9701 _user_getcwd(char* userBuffer, size_t size)
9702 {
9703 	if (size == 0)
9704 		return B_BAD_VALUE;
9705 	if (!IS_USER_ADDRESS(userBuffer))
9706 		return B_BAD_ADDRESS;
9707 
9708 	if (size > kMaxPathLength)
9709 		size = kMaxPathLength;
9710 
9711 	KPath pathBuffer(size);
9712 	if (pathBuffer.InitCheck() != B_OK)
9713 		return B_NO_MEMORY;
9714 
9715 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9716 
9717 	char* path = pathBuffer.LockBuffer();
9718 
9719 	status_t status = get_cwd(path, size, false);
9720 	if (status != B_OK)
9721 		return status;
9722 
9723 	// Copy back the result
9724 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9725 		return B_BAD_ADDRESS;
9726 
9727 	return status;
9728 }
9729 
9730 
9731 status_t
9732 _user_setcwd(int fd, const char* userPath)
9733 {
9734 	TRACE(("user_setcwd: path = %p\n", userPath));
9735 
9736 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9737 	if (pathBuffer.InitCheck() != B_OK)
9738 		return B_NO_MEMORY;
9739 
9740 	char* path = pathBuffer.LockBuffer();
9741 
9742 	if (userPath != NULL) {
9743 		if (!IS_USER_ADDRESS(userPath)
9744 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9745 			return B_BAD_ADDRESS;
9746 	}
9747 
9748 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9749 }
9750 
9751 
9752 status_t
9753 _user_change_root(const char* userPath)
9754 {
9755 	// only root is allowed to chroot()
9756 	if (geteuid() != 0)
9757 		return B_NOT_ALLOWED;
9758 
9759 	// alloc path buffer
9760 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9761 	if (pathBuffer.InitCheck() != B_OK)
9762 		return B_NO_MEMORY;
9763 
9764 	// copy userland path to kernel
9765 	char* path = pathBuffer.LockBuffer();
9766 	if (userPath != NULL) {
9767 		if (!IS_USER_ADDRESS(userPath)
9768 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9769 			return B_BAD_ADDRESS;
9770 	}
9771 
9772 	// get the vnode
9773 	struct vnode* vnode;
9774 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9775 	if (status != B_OK)
9776 		return status;
9777 
9778 	// set the new root
9779 	struct io_context* context = get_current_io_context(false);
9780 	mutex_lock(&sIOContextRootLock);
9781 	struct vnode* oldRoot = context->root;
9782 	context->root = vnode;
9783 	mutex_unlock(&sIOContextRootLock);
9784 
9785 	put_vnode(oldRoot);
9786 
9787 	return B_OK;
9788 }
9789 
9790 
9791 int
9792 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9793 	uint32 flags, port_id port, int32 token)
9794 {
9795 	char* query;
9796 
9797 	if (device < 0 || userQuery == NULL || queryLength == 0)
9798 		return B_BAD_VALUE;
9799 
9800 	// this is a safety restriction
9801 	if (queryLength >= 65536)
9802 		return B_NAME_TOO_LONG;
9803 
9804 	query = (char*)malloc(queryLength + 1);
9805 	if (query == NULL)
9806 		return B_NO_MEMORY;
9807 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9808 		free(query);
9809 		return B_BAD_ADDRESS;
9810 	}
9811 
9812 	int fd = query_open(device, query, flags, port, token, false);
9813 
9814 	free(query);
9815 	return fd;
9816 }
9817 
9818 
9819 #include "vfs_request_io.cpp"
9820