xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 91054f1d38dd7827c0f0ba9490c213775ec7b471)
1 /*
2  * Copyright 2005-2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 /*! Virtual File System and File System Interface Layer */
11 
12 
13 #include <ctype.h>
14 #include <fcntl.h>
15 #include <limits.h>
16 #include <stddef.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/file.h>
20 #include <sys/resource.h>
21 #include <sys/stat.h>
22 #include <unistd.h>
23 
24 #include <fs_info.h>
25 #include <fs_interface.h>
26 #include <fs_volume.h>
27 #include <OS.h>
28 #include <StorageDefs.h>
29 
30 #include <AutoDeleter.h>
31 #include <block_cache.h>
32 #include <boot/kernel_args.h>
33 #include <disk_device_manager/KDiskDevice.h>
34 #include <disk_device_manager/KDiskDeviceManager.h>
35 #include <disk_device_manager/KDiskDeviceUtils.h>
36 #include <disk_device_manager/KDiskSystem.h>
37 #include <fd.h>
38 #include <file_cache.h>
39 #include <fs/node_monitor.h>
40 #include <khash.h>
41 #include <KPath.h>
42 #include <lock.h>
43 #include <low_resource_manager.h>
44 #include <syscalls.h>
45 #include <syscall_restart.h>
46 #include <tracing.h>
47 #include <util/atomic.h>
48 #include <util/AutoLock.h>
49 #include <util/DoublyLinkedList.h>
50 #include <util/OpenHashTable.h>
51 #include <vfs.h>
52 #include <vm.h>
53 #include <vm_cache.h>
54 
55 #include "fifo.h"
56 #include "IORequest.h"
57 
58 
59 //#define TRACE_VFS
60 #ifdef TRACE_VFS
61 #	define TRACE(x) dprintf x
62 #	define FUNCTION(x) dprintf x
63 #else
64 #	define TRACE(x) ;
65 #	define FUNCTION(x) ;
66 #endif
67 
68 #define ADD_DEBUGGER_COMMANDS
69 
70 
71 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
72 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
73 
74 #if KDEBUG
75 #	define FS_CALL(vnode, op, params...) \
76 		( HAS_FS_CALL(vnode, op) ? \
77 			vnode->ops->op(vnode->mount->volume, vnode, params) \
78 			: (panic("FS_CALL op " #op " is NULL"), 0))
79 #	define FS_CALL_NO_PARAMS(vnode, op) \
80 		( HAS_FS_CALL(vnode, op) ? \
81 			vnode->ops->op(vnode->mount->volume, vnode) \
82 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
83 #	define FS_MOUNT_CALL(mount, op, params...) \
84 		( HAS_FS_MOUNT_CALL(mount, op) ? \
85 			mount->volume->ops->op(mount->volume, params) \
86 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
87 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
88 		( HAS_FS_MOUNT_CALL(mount, op) ? \
89 			mount->volume->ops->op(mount->volume) \
90 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
91 #else
92 #	define FS_CALL(vnode, op, params...) \
93 			vnode->ops->op(vnode->mount->volume, vnode, params)
94 #	define FS_CALL_NO_PARAMS(vnode, op) \
95 			vnode->ops->op(vnode->mount->volume, vnode)
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 			mount->volume->ops->op(mount->volume, params)
98 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
99 			mount->volume->ops->op(mount->volume)
100 #endif
101 
102 
103 const static uint32 kMaxUnusedVnodes = 8192;
104 	// This is the maximum number of unused vnodes that the system
105 	// will keep around (weak limit, if there is enough memory left,
106 	// they won't get flushed even when hitting that limit).
107 	// It may be chosen with respect to the available memory or enhanced
108 	// by some timestamp/frequency heurism.
109 
110 const static uint32 kMaxEntryCacheEntryCount = 8192;
111 	// Maximum number of entries per entry cache. It's a hard limit ATM.
112 
113 struct EntryCacheKey {
114 	EntryCacheKey(ino_t dirID, const char* name)
115 		:
116 		dir_id(dirID),
117 		name(name)
118 	{
119 	}
120 
121 	ino_t		dir_id;
122 	const char*	name;
123 };
124 
125 
126 struct EntryCacheEntry : HashTableLink<EntryCacheEntry>,
127 		DoublyLinkedListLinkImpl<EntryCacheEntry> {
128 	ino_t	node_id;
129 	ino_t	dir_id;
130 	char	name[1];
131 };
132 
133 
134 struct EntryCacheHashDefinition {
135 	typedef EntryCacheKey	KeyType;
136 	typedef EntryCacheEntry	ValueType;
137 
138 	uint32 HashKey(const EntryCacheKey& key) const
139 	{
140 		return (uint32)key.dir_id ^ (uint32)(key.dir_id >> 32)
141 			^ hash_hash_string(key.name);
142 	}
143 
144 	size_t Hash(const EntryCacheEntry* value) const
145 	{
146 		return (uint32)value->dir_id ^ (uint32)(value->dir_id >> 32)
147 			^ hash_hash_string(value->name);
148 	}
149 
150 	bool Compare(const EntryCacheKey& key, const EntryCacheEntry* value) const
151 	{
152 		return value->dir_id == key.dir_id
153 			&& strcmp(value->name, key.name) == 0;
154 	}
155 
156 	HashTableLink<EntryCacheEntry>* GetLink(EntryCacheEntry* value) const
157 	{
158 		return value;
159 	}
160 };
161 
162 
163 class EntryCache {
164 public:
165 	EntryCache()
166 	{
167 		mutex_init(&fLock, "entry cache");
168 
169 		new(&fEntries) EntryTable;
170 		new(&fUsedEntries) EntryList;
171 		fEntryCount = 0;
172 	}
173 
174 	~EntryCache()
175 	{
176 		while (EntryCacheEntry* entry = fUsedEntries.Head())
177 			_Remove(entry);
178 
179 		mutex_destroy(&fLock);
180 	}
181 
182 	status_t Init()
183 	{
184 		return fEntries.Init();
185 	}
186 
187 	status_t Add(ino_t dirID, const char* name, ino_t nodeID)
188 	{
189 		MutexLocker _(fLock);
190 
191 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
192 		if (entry != NULL) {
193 			entry->node_id = nodeID;
194 			return B_OK;
195 		}
196 
197 		if (fEntryCount >= kMaxEntryCacheEntryCount)
198 			_Remove(fUsedEntries.Head());
199 
200 		entry = (EntryCacheEntry*)malloc(sizeof(EntryCacheEntry)
201 			+ strlen(name));
202 		if (entry == NULL)
203 			return B_NO_MEMORY;
204 
205 		entry->node_id = nodeID;
206 		entry->dir_id = dirID;
207 		strcpy(entry->name, name);
208 
209 		fEntries.Insert(entry);
210 		fUsedEntries.Add(entry);
211 		fEntryCount++;
212 
213 		return B_OK;
214 	}
215 
216 	status_t Remove(ino_t dirID, const char* name)
217 	{
218 		MutexLocker _(fLock);
219 
220 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
221 		if (entry == NULL)
222 			return B_ENTRY_NOT_FOUND;
223 
224 		_Remove(entry);
225 
226 		return B_OK;
227 	}
228 
229 	bool Lookup(ino_t dirID, const char* name, ino_t& nodeID)
230 	{
231 		MutexLocker _(fLock);
232 
233 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
234 		if (entry == NULL)
235 			return false;
236 
237 		// requeue at the end
238 		fUsedEntries.Remove(entry);
239 		fUsedEntries.Add(entry);
240 
241 		nodeID = entry->node_id;
242 		return true;
243 	}
244 
245 	void _Remove(EntryCacheEntry* entry)
246 	{
247 		fEntries.Remove(entry);
248 		fUsedEntries.Remove(entry);
249 		free(entry);
250 		fEntryCount--;
251 	}
252 
253 private:
254 	typedef OpenHashTable<EntryCacheHashDefinition> EntryTable;
255 	typedef DoublyLinkedList<EntryCacheEntry> EntryList;
256 
257 	mutex		fLock;
258 	EntryTable	fEntries;
259 	EntryList	fUsedEntries;	// LRU queue (LRU entry at the head)
260 	uint32		fEntryCount;
261 };
262 
263 
264 struct vnode : fs_vnode, DoublyLinkedListLinkImpl<vnode> {
265 	struct vnode*	next;
266 	vm_cache*		cache;
267 	dev_t			device;
268 	list_link		unused_link;
269 	ino_t			id;
270 	struct fs_mount* mount;
271 	struct vnode*	covered_by;
272 	int32			ref_count;
273 	uint32			type : 29;
274 						// TODO: S_INDEX_DIR actually needs another bit.
275 						// Better combine this field with the following ones.
276 	uint32			remove : 1;
277 	uint32			busy : 1;
278 	uint32			unpublished : 1;
279 	struct advisory_locking* advisory_locking;
280 	struct file_descriptor* mandatory_locked_by;
281 };
282 
283 struct vnode_hash_key {
284 	dev_t	device;
285 	ino_t	vnode;
286 };
287 
288 typedef DoublyLinkedList<vnode> VnodeList;
289 
290 /*!	\brief Structure to manage a mounted file system
291 
292 	Note: The root_vnode and covers_vnode fields (what others?) are
293 	initialized in fs_mount() and not changed afterwards. That is as soon
294 	as the mount is mounted and it is made sure it won't be unmounted
295 	(e.g. by holding a reference to a vnode of that mount) (read) access
296 	to those fields is always safe, even without additional locking. Morever
297 	while mounted the mount holds a reference to the covers_vnode, and thus
298 	making the access path vnode->mount->covers_vnode->mount->... safe if a
299 	reference to vnode is held (note that for the root mount covers_vnode
300 	is NULL, though).
301 */
302 struct fs_mount {
303 	fs_mount()
304 		:
305 		volume(NULL),
306 		device_name(NULL)
307 	{
308 		recursive_lock_init(&rlock, "mount rlock");
309 	}
310 
311 	~fs_mount()
312 	{
313 		recursive_lock_destroy(&rlock);
314 		free(device_name);
315 
316 		while (volume) {
317 			fs_volume* superVolume = volume->super_volume;
318 
319 			if (volume->file_system != NULL)
320 				put_module(volume->file_system->info.name);
321 
322 			free(volume->file_system_name);
323 			free(volume);
324 			volume = superVolume;
325 		}
326 	}
327 
328 	struct fs_mount* next;
329 	dev_t			id;
330 	fs_volume*		volume;
331 	char*			device_name;
332 	recursive_lock	rlock;	// guards the vnodes list
333 	struct vnode*	root_vnode;
334 	struct vnode*	covers_vnode;
335 	KPartition*		partition;
336 	VnodeList		vnodes;
337 	EntryCache		entry_cache;
338 	bool			unmounting;
339 	bool			owns_file_device;
340 };
341 
342 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
343 	list_link		link;
344 	team_id			team;
345 	pid_t			session;
346 	off_t			start;
347 	off_t			end;
348 	bool			shared;
349 };
350 
351 typedef DoublyLinkedList<advisory_lock> LockList;
352 
353 struct advisory_locking {
354 	sem_id			lock;
355 	sem_id			wait_sem;
356 	LockList		locks;
357 
358 	advisory_locking()
359 		:
360 		lock(-1),
361 		wait_sem(-1)
362 	{
363 	}
364 
365 	~advisory_locking()
366 	{
367 		if (lock >= 0)
368 			delete_sem(lock);
369 		if (wait_sem >= 0)
370 			delete_sem(wait_sem);
371 	}
372 };
373 
374 /*!	\brief Guards sMountsTable.
375 
376 	The holder is allowed to read/write access the sMountsTable.
377 	Manipulation of the fs_mount structures themselves
378 	(and their destruction) requires different locks though.
379 */
380 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
381 
382 /*!	\brief Guards mount/unmount operations.
383 
384 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
385 	That is locking the lock ensures that no FS is mounted/unmounted. In
386 	particular this means that
387 	- sMountsTable will not be modified,
388 	- the fields immutable after initialization of the fs_mount structures in
389 	  sMountsTable will not be modified,
390 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
391 
392 	The thread trying to lock the lock must not hold sVnodeMutex or
393 	sMountMutex.
394 */
395 static recursive_lock sMountOpLock;
396 
397 /*!	\brief Guards the vnode::covered_by field of any vnode
398 
399 	The holder is allowed to read access the vnode::covered_by field of any
400 	vnode. Additionally holding sMountOpLock allows for write access.
401 
402 	The thread trying to lock the must not hold sVnodeMutex.
403 */
404 static mutex sVnodeCoveredByMutex
405 	= MUTEX_INITIALIZER("vfs_vnode_covered_by_lock");
406 
407 /*!	\brief Guards sVnodeTable.
408 
409 	The holder is allowed read/write access to sVnodeTable and to
410 	any unbusy vnode in that table, save to the immutable fields (device, id,
411 	private_node, mount) to which
412 	only read-only access is allowed, and to the field covered_by, which is
413 	guarded by sMountOpLock and sVnodeCoveredByMutex.
414 
415 	The thread trying to lock the mutex must not hold sMountMutex.
416 	You must not have this mutex held when calling create_sem(), as this
417 	might call vfs_free_unused_vnodes().
418 */
419 static mutex sVnodeMutex = MUTEX_INITIALIZER("vfs_vnode_lock");
420 
421 /*!	\brief Guards io_context::root.
422 
423 	Must be held when setting or getting the io_context::root field.
424 	The only operation allowed while holding this lock besides getting or
425 	setting the field is inc_vnode_ref_count() on io_context::root.
426 */
427 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
428 
429 #define VNODE_HASH_TABLE_SIZE 1024
430 static hash_table* sVnodeTable;
431 static list sUnusedVnodeList;
432 static uint32 sUnusedVnodes = 0;
433 static struct vnode* sRoot;
434 
435 #define MOUNTS_HASH_TABLE_SIZE 16
436 static hash_table* sMountsTable;
437 static dev_t sNextMountID = 1;
438 
439 #define MAX_TEMP_IO_VECS 8
440 
441 mode_t __gUmask = 022;
442 
443 /* function declarations */
444 
445 // file descriptor operation prototypes
446 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
447 	void* buffer, size_t* _bytes);
448 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
449 	const void* buffer, size_t* _bytes);
450 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
451 	int seekType);
452 static void file_free_fd(struct file_descriptor* descriptor);
453 static status_t file_close(struct file_descriptor* descriptor);
454 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
455 	struct selectsync* sync);
456 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
457 	struct selectsync* sync);
458 static status_t dir_read(struct io_context* context,
459 	struct file_descriptor* descriptor, struct dirent* buffer, size_t bufferSize,
460 	uint32* _count);
461 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
462 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
463 static status_t dir_rewind(struct file_descriptor* descriptor);
464 static void dir_free_fd(struct file_descriptor* descriptor);
465 static status_t dir_close(struct file_descriptor* descriptor);
466 static status_t attr_dir_read(struct io_context* context,
467 	struct file_descriptor* descriptor, struct dirent* buffer, size_t bufferSize,
468 	uint32* _count);
469 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
470 static void attr_dir_free_fd(struct file_descriptor* descriptor);
471 static status_t attr_dir_close(struct file_descriptor* descriptor);
472 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
473 	void* buffer, size_t* _bytes);
474 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
475 	const void* buffer, size_t* _bytes);
476 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
477 	int seekType);
478 static void attr_free_fd(struct file_descriptor* descriptor);
479 static status_t attr_close(struct file_descriptor* descriptor);
480 static status_t attr_read_stat(struct file_descriptor* descriptor,
481 	struct stat* statData);
482 static status_t attr_write_stat(struct file_descriptor* descriptor,
483 	const struct stat* stat, int statMask);
484 static status_t index_dir_read(struct io_context* context,
485 	struct file_descriptor* descriptor, struct dirent* buffer, size_t bufferSize,
486 	uint32* _count);
487 static status_t index_dir_rewind(struct file_descriptor* descriptor);
488 static void index_dir_free_fd(struct file_descriptor* descriptor);
489 static status_t index_dir_close(struct file_descriptor* descriptor);
490 static status_t query_read(struct io_context* context,
491 	struct file_descriptor* descriptor, struct dirent* buffer, size_t bufferSize,
492 	uint32* _count);
493 static status_t query_rewind(struct file_descriptor* descriptor);
494 static void query_free_fd(struct file_descriptor* descriptor);
495 static status_t query_close(struct file_descriptor* descriptor);
496 
497 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
498 	void* buffer, size_t length);
499 static status_t common_read_stat(struct file_descriptor* descriptor,
500 	struct stat* statData);
501 static status_t common_write_stat(struct file_descriptor* descriptor,
502 	const struct stat* statData, int statMask);
503 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
504 	struct stat* stat, bool kernel);
505 
506 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
507 	bool traverseLeafLink, int count, bool kernel,
508 	struct vnode** _vnode, ino_t* _parentID);
509 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
510 	size_t bufferSize, bool kernel);
511 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
512 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
513 static void inc_vnode_ref_count(struct vnode* vnode);
514 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
515 	bool reenter);
516 static inline void put_vnode(struct vnode* vnode);
517 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
518 	bool kernel);
519 
520 
521 static struct fd_ops sFileOps = {
522 	file_read,
523 	file_write,
524 	file_seek,
525 	common_ioctl,
526 	NULL,		// set_flags
527 	file_select,
528 	file_deselect,
529 	NULL,		// read_dir()
530 	NULL,		// rewind_dir()
531 	common_read_stat,
532 	common_write_stat,
533 	file_close,
534 	file_free_fd
535 };
536 
537 static struct fd_ops sDirectoryOps = {
538 	NULL,		// read()
539 	NULL,		// write()
540 	NULL,		// seek()
541 	common_ioctl,
542 	NULL,		// set_flags
543 	NULL,		// select()
544 	NULL,		// deselect()
545 	dir_read,
546 	dir_rewind,
547 	common_read_stat,
548 	common_write_stat,
549 	dir_close,
550 	dir_free_fd
551 };
552 
553 static struct fd_ops sAttributeDirectoryOps = {
554 	NULL,		// read()
555 	NULL,		// write()
556 	NULL,		// seek()
557 	common_ioctl,
558 	NULL,		// set_flags
559 	NULL,		// select()
560 	NULL,		// deselect()
561 	attr_dir_read,
562 	attr_dir_rewind,
563 	common_read_stat,
564 	common_write_stat,
565 	attr_dir_close,
566 	attr_dir_free_fd
567 };
568 
569 static struct fd_ops sAttributeOps = {
570 	attr_read,
571 	attr_write,
572 	attr_seek,
573 	common_ioctl,
574 	NULL,		// set_flags
575 	NULL,		// select()
576 	NULL,		// deselect()
577 	NULL,		// read_dir()
578 	NULL,		// rewind_dir()
579 	attr_read_stat,
580 	attr_write_stat,
581 	attr_close,
582 	attr_free_fd
583 };
584 
585 static struct fd_ops sIndexDirectoryOps = {
586 	NULL,		// read()
587 	NULL,		// write()
588 	NULL,		// seek()
589 	NULL,		// ioctl()
590 	NULL,		// set_flags
591 	NULL,		// select()
592 	NULL,		// deselect()
593 	index_dir_read,
594 	index_dir_rewind,
595 	NULL,		// read_stat()
596 	NULL,		// write_stat()
597 	index_dir_close,
598 	index_dir_free_fd
599 };
600 
601 #if 0
602 static struct fd_ops sIndexOps = {
603 	NULL,		// read()
604 	NULL,		// write()
605 	NULL,		// seek()
606 	NULL,		// ioctl()
607 	NULL,		// set_flags
608 	NULL,		// select()
609 	NULL,		// deselect()
610 	NULL,		// dir_read()
611 	NULL,		// dir_rewind()
612 	index_read_stat,	// read_stat()
613 	NULL,		// write_stat()
614 	NULL,		// dir_close()
615 	NULL		// free_fd()
616 };
617 #endif
618 
619 static struct fd_ops sQueryOps = {
620 	NULL,		// read()
621 	NULL,		// write()
622 	NULL,		// seek()
623 	NULL,		// ioctl()
624 	NULL,		// set_flags
625 	NULL,		// select()
626 	NULL,		// deselect()
627 	query_read,
628 	query_rewind,
629 	NULL,		// read_stat()
630 	NULL,		// write_stat()
631 	query_close,
632 	query_free_fd
633 };
634 
635 
636 // VNodePutter
637 class VNodePutter {
638 public:
639 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
640 
641 	~VNodePutter()
642 	{
643 		Put();
644 	}
645 
646 	void SetTo(struct vnode* vnode)
647 	{
648 		Put();
649 		fVNode = vnode;
650 	}
651 
652 	void Put()
653 	{
654 		if (fVNode) {
655 			put_vnode(fVNode);
656 			fVNode = NULL;
657 		}
658 	}
659 
660 	struct vnode* Detach()
661 	{
662 		struct vnode* vnode = fVNode;
663 		fVNode = NULL;
664 		return vnode;
665 	}
666 
667 private:
668 	struct vnode* fVNode;
669 };
670 
671 
672 class FDCloser {
673 public:
674 	FDCloser() : fFD(-1), fKernel(true) {}
675 
676 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
677 
678 	~FDCloser()
679 	{
680 		Close();
681 	}
682 
683 	void SetTo(int fd, bool kernel)
684 	{
685 		Close();
686 		fFD = fd;
687 		fKernel = kernel;
688 	}
689 
690 	void Close()
691 	{
692 		if (fFD >= 0) {
693 			if (fKernel)
694 				_kern_close(fFD);
695 			else
696 				_user_close(fFD);
697 			fFD = -1;
698 		}
699 	}
700 
701 	int Detach()
702 	{
703 		int fd = fFD;
704 		fFD = -1;
705 		return fd;
706 	}
707 
708 private:
709 	int		fFD;
710 	bool	fKernel;
711 };
712 
713 
714 #if VFS_PAGES_IO_TRACING
715 
716 namespace VFSPagesIOTracing {
717 
718 class PagesIOTraceEntry : public AbstractTraceEntry {
719 protected:
720 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
721 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
722 		status_t status, size_t bytesTransferred)
723 		:
724 		fVnode(vnode),
725 		fMountID(vnode->mount->id),
726 		fNodeID(vnode->id),
727 		fCookie(cookie),
728 		fPos(pos),
729 		fCount(count),
730 		fFlags(flags),
731 		fBytesRequested(bytesRequested),
732 		fStatus(status),
733 		fBytesTransferred(bytesTransferred)
734 	{
735 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
736 			false);
737 	}
738 
739 	void AddDump(TraceOutput& out, const char* mode)
740 	{
741 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
742 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
743 			fCookie, fPos, fBytesRequested);
744 
745 		if (fVecs != NULL) {
746 			for (uint32 i = 0; i < fCount; i++) {
747 				if (i > 0)
748 					out.Print(", ");
749 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
750 			}
751 		}
752 
753 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
754 			fFlags, fStatus, fBytesTransferred);
755 	}
756 
757 protected:
758 	struct vnode*	fVnode;
759 	dev_t			fMountID;
760 	ino_t			fNodeID;
761 	void*			fCookie;
762 	off_t			fPos;
763 	iovec*			fVecs;
764 	uint32			fCount;
765 	uint32			fFlags;
766 	size_t			fBytesRequested;
767 	status_t		fStatus;
768 	size_t			fBytesTransferred;
769 };
770 
771 
772 class ReadPages : public PagesIOTraceEntry {
773 public:
774 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
775 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
776 		status_t status, size_t bytesTransferred)
777 		:
778 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
779 			bytesRequested, status, bytesTransferred)
780 	{
781 		Initialized();
782 	}
783 
784 	virtual void AddDump(TraceOutput& out)
785 	{
786 		PagesIOTraceEntry::AddDump(out, "read");
787 	}
788 };
789 
790 
791 class WritePages : public PagesIOTraceEntry {
792 public:
793 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
794 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
795 		status_t status, size_t bytesTransferred)
796 		:
797 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
798 			bytesRequested, status, bytesTransferred)
799 	{
800 		Initialized();
801 	}
802 
803 	virtual void AddDump(TraceOutput& out)
804 	{
805 		PagesIOTraceEntry::AddDump(out, "write");
806 	}
807 };
808 
809 }	// namespace VFSPagesIOTracing
810 
811 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
812 #else
813 #	define TPIO(x) ;
814 #endif	// VFS_PAGES_IO_TRACING
815 
816 
817 static int
818 mount_compare(void* _m, const void* _key)
819 {
820 	struct fs_mount* mount = (fs_mount*)_m;
821 	const dev_t* id = (dev_t*)_key;
822 
823 	if (mount->id == *id)
824 		return 0;
825 
826 	return -1;
827 }
828 
829 
830 static uint32
831 mount_hash(void* _m, const void* _key, uint32 range)
832 {
833 	struct fs_mount* mount = (fs_mount*)_m;
834 	const dev_t* id = (dev_t*)_key;
835 
836 	if (mount)
837 		return mount->id % range;
838 
839 	return (uint32)*id % range;
840 }
841 
842 
843 /*! Finds the mounted device (the fs_mount structure) with the given ID.
844 	Note, you must hold the gMountMutex lock when you call this function.
845 */
846 static struct fs_mount*
847 find_mount(dev_t id)
848 {
849 	ASSERT_LOCKED_MUTEX(&sMountMutex);
850 
851 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
852 }
853 
854 
855 static status_t
856 get_mount(dev_t id, struct fs_mount** _mount)
857 {
858 	struct fs_mount* mount;
859 
860 	MutexLocker nodeLocker(sVnodeMutex);
861 	MutexLocker mountLocker(sMountMutex);
862 
863 	mount = find_mount(id);
864 	if (mount == NULL)
865 		return B_BAD_VALUE;
866 
867 	struct vnode* rootNode = mount->root_vnode;
868 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
869 		// might have been called during a mount/unmount operation
870 		return B_BUSY;
871 	}
872 
873 	inc_vnode_ref_count(mount->root_vnode);
874 	*_mount = mount;
875 	return B_OK;
876 }
877 
878 
879 static void
880 put_mount(struct fs_mount* mount)
881 {
882 	if (mount)
883 		put_vnode(mount->root_vnode);
884 }
885 
886 
887 /*!	Tries to open the specified file system module.
888 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
889 	Returns a pointer to file system module interface, or NULL if it
890 	could not open the module.
891 */
892 static file_system_module_info*
893 get_file_system(const char* fsName)
894 {
895 	char name[B_FILE_NAME_LENGTH];
896 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
897 		// construct module name if we didn't get one
898 		// (we currently support only one API)
899 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
900 		fsName = NULL;
901 	}
902 
903 	file_system_module_info* info;
904 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
905 		return NULL;
906 
907 	return info;
908 }
909 
910 
911 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
912 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
913 	The name is allocated for you, and you have to free() it when you're
914 	done with it.
915 	Returns NULL if the required memory is not available.
916 */
917 static char*
918 get_file_system_name(const char* fsName)
919 {
920 	const size_t length = strlen("file_systems/");
921 
922 	if (strncmp(fsName, "file_systems/", length)) {
923 		// the name already seems to be the module's file name
924 		return strdup(fsName);
925 	}
926 
927 	fsName += length;
928 	const char* end = strchr(fsName, '/');
929 	if (end == NULL) {
930 		// this doesn't seem to be a valid name, but well...
931 		return strdup(fsName);
932 	}
933 
934 	// cut off the trailing /v1
935 
936 	char* name = (char*)malloc(end + 1 - fsName);
937 	if (name == NULL)
938 		return NULL;
939 
940 	strlcpy(name, fsName, end + 1 - fsName);
941 	return name;
942 }
943 
944 
945 /*!	Accepts a list of file system names separated by a colon, one for each
946 	layer and returns the file system name for the specified layer.
947 	The name is allocated for you, and you have to free() it when you're
948 	done with it.
949 	Returns NULL if the required memory is not available or if there is no
950 	name for the specified layer.
951 */
952 static char*
953 get_file_system_name_for_layer(const char* fsNames, int32 layer)
954 {
955 	while (layer >= 0) {
956 		const char* end = strchr(fsNames, ':');
957 		if (end == NULL) {
958 			if (layer == 0)
959 				return strdup(fsNames);
960 			return NULL;
961 		}
962 
963 		if (layer == 0) {
964 			size_t length = end - fsNames + 1;
965 			char* result = (char*)malloc(length);
966 			strlcpy(result, fsNames, length);
967 			return result;
968 		}
969 
970 		fsNames = end + 1;
971 		layer--;
972 	}
973 
974 	return NULL;
975 }
976 
977 
978 static int
979 vnode_compare(void* _vnode, const void* _key)
980 {
981 	struct vnode* vnode = (struct vnode*)_vnode;
982 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
983 
984 	if (vnode->device == key->device && vnode->id == key->vnode)
985 		return 0;
986 
987 	return -1;
988 }
989 
990 
991 static uint32
992 vnode_hash(void* _vnode, const void* _key, uint32 range)
993 {
994 	struct vnode* vnode = (struct vnode*)_vnode;
995 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
996 
997 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
998 
999 	if (vnode != NULL)
1000 		return VHASH(vnode->device, vnode->id) % range;
1001 
1002 	return VHASH(key->device, key->vnode) % range;
1003 
1004 #undef VHASH
1005 }
1006 
1007 
1008 static void
1009 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
1010 {
1011 	RecursiveLocker _(mount->rlock);
1012 	mount->vnodes.Add(vnode);
1013 }
1014 
1015 
1016 static void
1017 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
1018 {
1019 	RecursiveLocker _(mount->rlock);
1020 	mount->vnodes.Remove(vnode);
1021 }
1022 
1023 
1024 static status_t
1025 create_new_vnode(struct vnode** _vnode, dev_t mountID, ino_t vnodeID)
1026 {
1027 	FUNCTION(("create_new_vnode()\n"));
1028 
1029 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
1030 	if (vnode == NULL)
1031 		return B_NO_MEMORY;
1032 
1033 	// initialize basic values
1034 	memset(vnode, 0, sizeof(struct vnode));
1035 	vnode->device = mountID;
1036 	vnode->id = vnodeID;
1037 
1038 	// add the vnode to the mount structure
1039 	mutex_lock(&sMountMutex);
1040 	vnode->mount = find_mount(mountID);
1041 	if (!vnode->mount || vnode->mount->unmounting) {
1042 		mutex_unlock(&sMountMutex);
1043 		free(vnode);
1044 		return B_ENTRY_NOT_FOUND;
1045 	}
1046 
1047 	hash_insert(sVnodeTable, vnode);
1048 	add_vnode_to_mount_list(vnode, vnode->mount);
1049 
1050 	mutex_unlock(&sMountMutex);
1051 
1052 	vnode->ref_count = 1;
1053 	*_vnode = vnode;
1054 
1055 	return B_OK;
1056 }
1057 
1058 
1059 /*!	Frees the vnode and all resources it has acquired, and removes
1060 	it from the vnode hash as well as from its mount structure.
1061 	Will also make sure that any cache modifications are written back.
1062 */
1063 static void
1064 free_vnode(struct vnode* vnode, bool reenter)
1065 {
1066 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
1067 
1068 	// write back any changes in this vnode's cache -- but only
1069 	// if the vnode won't be deleted, in which case the changes
1070 	// will be discarded
1071 
1072 	if (!vnode->remove && HAS_FS_CALL(vnode, fsync))
1073 		FS_CALL_NO_PARAMS(vnode, fsync);
1074 
1075 	// Note: If this vnode has a cache attached, there will still be two
1076 	// references to that cache at this point. The last one belongs to the vnode
1077 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1078 	// cache. Each but the last reference to a cache also includes a reference
1079 	// to the vnode. The file cache, however, released its reference (cf.
1080 	// file_cache_create()), so that this vnode's ref count has the chance to
1081 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1082 	// cache reference to be released, which will also release a (no longer
1083 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1084 	// count, so that it will neither become negative nor 0.
1085 	vnode->ref_count = 2;
1086 
1087 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
1088 	// cache either (i.e. no one can get a cache reference while we're deleting
1089 	// the vnode).. This is, however, not the case for the page daemon. It gets
1090 	// its cache references via the pages it scans, so it can in fact get a
1091 	// vnode reference while we're deleting the vnode.
1092 
1093 	if (!vnode->unpublished) {
1094 		if (vnode->remove)
1095 			FS_CALL(vnode, remove_vnode, reenter);
1096 		else
1097 			FS_CALL(vnode, put_vnode, reenter);
1098 	}
1099 
1100 	// The file system has removed the resources of the vnode now, so we can
1101 	// make it available again (and remove the busy vnode from the hash)
1102 	mutex_lock(&sVnodeMutex);
1103 	hash_remove(sVnodeTable, vnode);
1104 	mutex_unlock(&sVnodeMutex);
1105 
1106 	// if we have a vm_cache attached, remove it
1107 	if (vnode->cache)
1108 		vnode->cache->ReleaseRef();
1109 
1110 	vnode->cache = NULL;
1111 
1112 	remove_vnode_from_mount_list(vnode, vnode->mount);
1113 
1114 	free(vnode);
1115 }
1116 
1117 
1118 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1119 	if the counter dropped to 0.
1120 
1121 	The caller must, of course, own a reference to the vnode to call this
1122 	function.
1123 	The caller must not hold the sVnodeMutex or the sMountMutex.
1124 
1125 	\param vnode the vnode.
1126 	\param alwaysFree don't move this vnode into the unused list, but really
1127 		   delete it if possible.
1128 	\param reenter \c true, if this function is called (indirectly) from within
1129 		   a file system. This will be passed to file system hooks only.
1130 	\return \c B_OK, if everything went fine, an error code otherwise.
1131 */
1132 static status_t
1133 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1134 {
1135 	MutexLocker locker(sVnodeMutex);
1136 
1137 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1138 
1139 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1140 
1141 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1142 		vnode->ref_count));
1143 
1144 	if (oldRefCount != 1)
1145 		return B_OK;
1146 
1147 	if (vnode->busy)
1148 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1149 
1150 	bool freeNode = false;
1151 
1152 	// Just insert the vnode into an unused list if we don't need
1153 	// to delete it
1154 	if (vnode->remove || alwaysFree) {
1155 		vnode->busy = true;
1156 		freeNode = true;
1157 	} else {
1158 		list_add_item(&sUnusedVnodeList, vnode);
1159 		if (++sUnusedVnodes > kMaxUnusedVnodes
1160 			&& low_resource_state(
1161 				B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)
1162 					!= B_NO_LOW_RESOURCE) {
1163 			// there are too many unused vnodes so we free the oldest one
1164 			// TODO: evaluate this mechanism
1165 			vnode = (struct vnode*)list_remove_head_item(&sUnusedVnodeList);
1166 			vnode->busy = true;
1167 			freeNode = true;
1168 			sUnusedVnodes--;
1169 		}
1170 	}
1171 
1172 	locker.Unlock();
1173 
1174 	if (freeNode)
1175 		free_vnode(vnode, reenter);
1176 
1177 	return B_OK;
1178 }
1179 
1180 
1181 /*!	\brief Increments the reference counter of the given vnode.
1182 
1183 	The caller must either already have a reference to the vnode or hold
1184 	the sVnodeMutex.
1185 
1186 	\param vnode the vnode.
1187 */
1188 static void
1189 inc_vnode_ref_count(struct vnode* vnode)
1190 {
1191 	atomic_add(&vnode->ref_count, 1);
1192 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
1193 }
1194 
1195 
1196 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
1197 
1198 	The caller must hold the sVnodeMutex.
1199 
1200 	\param mountID the mount ID.
1201 	\param vnodeID the node ID.
1202 
1203 	\return The vnode structure, if it was found in the hash table, \c NULL
1204 			otherwise.
1205 */
1206 static struct vnode*
1207 lookup_vnode(dev_t mountID, ino_t vnodeID)
1208 {
1209 	struct vnode_hash_key key;
1210 
1211 	key.device = mountID;
1212 	key.vnode = vnodeID;
1213 
1214 	return (vnode*)hash_lookup(sVnodeTable, &key);
1215 }
1216 
1217 
1218 static bool
1219 is_special_node_type(int type)
1220 {
1221 	// at the moment only FIFOs are supported
1222 	return S_ISFIFO(type);
1223 }
1224 
1225 
1226 static status_t
1227 create_special_sub_node(struct vnode* vnode, uint32 flags)
1228 {
1229 	if (S_ISFIFO(vnode->type))
1230 		return create_fifo_vnode(vnode->mount->volume, vnode);
1231 
1232 	return B_BAD_VALUE;
1233 }
1234 
1235 
1236 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1237 
1238 	If the node is not yet in memory, it will be loaded.
1239 
1240 	The caller must not hold the sVnodeMutex or the sMountMutex.
1241 
1242 	\param mountID the mount ID.
1243 	\param vnodeID the node ID.
1244 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1245 		   retrieved vnode structure shall be written.
1246 	\param reenter \c true, if this function is called (indirectly) from within
1247 		   a file system.
1248 	\return \c B_OK, if everything when fine, an error code otherwise.
1249 */
1250 static status_t
1251 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1252 	int reenter)
1253 {
1254 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
1255 
1256 	mutex_lock(&sVnodeMutex);
1257 
1258 	int32 tries = 1000;
1259 		// try for 10 secs
1260 restart:
1261 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1262 	if (vnode && vnode->busy) {
1263 		mutex_unlock(&sVnodeMutex);
1264 		if (!canWait || --tries < 0) {
1265 			// vnode doesn't seem to become unbusy
1266 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
1267 			return B_BUSY;
1268 		}
1269 		snooze(10000); // 10 ms
1270 		mutex_lock(&sVnodeMutex);
1271 		goto restart;
1272 	}
1273 
1274 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1275 
1276 	status_t status;
1277 
1278 	if (vnode) {
1279 		if (vnode->ref_count == 0) {
1280 			// this vnode has been unused before
1281 			list_remove_item(&sUnusedVnodeList, vnode);
1282 			sUnusedVnodes--;
1283 		}
1284 		inc_vnode_ref_count(vnode);
1285 	} else {
1286 		// we need to create a new vnode and read it in
1287 		status = create_new_vnode(&vnode, mountID, vnodeID);
1288 		if (status < B_OK)
1289 			goto err;
1290 
1291 		vnode->busy = true;
1292 		mutex_unlock(&sVnodeMutex);
1293 
1294 		int type;
1295 		uint32 flags;
1296 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1297 			&flags, reenter);
1298 		if (status == B_OK && vnode->private_node == NULL)
1299 			status = B_BAD_VALUE;
1300 
1301 		bool gotNode = status == B_OK;
1302 		bool publishSpecialSubNode = false;
1303 		if (gotNode) {
1304 			vnode->type = type;
1305 			publishSpecialSubNode = is_special_node_type(type)
1306 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1307 		}
1308 
1309 		if (gotNode && publishSpecialSubNode)
1310 			status = create_special_sub_node(vnode, flags);
1311 
1312 		mutex_lock(&sVnodeMutex);
1313 
1314 		if (status < B_OK) {
1315 			if (gotNode)
1316 				FS_CALL(vnode, put_vnode, reenter);
1317 
1318 			goto err1;
1319 		}
1320 
1321 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
1322 		vnode->busy = false;
1323 	}
1324 
1325 	mutex_unlock(&sVnodeMutex);
1326 
1327 	TRACE(("get_vnode: returning %p\n", vnode));
1328 
1329 	*_vnode = vnode;
1330 	return B_OK;
1331 
1332 err1:
1333 	hash_remove(sVnodeTable, vnode);
1334 	remove_vnode_from_mount_list(vnode, vnode->mount);
1335 err:
1336 	mutex_unlock(&sVnodeMutex);
1337 	if (vnode)
1338 		free(vnode);
1339 
1340 	return status;
1341 }
1342 
1343 
1344 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1345 	if the counter dropped to 0.
1346 
1347 	The caller must, of course, own a reference to the vnode to call this
1348 	function.
1349 	The caller must not hold the sVnodeMutex or the sMountMutex.
1350 
1351 	\param vnode the vnode.
1352 */
1353 static inline void
1354 put_vnode(struct vnode* vnode)
1355 {
1356 	dec_vnode_ref_count(vnode, false, false);
1357 }
1358 
1359 
1360 static void
1361 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1362 {
1363 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1364 
1365 	uint32 count = 1;
1366 	switch (level) {
1367 		case B_NO_LOW_RESOURCE:
1368 			return;
1369 		case B_LOW_RESOURCE_NOTE:
1370 			count = sUnusedVnodes / 100;
1371 			break;
1372 		case B_LOW_RESOURCE_WARNING:
1373 			count = sUnusedVnodes / 10;
1374 			break;
1375 		case B_LOW_RESOURCE_CRITICAL:
1376 			count = sUnusedVnodes;
1377 			break;
1378 	}
1379 
1380 	if (count > sUnusedVnodes)
1381 		count = sUnusedVnodes;
1382 
1383 	// Write back the modified pages of some unused vnodes and free them
1384 
1385 	for (uint32 i = 0; i < count; i++) {
1386 		mutex_lock(&sVnodeMutex);
1387 		struct vnode* vnode = (struct vnode*)list_remove_head_item(
1388 			&sUnusedVnodeList);
1389 		if (vnode == NULL) {
1390 			mutex_unlock(&sVnodeMutex);
1391 			break;
1392 		}
1393 
1394 		inc_vnode_ref_count(vnode);
1395 		sUnusedVnodes--;
1396 
1397 		mutex_unlock(&sVnodeMutex);
1398 
1399 		if (vnode->cache != NULL)
1400 			vnode->cache->WriteModified();
1401 
1402 		dec_vnode_ref_count(vnode, true, false);
1403 			// this should free the vnode when it's still unused
1404 	}
1405 }
1406 
1407 
1408 static inline void
1409 put_advisory_locking(struct advisory_locking* locking)
1410 {
1411 	release_sem(locking->lock);
1412 }
1413 
1414 
1415 /*!	Returns the advisory_locking object of the \a vnode in case it
1416 	has one, and locks it.
1417 	You have to call put_advisory_locking() when you're done with
1418 	it.
1419 	Note, you must not have the vnode mutex locked when calling
1420 	this function.
1421 */
1422 static struct advisory_locking*
1423 get_advisory_locking(struct vnode* vnode)
1424 {
1425 	mutex_lock(&sVnodeMutex);
1426 
1427 	struct advisory_locking* locking = vnode->advisory_locking;
1428 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1429 
1430 	mutex_unlock(&sVnodeMutex);
1431 
1432 	if (lock >= B_OK)
1433 		lock = acquire_sem(lock);
1434 	if (lock < B_OK) {
1435 		// This means the locking has been deleted in the mean time
1436 		// or had never existed in the first place - otherwise, we
1437 		// would get the lock at some point.
1438 		return NULL;
1439 	}
1440 
1441 	return locking;
1442 }
1443 
1444 
1445 /*!	Creates a locked advisory_locking object, and attaches it to the
1446 	given \a vnode.
1447 	Returns B_OK in case of success - also if the vnode got such an
1448 	object from someone else in the mean time, you'll still get this
1449 	one locked then.
1450 */
1451 static status_t
1452 create_advisory_locking(struct vnode* vnode)
1453 {
1454 	if (vnode == NULL)
1455 		return B_FILE_ERROR;
1456 
1457 	ObjectDeleter<advisory_locking> lockingDeleter;
1458 	struct advisory_locking* locking = NULL;
1459 
1460 	while (get_advisory_locking(vnode) == NULL) {
1461 		// no locking object set on the vnode yet, create one
1462 		if (locking == NULL) {
1463 			locking = new(std::nothrow) advisory_locking;
1464 			if (locking == NULL)
1465 				return B_NO_MEMORY;
1466 			lockingDeleter.SetTo(locking);
1467 
1468 			locking->wait_sem = create_sem(0, "advisory lock");
1469 			if (locking->wait_sem < B_OK)
1470 				return locking->wait_sem;
1471 
1472 			locking->lock = create_sem(0, "advisory locking");
1473 			if (locking->lock < B_OK)
1474 				return locking->lock;
1475 		}
1476 
1477 		// set our newly created locking object
1478 		MutexLocker _(sVnodeMutex);
1479 		if (vnode->advisory_locking == NULL) {
1480 			vnode->advisory_locking = locking;
1481 			lockingDeleter.Detach();
1482 			return B_OK;
1483 		}
1484 	}
1485 
1486 	// The vnode already had a locking object. That's just as well.
1487 
1488 	return B_OK;
1489 }
1490 
1491 
1492 /*!	Retrieves the first lock that has been set by the current team.
1493 */
1494 static status_t
1495 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1496 {
1497 	struct advisory_locking* locking = get_advisory_locking(vnode);
1498 	if (locking == NULL)
1499 		return B_BAD_VALUE;
1500 
1501 	// TODO: this should probably get the flock by its file descriptor!
1502 	team_id team = team_get_current_team_id();
1503 	status_t status = B_BAD_VALUE;
1504 
1505 	LockList::Iterator iterator = locking->locks.GetIterator();
1506 	while (iterator.HasNext()) {
1507 		struct advisory_lock* lock = iterator.Next();
1508 
1509 		if (lock->team == team) {
1510 			flock->l_start = lock->start;
1511 			flock->l_len = lock->end - lock->start + 1;
1512 			status = B_OK;
1513 			break;
1514 		}
1515 	}
1516 
1517 	put_advisory_locking(locking);
1518 	return status;
1519 }
1520 
1521 
1522 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1523 	with the advisory_lock \a lock.
1524 */
1525 static bool
1526 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1527 {
1528 	if (flock == NULL)
1529 		return true;
1530 
1531 	return lock->start <= flock->l_start - 1 + flock->l_len
1532 		&& lock->end >= flock->l_start;
1533 }
1534 
1535 
1536 /*!	Removes the specified lock, or all locks of the calling team
1537 	if \a flock is NULL.
1538 */
1539 static status_t
1540 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1541 {
1542 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1543 
1544 	struct advisory_locking* locking = get_advisory_locking(vnode);
1545 	if (locking == NULL)
1546 		return B_OK;
1547 
1548 	// TODO: use the thread ID instead??
1549 	team_id team = team_get_current_team_id();
1550 	pid_t session = thread_get_current_thread()->team->session_id;
1551 
1552 	// find matching lock entries
1553 
1554 	LockList::Iterator iterator = locking->locks.GetIterator();
1555 	while (iterator.HasNext()) {
1556 		struct advisory_lock* lock = iterator.Next();
1557 		bool removeLock = false;
1558 
1559 		if (lock->session == session)
1560 			removeLock = true;
1561 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1562 			bool endsBeyond = false;
1563 			bool startsBefore = false;
1564 			if (flock != NULL) {
1565 				startsBefore = lock->start < flock->l_start;
1566 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1567 			}
1568 
1569 			if (!startsBefore && !endsBeyond) {
1570 				// lock is completely contained in flock
1571 				removeLock = true;
1572 			} else if (startsBefore && !endsBeyond) {
1573 				// cut the end of the lock
1574 				lock->end = flock->l_start - 1;
1575 			} else if (!startsBefore && endsBeyond) {
1576 				// cut the start of the lock
1577 				lock->start = flock->l_start + flock->l_len;
1578 			} else {
1579 				// divide the lock into two locks
1580 				struct advisory_lock* secondLock = new advisory_lock;
1581 				if (secondLock == NULL) {
1582 					// TODO: we should probably revert the locks we already
1583 					// changed... (ie. allocate upfront)
1584 					put_advisory_locking(locking);
1585 					return B_NO_MEMORY;
1586 				}
1587 
1588 				lock->end = flock->l_start - 1;
1589 
1590 				secondLock->team = lock->team;
1591 				secondLock->session = lock->session;
1592 				// values must already be normalized when getting here
1593 				secondLock->start = flock->l_start + flock->l_len;
1594 				secondLock->end = lock->end;
1595 				secondLock->shared = lock->shared;
1596 
1597 				locking->locks.Add(secondLock);
1598 			}
1599 		}
1600 
1601 		if (removeLock) {
1602 			// this lock is no longer used
1603 			iterator.Remove();
1604 			free(lock);
1605 		}
1606 	}
1607 
1608 	bool removeLocking = locking->locks.IsEmpty();
1609 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1610 
1611 	put_advisory_locking(locking);
1612 
1613 	if (removeLocking) {
1614 		// We can remove the whole advisory locking structure; it's no
1615 		// longer used
1616 		locking = get_advisory_locking(vnode);
1617 		if (locking != NULL) {
1618 			MutexLocker locker(sVnodeMutex);
1619 
1620 			// the locking could have been changed in the mean time
1621 			if (locking->locks.IsEmpty()) {
1622 				vnode->advisory_locking = NULL;
1623 				locker.Unlock();
1624 
1625 				// we've detached the locking from the vnode, so we can
1626 				// safely delete it
1627 				delete_sem(locking->lock);
1628 				delete_sem(locking->wait_sem);
1629 				delete locking;
1630 			} else {
1631 				// the locking is in use again
1632 				locker.Unlock();
1633 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1634 			}
1635 		}
1636 	}
1637 
1638 	return B_OK;
1639 }
1640 
1641 
1642 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1643 	will wait for the lock to become available, if there are any collisions
1644 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1645 
1646 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1647 	BSD flock() semantics are used, that is, all children can unlock the file
1648 	in question (we even allow parents to remove the lock, though, but that
1649 	seems to be in line to what the BSD's are doing).
1650 */
1651 static status_t
1652 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1653 	bool wait)
1654 {
1655 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1656 		vnode, flock, wait ? "yes" : "no"));
1657 
1658 	bool shared = flock->l_type == F_RDLCK;
1659 	status_t status = B_OK;
1660 
1661 	// TODO: do deadlock detection!
1662 
1663 	struct advisory_locking* locking;
1664 	sem_id waitForLock;
1665 
1666 	while (true) {
1667 		// if this vnode has an advisory_locking structure attached,
1668 		// lock that one and search for any colliding file lock
1669 		status = create_advisory_locking(vnode);
1670 		if (status != B_OK)
1671 			return status;
1672 
1673 		locking = vnode->advisory_locking;
1674 		team_id team = team_get_current_team_id();
1675 		waitForLock = -1;
1676 
1677 		// test for collisions
1678 		LockList::Iterator iterator = locking->locks.GetIterator();
1679 		while (iterator.HasNext()) {
1680 			struct advisory_lock* lock = iterator.Next();
1681 
1682 			// TODO: locks from the same team might be joinable!
1683 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1684 				// locks do overlap
1685 				if (!shared || !lock->shared) {
1686 					// we need to wait
1687 					waitForLock = locking->wait_sem;
1688 					break;
1689 				}
1690 			}
1691 		}
1692 
1693 		if (waitForLock < 0)
1694 			break;
1695 
1696 		// We need to wait. Do that or fail now, if we've been asked not to.
1697 
1698 		if (!wait) {
1699 			put_advisory_locking(locking);
1700 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1701 		}
1702 
1703 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1704 			B_CAN_INTERRUPT, 0);
1705 		if (status != B_OK && status != B_BAD_SEM_ID)
1706 			return status;
1707 
1708 		// We have been notified, but we need to re-lock the locking object. So
1709 		// go another round...
1710 	}
1711 
1712 	// install new lock
1713 
1714 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1715 		sizeof(struct advisory_lock));
1716 	if (lock == NULL) {
1717 		if (waitForLock >= B_OK)
1718 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1719 		release_sem(locking->lock);
1720 		return B_NO_MEMORY;
1721 	}
1722 
1723 	lock->team = team_get_current_team_id();
1724 	lock->session = session;
1725 	// values must already be normalized when getting here
1726 	lock->start = flock->l_start;
1727 	lock->end = flock->l_start - 1 + flock->l_len;
1728 	lock->shared = shared;
1729 
1730 	locking->locks.Add(lock);
1731 	put_advisory_locking(locking);
1732 
1733 	return status;
1734 }
1735 
1736 
1737 /*!	Normalizes the \a flock structure to make it easier to compare the
1738 	structure with others. The l_start and l_len fields are set to absolute
1739 	values according to the l_whence field.
1740 */
1741 static status_t
1742 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1743 {
1744 	switch (flock->l_whence) {
1745 		case SEEK_SET:
1746 			break;
1747 		case SEEK_CUR:
1748 			flock->l_start += descriptor->pos;
1749 			break;
1750 		case SEEK_END:
1751 		{
1752 			struct vnode* vnode = descriptor->u.vnode;
1753 			struct stat stat;
1754 			status_t status;
1755 
1756 			if (!HAS_FS_CALL(vnode, read_stat))
1757 				return EOPNOTSUPP;
1758 
1759 			status = FS_CALL(vnode, read_stat, &stat);
1760 			if (status < B_OK)
1761 				return status;
1762 
1763 			flock->l_start += stat.st_size;
1764 			break;
1765 		}
1766 		default:
1767 			return B_BAD_VALUE;
1768 	}
1769 
1770 	if (flock->l_start < 0)
1771 		flock->l_start = 0;
1772 	if (flock->l_len == 0)
1773 		flock->l_len = OFF_MAX;
1774 
1775 	// don't let the offset and length overflow
1776 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1777 		flock->l_len = OFF_MAX - flock->l_start;
1778 
1779 	if (flock->l_len < 0) {
1780 		// a negative length reverses the region
1781 		flock->l_start += flock->l_len;
1782 		flock->l_len = -flock->l_len;
1783 	}
1784 
1785 	return B_OK;
1786 }
1787 
1788 
1789 static void
1790 replace_vnode_if_disconnected(struct fs_mount* mount,
1791 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1792 	struct vnode* fallBack, bool lockRootLock)
1793 {
1794 	if (lockRootLock)
1795 		mutex_lock(&sIOContextRootLock);
1796 
1797 	struct vnode* obsoleteVnode = NULL;
1798 
1799 	if (vnode != NULL && vnode->mount == mount
1800 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1801 		obsoleteVnode = vnode;
1802 
1803 		if (vnode == mount->root_vnode) {
1804 			// redirect the vnode to the covered vnode
1805 			vnode = mount->covers_vnode;
1806 		} else
1807 			vnode = fallBack;
1808 
1809 		if (vnode != NULL)
1810 			inc_vnode_ref_count(vnode);
1811 	}
1812 
1813 	if (lockRootLock)
1814 		mutex_unlock(&sIOContextRootLock);
1815 
1816 	if (obsoleteVnode != NULL)
1817 		put_vnode(obsoleteVnode);
1818 }
1819 
1820 
1821 /*!	Disconnects all file descriptors that are associated with the
1822 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1823 	\a mount object.
1824 
1825 	Note, after you've called this function, there might still be ongoing
1826 	accesses - they won't be interrupted if they already happened before.
1827 	However, any subsequent access will fail.
1828 
1829 	This is not a cheap function and should be used with care and rarely.
1830 	TODO: there is currently no means to stop a blocking read/write!
1831 */
1832 void
1833 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1834 	struct vnode* vnodeToDisconnect)
1835 {
1836 	// iterate over all teams and peek into their file descriptors
1837 	int32 nextTeamID = 0;
1838 
1839 	while (true) {
1840 		struct io_context* context = NULL;
1841 		bool contextLocked = false;
1842 		struct team* team = NULL;
1843 		team_id lastTeamID;
1844 
1845 		cpu_status state = disable_interrupts();
1846 		SpinLocker teamsLock(gTeamSpinlock);
1847 
1848 		lastTeamID = peek_next_thread_id();
1849 		if (nextTeamID < lastTeamID) {
1850 			// get next valid team
1851 			while (nextTeamID < lastTeamID
1852 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1853 				nextTeamID++;
1854 			}
1855 
1856 			if (team) {
1857 				context = (io_context*)team->io_context;
1858 
1859 				// Some acrobatics to lock the context in a safe way
1860 				// (cf. _kern_get_next_fd_info() for details).
1861 				GRAB_THREAD_LOCK();
1862 				teamsLock.Unlock();
1863 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1864 					== B_OK;
1865 				RELEASE_THREAD_LOCK();
1866 
1867 				nextTeamID++;
1868 			}
1869 		}
1870 
1871 		teamsLock.Unlock();
1872 		restore_interrupts(state);
1873 
1874 		if (context == NULL)
1875 			break;
1876 
1877 		// we now have a context - since we couldn't lock it while having
1878 		// safe access to the team structure, we now need to lock the mutex
1879 		// manually
1880 
1881 		if (!contextLocked) {
1882 			// team seems to be gone, go over to the next team
1883 			continue;
1884 		}
1885 
1886 		// the team cannot be deleted completely while we're owning its
1887 		// io_context mutex, so we can safely play with it now
1888 
1889 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1890 			sRoot, true);
1891 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1892 			sRoot, false);
1893 
1894 		for (uint32 i = 0; i < context->table_size; i++) {
1895 			if (struct file_descriptor* descriptor = context->fds[i]) {
1896 				inc_fd_ref_count(descriptor);
1897 
1898 				// if this descriptor points at this mount, we
1899 				// need to disconnect it to be able to unmount
1900 				struct vnode* vnode = fd_vnode(descriptor);
1901 				if (vnodeToDisconnect != NULL) {
1902 					if (vnode == vnodeToDisconnect)
1903 						disconnect_fd(descriptor);
1904 				} else if ((vnode != NULL && vnode->mount == mount)
1905 					|| (vnode == NULL && descriptor->u.mount == mount))
1906 					disconnect_fd(descriptor);
1907 
1908 				put_fd(descriptor);
1909 			}
1910 		}
1911 
1912 		mutex_unlock(&context->io_mutex);
1913 	}
1914 }
1915 
1916 
1917 /*!	\brief Gets the root node of the current IO context.
1918 	If \a kernel is \c true, the kernel IO context will be used.
1919 	The caller obtains a reference to the returned node.
1920 */
1921 struct vnode*
1922 get_root_vnode(bool kernel)
1923 {
1924 	if (!kernel) {
1925 		// Get current working directory from io context
1926 		struct io_context* context = get_current_io_context(kernel);
1927 
1928 		mutex_lock(&sIOContextRootLock);
1929 
1930 		struct vnode* root = context->root;
1931 		if (root != NULL)
1932 			inc_vnode_ref_count(root);
1933 
1934 		mutex_unlock(&sIOContextRootLock);
1935 
1936 		if (root != NULL)
1937 			return root;
1938 
1939 		// That should never happen.
1940 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1941 			"root\n", team_get_current_team_id());
1942 	}
1943 
1944 	inc_vnode_ref_count(sRoot);
1945 	return sRoot;
1946 }
1947 
1948 
1949 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1950 		   by.
1951 
1952 	Given an arbitrary vnode, the function checks, whether the node is covered
1953 	by the root of a volume. If it is the function obtains a reference to the
1954 	volume root node and returns it.
1955 
1956 	\param vnode The vnode in question.
1957 	\return The volume root vnode the vnode cover is covered by, if it is
1958 			indeed a mount point, or \c NULL otherwise.
1959 */
1960 static struct vnode*
1961 resolve_mount_point_to_volume_root(struct vnode* vnode)
1962 {
1963 	if (!vnode)
1964 		return NULL;
1965 
1966 	struct vnode* volumeRoot = NULL;
1967 
1968 	mutex_lock(&sVnodeCoveredByMutex);
1969 	if (vnode->covered_by) {
1970 		volumeRoot = vnode->covered_by;
1971 		inc_vnode_ref_count(volumeRoot);
1972 	}
1973 	mutex_unlock(&sVnodeCoveredByMutex);
1974 
1975 	return volumeRoot;
1976 }
1977 
1978 
1979 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1980 		   by.
1981 
1982 	Given an arbitrary vnode (identified by mount and node ID), the function
1983 	checks, whether the node is covered by the root of a volume. If it is the
1984 	function returns the mount and node ID of the volume root node. Otherwise
1985 	it simply returns the supplied mount and node ID.
1986 
1987 	In case of error (e.g. the supplied node could not be found) the variables
1988 	for storing the resolved mount and node ID remain untouched and an error
1989 	code is returned.
1990 
1991 	\param mountID The mount ID of the vnode in question.
1992 	\param nodeID The node ID of the vnode in question.
1993 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1994 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1995 	\return
1996 	- \c B_OK, if everything went fine,
1997 	- another error code, if something went wrong.
1998 */
1999 status_t
2000 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
2001 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
2002 {
2003 	// get the node
2004 	struct vnode* node;
2005 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
2006 	if (error != B_OK)
2007 		return error;
2008 
2009 	// resolve the node
2010 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
2011 	if (resolvedNode) {
2012 		put_vnode(node);
2013 		node = resolvedNode;
2014 	}
2015 
2016 	// set the return values
2017 	*resolvedMountID = node->device;
2018 	*resolvedNodeID = node->id;
2019 
2020 	put_vnode(node);
2021 
2022 	return B_OK;
2023 }
2024 
2025 
2026 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
2027 
2028 	Given an arbitrary vnode, the function checks, whether the node is the
2029 	root of a volume. If it is (and if it is not "/"), the function obtains
2030 	a reference to the underlying mount point node and returns it.
2031 
2032 	\param vnode The vnode in question (caller must have a reference).
2033 	\return The mount point vnode the vnode covers, if it is indeed a volume
2034 			root and not "/", or \c NULL otherwise.
2035 */
2036 static struct vnode*
2037 resolve_volume_root_to_mount_point(struct vnode* vnode)
2038 {
2039 	if (!vnode)
2040 		return NULL;
2041 
2042 	struct vnode* mountPoint = NULL;
2043 
2044 	struct fs_mount* mount = vnode->mount;
2045 	if (vnode == mount->root_vnode && mount->covers_vnode) {
2046 		mountPoint = mount->covers_vnode;
2047 		inc_vnode_ref_count(mountPoint);
2048 	}
2049 
2050 	return mountPoint;
2051 }
2052 
2053 
2054 /*!	\brief Gets the directory path and leaf name for a given path.
2055 
2056 	The supplied \a path is transformed to refer to the directory part of
2057 	the entry identified by the original path, and into the buffer \a filename
2058 	the leaf name of the original entry is written.
2059 	Neither the returned path nor the leaf name can be expected to be
2060 	canonical.
2061 
2062 	\param path The path to be analyzed. Must be able to store at least one
2063 		   additional character.
2064 	\param filename The buffer into which the leaf name will be written.
2065 		   Must be of size B_FILE_NAME_LENGTH at least.
2066 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2067 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2068 		   if the given path name is empty.
2069 */
2070 static status_t
2071 get_dir_path_and_leaf(char* path, char* filename)
2072 {
2073 	if (*path == '\0')
2074 		return B_ENTRY_NOT_FOUND;
2075 
2076 	char* p = strrchr(path, '/');
2077 		// '/' are not allowed in file names!
2078 
2079 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2080 
2081 	if (!p) {
2082 		// this path is single segment with no '/' in it
2083 		// ex. "foo"
2084 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2085 			return B_NAME_TOO_LONG;
2086 		strcpy(path, ".");
2087 	} else {
2088 		p++;
2089 		if (p[0] == '\0') {
2090 			// special case: the path ends in one or more '/' - remove them
2091 			while (*--p == '/' && p != path);
2092 			p[1] = '\0';
2093 
2094 			if (p == path && p[0] == '/') {
2095 				// This path points to the root of the file system
2096 				strcpy(filename, ".");
2097 				return B_OK;
2098 			}
2099 			for (; p != path && *(p - 1) != '/'; p--);
2100 				// rewind to the start of the leaf before the '/'
2101 		}
2102 
2103 		// normal leaf: replace the leaf portion of the path with a '.'
2104 		if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
2105 				>= B_FILE_NAME_LENGTH) {
2106 			return B_NAME_TOO_LONG;
2107 		}
2108 		p[0] = '.';
2109 		p[1] = '\0';
2110 	}
2111 	return B_OK;
2112 }
2113 
2114 
2115 static status_t
2116 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2117 	bool traverse, bool kernel, struct vnode** _vnode)
2118 {
2119 	char clonedName[B_FILE_NAME_LENGTH + 1];
2120 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2121 		return B_NAME_TOO_LONG;
2122 
2123 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2124 	struct vnode* directory;
2125 
2126 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2127 	if (status < 0)
2128 		return status;
2129 
2130 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2131 		_vnode, NULL);
2132 }
2133 
2134 
2135 static status_t
2136 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2137 {
2138 	ino_t id;
2139 
2140 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2141 		return get_vnode(dir->device, id, _vnode, true, false);
2142 
2143 	status_t status = FS_CALL(dir, lookup, name, &id);
2144 	if (status < B_OK)
2145 		return status;
2146 
2147 	mutex_lock(&sVnodeMutex);
2148 	*_vnode = lookup_vnode(dir->device, id);
2149 	mutex_unlock(&sVnodeMutex);
2150 
2151 	if (*_vnode == NULL) {
2152 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2153 			"0x%Lx)\n", dir->device, id);
2154 		return B_ENTRY_NOT_FOUND;
2155 	}
2156 
2157 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2158 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2159 //		(*_vnode)->mount->id, (*_vnode)->id);
2160 
2161 	return B_OK;
2162 }
2163 
2164 
2165 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2166 	\a path must not be NULL.
2167 	If it returns successfully, \a path contains the name of the last path
2168 	component. This function clobbers the buffer pointed to by \a path only
2169 	if it does contain more than one component.
2170 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2171 	it is successful or not!
2172 */
2173 static status_t
2174 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2175 	int count, struct io_context* ioContext, struct vnode** _vnode,
2176 	ino_t* _parentID)
2177 {
2178 	status_t status = B_OK;
2179 	ino_t lastParentID = vnode->id;
2180 
2181 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2182 
2183 	if (path == NULL) {
2184 		put_vnode(vnode);
2185 		return B_BAD_VALUE;
2186 	}
2187 
2188 	if (*path == '\0') {
2189 		put_vnode(vnode);
2190 		return B_ENTRY_NOT_FOUND;
2191 	}
2192 
2193 	while (true) {
2194 		struct vnode* nextVnode;
2195 		char* nextPath;
2196 
2197 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
2198 
2199 		// done?
2200 		if (path[0] == '\0')
2201 			break;
2202 
2203 		// walk to find the next path component ("path" will point to a single
2204 		// path component), and filter out multiple slashes
2205 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2206 				nextPath++);
2207 
2208 		if (*nextPath == '/') {
2209 			*nextPath = '\0';
2210 			do
2211 				nextPath++;
2212 			while (*nextPath == '/');
2213 		}
2214 
2215 		// See if the '..' is at the root of a mount and move to the covered
2216 		// vnode so we pass the '..' path to the underlying filesystem.
2217 		// Also prevent breaking the root of the IO context.
2218 		if (strcmp("..", path) == 0) {
2219 			if (vnode == ioContext->root) {
2220 				// Attempted prison break! Keep it contained.
2221 				path = nextPath;
2222 				continue;
2223 			} else if (vnode->mount->root_vnode == vnode
2224 				&& vnode->mount->covers_vnode) {
2225 				nextVnode = vnode->mount->covers_vnode;
2226 				inc_vnode_ref_count(nextVnode);
2227 				put_vnode(vnode);
2228 				vnode = nextVnode;
2229 			}
2230 		}
2231 
2232 		// check if vnode is really a directory
2233 		if (status == B_OK && !S_ISDIR(vnode->type))
2234 			status = B_NOT_A_DIRECTORY;
2235 
2236 		// Check if we have the right to search the current directory vnode.
2237 		// If a file system doesn't have the access() function, we assume that
2238 		// searching a directory is always allowed
2239 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2240 			status = FS_CALL(vnode, access, X_OK);
2241 
2242 		// Tell the filesystem to get the vnode of this path component (if we got the
2243 		// permission from the call above)
2244 		if (status >= B_OK)
2245 			status = lookup_dir_entry(vnode, path, &nextVnode);
2246 
2247 		if (status < B_OK) {
2248 			put_vnode(vnode);
2249 			return status;
2250 		}
2251 
2252 		// If the new node is a symbolic link, resolve it (if we've been told
2253 		// to do it)
2254 		if (S_ISLNK(nextVnode->type)
2255 			&& !(!traverseLeafLink && nextPath[0] == '\0')) {
2256 			size_t bufferSize;
2257 			char* buffer;
2258 
2259 			TRACE(("traverse link\n"));
2260 
2261 			// it's not exactly nice style using goto in this way, but hey, it works :-/
2262 			if (count + 1 > B_MAX_SYMLINKS) {
2263 				status = B_LINK_LIMIT;
2264 				goto resolve_link_error;
2265 			}
2266 
2267 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2268 			if (buffer == NULL) {
2269 				status = B_NO_MEMORY;
2270 				goto resolve_link_error;
2271 			}
2272 
2273 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2274 				bufferSize--;
2275 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2276 				// null-terminate
2277 				if (status >= 0)
2278 					buffer[bufferSize] = '\0';
2279 			} else
2280 				status = B_BAD_VALUE;
2281 
2282 			if (status < B_OK) {
2283 				free(buffer);
2284 
2285 		resolve_link_error:
2286 				put_vnode(vnode);
2287 				put_vnode(nextVnode);
2288 
2289 				return status;
2290 			}
2291 			put_vnode(nextVnode);
2292 
2293 			// Check if we start from the root directory or the current
2294 			// directory ("vnode" still points to that one).
2295 			// Cut off all leading slashes if it's the root directory
2296 			path = buffer;
2297 			bool absoluteSymlink = false;
2298 			if (path[0] == '/') {
2299 				// we don't need the old directory anymore
2300 				put_vnode(vnode);
2301 
2302 				while (*++path == '/')
2303 					;
2304 
2305 				mutex_lock(&sIOContextRootLock);
2306 				vnode = ioContext->root;
2307 				inc_vnode_ref_count(vnode);
2308 				mutex_unlock(&sIOContextRootLock);
2309 
2310 				absoluteSymlink = true;
2311 			}
2312 
2313 			inc_vnode_ref_count(vnode);
2314 				// balance the next recursion - we will decrement the
2315 				// ref_count of the vnode, no matter if we succeeded or not
2316 
2317 			if (absoluteSymlink && *path == '\0') {
2318 				// symlink was just "/"
2319 				nextVnode = vnode;
2320 			} else {
2321 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
2322 					count + 1, ioContext, &nextVnode, &lastParentID);
2323 			}
2324 
2325 			free(buffer);
2326 
2327 			if (status < B_OK) {
2328 				put_vnode(vnode);
2329 				return status;
2330 			}
2331 		} else
2332 			lastParentID = vnode->id;
2333 
2334 		// decrease the ref count on the old dir we just looked up into
2335 		put_vnode(vnode);
2336 
2337 		path = nextPath;
2338 		vnode = nextVnode;
2339 
2340 		// see if we hit a mount point
2341 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2342 		if (mountPoint) {
2343 			put_vnode(vnode);
2344 			vnode = mountPoint;
2345 		}
2346 	}
2347 
2348 	*_vnode = vnode;
2349 	if (_parentID)
2350 		*_parentID = lastParentID;
2351 
2352 	return B_OK;
2353 }
2354 
2355 
2356 static status_t
2357 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2358 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2359 {
2360 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2361 		get_current_io_context(kernel), _vnode, _parentID);
2362 }
2363 
2364 
2365 static status_t
2366 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2367 	ino_t* _parentID, bool kernel)
2368 {
2369 	struct vnode* start = NULL;
2370 
2371 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2372 
2373 	if (!path)
2374 		return B_BAD_VALUE;
2375 
2376 	if (*path == '\0')
2377 		return B_ENTRY_NOT_FOUND;
2378 
2379 	// figure out if we need to start at root or at cwd
2380 	if (*path == '/') {
2381 		if (sRoot == NULL) {
2382 			// we're a bit early, aren't we?
2383 			return B_ERROR;
2384 		}
2385 
2386 		while (*++path == '/')
2387 			;
2388 		start = get_root_vnode(kernel);
2389 
2390 		if (*path == '\0') {
2391 			*_vnode = start;
2392 			return B_OK;
2393 		}
2394 
2395 	} else {
2396 		struct io_context* context = get_current_io_context(kernel);
2397 
2398 		mutex_lock(&context->io_mutex);
2399 		start = context->cwd;
2400 		if (start != NULL)
2401 			inc_vnode_ref_count(start);
2402 		mutex_unlock(&context->io_mutex);
2403 
2404 		if (start == NULL)
2405 			return B_ERROR;
2406 	}
2407 
2408 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2409 		_parentID);
2410 }
2411 
2412 
2413 /*! Returns the vnode in the next to last segment of the path, and returns
2414 	the last portion in filename.
2415 	The path buffer must be able to store at least one additional character.
2416 */
2417 static status_t
2418 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename, bool kernel)
2419 {
2420 	status_t status = get_dir_path_and_leaf(path, filename);
2421 	if (status != B_OK)
2422 		return status;
2423 
2424 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2425 }
2426 
2427 
2428 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2429 		   to by a FD + path pair.
2430 
2431 	\a path must be given in either case. \a fd might be omitted, in which
2432 	case \a path is either an absolute path or one relative to the current
2433 	directory. If both a supplied and \a path is relative it is reckoned off
2434 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2435 	ignored.
2436 
2437 	The caller has the responsibility to call put_vnode() on the returned
2438 	directory vnode.
2439 
2440 	\param fd The FD. May be < 0.
2441 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2442 	       is modified by this function. It must have at least room for a
2443 	       string one character longer than the path it contains.
2444 	\param _vnode A pointer to a variable the directory vnode shall be written
2445 		   into.
2446 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2447 		   the leaf name of the specified entry will be written.
2448 	\param kernel \c true, if invoked from inside the kernel, \c false if
2449 		   invoked from userland.
2450 	\return \c B_OK, if everything went fine, another error code otherwise.
2451 */
2452 static status_t
2453 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2454 	char* filename, bool kernel)
2455 {
2456 	if (!path)
2457 		return B_BAD_VALUE;
2458 	if (*path == '\0')
2459 		return B_ENTRY_NOT_FOUND;
2460 	if (fd < 0)
2461 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2462 
2463 	status_t status = get_dir_path_and_leaf(path, filename);
2464 	if (status != B_OK)
2465 		return status;
2466 
2467 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2468 }
2469 
2470 
2471 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2472 		   to by a vnode + path pair.
2473 
2474 	\a path must be given in either case. \a vnode might be omitted, in which
2475 	case \a path is either an absolute path or one relative to the current
2476 	directory. If both a supplied and \a path is relative it is reckoned off
2477 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2478 	ignored.
2479 
2480 	The caller has the responsibility to call put_vnode() on the returned
2481 	directory vnode.
2482 
2483 	\param vnode The vnode. May be \c NULL.
2484 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2485 	       is modified by this function. It must have at least room for a
2486 	       string one character longer than the path it contains.
2487 	\param _vnode A pointer to a variable the directory vnode shall be written
2488 		   into.
2489 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2490 		   the leaf name of the specified entry will be written.
2491 	\param kernel \c true, if invoked from inside the kernel, \c false if
2492 		   invoked from userland.
2493 	\return \c B_OK, if everything went fine, another error code otherwise.
2494 */
2495 static status_t
2496 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2497 	struct vnode** _vnode, char* filename, bool kernel)
2498 {
2499 	if (!path)
2500 		return B_BAD_VALUE;
2501 	if (*path == '\0')
2502 		return B_ENTRY_NOT_FOUND;
2503 	if (vnode == NULL || path[0] == '/')
2504 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2505 
2506 	status_t status = get_dir_path_and_leaf(path, filename);
2507 	if (status != B_OK)
2508 		return status;
2509 
2510 	inc_vnode_ref_count(vnode);
2511 		// vnode_path_to_vnode() always decrements the ref count
2512 
2513 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2514 }
2515 
2516 
2517 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2518 */
2519 static status_t
2520 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2521 	size_t bufferSize, struct io_context* ioContext)
2522 {
2523 	if (bufferSize < sizeof(struct dirent))
2524 		return B_BAD_VALUE;
2525 
2526 	// See if vnode is the root of a mount and move to the covered
2527 	// vnode so we get the underlying file system
2528 	VNodePutter vnodePutter;
2529 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
2530 		vnode = vnode->mount->covers_vnode;
2531 		inc_vnode_ref_count(vnode);
2532 		vnodePutter.SetTo(vnode);
2533 	}
2534 
2535 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2536 		// The FS supports getting the name of a vnode.
2537 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2538 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2539 			return B_OK;
2540 	}
2541 
2542 	// The FS doesn't support getting the name of a vnode. So we search the
2543 	// parent directory for the vnode, if the caller let us.
2544 
2545 	if (parent == NULL)
2546 		return EOPNOTSUPP;
2547 
2548 	void* cookie;
2549 
2550 	status_t status = FS_CALL(parent, open_dir, &cookie);
2551 	if (status >= B_OK) {
2552 		while (true) {
2553 			uint32 num = 1;
2554 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2555 				&num);
2556 			if (status < B_OK)
2557 				break;
2558 			if (num == 0) {
2559 				status = B_ENTRY_NOT_FOUND;
2560 				break;
2561 			}
2562 
2563 			if (vnode->id == buffer->d_ino) {
2564 				// found correct entry!
2565 				break;
2566 			}
2567 		}
2568 
2569 		FS_CALL(vnode, close_dir, cookie);
2570 		FS_CALL(vnode, free_dir_cookie, cookie);
2571 	}
2572 	return status;
2573 }
2574 
2575 
2576 static status_t
2577 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2578 	size_t nameSize, bool kernel)
2579 {
2580 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2581 	struct dirent* dirent = (struct dirent*)buffer;
2582 
2583 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2584 		get_current_io_context(kernel));
2585 	if (status != B_OK)
2586 		return status;
2587 
2588 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2589 		return B_BUFFER_OVERFLOW;
2590 
2591 	return B_OK;
2592 }
2593 
2594 
2595 /*!	Gets the full path to a given directory vnode.
2596 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2597 	file system doesn't support this call, it will fall back to iterating
2598 	through the parent directory to get the name of the child.
2599 
2600 	To protect against circular loops, it supports a maximum tree depth
2601 	of 256 levels.
2602 
2603 	Note that the path may not be correct the time this function returns!
2604 	It doesn't use any locking to prevent returning the correct path, as
2605 	paths aren't safe anyway: the path to a file can change at any time.
2606 
2607 	It might be a good idea, though, to check if the returned path exists
2608 	in the calling function (it's not done here because of efficiency)
2609 */
2610 static status_t
2611 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2612 	bool kernel)
2613 {
2614 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2615 
2616 	if (vnode == NULL || buffer == NULL)
2617 		return B_BAD_VALUE;
2618 
2619 	if (!S_ISDIR(vnode->type))
2620 		return B_NOT_A_DIRECTORY;
2621 
2622 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2623 	KPath pathBuffer;
2624 	if (pathBuffer.InitCheck() != B_OK)
2625 		return B_NO_MEMORY;
2626 
2627 	char* path = pathBuffer.LockBuffer();
2628 	int32 insert = pathBuffer.BufferSize();
2629 	int32 maxLevel = 256;
2630 	int32 length;
2631 	status_t status;
2632 
2633 	// we don't use get_vnode() here because this call is more
2634 	// efficient and does all we need from get_vnode()
2635 	inc_vnode_ref_count(vnode);
2636 
2637 	// resolve a volume root to its mount point
2638 	struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2639 	if (mountPoint) {
2640 		put_vnode(vnode);
2641 		vnode = mountPoint;
2642 	}
2643 
2644 	path[--insert] = '\0';
2645 
2646 	struct io_context* ioContext = get_current_io_context(kernel);
2647 
2648 	while (true) {
2649 		// the name buffer is also used for fs_read_dir()
2650 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2651 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2652 		struct vnode* parentVnode;
2653 		ino_t parentID;
2654 
2655 		// lookup the parent vnode
2656 		if (vnode == ioContext->root) {
2657 			// we hit the IO context root
2658 			parentVnode = vnode;
2659 			inc_vnode_ref_count(vnode);
2660 		} else {
2661 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2662 			if (status < B_OK)
2663 				goto out;
2664 		}
2665 
2666 		// get the node's name
2667 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2668 			sizeof(nameBuffer), ioContext);
2669 
2670 		// resolve a volume root to its mount point
2671 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2672 		if (mountPoint) {
2673 			put_vnode(parentVnode);
2674 			parentVnode = mountPoint;
2675 			parentID = parentVnode->id;
2676 		}
2677 
2678 		bool hitRoot = (parentVnode == vnode);
2679 
2680 		// release the current vnode, we only need its parent from now on
2681 		put_vnode(vnode);
2682 		vnode = parentVnode;
2683 
2684 		if (status < B_OK)
2685 			goto out;
2686 
2687 		if (hitRoot) {
2688 			// we have reached "/", which means we have constructed the full
2689 			// path
2690 			break;
2691 		}
2692 
2693 		// ToDo: add an explicit check for loops in about 10 levels to do
2694 		// real loop detection
2695 
2696 		// don't go deeper as 'maxLevel' to prevent circular loops
2697 		if (maxLevel-- < 0) {
2698 			status = ELOOP;
2699 			goto out;
2700 		}
2701 
2702 		// add the name in front of the current path
2703 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2704 		length = strlen(name);
2705 		insert -= length;
2706 		if (insert <= 0) {
2707 			status = ENOBUFS;
2708 			goto out;
2709 		}
2710 		memcpy(path + insert, name, length);
2711 		path[--insert] = '/';
2712 	}
2713 
2714 	// the root dir will result in an empty path: fix it
2715 	if (path[insert] == '\0')
2716 		path[--insert] = '/';
2717 
2718 	TRACE(("  path is: %s\n", path + insert));
2719 
2720 	// copy the path to the output buffer
2721 	length = pathBuffer.BufferSize() - insert;
2722 	if (length <= (int)bufferSize)
2723 		memcpy(buffer, path + insert, length);
2724 	else
2725 		status = ENOBUFS;
2726 
2727 out:
2728 	put_vnode(vnode);
2729 	return status;
2730 }
2731 
2732 
2733 /*!	Checks the length of every path component, and adds a '.'
2734 	if the path ends in a slash.
2735 	The given path buffer must be able to store at least one
2736 	additional character.
2737 */
2738 static status_t
2739 check_path(char* to)
2740 {
2741 	int32 length = 0;
2742 
2743 	// check length of every path component
2744 
2745 	while (*to) {
2746 		char* begin;
2747 		if (*to == '/')
2748 			to++, length++;
2749 
2750 		begin = to;
2751 		while (*to != '/' && *to)
2752 			to++, length++;
2753 
2754 		if (to - begin > B_FILE_NAME_LENGTH)
2755 			return B_NAME_TOO_LONG;
2756 	}
2757 
2758 	if (length == 0)
2759 		return B_ENTRY_NOT_FOUND;
2760 
2761 	// complete path if there is a slash at the end
2762 
2763 	if (*(to - 1) == '/') {
2764 		if (length > B_PATH_NAME_LENGTH - 2)
2765 			return B_NAME_TOO_LONG;
2766 
2767 		to[0] = '.';
2768 		to[1] = '\0';
2769 	}
2770 
2771 	return B_OK;
2772 }
2773 
2774 
2775 static struct file_descriptor*
2776 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2777 {
2778 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel), fd);
2779 	if (descriptor == NULL)
2780 		return NULL;
2781 
2782 	struct vnode* vnode = fd_vnode(descriptor);
2783 	if (vnode == NULL) {
2784 		put_fd(descriptor);
2785 		return NULL;
2786 	}
2787 
2788 	// ToDo: when we can close a file descriptor at any point, investigate
2789 	//	if this is still valid to do (accessing the vnode without ref_count
2790 	//	or locking)
2791 	*_vnode = vnode;
2792 	return descriptor;
2793 }
2794 
2795 
2796 static struct vnode*
2797 get_vnode_from_fd(int fd, bool kernel)
2798 {
2799 	struct file_descriptor* descriptor;
2800 	struct vnode* vnode;
2801 
2802 	descriptor = get_fd(get_current_io_context(kernel), fd);
2803 	if (descriptor == NULL)
2804 		return NULL;
2805 
2806 	vnode = fd_vnode(descriptor);
2807 	if (vnode != NULL)
2808 		inc_vnode_ref_count(vnode);
2809 
2810 	put_fd(descriptor);
2811 	return vnode;
2812 }
2813 
2814 
2815 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2816 	only the path will be considered. In this case, the \a path must not be
2817 	NULL.
2818 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2819 	and should be NULL for files.
2820 */
2821 static status_t
2822 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2823 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2824 {
2825 	if (fd < 0 && !path)
2826 		return B_BAD_VALUE;
2827 
2828 	if (path != NULL && *path == '\0')
2829 		return B_ENTRY_NOT_FOUND;
2830 
2831 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2832 		// no FD or absolute path
2833 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2834 	}
2835 
2836 	// FD only, or FD + relative path
2837 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2838 	if (!vnode)
2839 		return B_FILE_ERROR;
2840 
2841 	if (path != NULL) {
2842 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2843 			_vnode, _parentID);
2844 	}
2845 
2846 	// there is no relative path to take into account
2847 
2848 	*_vnode = vnode;
2849 	if (_parentID)
2850 		*_parentID = -1;
2851 
2852 	return B_OK;
2853 }
2854 
2855 
2856 static int
2857 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2858 	void* cookie, int openMode, bool kernel)
2859 {
2860 	struct file_descriptor* descriptor;
2861 	int fd;
2862 
2863 	// If the vnode is locked, we don't allow creating a new file/directory
2864 	// file_descriptor for it
2865 	if (vnode && vnode->mandatory_locked_by != NULL
2866 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2867 		return B_BUSY;
2868 
2869 	descriptor = alloc_fd();
2870 	if (!descriptor)
2871 		return B_NO_MEMORY;
2872 
2873 	if (vnode)
2874 		descriptor->u.vnode = vnode;
2875 	else
2876 		descriptor->u.mount = mount;
2877 	descriptor->cookie = cookie;
2878 
2879 	switch (type) {
2880 		// vnode types
2881 		case FDTYPE_FILE:
2882 			descriptor->ops = &sFileOps;
2883 			break;
2884 		case FDTYPE_DIR:
2885 			descriptor->ops = &sDirectoryOps;
2886 			break;
2887 		case FDTYPE_ATTR:
2888 			descriptor->ops = &sAttributeOps;
2889 			break;
2890 		case FDTYPE_ATTR_DIR:
2891 			descriptor->ops = &sAttributeDirectoryOps;
2892 			break;
2893 
2894 		// mount types
2895 		case FDTYPE_INDEX_DIR:
2896 			descriptor->ops = &sIndexDirectoryOps;
2897 			break;
2898 		case FDTYPE_QUERY:
2899 			descriptor->ops = &sQueryOps;
2900 			break;
2901 
2902 		default:
2903 			panic("get_new_fd() called with unknown type %d\n", type);
2904 			break;
2905 	}
2906 	descriptor->type = type;
2907 	descriptor->open_mode = openMode;
2908 
2909 	fd = new_fd(get_current_io_context(kernel), descriptor);
2910 	if (fd < 0) {
2911 		free(descriptor);
2912 		return B_NO_MORE_FDS;
2913 	}
2914 
2915 	return fd;
2916 }
2917 
2918 
2919 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2920 	vfs_normalize_path(). See there for more documentation.
2921 */
2922 static status_t
2923 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2924 {
2925 	VNodePutter dirPutter;
2926 	struct vnode* dir = NULL;
2927 	status_t error;
2928 
2929 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2930 		// get dir vnode + leaf name
2931 		struct vnode* nextDir;
2932 		char leaf[B_FILE_NAME_LENGTH];
2933 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2934 		if (error != B_OK)
2935 			return error;
2936 
2937 		dir = nextDir;
2938 		strcpy(path, leaf);
2939 		dirPutter.SetTo(dir);
2940 
2941 		// get file vnode, if we shall resolve links
2942 		bool fileExists = false;
2943 		struct vnode* fileVnode;
2944 		VNodePutter fileVnodePutter;
2945 		if (traverseLink) {
2946 			inc_vnode_ref_count(dir);
2947 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2948 					NULL) == B_OK) {
2949 				fileVnodePutter.SetTo(fileVnode);
2950 				fileExists = true;
2951 			}
2952 		}
2953 
2954 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->type)) {
2955 			// we're done -- construct the path
2956 			bool hasLeaf = true;
2957 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2958 				// special cases "." and ".." -- get the dir, forget the leaf
2959 				inc_vnode_ref_count(dir);
2960 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2961 					&nextDir, NULL);
2962 				if (error != B_OK)
2963 					return error;
2964 				dir = nextDir;
2965 				dirPutter.SetTo(dir);
2966 				hasLeaf = false;
2967 			}
2968 
2969 			// get the directory path
2970 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2971 			if (error != B_OK)
2972 				return error;
2973 
2974 			// append the leaf name
2975 			if (hasLeaf) {
2976 				// insert a directory separator if this is not the file system
2977 				// root
2978 				if ((strcmp(path, "/") != 0
2979 					&& strlcat(path, "/", pathSize) >= pathSize)
2980 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2981 					return B_NAME_TOO_LONG;
2982 				}
2983 			}
2984 
2985 			return B_OK;
2986 		}
2987 
2988 		// read link
2989 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2990 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2991 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2992 			if (error != B_OK)
2993 				return error;
2994 			path[bufferSize] = '\0';
2995 		} else
2996 			return B_BAD_VALUE;
2997 	}
2998 
2999 	return B_LINK_LIMIT;
3000 }
3001 
3002 
3003 #ifdef ADD_DEBUGGER_COMMANDS
3004 
3005 
3006 static void
3007 _dump_advisory_locking(advisory_locking* locking)
3008 {
3009 	if (locking == NULL)
3010 		return;
3011 
3012 	kprintf("   lock:        %ld", locking->lock);
3013 	kprintf("   wait_sem:    %ld", locking->wait_sem);
3014 
3015 	int32 index = 0;
3016 	LockList::Iterator iterator = locking->locks.GetIterator();
3017 	while (iterator.HasNext()) {
3018 		struct advisory_lock* lock = iterator.Next();
3019 
3020 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
3021 		kprintf("        start:  %Ld\n", lock->start);
3022 		kprintf("        end:    %Ld\n", lock->end);
3023 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3024 	}
3025 }
3026 
3027 
3028 static void
3029 _dump_mount(struct fs_mount* mount)
3030 {
3031 	kprintf("MOUNT: %p\n", mount);
3032 	kprintf(" id:            %ld\n", mount->id);
3033 	kprintf(" device_name:   %s\n", mount->device_name);
3034 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3035 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
3036 	kprintf(" partition:     %p\n", mount->partition);
3037 	kprintf(" lock:          %p\n", &mount->rlock);
3038 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3039 		mount->owns_file_device ? " owns_file_device" : "");
3040 
3041 	fs_volume* volume = mount->volume;
3042 	while (volume != NULL) {
3043 		kprintf(" volume %p:\n", volume);
3044 		kprintf("  layer:            %ld\n", volume->layer);
3045 		kprintf("  private_volume:   %p\n", volume->private_volume);
3046 		kprintf("  ops:              %p\n", volume->ops);
3047 		kprintf("  file_system:      %p\n", volume->file_system);
3048 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3049 		volume = volume->super_volume;
3050 	}
3051 
3052 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3053 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3054 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3055 	set_debug_variable("_partition", (addr_t)mount->partition);
3056 }
3057 
3058 
3059 static void
3060 _dump_vnode(struct vnode* vnode)
3061 {
3062 	kprintf("VNODE: %p\n", vnode);
3063 	kprintf(" device:        %ld\n", vnode->device);
3064 	kprintf(" id:            %Ld\n", vnode->id);
3065 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3066 	kprintf(" private_node:  %p\n", vnode->private_node);
3067 	kprintf(" mount:         %p\n", vnode->mount);
3068 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3069 	kprintf(" cache:         %p\n", vnode->cache);
3070 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
3071 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3072 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3073 
3074 	_dump_advisory_locking(vnode->advisory_locking);
3075 
3076 	set_debug_variable("_node", (addr_t)vnode->private_node);
3077 	set_debug_variable("_mount", (addr_t)vnode->mount);
3078 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3079 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3080 }
3081 
3082 
3083 static int
3084 dump_mount(int argc, char** argv)
3085 {
3086 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3087 		kprintf("usage: %s [id|address]\n", argv[0]);
3088 		return 0;
3089 	}
3090 
3091 	uint32 id = parse_expression(argv[1]);
3092 	struct fs_mount* mount = NULL;
3093 
3094 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3095 	if (mount == NULL) {
3096 		if (IS_USER_ADDRESS(id)) {
3097 			kprintf("fs_mount not found\n");
3098 			return 0;
3099 		}
3100 		mount = (fs_mount*)id;
3101 	}
3102 
3103 	_dump_mount(mount);
3104 	return 0;
3105 }
3106 
3107 
3108 static int
3109 dump_mounts(int argc, char** argv)
3110 {
3111 	if (argc != 1) {
3112 		kprintf("usage: %s\n", argv[0]);
3113 		return 0;
3114 	}
3115 
3116 	kprintf("address     id root       covers     cookie     fs_name\n");
3117 
3118 	struct hash_iterator iterator;
3119 	struct fs_mount* mount;
3120 
3121 	hash_open(sMountsTable, &iterator);
3122 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator)) != NULL) {
3123 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3124 			mount->covers_vnode, mount->volume->private_volume,
3125 			mount->volume->file_system_name);
3126 
3127 		fs_volume* volume = mount->volume;
3128 		while (volume->super_volume != NULL) {
3129 			volume = volume->super_volume;
3130 			kprintf("                                     %p %s\n",
3131 				volume->private_volume, volume->file_system_name);
3132 		}
3133 	}
3134 
3135 	hash_close(sMountsTable, &iterator, false);
3136 	return 0;
3137 }
3138 
3139 
3140 static int
3141 dump_vnode(int argc, char** argv)
3142 {
3143 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
3144 		kprintf("usage: %s <device> <id>\n"
3145 			"   or: %s <address>\n", argv[0], argv[0]);
3146 		return 0;
3147 	}
3148 
3149 	struct vnode* vnode = NULL;
3150 
3151 	if (argc == 2) {
3152 		vnode = (struct vnode*)parse_expression(argv[1]);
3153 		if (IS_USER_ADDRESS(vnode)) {
3154 			kprintf("invalid vnode address\n");
3155 			return 0;
3156 		}
3157 		_dump_vnode(vnode);
3158 		return 0;
3159 	}
3160 
3161 	struct hash_iterator iterator;
3162 	dev_t device = parse_expression(argv[1]);
3163 	ino_t id = parse_expression(argv[2]);
3164 
3165 	hash_open(sVnodeTable, &iterator);
3166 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3167 		if (vnode->id != id || vnode->device != device)
3168 			continue;
3169 
3170 		_dump_vnode(vnode);
3171 	}
3172 
3173 	hash_close(sVnodeTable, &iterator, false);
3174 	return 0;
3175 }
3176 
3177 
3178 static int
3179 dump_vnodes(int argc, char** argv)
3180 {
3181 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3182 		kprintf("usage: %s [device]\n", argv[0]);
3183 		return 0;
3184 	}
3185 
3186 	// restrict dumped nodes to a certain device if requested
3187 	dev_t device = parse_expression(argv[1]);
3188 
3189 	struct hash_iterator iterator;
3190 	struct vnode* vnode;
3191 
3192 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3193 		"flags\n");
3194 
3195 	hash_open(sVnodeTable, &iterator);
3196 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3197 		if (vnode->device != device)
3198 			continue;
3199 
3200 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3201 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3202 			vnode->advisory_locking, vnode->remove ? "r" : "-",
3203 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3204 	}
3205 
3206 	hash_close(sVnodeTable, &iterator, false);
3207 	return 0;
3208 }
3209 
3210 
3211 static int
3212 dump_vnode_caches(int argc, char** argv)
3213 {
3214 	struct hash_iterator iterator;
3215 	struct vnode* vnode;
3216 
3217 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3218 		kprintf("usage: %s [device]\n", argv[0]);
3219 		return 0;
3220 	}
3221 
3222 	// restrict dumped nodes to a certain device if requested
3223 	dev_t device = -1;
3224 	if (argc > 1)
3225 		device = parse_expression(argv[1]);
3226 
3227 	kprintf("address    dev     inode cache          size   pages\n");
3228 
3229 	hash_open(sVnodeTable, &iterator);
3230 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3231 		if (vnode->cache == NULL)
3232 			continue;
3233 		if (device != -1 && vnode->device != device)
3234 			continue;
3235 
3236 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3237 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3238 				/ B_PAGE_SIZE, vnode->cache->page_count);
3239 	}
3240 
3241 	hash_close(sVnodeTable, &iterator, false);
3242 	return 0;
3243 }
3244 
3245 
3246 int
3247 dump_io_context(int argc, char** argv)
3248 {
3249 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3250 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3251 		return 0;
3252 	}
3253 
3254 	struct io_context* context = NULL;
3255 
3256 	if (argc > 1) {
3257 		uint32 num = parse_expression(argv[1]);
3258 		if (IS_KERNEL_ADDRESS(num))
3259 			context = (struct io_context*)num;
3260 		else {
3261 			struct team* team = team_get_team_struct_locked(num);
3262 			if (team == NULL) {
3263 				kprintf("could not find team with ID %ld\n", num);
3264 				return 0;
3265 			}
3266 			context = (struct io_context*)team->io_context;
3267 		}
3268 	} else
3269 		context = get_current_io_context(true);
3270 
3271 	kprintf("I/O CONTEXT: %p\n", context);
3272 	kprintf(" root vnode:\t%p\n", context->root);
3273 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3274 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3275 	kprintf(" max fds:\t%lu\n", context->table_size);
3276 
3277 	if (context->num_used_fds)
3278 		kprintf("   no. type     ops ref open mode        pos cookie\n");
3279 
3280 	for (uint32 i = 0; i < context->table_size; i++) {
3281 		struct file_descriptor* fd = context->fds[i];
3282 		if (fd == NULL)
3283 			continue;
3284 
3285 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
3286 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
3287 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
3288 			fd->u.vnode);
3289 	}
3290 
3291 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3292 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3293 
3294 	set_debug_variable("_cwd", (addr_t)context->cwd);
3295 
3296 	return 0;
3297 }
3298 
3299 
3300 int
3301 dump_vnode_usage(int argc, char** argv)
3302 {
3303 	if (argc != 1) {
3304 		kprintf("usage: %s\n", argv[0]);
3305 		return 0;
3306 	}
3307 
3308 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3309 		kMaxUnusedVnodes);
3310 
3311 	struct hash_iterator iterator;
3312 	hash_open(sVnodeTable, &iterator);
3313 
3314 	uint32 count = 0;
3315 	struct vnode* vnode;
3316 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3317 		count++;
3318 	}
3319 
3320 	hash_close(sVnodeTable, &iterator, false);
3321 
3322 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3323 	return 0;
3324 }
3325 
3326 #endif	// ADD_DEBUGGER_COMMANDS
3327 
3328 /*!	Clears an iovec array of physical pages.
3329 	Returns in \a _bytes the number of bytes successfully cleared.
3330 */
3331 static status_t
3332 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3333 {
3334 	size_t bytes = *_bytes;
3335 	size_t index = 0;
3336 
3337 	while (bytes > 0) {
3338 		size_t length = min_c(vecs[index].iov_len, bytes);
3339 
3340 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3341 			length);
3342 		if (status != B_OK) {
3343 			*_bytes -= bytes;
3344 			return status;
3345 		}
3346 
3347 		bytes -= length;
3348 	}
3349 
3350 	return B_OK;
3351 }
3352 
3353 
3354 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3355 	and calls the file system hooks to read/write the request to disk.
3356 */
3357 static status_t
3358 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3359 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3360 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3361 	bool doWrite)
3362 {
3363 	if (fileVecCount == 0) {
3364 		// There are no file vecs at this offset, so we're obviously trying
3365 		// to access the file outside of its bounds
3366 		return B_BAD_VALUE;
3367 	}
3368 
3369 	size_t numBytes = *_numBytes;
3370 	uint32 fileVecIndex;
3371 	size_t vecOffset = *_vecOffset;
3372 	uint32 vecIndex = *_vecIndex;
3373 	status_t status;
3374 	size_t size;
3375 
3376 	if (!doWrite && vecOffset == 0) {
3377 		// now directly read the data from the device
3378 		// the first file_io_vec can be read directly
3379 
3380 		if (fileVecs[0].length < numBytes)
3381 			size = fileVecs[0].length;
3382 		else
3383 			size = numBytes;
3384 
3385 		if (fileVecs[0].offset >= 0) {
3386 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3387 				&vecs[vecIndex], vecCount - vecIndex, &size);
3388 		} else {
3389 			// sparse read
3390 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3391 		}
3392 		if (status < B_OK)
3393 			return status;
3394 
3395 		// TODO: this is a work-around for buggy device drivers!
3396 		//	When our own drivers honour the length, we can:
3397 		//	a) also use this direct I/O for writes (otherwise, it would
3398 		//	   overwrite precious data)
3399 		//	b) panic if the term below is true (at least for writes)
3400 		if (size > fileVecs[0].length) {
3401 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
3402 			size = fileVecs[0].length;
3403 		}
3404 
3405 		ASSERT(size <= fileVecs[0].length);
3406 
3407 		// If the file portion was contiguous, we're already done now
3408 		if (size == numBytes)
3409 			return B_OK;
3410 
3411 		// if we reached the end of the file, we can return as well
3412 		if (size != fileVecs[0].length) {
3413 			*_numBytes = size;
3414 			return B_OK;
3415 		}
3416 
3417 		fileVecIndex = 1;
3418 
3419 		// first, find out where we have to continue in our iovecs
3420 		for (; vecIndex < vecCount; vecIndex++) {
3421 			if (size < vecs[vecIndex].iov_len)
3422 				break;
3423 
3424 			size -= vecs[vecIndex].iov_len;
3425 		}
3426 
3427 		vecOffset = size;
3428 	} else {
3429 		fileVecIndex = 0;
3430 		size = 0;
3431 	}
3432 
3433 	// Too bad, let's process the rest of the file_io_vecs
3434 
3435 	size_t totalSize = size;
3436 	size_t bytesLeft = numBytes - size;
3437 
3438 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3439 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3440 		off_t fileOffset = fileVec.offset;
3441 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3442 
3443 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3444 
3445 		// process the complete fileVec
3446 		while (fileLeft > 0) {
3447 			iovec tempVecs[MAX_TEMP_IO_VECS];
3448 			uint32 tempCount = 0;
3449 
3450 			// size tracks how much of what is left of the current fileVec
3451 			// (fileLeft) has been assigned to tempVecs
3452 			size = 0;
3453 
3454 			// assign what is left of the current fileVec to the tempVecs
3455 			for (size = 0; size < fileLeft && vecIndex < vecCount
3456 					&& tempCount < MAX_TEMP_IO_VECS;) {
3457 				// try to satisfy one iovec per iteration (or as much as
3458 				// possible)
3459 
3460 				// bytes left of the current iovec
3461 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3462 				if (vecLeft == 0) {
3463 					vecOffset = 0;
3464 					vecIndex++;
3465 					continue;
3466 				}
3467 
3468 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3469 					vecIndex, vecOffset, size));
3470 
3471 				// actually available bytes
3472 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3473 
3474 				tempVecs[tempCount].iov_base
3475 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3476 				tempVecs[tempCount].iov_len = tempVecSize;
3477 				tempCount++;
3478 
3479 				size += tempVecSize;
3480 				vecOffset += tempVecSize;
3481 			}
3482 
3483 			size_t bytes = size;
3484 
3485 			if (fileOffset == -1) {
3486 				if (doWrite) {
3487 					panic("sparse write attempt: vnode %p", vnode);
3488 					status = B_IO_ERROR;
3489 				} else {
3490 					// sparse read
3491 					status = zero_pages(tempVecs, tempCount, &bytes);
3492 				}
3493 			} else if (doWrite) {
3494 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3495 					tempVecs, tempCount, &bytes);
3496 			} else {
3497 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3498 					tempVecs, tempCount, &bytes);
3499 			}
3500 			if (status < B_OK)
3501 				return status;
3502 
3503 			totalSize += bytes;
3504 			bytesLeft -= size;
3505 			if (fileOffset >= 0)
3506 				fileOffset += size;
3507 			fileLeft -= size;
3508 			//dprintf("-> file left = %Lu\n", fileLeft);
3509 
3510 			if (size != bytes || vecIndex >= vecCount) {
3511 				// there are no more bytes or iovecs, let's bail out
3512 				*_numBytes = totalSize;
3513 				return B_OK;
3514 			}
3515 		}
3516 	}
3517 
3518 	*_vecIndex = vecIndex;
3519 	*_vecOffset = vecOffset;
3520 	*_numBytes = totalSize;
3521 	return B_OK;
3522 }
3523 
3524 
3525 //	#pragma mark - public API for file systems
3526 
3527 
3528 extern "C" status_t
3529 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3530 	fs_vnode_ops* ops)
3531 {
3532 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3533 		volume, volume->id, vnodeID, privateNode));
3534 
3535 	if (privateNode == NULL)
3536 		return B_BAD_VALUE;
3537 
3538 	mutex_lock(&sVnodeMutex);
3539 
3540 	// file system integrity check:
3541 	// test if the vnode already exists and bail out if this is the case!
3542 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3543 	if (vnode != NULL) {
3544 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3545 			volume->id, vnodeID, privateNode, vnode->private_node);
3546 	}
3547 
3548 	status_t status = create_new_vnode(&vnode, volume->id, vnodeID);
3549 	if (status == B_OK) {
3550 		vnode->private_node = privateNode;
3551 		vnode->ops = ops;
3552 		vnode->busy = true;
3553 		vnode->unpublished = true;
3554 	}
3555 
3556 	TRACE(("returns: %s\n", strerror(status)));
3557 
3558 	mutex_unlock(&sVnodeMutex);
3559 	return status;
3560 }
3561 
3562 
3563 extern "C" status_t
3564 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3565 	fs_vnode_ops* ops, int type, uint32 flags)
3566 {
3567 	FUNCTION(("publish_vnode()\n"));
3568 
3569 	MutexLocker locker(sVnodeMutex);
3570 
3571 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3572 	status_t status = B_OK;
3573 
3574 	if (vnode != NULL && vnode->busy && vnode->unpublished
3575 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3576 		// already known, but not published
3577 	} else if (vnode == NULL && privateNode != NULL) {
3578 		status = create_new_vnode(&vnode, volume->id, vnodeID);
3579 		if (status == B_OK) {
3580 			vnode->private_node = privateNode;
3581 			vnode->ops = ops;
3582 			vnode->busy = true;
3583 			vnode->unpublished = true;
3584 		}
3585 	} else
3586 		status = B_BAD_VALUE;
3587 
3588 	bool publishSpecialSubNode = false;
3589 
3590 	if (status == B_OK) {
3591 		vnode->type = type;
3592 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
3593 		publishSpecialSubNode = is_special_node_type(type)
3594 			&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3595 	}
3596 
3597 
3598 	// create sub vnodes, if necessary
3599 	if (status == B_OK
3600 			&& (volume->sub_volume != NULL || publishSpecialSubNode)) {
3601 		locker.Unlock();
3602 
3603 		fs_volume* subVolume = volume;
3604 		if (volume->sub_volume != NULL) {
3605 			while (status == B_OK && subVolume->sub_volume != NULL) {
3606 				subVolume = subVolume->sub_volume;
3607 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3608 					vnode);
3609 			}
3610 		}
3611 
3612 		if (status == B_OK && publishSpecialSubNode)
3613 			status = create_special_sub_node(vnode, flags);
3614 
3615 		if (status != B_OK) {
3616 			// error -- clean up the created sub vnodes
3617 			while (subVolume->super_volume != volume) {
3618 				subVolume = subVolume->super_volume;
3619 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3620 			}
3621 		}
3622 
3623 		locker.Lock();
3624 
3625 		if (status != B_OK) {
3626 			hash_remove(sVnodeTable, vnode);
3627 			remove_vnode_from_mount_list(vnode, vnode->mount);
3628 			free(vnode);
3629 		}
3630 	}
3631 
3632 	if (status == B_OK) {
3633 		vnode->busy = false;
3634 		vnode->unpublished = false;
3635 	}
3636 
3637 	TRACE(("returns: %s\n", strerror(status)));
3638 
3639 	return status;
3640 }
3641 
3642 
3643 extern "C" status_t
3644 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3645 {
3646 	struct vnode* vnode;
3647 
3648 	if (volume == NULL)
3649 		return B_BAD_VALUE;
3650 
3651 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3652 	if (status != B_OK)
3653 		return status;
3654 
3655 	// If this is a layered FS, we need to get the node cookie for the requested
3656 	// layer.
3657 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3658 		fs_vnode resolvedNode;
3659 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3660 			&resolvedNode);
3661 		if (status != B_OK) {
3662 			panic("get_vnode(): Failed to get super node for vnode %p, "
3663 				"volume: %p", vnode, volume);
3664 			put_vnode(vnode);
3665 			return status;
3666 		}
3667 
3668 		if (_privateNode != NULL)
3669 			*_privateNode = resolvedNode.private_node;
3670 	} else if (_privateNode != NULL)
3671 		*_privateNode = vnode->private_node;
3672 
3673 	return B_OK;
3674 }
3675 
3676 
3677 extern "C" status_t
3678 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3679 {
3680 	struct vnode* vnode;
3681 
3682 	mutex_lock(&sVnodeMutex);
3683 	vnode = lookup_vnode(volume->id, vnodeID);
3684 	mutex_unlock(&sVnodeMutex);
3685 
3686 	if (vnode == NULL)
3687 		return B_BAD_VALUE;
3688 
3689 	inc_vnode_ref_count(vnode);
3690 	return B_OK;
3691 }
3692 
3693 
3694 extern "C" status_t
3695 put_vnode(fs_volume* volume, ino_t vnodeID)
3696 {
3697 	struct vnode* vnode;
3698 
3699 	mutex_lock(&sVnodeMutex);
3700 	vnode = lookup_vnode(volume->id, vnodeID);
3701 	mutex_unlock(&sVnodeMutex);
3702 
3703 	if (vnode == NULL)
3704 		return B_BAD_VALUE;
3705 
3706 	dec_vnode_ref_count(vnode, false, true);
3707 	return B_OK;
3708 }
3709 
3710 
3711 extern "C" status_t
3712 remove_vnode(fs_volume* volume, ino_t vnodeID)
3713 {
3714 	MutexLocker locker(sVnodeMutex);
3715 
3716 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3717 	if (vnode == NULL)
3718 		return B_ENTRY_NOT_FOUND;
3719 
3720 	if (vnode->covered_by != NULL) {
3721 		// this vnode is in use
3722 		return B_BUSY;
3723 	}
3724 
3725 	vnode->remove = true;
3726 	bool removeUnpublished = false;
3727 
3728 	if (vnode->unpublished) {
3729 		// prepare the vnode for deletion
3730 		removeUnpublished = true;
3731 		vnode->busy = true;
3732 	}
3733 
3734 	locker.Unlock();
3735 
3736 	if (removeUnpublished) {
3737 		// If the vnode hasn't been published yet, we delete it here
3738 		atomic_add(&vnode->ref_count, -1);
3739 		free_vnode(vnode, true);
3740 	}
3741 
3742 	return B_OK;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	mutex_lock(&sVnodeMutex);
3752 
3753 	vnode = lookup_vnode(volume->id, vnodeID);
3754 	if (vnode)
3755 		vnode->remove = false;
3756 
3757 	mutex_unlock(&sVnodeMutex);
3758 	return B_OK;
3759 }
3760 
3761 
3762 extern "C" status_t
3763 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3764 {
3765 	MutexLocker _(sVnodeMutex);
3766 
3767 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3768 		if (_removed != NULL)
3769 			*_removed = vnode->remove;
3770 		return B_OK;
3771 	}
3772 
3773 	return B_BAD_VALUE;
3774 }
3775 
3776 
3777 extern "C" fs_volume*
3778 volume_for_vnode(fs_vnode* _vnode)
3779 {
3780 	if (_vnode == NULL)
3781 		return NULL;
3782 
3783 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3784 	return vnode->mount->volume;
3785 }
3786 
3787 
3788 extern "C" status_t
3789 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3790 	size_t* _numBytes)
3791 {
3792 	struct file_descriptor* descriptor;
3793 	struct vnode* vnode;
3794 
3795 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3796 	if (descriptor == NULL)
3797 		return B_FILE_ERROR;
3798 
3799 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3800 		count, 0, _numBytes);
3801 
3802 	put_fd(descriptor);
3803 	return status;
3804 }
3805 
3806 
3807 extern "C" status_t
3808 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3809 	size_t* _numBytes)
3810 {
3811 	struct file_descriptor* descriptor;
3812 	struct vnode* vnode;
3813 
3814 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3815 	if (descriptor == NULL)
3816 		return B_FILE_ERROR;
3817 
3818 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3819 		count, 0, _numBytes);
3820 
3821 	put_fd(descriptor);
3822 	return status;
3823 }
3824 
3825 
3826 extern "C" status_t
3827 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3828 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3829 	size_t* _bytes)
3830 {
3831 	struct file_descriptor* descriptor;
3832 	struct vnode* vnode;
3833 
3834 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3835 	if (descriptor == NULL)
3836 		return B_FILE_ERROR;
3837 
3838 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3839 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3840 		false);
3841 
3842 	put_fd(descriptor);
3843 	return status;
3844 }
3845 
3846 
3847 extern "C" status_t
3848 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3849 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3850 	size_t* _bytes)
3851 {
3852 	struct file_descriptor* descriptor;
3853 	struct vnode* vnode;
3854 
3855 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3856 	if (descriptor == NULL)
3857 		return B_FILE_ERROR;
3858 
3859 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3860 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3861 		true);
3862 
3863 	put_fd(descriptor);
3864 	return status;
3865 }
3866 
3867 
3868 extern "C" status_t
3869 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3870 {
3871 	// lookup mount -- the caller is required to make sure that the mount
3872 	// won't go away
3873 	MutexLocker locker(sMountMutex);
3874 	struct fs_mount* mount = find_mount(mountID);
3875 	if (mount == NULL)
3876 		return B_BAD_VALUE;
3877 	locker.Unlock();
3878 
3879 	return mount->entry_cache.Add(dirID, name, nodeID);
3880 }
3881 
3882 
3883 extern "C" status_t
3884 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3885 {
3886 	// lookup mount -- the caller is required to make sure that the mount
3887 	// won't go away
3888 	MutexLocker locker(sMountMutex);
3889 	struct fs_mount* mount = find_mount(mountID);
3890 	if (mount == NULL)
3891 		return B_BAD_VALUE;
3892 	locker.Unlock();
3893 
3894 	return mount->entry_cache.Remove(dirID, name);
3895 }
3896 
3897 
3898 //	#pragma mark - private VFS API
3899 //	Functions the VFS exports for other parts of the kernel
3900 
3901 
3902 /*! Acquires another reference to the vnode that has to be released
3903 	by calling vfs_put_vnode().
3904 */
3905 void
3906 vfs_acquire_vnode(struct vnode* vnode)
3907 {
3908 	inc_vnode_ref_count(vnode);
3909 }
3910 
3911 
3912 /*! This is currently called from file_cache_create() only.
3913 	It's probably a temporary solution as long as devfs requires that
3914 	fs_read_pages()/fs_write_pages() are called with the standard
3915 	open cookie and not with a device cookie.
3916 	If that's done differently, remove this call; it has no other
3917 	purpose.
3918 */
3919 extern "C" status_t
3920 vfs_get_cookie_from_fd(int fd, void** _cookie)
3921 {
3922 	struct file_descriptor* descriptor;
3923 
3924 	descriptor = get_fd(get_current_io_context(true), fd);
3925 	if (descriptor == NULL)
3926 		return B_FILE_ERROR;
3927 
3928 	*_cookie = descriptor->cookie;
3929 	return B_OK;
3930 }
3931 
3932 
3933 extern "C" int
3934 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3935 {
3936 	*vnode = get_vnode_from_fd(fd, kernel);
3937 
3938 	if (*vnode == NULL)
3939 		return B_FILE_ERROR;
3940 
3941 	return B_NO_ERROR;
3942 }
3943 
3944 
3945 extern "C" status_t
3946 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
3947 {
3948 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3949 		path, kernel));
3950 
3951 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3952 	if (pathBuffer.InitCheck() != B_OK)
3953 		return B_NO_MEMORY;
3954 
3955 	char* buffer = pathBuffer.LockBuffer();
3956 	strlcpy(buffer, path, pathBuffer.BufferSize());
3957 
3958 	struct vnode* vnode;
3959 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3960 	if (status < B_OK)
3961 		return status;
3962 
3963 	*_vnode = vnode;
3964 	return B_OK;
3965 }
3966 
3967 
3968 extern "C" status_t
3969 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
3970 {
3971 	struct vnode* vnode;
3972 
3973 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3974 	if (status < B_OK)
3975 		return status;
3976 
3977 	*_vnode = vnode;
3978 	return B_OK;
3979 }
3980 
3981 
3982 extern "C" status_t
3983 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3984 	const char* name, struct vnode** _vnode)
3985 {
3986 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
3987 }
3988 
3989 
3990 extern "C" void
3991 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
3992 {
3993 	*_mountID = vnode->device;
3994 	*_vnodeID = vnode->id;
3995 }
3996 
3997 
3998 /*!	Looks up a vnode with the given mount and vnode ID.
3999 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4000 	to the node.
4001 	It's currently only be used by file_cache_create().
4002 */
4003 extern "C" status_t
4004 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4005 {
4006 	mutex_lock(&sVnodeMutex);
4007 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4008 	mutex_unlock(&sVnodeMutex);
4009 
4010 	if (vnode == NULL)
4011 		return B_ERROR;
4012 
4013 	*_vnode = vnode;
4014 	return B_OK;
4015 }
4016 
4017 
4018 extern "C" status_t
4019 vfs_get_fs_node_from_path(fs_volume* volume, const char* path, bool kernel,
4020 	void** _node)
4021 {
4022 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4023 		volume, path, kernel));
4024 
4025 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4026 	if (pathBuffer.InitCheck() != B_OK)
4027 		return B_NO_MEMORY;
4028 
4029 	fs_mount* mount;
4030 	status_t status = get_mount(volume->id, &mount);
4031 	if (status < B_OK)
4032 		return status;
4033 
4034 	char* buffer = pathBuffer.LockBuffer();
4035 	strlcpy(buffer, path, pathBuffer.BufferSize());
4036 
4037 	struct vnode* vnode = mount->root_vnode;
4038 
4039 	if (buffer[0] == '/')
4040 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
4041 	else {
4042 		inc_vnode_ref_count(vnode);
4043 			// vnode_path_to_vnode() releases a reference to the starting vnode
4044 		status = vnode_path_to_vnode(vnode, buffer, true, 0, kernel, &vnode,
4045 			NULL);
4046 	}
4047 
4048 	put_mount(mount);
4049 
4050 	if (status < B_OK)
4051 		return status;
4052 
4053 	if (vnode->device != volume->id) {
4054 		// wrong mount ID - must not gain access on foreign file system nodes
4055 		put_vnode(vnode);
4056 		return B_BAD_VALUE;
4057 	}
4058 
4059 	// Use get_vnode() to resolve the cookie for the right layer.
4060 	status = get_vnode(volume, vnode->id, _node);
4061 	put_vnode(vnode);
4062 
4063 	return status;
4064 }
4065 
4066 
4067 status_t
4068 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4069 	struct stat* stat, bool kernel)
4070 {
4071 	status_t status;
4072 
4073 	if (path) {
4074 		// path given: get the stat of the node referred to by (fd, path)
4075 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4076 		if (pathBuffer.InitCheck() != B_OK)
4077 			return B_NO_MEMORY;
4078 
4079 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4080 			traverseLeafLink, stat, kernel);
4081 	} else {
4082 		// no path given: get the FD and use the FD operation
4083 		struct file_descriptor* descriptor
4084 			= get_fd(get_current_io_context(kernel), fd);
4085 		if (descriptor == NULL)
4086 			return B_FILE_ERROR;
4087 
4088 		if (descriptor->ops->fd_read_stat)
4089 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4090 		else
4091 			status = EOPNOTSUPP;
4092 
4093 		put_fd(descriptor);
4094 	}
4095 
4096 	return status;
4097 }
4098 
4099 
4100 /*!	Finds the full path to the file that contains the module \a moduleName,
4101 	puts it into \a pathBuffer, and returns B_OK for success.
4102 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4103 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4104 	\a pathBuffer is clobbered in any case and must not be relied on if this
4105 	functions returns unsuccessfully.
4106 	\a basePath and \a pathBuffer must not point to the same space.
4107 */
4108 status_t
4109 vfs_get_module_path(const char* basePath, const char* moduleName,
4110 	char* pathBuffer, size_t bufferSize)
4111 {
4112 	struct vnode* dir;
4113 	struct vnode* file;
4114 	status_t status;
4115 	size_t length;
4116 	char* path;
4117 
4118 	if (bufferSize == 0
4119 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4120 		return B_BUFFER_OVERFLOW;
4121 
4122 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4123 	if (status < B_OK)
4124 		return status;
4125 
4126 	// the path buffer had been clobbered by the above call
4127 	length = strlcpy(pathBuffer, basePath, bufferSize);
4128 	if (pathBuffer[length - 1] != '/')
4129 		pathBuffer[length++] = '/';
4130 
4131 	path = pathBuffer + length;
4132 	bufferSize -= length;
4133 
4134 	while (moduleName) {
4135 		char* nextPath = strchr(moduleName, '/');
4136 		if (nextPath == NULL)
4137 			length = strlen(moduleName);
4138 		else {
4139 			length = nextPath - moduleName;
4140 			nextPath++;
4141 		}
4142 
4143 		if (length + 1 >= bufferSize) {
4144 			status = B_BUFFER_OVERFLOW;
4145 			goto err;
4146 		}
4147 
4148 		memcpy(path, moduleName, length);
4149 		path[length] = '\0';
4150 		moduleName = nextPath;
4151 
4152 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4153 		if (status < B_OK) {
4154 			// vnode_path_to_vnode() has already released the reference to dir
4155 			return status;
4156 		}
4157 
4158 		if (S_ISDIR(file->type)) {
4159 			// goto the next directory
4160 			path[length] = '/';
4161 			path[length + 1] = '\0';
4162 			path += length + 1;
4163 			bufferSize -= length + 1;
4164 
4165 			dir = file;
4166 		} else if (S_ISREG(file->type)) {
4167 			// it's a file so it should be what we've searched for
4168 			put_vnode(file);
4169 
4170 			return B_OK;
4171 		} else {
4172 			TRACE(("vfs_get_module_path(): something is strange here: 0x%08lx...\n",
4173 				file->type));
4174 			status = B_ERROR;
4175 			dir = file;
4176 			goto err;
4177 		}
4178 	}
4179 
4180 	// if we got here, the moduleName just pointed to a directory, not to
4181 	// a real module - what should we do in this case?
4182 	status = B_ENTRY_NOT_FOUND;
4183 
4184 err:
4185 	put_vnode(dir);
4186 	return status;
4187 }
4188 
4189 
4190 /*!	\brief Normalizes a given path.
4191 
4192 	The path must refer to an existing or non-existing entry in an existing
4193 	directory, that is chopping off the leaf component the remaining path must
4194 	refer to an existing directory.
4195 
4196 	The returned will be canonical in that it will be absolute, will not
4197 	contain any "." or ".." components or duplicate occurrences of '/'s,
4198 	and none of the directory components will by symbolic links.
4199 
4200 	Any two paths referring to the same entry, will result in the same
4201 	normalized path (well, that is pretty much the definition of `normalized',
4202 	isn't it :-).
4203 
4204 	\param path The path to be normalized.
4205 	\param buffer The buffer into which the normalized path will be written.
4206 		   May be the same one as \a path.
4207 	\param bufferSize The size of \a buffer.
4208 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4209 	\param kernel \c true, if the IO context of the kernel shall be used,
4210 		   otherwise that of the team this thread belongs to. Only relevant,
4211 		   if the path is relative (to get the CWD).
4212 	\return \c B_OK if everything went fine, another error code otherwise.
4213 */
4214 status_t
4215 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4216 	bool traverseLink, bool kernel)
4217 {
4218 	if (!path || !buffer || bufferSize < 1)
4219 		return B_BAD_VALUE;
4220 
4221 	if (path != buffer) {
4222 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4223 			return B_BUFFER_OVERFLOW;
4224 	}
4225 
4226 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4227 }
4228 
4229 
4230 /*!	\brief Creates a special node in the file system.
4231 
4232 	The caller gets a reference to the newly created node (which is passed
4233 	back through \a _createdVnode) and is responsible for releasing it.
4234 
4235 	\param path The path where to create the entry for the node. Can be \c NULL,
4236 		in which case the node is created without an entry in the root FS -- it
4237 		will automatically be deleted when the last reference has been released.
4238 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4239 		the target file system will just create the node with its standard
4240 		operations. Depending on the type of the node a subnode might be created
4241 		automatically, though.
4242 	\param mode The type and permissions for the node to be created.
4243 	\param flags Flags to be passed to the creating FS.
4244 	\param kernel \c true, if called in the kernel context (relevant only if
4245 		\a path is not \c NULL and not absolute).
4246 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4247 		file system creating the node, with the private data pointer and
4248 		operations for the super node. Can be \c NULL.
4249 	\param _createVnode Pointer to pre-allocated storage where to store the
4250 		pointer to the newly created node.
4251 	\return \c B_OK, if everything went fine, another error code otherwise.
4252 */
4253 status_t
4254 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4255 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4256 	struct vnode** _createdVnode)
4257 {
4258 	struct vnode* dirNode;
4259 	char _leaf[B_FILE_NAME_LENGTH];
4260 	char* leaf = NULL;
4261 
4262 	if (path) {
4263 		// We've got a path. Get the dir vnode and the leaf name.
4264 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4265 		if (tmpPathBuffer.InitCheck() != B_OK)
4266 			return B_NO_MEMORY;
4267 
4268 		char* tmpPath = tmpPathBuffer.LockBuffer();
4269 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4270 			return B_NAME_TOO_LONG;
4271 
4272 		// get the dir vnode and the leaf name
4273 		leaf = _leaf;
4274 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4275 		if (error != B_OK)
4276 			return error;
4277 	} else {
4278 		// No path. Create the node in the root FS.
4279 		dirNode = sRoot;
4280 		inc_vnode_ref_count(dirNode);
4281 	}
4282 
4283 	VNodePutter _(dirNode);
4284 
4285 	// check support for creating special nodes
4286 	if (!HAS_FS_CALL(dirNode, create_special_node))
4287 		return B_UNSUPPORTED;
4288 
4289 	// create the node
4290 	fs_vnode superVnode;
4291 	ino_t nodeID;
4292 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4293 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4294 	if (status != B_OK)
4295 		return status;
4296 
4297 	// lookup the node
4298 	mutex_lock(&sVnodeMutex);
4299 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4300 	mutex_unlock(&sVnodeMutex);
4301 
4302 	if (*_createdVnode == NULL) {
4303 		panic("vfs_create_special_node(): lookup of node failed");
4304 		return B_ERROR;
4305 	}
4306 
4307 	return B_OK;
4308 }
4309 
4310 
4311 extern "C" void
4312 vfs_put_vnode(struct vnode* vnode)
4313 {
4314 	put_vnode(vnode);
4315 }
4316 
4317 
4318 extern "C" status_t
4319 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4320 {
4321 	// Get current working directory from io context
4322 	struct io_context* context = get_current_io_context(false);
4323 	status_t status = B_OK;
4324 
4325 	mutex_lock(&context->io_mutex);
4326 
4327 	if (context->cwd != NULL) {
4328 		*_mountID = context->cwd->device;
4329 		*_vnodeID = context->cwd->id;
4330 	} else
4331 		status = B_ERROR;
4332 
4333 	mutex_unlock(&context->io_mutex);
4334 	return status;
4335 }
4336 
4337 
4338 status_t
4339 vfs_unmount(dev_t mountID, uint32 flags)
4340 {
4341 	return fs_unmount(NULL, mountID, flags, true);
4342 }
4343 
4344 
4345 extern "C" status_t
4346 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4347 {
4348 	struct vnode* vnode;
4349 
4350 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4351 	if (status < B_OK)
4352 		return status;
4353 
4354 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4355 	put_vnode(vnode);
4356 	return B_OK;
4357 }
4358 
4359 
4360 extern "C" void
4361 vfs_free_unused_vnodes(int32 level)
4362 {
4363 	vnode_low_resource_handler(NULL,
4364 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4365 }
4366 
4367 
4368 extern "C" bool
4369 vfs_can_page(struct vnode* vnode, void* cookie)
4370 {
4371 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4372 
4373 	if (HAS_FS_CALL(vnode, can_page))
4374 		return FS_CALL(vnode, can_page, cookie);
4375 	return false;
4376 }
4377 
4378 
4379 extern "C" status_t
4380 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4381 	size_t count, uint32 flags, size_t* _numBytes)
4382 {
4383 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4384 		pos));
4385 
4386 #if VFS_PAGES_IO_TRACING
4387 	size_t bytesRequested = *_numBytes;
4388 #endif
4389 
4390 	IORequest request;
4391 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4392 	if (status == B_OK) {
4393 		status = vfs_vnode_io(vnode, cookie, &request);
4394 		if (status == B_OK)
4395 			status = request.Wait();
4396 		*_numBytes = request.TransferredBytes();
4397 	}
4398 
4399 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4400 		status, *_numBytes));
4401 
4402 	return status;
4403 }
4404 
4405 
4406 extern "C" status_t
4407 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4408 	size_t count, uint32 flags, size_t* _numBytes)
4409 {
4410 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4411 		pos));
4412 
4413 #if VFS_PAGES_IO_TRACING
4414 	size_t bytesRequested = *_numBytes;
4415 #endif
4416 
4417 	IORequest request;
4418 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4419 	if (status == B_OK) {
4420 		status = vfs_vnode_io(vnode, cookie, &request);
4421 		if (status == B_OK)
4422 			status = request.Wait();
4423 		*_numBytes = request.TransferredBytes();
4424 	}
4425 
4426 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4427 		status, *_numBytes));
4428 
4429 	return status;
4430 }
4431 
4432 
4433 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
4434 	created if \a allocate is \c true.
4435 	In case it's successful, it will also grab a reference to the cache
4436 	it returns.
4437 */
4438 extern "C" status_t
4439 vfs_get_vnode_cache(struct vnode* vnode, vm_cache** _cache, bool allocate)
4440 {
4441 	if (vnode->cache != NULL) {
4442 		vnode->cache->AcquireRef();
4443 		*_cache = vnode->cache;
4444 		return B_OK;
4445 	}
4446 
4447 	mutex_lock(&sVnodeMutex);
4448 
4449 	status_t status = B_OK;
4450 
4451 	// The cache could have been created in the meantime
4452 	if (vnode->cache == NULL) {
4453 		if (allocate) {
4454 			// TODO: actually the vnode need to be busy already here, or
4455 			//	else this won't work...
4456 			bool wasBusy = vnode->busy;
4457 			vnode->busy = true;
4458 			mutex_unlock(&sVnodeMutex);
4459 
4460 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4461 
4462 			mutex_lock(&sVnodeMutex);
4463 			vnode->busy = wasBusy;
4464 		} else
4465 			status = B_BAD_VALUE;
4466 	}
4467 
4468 	mutex_unlock(&sVnodeMutex);
4469 
4470 	if (status == B_OK) {
4471 		vnode->cache->AcquireRef();
4472 		*_cache = vnode->cache;
4473 	}
4474 
4475 	return status;
4476 }
4477 
4478 
4479 status_t
4480 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4481 	file_io_vec* vecs, size_t* _count)
4482 {
4483 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
4484 
4485 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4486 }
4487 
4488 
4489 status_t
4490 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4491 {
4492 	status_t status = FS_CALL(vnode, read_stat, stat);
4493 
4494 	// fill in the st_dev and st_ino fields
4495 	if (status == B_OK) {
4496 		stat->st_dev = vnode->device;
4497 		stat->st_ino = vnode->id;
4498 		stat->st_rdev = -1;
4499 	}
4500 
4501 	return status;
4502 }
4503 
4504 
4505 status_t
4506 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4507 {
4508 	struct vnode* vnode;
4509 	status_t status = get_vnode(device, inode, &vnode, true, false);
4510 	if (status < B_OK)
4511 		return status;
4512 
4513 	status = FS_CALL(vnode, read_stat, stat);
4514 
4515 	// fill in the st_dev and st_ino fields
4516 	if (status == B_OK) {
4517 		stat->st_dev = vnode->device;
4518 		stat->st_ino = vnode->id;
4519 		stat->st_rdev = -1;
4520 	}
4521 
4522 	put_vnode(vnode);
4523 	return status;
4524 }
4525 
4526 
4527 status_t
4528 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4529 {
4530 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4531 }
4532 
4533 
4534 status_t
4535 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4536 	char* path, size_t pathLength)
4537 {
4538 	struct vnode* vnode;
4539 	status_t status;
4540 
4541 	// filter invalid leaf names
4542 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4543 		return B_BAD_VALUE;
4544 
4545 	// get the vnode matching the dir's node_ref
4546 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4547 		// special cases "." and "..": we can directly get the vnode of the
4548 		// referenced directory
4549 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4550 		leaf = NULL;
4551 	} else
4552 		status = get_vnode(device, inode, &vnode, true, false);
4553 	if (status < B_OK)
4554 		return status;
4555 
4556 	// get the directory path
4557 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4558 	put_vnode(vnode);
4559 		// we don't need the vnode anymore
4560 	if (status < B_OK)
4561 		return status;
4562 
4563 	// append the leaf name
4564 	if (leaf) {
4565 		// insert a directory separator if this is not the file system root
4566 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4567 				>= pathLength)
4568 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4569 			return B_NAME_TOO_LONG;
4570 		}
4571 	}
4572 
4573 	return B_OK;
4574 }
4575 
4576 
4577 /*!	If the given descriptor locked its vnode, that lock will be released. */
4578 void
4579 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4580 {
4581 	struct vnode* vnode = fd_vnode(descriptor);
4582 
4583 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4584 		vnode->mandatory_locked_by = NULL;
4585 }
4586 
4587 
4588 /*!	Closes all file descriptors of the specified I/O context that
4589 	have the O_CLOEXEC flag set.
4590 */
4591 void
4592 vfs_exec_io_context(io_context* context)
4593 {
4594 	uint32 i;
4595 
4596 	for (i = 0; i < context->table_size; i++) {
4597 		mutex_lock(&context->io_mutex);
4598 
4599 		struct file_descriptor* descriptor = context->fds[i];
4600 		bool remove = false;
4601 
4602 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4603 			context->fds[i] = NULL;
4604 			context->num_used_fds--;
4605 
4606 			remove = true;
4607 		}
4608 
4609 		mutex_unlock(&context->io_mutex);
4610 
4611 		if (remove) {
4612 			close_fd(descriptor);
4613 			put_fd(descriptor);
4614 		}
4615 	}
4616 }
4617 
4618 
4619 /*! Sets up a new io_control structure, and inherits the properties
4620 	of the parent io_control if it is given.
4621 */
4622 io_context*
4623 vfs_new_io_context(io_context* parentContext)
4624 {
4625 	size_t tableSize;
4626 	struct io_context* context;
4627 
4628 	context = (io_context*)malloc(sizeof(struct io_context));
4629 	if (context == NULL)
4630 		return NULL;
4631 
4632 	memset(context, 0, sizeof(struct io_context));
4633 	context->ref_count = 1;
4634 
4635 	MutexLocker parentLocker;
4636 	if (parentContext) {
4637 		parentLocker.SetTo(parentContext->io_mutex, false);
4638 		tableSize = parentContext->table_size;
4639 	} else
4640 		tableSize = DEFAULT_FD_TABLE_SIZE;
4641 
4642 	// allocate space for FDs and their close-on-exec flag
4643 	context->fds = (file_descriptor**)malloc(
4644 		sizeof(struct file_descriptor*) * tableSize
4645 		+ sizeof(struct select_sync*) * tableSize
4646 		+ (tableSize + 7) / 8);
4647 	if (context->fds == NULL) {
4648 		free(context);
4649 		return NULL;
4650 	}
4651 
4652 	context->select_infos = (select_info**)(context->fds + tableSize);
4653 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4654 
4655 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4656 		+ sizeof(struct select_sync*) * tableSize
4657 		+ (tableSize + 7) / 8);
4658 
4659 	mutex_init(&context->io_mutex, "I/O context");
4660 
4661 	// Copy all parent file descriptors
4662 
4663 	if (parentContext) {
4664 		size_t i;
4665 
4666 		mutex_lock(&sIOContextRootLock);
4667 		context->root = parentContext->root;
4668 		if (context->root)
4669 			inc_vnode_ref_count(context->root);
4670 		mutex_unlock(&sIOContextRootLock);
4671 
4672 		context->cwd = parentContext->cwd;
4673 		if (context->cwd)
4674 			inc_vnode_ref_count(context->cwd);
4675 
4676 		for (i = 0; i < tableSize; i++) {
4677 			struct file_descriptor* descriptor = parentContext->fds[i];
4678 
4679 			if (descriptor != NULL) {
4680 				context->fds[i] = descriptor;
4681 				context->num_used_fds++;
4682 				atomic_add(&descriptor->ref_count, 1);
4683 				atomic_add(&descriptor->open_count, 1);
4684 
4685 				if (fd_close_on_exec(parentContext, i))
4686 					fd_set_close_on_exec(context, i, true);
4687 			}
4688 		}
4689 
4690 		parentLocker.Unlock();
4691 	} else {
4692 		context->root = sRoot;
4693 		context->cwd = sRoot;
4694 
4695 		if (context->root)
4696 			inc_vnode_ref_count(context->root);
4697 
4698 		if (context->cwd)
4699 			inc_vnode_ref_count(context->cwd);
4700 	}
4701 
4702 	context->table_size = tableSize;
4703 
4704 	list_init(&context->node_monitors);
4705 	context->max_monitors = DEFAULT_NODE_MONITORS;
4706 
4707 	return context;
4708 }
4709 
4710 
4711 static status_t
4712 vfs_free_io_context(io_context* context)
4713 {
4714 	uint32 i;
4715 
4716 	if (context->root)
4717 		put_vnode(context->root);
4718 
4719 	if (context->cwd)
4720 		put_vnode(context->cwd);
4721 
4722 	mutex_lock(&context->io_mutex);
4723 
4724 	for (i = 0; i < context->table_size; i++) {
4725 		if (struct file_descriptor* descriptor = context->fds[i]) {
4726 			close_fd(descriptor);
4727 			put_fd(descriptor);
4728 		}
4729 	}
4730 
4731 	mutex_destroy(&context->io_mutex);
4732 
4733 	remove_node_monitors(context);
4734 	free(context->fds);
4735 	free(context);
4736 
4737 	return B_OK;
4738 }
4739 
4740 
4741 void
4742 vfs_get_io_context(io_context* context)
4743 {
4744 	atomic_add(&context->ref_count, 1);
4745 }
4746 
4747 
4748 void
4749 vfs_put_io_context(io_context* context)
4750 {
4751 	if (atomic_add(&context->ref_count, -1) == 1)
4752 		vfs_free_io_context(context);
4753 }
4754 
4755 
4756 static status_t
4757 vfs_resize_fd_table(struct io_context* context, const int newSize)
4758 {
4759 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4760 		return EINVAL;
4761 
4762 	MutexLocker _(context->io_mutex);
4763 
4764 	int oldSize = context->table_size;
4765 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4766 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4767 
4768 	// If the tables shrink, make sure none of the fds being dropped are in use.
4769 	if (newSize < oldSize) {
4770 		for (int i = oldSize; i-- > newSize;) {
4771 			if (context->fds[i])
4772 				return EBUSY;
4773 		}
4774 	}
4775 
4776 	// store pointers to the old tables
4777 	file_descriptor** oldFDs = context->fds;
4778 	select_info** oldSelectInfos = context->select_infos;
4779 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4780 
4781 	// allocate new tables
4782 	file_descriptor** newFDs = (file_descriptor**)malloc(
4783 		sizeof(struct file_descriptor*) * newSize
4784 		+ sizeof(struct select_sync*) * newSize
4785 		+ newCloseOnExitBitmapSize);
4786 	if (newFDs == NULL)
4787 		return ENOMEM;
4788 
4789 	context->fds = newFDs;
4790 	context->select_infos = (select_info**)(context->fds + newSize);
4791 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4792 	context->table_size = newSize;
4793 
4794 	// copy entries from old tables
4795 	int toCopy = min_c(oldSize, newSize);
4796 
4797 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4798 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4799 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4800 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4801 
4802 	// clear additional entries, if the tables grow
4803 	if (newSize > oldSize) {
4804 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4805 		memset(context->select_infos + oldSize, 0,
4806 			sizeof(void*) * (newSize - oldSize));
4807 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4808 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4809 	}
4810 
4811 	free(oldFDs);
4812 
4813 	return B_OK;
4814 }
4815 
4816 
4817 static status_t
4818 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4819 {
4820 	int	status = B_OK;
4821 
4822 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4823 		return EINVAL;
4824 
4825 	mutex_lock(&context->io_mutex);
4826 
4827 	if ((size_t)newSize < context->num_monitors) {
4828 		status = EBUSY;
4829 		goto out;
4830 	}
4831 	context->max_monitors = newSize;
4832 
4833 out:
4834 	mutex_unlock(&context->io_mutex);
4835 	return status;
4836 }
4837 
4838 
4839 int
4840 vfs_getrlimit(int resource, struct rlimit* rlp)
4841 {
4842 	if (!rlp)
4843 		return B_BAD_ADDRESS;
4844 
4845 	switch (resource) {
4846 		case RLIMIT_NOFILE:
4847 		{
4848 			struct io_context* context = get_current_io_context(false);
4849 			MutexLocker _(context->io_mutex);
4850 
4851 			rlp->rlim_cur = context->table_size;
4852 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4853 			return 0;
4854 		}
4855 
4856 		case RLIMIT_NOVMON:
4857 		{
4858 			struct io_context* context = get_current_io_context(false);
4859 			MutexLocker _(context->io_mutex);
4860 
4861 			rlp->rlim_cur = context->max_monitors;
4862 			rlp->rlim_max = MAX_NODE_MONITORS;
4863 			return 0;
4864 		}
4865 
4866 		default:
4867 			return B_BAD_VALUE;
4868 	}
4869 }
4870 
4871 
4872 int
4873 vfs_setrlimit(int resource, const struct rlimit* rlp)
4874 {
4875 	if (!rlp)
4876 		return B_BAD_ADDRESS;
4877 
4878 	switch (resource) {
4879 		case RLIMIT_NOFILE:
4880 			/* TODO: check getuid() */
4881 			if (rlp->rlim_max != RLIM_SAVED_MAX
4882 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4883 				return B_NOT_ALLOWED;
4884 
4885 			return vfs_resize_fd_table(get_current_io_context(false),
4886 				rlp->rlim_cur);
4887 
4888 		case RLIMIT_NOVMON:
4889 			/* TODO: check getuid() */
4890 			if (rlp->rlim_max != RLIM_SAVED_MAX
4891 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4892 				return B_NOT_ALLOWED;
4893 
4894 			return vfs_resize_monitor_table(get_current_io_context(false),
4895 				rlp->rlim_cur);
4896 
4897 		default:
4898 			return B_BAD_VALUE;
4899 	}
4900 }
4901 
4902 
4903 status_t
4904 vfs_init(kernel_args* args)
4905 {
4906 	struct vnode dummyVnode;
4907 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
4908 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
4909 	if (sVnodeTable == NULL)
4910 		panic("vfs_init: error creating vnode hash table\n");
4911 
4912 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
4913 
4914 	struct fs_mount dummyMount;
4915 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
4916 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
4917 	if (sMountsTable == NULL)
4918 		panic("vfs_init: error creating mounts hash table\n");
4919 
4920 	node_monitor_init();
4921 
4922 	sRoot = NULL;
4923 
4924 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
4925 
4926 	if (block_cache_init() != B_OK)
4927 		return B_ERROR;
4928 
4929 #ifdef ADD_DEBUGGER_COMMANDS
4930 	// add some debugger commands
4931 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
4932 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
4933 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
4934 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
4935 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4936 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
4937 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
4938 #endif
4939 
4940 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
4941 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
4942 
4943 	file_map_init();
4944 
4945 	return file_cache_init();
4946 }
4947 
4948 
4949 //	#pragma mark - fd_ops implementations
4950 
4951 
4952 /*!
4953 	Calls fs_open() on the given vnode and returns a new
4954 	file descriptor for it
4955 */
4956 static int
4957 open_vnode(struct vnode* vnode, int openMode, bool kernel)
4958 {
4959 	void* cookie;
4960 	int status;
4961 
4962 	status = FS_CALL(vnode, open, openMode, &cookie);
4963 	if (status < 0)
4964 		return status;
4965 
4966 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4967 	if (status < 0) {
4968 		FS_CALL(vnode, close, cookie);
4969 		FS_CALL(vnode, free_cookie, cookie);
4970 	}
4971 	return status;
4972 }
4973 
4974 
4975 /*!
4976 	Calls fs_open() on the given vnode and returns a new
4977 	file descriptor for it
4978 */
4979 static int
4980 create_vnode(struct vnode* directory, const char* name, int openMode,
4981 	int perms, bool kernel)
4982 {
4983 	void* cookie;
4984 	ino_t newID;
4985 	int status = B_ERROR;
4986 	struct vnode* vnode;
4987 
4988 	// This is somewhat tricky: If the entry already exists, the FS responsible
4989 	// for the directory might not necessarily the one also responsible for the
4990 	// node the entry refers to. So we can actually never call the create() hook
4991 	// without O_EXCL. Instead we try to look the entry up first. If it already
4992 	// exists, we just open the node (unless O_EXCL), otherwise we call create()
4993 	// with O_EXCL. This introduces a race condition, since we someone else
4994 	// might have created the entry in the meantime. We hope the respective
4995 	// FS returns the correct error code and retry (up to 3 times) again.
4996 
4997 	for (int i = 0; i < 3 && status != B_OK; i++) {
4998 		// look the node up
4999 		status = lookup_dir_entry(directory, name, &vnode);
5000 		if (status == B_OK) {
5001 			VNodePutter putter(vnode);
5002 
5003 			if ((openMode & O_EXCL) != 0)
5004 				return B_FILE_EXISTS;
5005 
5006 			// If the node is a symlink, we have to follow it, unless
5007 			// O_NOTRAVERSE is set.
5008 			if (S_ISLNK(vnode->type) && (openMode & O_NOTRAVERSE) == 0) {
5009 				putter.Put();
5010 				char clonedName[B_FILE_NAME_LENGTH + 1];
5011 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5012 						>= B_FILE_NAME_LENGTH) {
5013 					return B_NAME_TOO_LONG;
5014 				}
5015 
5016 				inc_vnode_ref_count(directory);
5017 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5018 					kernel, &vnode, NULL);
5019 				if (status != B_OK)
5020 					return status;
5021 
5022 				putter.SetTo(vnode);
5023 			}
5024 
5025 			status = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5026 			// on success keep the vnode reference for the FD
5027 			if (status >= 0)
5028 				putter.Detach();
5029 
5030 			return status;
5031 		}
5032 
5033 		// it doesn't exist yet -- try to create it
5034 
5035 		if (!HAS_FS_CALL(directory, create))
5036 			return EROFS;
5037 
5038 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5039 			&cookie, &newID);
5040 		if (status != B_OK
5041 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5042 			return status;
5043 		}
5044 	}
5045 
5046 	if (status != B_OK)
5047 		return status;
5048 
5049 	// the node has been created successfully
5050 
5051 	mutex_lock(&sVnodeMutex);
5052 	vnode = lookup_vnode(directory->device, newID);
5053 	mutex_unlock(&sVnodeMutex);
5054 
5055 	if (vnode == NULL) {
5056 		panic("vfs: fs_create() returned success but there is no vnode, "
5057 			"mount ID %ld!\n", directory->device);
5058 		return B_BAD_VALUE;
5059 	}
5060 
5061 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
5062 		return status;
5063 
5064 	// something went wrong, clean up
5065 
5066 	FS_CALL(vnode, close, cookie);
5067 	FS_CALL(vnode, free_cookie, cookie);
5068 	put_vnode(vnode);
5069 
5070 	FS_CALL(directory, unlink, name);
5071 
5072 	return status;
5073 }
5074 
5075 
5076 /*! Calls fs open_dir() on the given vnode and returns a new
5077 	file descriptor for it
5078 */
5079 static int
5080 open_dir_vnode(struct vnode* vnode, bool kernel)
5081 {
5082 	void* cookie;
5083 	int status;
5084 
5085 	status = FS_CALL(vnode, open_dir, &cookie);
5086 	if (status < B_OK)
5087 		return status;
5088 
5089 	// file is opened, create a fd
5090 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
5091 	if (status >= 0)
5092 		return status;
5093 
5094 	FS_CALL(vnode, close_dir, cookie);
5095 	FS_CALL(vnode, free_dir_cookie, cookie);
5096 
5097 	return status;
5098 }
5099 
5100 
5101 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5102 	file descriptor for it.
5103 	Used by attr_dir_open(), and attr_dir_open_fd().
5104 */
5105 static int
5106 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5107 {
5108 	void* cookie;
5109 	int status;
5110 
5111 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5112 		return EOPNOTSUPP;
5113 
5114 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5115 	if (status < 0)
5116 		return status;
5117 
5118 	// file is opened, create a fd
5119 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
5120 	if (status >= 0)
5121 		return status;
5122 
5123 	FS_CALL(vnode, close_attr_dir, cookie);
5124 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5125 
5126 	return status;
5127 }
5128 
5129 
5130 static int
5131 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5132 	int openMode, int perms, bool kernel)
5133 {
5134 	struct vnode* directory;
5135 	int status;
5136 
5137 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
5138 
5139 	// get directory to put the new file in
5140 	status = get_vnode(mountID, directoryID, &directory, true, false);
5141 	if (status < B_OK)
5142 		return status;
5143 
5144 	status = create_vnode(directory, name, openMode, perms, kernel);
5145 	put_vnode(directory);
5146 
5147 	return status;
5148 }
5149 
5150 
5151 static int
5152 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5153 {
5154 	char name[B_FILE_NAME_LENGTH];
5155 	struct vnode* directory;
5156 	int status;
5157 
5158 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
5159 
5160 	// get directory to put the new file in
5161 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5162 	if (status < 0)
5163 		return status;
5164 
5165 	status = create_vnode(directory, name, openMode, perms, kernel);
5166 
5167 	put_vnode(directory);
5168 	return status;
5169 }
5170 
5171 
5172 static int
5173 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5174 	int openMode, bool kernel)
5175 {
5176 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5177 	struct vnode* vnode;
5178 	int status;
5179 
5180 	if (name == NULL || *name == '\0')
5181 		return B_BAD_VALUE;
5182 
5183 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5184 		mountID, directoryID, name, openMode));
5185 
5186 	// get the vnode matching the entry_ref
5187 	status = entry_ref_to_vnode(mountID, directoryID, name, traverse, kernel,
5188 		&vnode);
5189 	if (status < B_OK)
5190 		return status;
5191 
5192 	status = open_vnode(vnode, openMode, kernel);
5193 	if (status < B_OK)
5194 		put_vnode(vnode);
5195 
5196 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5197 		vnode->id, name);
5198 	return status;
5199 }
5200 
5201 
5202 static int
5203 file_open(int fd, char* path, int openMode, bool kernel)
5204 {
5205 	int status = B_OK;
5206 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5207 
5208 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5209 		fd, path, openMode, kernel));
5210 
5211 	// get the vnode matching the vnode + path combination
5212 	struct vnode* vnode = NULL;
5213 	ino_t parentID;
5214 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
5215 	if (status != B_OK)
5216 		return status;
5217 
5218 	// open the vnode
5219 	status = open_vnode(vnode, openMode, kernel);
5220 	// put only on error -- otherwise our reference was transferred to the FD
5221 	if (status < B_OK)
5222 		put_vnode(vnode);
5223 
5224 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5225 		vnode->device, parentID, vnode->id, NULL);
5226 
5227 	return status;
5228 }
5229 
5230 
5231 static status_t
5232 file_close(struct file_descriptor* descriptor)
5233 {
5234 	struct vnode* vnode = descriptor->u.vnode;
5235 	status_t status = B_OK;
5236 
5237 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5238 
5239 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
5240 	if (HAS_FS_CALL(vnode, close)) {
5241 		status = FS_CALL(vnode, close, descriptor->cookie);
5242 	}
5243 
5244 	if (status == B_OK) {
5245 		// remove all outstanding locks for this team
5246 		release_advisory_lock(vnode, NULL);
5247 	}
5248 	return status;
5249 }
5250 
5251 
5252 static void
5253 file_free_fd(struct file_descriptor* descriptor)
5254 {
5255 	struct vnode* vnode = descriptor->u.vnode;
5256 
5257 	if (vnode != NULL) {
5258 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5259 		put_vnode(vnode);
5260 	}
5261 }
5262 
5263 
5264 static status_t
5265 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5266 	size_t* length)
5267 {
5268 	struct vnode* vnode = descriptor->u.vnode;
5269 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5270 
5271 	if (S_ISDIR(vnode->type))
5272 		return B_IS_A_DIRECTORY;
5273 
5274 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5275 }
5276 
5277 
5278 static status_t
5279 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5280 	size_t* length)
5281 {
5282 	struct vnode* vnode = descriptor->u.vnode;
5283 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5284 
5285 	if (S_ISDIR(vnode->type))
5286 		return B_IS_A_DIRECTORY;
5287 	if (!HAS_FS_CALL(vnode, write))
5288 		return EROFS;
5289 
5290 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5291 }
5292 
5293 
5294 static off_t
5295 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5296 {
5297 	struct vnode* vnode = descriptor->u.vnode;
5298 	off_t offset;
5299 
5300 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5301 
5302 	// some kinds of files are not seekable
5303 	switch (vnode->type & S_IFMT) {
5304 		case S_IFIFO:
5305 		case S_IFSOCK:
5306 			return ESPIPE;
5307 
5308 		// The Open Group Base Specs don't mention any file types besides pipes,
5309 		// fifos, and sockets specially, so we allow seeking them.
5310 		case S_IFREG:
5311 		case S_IFBLK:
5312 		case S_IFDIR:
5313 		case S_IFLNK:
5314 		case S_IFCHR:
5315 			break;
5316 	}
5317 
5318 	switch (seekType) {
5319 		case SEEK_SET:
5320 			offset = 0;
5321 			break;
5322 		case SEEK_CUR:
5323 			offset = descriptor->pos;
5324 			break;
5325 		case SEEK_END:
5326 		{
5327 			// stat() the node
5328 			if (!HAS_FS_CALL(vnode, read_stat))
5329 				return EOPNOTSUPP;
5330 
5331 			struct stat stat;
5332 			status_t status = FS_CALL(vnode, read_stat, &stat);
5333 			if (status < B_OK)
5334 				return status;
5335 
5336 			offset = stat.st_size;
5337 			break;
5338 		}
5339 		default:
5340 			return B_BAD_VALUE;
5341 	}
5342 
5343 	// assumes off_t is 64 bits wide
5344 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5345 		return EOVERFLOW;
5346 
5347 	pos += offset;
5348 	if (pos < 0)
5349 		return B_BAD_VALUE;
5350 
5351 	return descriptor->pos = pos;
5352 }
5353 
5354 
5355 static status_t
5356 file_select(struct file_descriptor* descriptor, uint8 event,
5357 	struct selectsync* sync)
5358 {
5359 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5360 
5361 	struct vnode* vnode = descriptor->u.vnode;
5362 
5363 	// If the FS has no select() hook, notify select() now.
5364 	if (!HAS_FS_CALL(vnode, select))
5365 		return notify_select_event(sync, event);
5366 
5367 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5368 }
5369 
5370 
5371 static status_t
5372 file_deselect(struct file_descriptor* descriptor, uint8 event,
5373 	struct selectsync* sync)
5374 {
5375 	struct vnode* vnode = descriptor->u.vnode;
5376 
5377 	if (!HAS_FS_CALL(vnode, deselect))
5378 		return B_OK;
5379 
5380 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5381 }
5382 
5383 
5384 static status_t
5385 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5386 	bool kernel)
5387 {
5388 	struct vnode* vnode;
5389 	status_t status;
5390 
5391 	if (name == NULL || *name == '\0')
5392 		return B_BAD_VALUE;
5393 
5394 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5395 		"perms = %d)\n", mountID, parentID, name, perms));
5396 
5397 	status = get_vnode(mountID, parentID, &vnode, true, false);
5398 	if (status < B_OK)
5399 		return status;
5400 
5401 	if (HAS_FS_CALL(vnode, create_dir))
5402 		status = FS_CALL(vnode, create_dir, name, perms);
5403 	else
5404 		status = EROFS;
5405 
5406 	put_vnode(vnode);
5407 	return status;
5408 }
5409 
5410 
5411 static status_t
5412 dir_create(int fd, char* path, int perms, bool kernel)
5413 {
5414 	char filename[B_FILE_NAME_LENGTH];
5415 	struct vnode* vnode;
5416 	status_t status;
5417 
5418 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
5419 
5420 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5421 	if (status < 0)
5422 		return status;
5423 
5424 	if (HAS_FS_CALL(vnode, create_dir)) {
5425 		status = FS_CALL(vnode, create_dir, filename, perms);
5426 	} else
5427 		status = EROFS;
5428 
5429 	put_vnode(vnode);
5430 	return status;
5431 }
5432 
5433 
5434 static int
5435 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5436 {
5437 	struct vnode* vnode;
5438 	int status;
5439 
5440 	FUNCTION(("dir_open_entry_ref()\n"));
5441 
5442 	if (name && *name == '\0')
5443 		return B_BAD_VALUE;
5444 
5445 	// get the vnode matching the entry_ref/node_ref
5446 	if (name) {
5447 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5448 			&vnode);
5449 	} else
5450 		status = get_vnode(mountID, parentID, &vnode, true, false);
5451 	if (status < B_OK)
5452 		return status;
5453 
5454 	status = open_dir_vnode(vnode, kernel);
5455 	if (status < B_OK)
5456 		put_vnode(vnode);
5457 
5458 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5459 		vnode->id, name);
5460 	return status;
5461 }
5462 
5463 
5464 static int
5465 dir_open(int fd, char* path, bool kernel)
5466 {
5467 	int status = B_OK;
5468 
5469 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
5470 
5471 	// get the vnode matching the vnode + path combination
5472 	struct vnode* vnode = NULL;
5473 	ino_t parentID;
5474 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
5475 	if (status != B_OK)
5476 		return status;
5477 
5478 	// open the dir
5479 	status = open_dir_vnode(vnode, kernel);
5480 	if (status < B_OK)
5481 		put_vnode(vnode);
5482 
5483 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
5484 	return status;
5485 }
5486 
5487 
5488 static status_t
5489 dir_close(struct file_descriptor* descriptor)
5490 {
5491 	struct vnode* vnode = descriptor->u.vnode;
5492 
5493 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5494 
5495 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
5496 	if (HAS_FS_CALL(vnode, close_dir))
5497 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5498 
5499 	return B_OK;
5500 }
5501 
5502 
5503 static void
5504 dir_free_fd(struct file_descriptor* descriptor)
5505 {
5506 	struct vnode* vnode = descriptor->u.vnode;
5507 
5508 	if (vnode != NULL) {
5509 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5510 		put_vnode(vnode);
5511 	}
5512 }
5513 
5514 
5515 static status_t
5516 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5517 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5518 {
5519 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5520 		bufferSize, _count);
5521 }
5522 
5523 
5524 static status_t
5525 fix_dirent(struct vnode* parent, struct dirent* userEntry,
5526 	struct io_context* ioContext, uint32* _length)
5527 {
5528 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
5529 	struct dirent* entry;
5530 
5531 	if (IS_USER_ADDRESS(userEntry)) {
5532 		entry = (struct dirent*)buffer;
5533 		if (user_memcpy(entry, userEntry, sizeof(struct dirent) - 1) != B_OK)
5534 			return B_BAD_ADDRESS;
5535 
5536 		ASSERT(entry->d_reclen >= sizeof(struct dirent));
5537 
5538 		if (user_memcpy(entry->d_name, userEntry->d_name,
5539 				entry->d_reclen - sizeof(struct dirent)) != B_OK)
5540 			return B_BAD_ADDRESS;
5541 	} else
5542 		entry = userEntry;
5543 
5544 	*_length = entry->d_reclen;
5545 
5546 	// set d_pdev and d_pino
5547 	entry->d_pdev = parent->device;
5548 	entry->d_pino = parent->id;
5549 
5550 	// If this is the ".." entry and the directory is the root of a FS,
5551 	// we need to replace d_dev and d_ino with the actual values.
5552 	if (strcmp(entry->d_name, "..") == 0
5553 		&& parent->mount->root_vnode == parent
5554 		&& parent->mount->covers_vnode) {
5555 		inc_vnode_ref_count(parent);
5556 			// vnode_path_to_vnode() puts the node
5557 
5558 		// Make sure the IO context root is not bypassed.
5559 		if (parent == ioContext->root) {
5560 			entry->d_dev = parent->device;
5561 			entry->d_ino = parent->id;
5562 		} else {
5563 			// ".." is guaranteed not to be clobbered by this call
5564 			struct vnode* vnode;
5565 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5566 				ioContext, &vnode, NULL);
5567 
5568 			if (status == B_OK) {
5569 				entry->d_dev = vnode->device;
5570 				entry->d_ino = vnode->id;
5571 			}
5572 		}
5573 	} else {
5574 		// resolve mount points
5575 		struct vnode* vnode = NULL;
5576 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
5577 			false);
5578 		if (status != B_OK)
5579 			return status;
5580 
5581 		mutex_lock(&sVnodeCoveredByMutex);
5582 		if (vnode->covered_by) {
5583 			entry->d_dev = vnode->covered_by->device;
5584 			entry->d_ino = vnode->covered_by->id;
5585 		}
5586 		mutex_unlock(&sVnodeCoveredByMutex);
5587 
5588 		put_vnode(vnode);
5589 	}
5590 
5591 	// copy back from userland buffer if needed
5592 	if (entry != userEntry)
5593 		return user_memcpy(userEntry, entry, sizeof(struct dirent) - 1);
5594 
5595 	return B_OK;
5596 }
5597 
5598 
5599 static status_t
5600 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5601 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5602 {
5603 	if (!HAS_FS_CALL(vnode, read_dir))
5604 		return EOPNOTSUPP;
5605 
5606 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5607 		_count);
5608 	if (error != B_OK)
5609 		return error;
5610 
5611 	// we need to adjust the read dirents
5612 	uint32 count = *_count;
5613 	if (count > 0) {
5614 		for (uint32 i = 0; i < count; i++) {
5615 			uint32 length;
5616 			error = fix_dirent(vnode, buffer, ioContext, &length);
5617 			if (error != B_OK)
5618 				return error;
5619 
5620 			buffer = (struct dirent*)((uint8*)buffer + length);
5621 		}
5622 	}
5623 
5624 	return error;
5625 }
5626 
5627 
5628 static status_t
5629 dir_rewind(struct file_descriptor* descriptor)
5630 {
5631 	struct vnode* vnode = descriptor->u.vnode;
5632 
5633 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5634 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5635 	}
5636 
5637 	return EOPNOTSUPP;
5638 }
5639 
5640 
5641 static status_t
5642 dir_remove(int fd, char* path, bool kernel)
5643 {
5644 	char name[B_FILE_NAME_LENGTH];
5645 	struct vnode* directory;
5646 	status_t status;
5647 
5648 	if (path != NULL) {
5649 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
5650 		char* lastSlash = strrchr(path, '/');
5651 		if (lastSlash != NULL) {
5652 			char* leaf = lastSlash + 1;
5653 			if (!strcmp(leaf, ".."))
5654 				return B_NOT_ALLOWED;
5655 
5656 			// omit multiple slashes
5657 			while (lastSlash > path && lastSlash[-1] == '/') {
5658 				lastSlash--;
5659 			}
5660 
5661 			if (!leaf[0]
5662 				|| !strcmp(leaf, ".")) {
5663 				// "name/" -> "name", or "name/." -> "name"
5664 				lastSlash[0] = '\0';
5665 			}
5666 		}
5667 
5668 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5669 			return B_NOT_ALLOWED;
5670 	}
5671 
5672 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5673 	if (status < B_OK)
5674 		return status;
5675 
5676 	if (HAS_FS_CALL(directory, remove_dir))
5677 		status = FS_CALL(directory, remove_dir, name);
5678 	else
5679 		status = EROFS;
5680 
5681 	put_vnode(directory);
5682 	return status;
5683 }
5684 
5685 
5686 static status_t
5687 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5688 	size_t length)
5689 {
5690 	struct vnode* vnode = descriptor->u.vnode;
5691 
5692 	if (HAS_FS_CALL(vnode, ioctl))
5693 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5694 
5695 	return EOPNOTSUPP;
5696 }
5697 
5698 
5699 static status_t
5700 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5701 {
5702 	struct flock flock;
5703 
5704 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5705 		fd, op, argument, kernel ? "kernel" : "user"));
5706 
5707 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5708 		fd);
5709 	if (descriptor == NULL)
5710 		return B_FILE_ERROR;
5711 
5712 	struct vnode* vnode = fd_vnode(descriptor);
5713 
5714 	status_t status = B_OK;
5715 
5716 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5717 		if (descriptor->type != FDTYPE_FILE)
5718 			status = B_BAD_VALUE;
5719 		else if (user_memcpy(&flock, (struct flock*)argument,
5720 				sizeof(struct flock)) < B_OK)
5721 			status = B_BAD_ADDRESS;
5722 
5723 		if (status != B_OK) {
5724 			put_fd(descriptor);
5725 			return status;
5726 		}
5727 	}
5728 
5729 	switch (op) {
5730 		case F_SETFD:
5731 		{
5732 			struct io_context* context = get_current_io_context(kernel);
5733 			// Set file descriptor flags
5734 
5735 			// O_CLOEXEC is the only flag available at this time
5736 			mutex_lock(&context->io_mutex);
5737 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5738 			mutex_unlock(&context->io_mutex);
5739 
5740 			status = B_OK;
5741 			break;
5742 		}
5743 
5744 		case F_GETFD:
5745 		{
5746 			struct io_context* context = get_current_io_context(kernel);
5747 
5748 			// Get file descriptor flags
5749 			mutex_lock(&context->io_mutex);
5750 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5751 			mutex_unlock(&context->io_mutex);
5752 			break;
5753 		}
5754 
5755 		case F_SETFL:
5756 			// Set file descriptor open mode
5757 
5758 			// we only accept changes to O_APPEND and O_NONBLOCK
5759 			argument &= O_APPEND | O_NONBLOCK;
5760 			if (descriptor->ops->fd_set_flags != NULL) {
5761 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5762 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5763 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5764 					(int)argument);
5765 			} else
5766 				status = EOPNOTSUPP;
5767 
5768 			if (status == B_OK) {
5769 				// update this descriptor's open_mode field
5770 				descriptor->open_mode = (descriptor->open_mode
5771 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5772 			}
5773 
5774 			break;
5775 
5776 		case F_GETFL:
5777 			// Get file descriptor open mode
5778 			status = descriptor->open_mode;
5779 			break;
5780 
5781 		case F_DUPFD:
5782 		{
5783 			struct io_context* context = get_current_io_context(kernel);
5784 
5785 			status = new_fd_etc(context, descriptor, (int)argument);
5786 			if (status >= 0) {
5787 				mutex_lock(&context->io_mutex);
5788 				fd_set_close_on_exec(context, fd, false);
5789 				mutex_unlock(&context->io_mutex);
5790 
5791 				atomic_add(&descriptor->ref_count, 1);
5792 			}
5793 			break;
5794 		}
5795 
5796 		case F_GETLK:
5797 			if (vnode != NULL) {
5798 				status = get_advisory_lock(vnode, &flock);
5799 				if (status == B_OK) {
5800 					// copy back flock structure
5801 					status = user_memcpy((struct flock*)argument, &flock,
5802 						sizeof(struct flock));
5803 				}
5804 			} else
5805 				status = B_BAD_VALUE;
5806 			break;
5807 
5808 		case F_SETLK:
5809 		case F_SETLKW:
5810 			status = normalize_flock(descriptor, &flock);
5811 			if (status < B_OK)
5812 				break;
5813 
5814 			if (vnode == NULL) {
5815 				status = B_BAD_VALUE;
5816 			} else if (flock.l_type == F_UNLCK) {
5817 				status = release_advisory_lock(vnode, &flock);
5818 			} else {
5819 				// the open mode must match the lock type
5820 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5821 						&& flock.l_type == F_WRLCK)
5822 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5823 						&& flock.l_type == F_RDLCK))
5824 					status = B_FILE_ERROR;
5825 				else {
5826 					status = acquire_advisory_lock(vnode, -1,
5827 						&flock, op == F_SETLKW);
5828 				}
5829 			}
5830 			break;
5831 
5832 		// ToDo: add support for more ops?
5833 
5834 		default:
5835 			status = B_BAD_VALUE;
5836 	}
5837 
5838 	put_fd(descriptor);
5839 	return status;
5840 }
5841 
5842 
5843 static status_t
5844 common_sync(int fd, bool kernel)
5845 {
5846 	struct file_descriptor* descriptor;
5847 	struct vnode* vnode;
5848 	status_t status;
5849 
5850 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5851 
5852 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5853 	if (descriptor == NULL)
5854 		return B_FILE_ERROR;
5855 
5856 	if (HAS_FS_CALL(vnode, fsync))
5857 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5858 	else
5859 		status = EOPNOTSUPP;
5860 
5861 	put_fd(descriptor);
5862 	return status;
5863 }
5864 
5865 
5866 static status_t
5867 common_lock_node(int fd, bool kernel)
5868 {
5869 	struct file_descriptor* descriptor;
5870 	struct vnode* vnode;
5871 
5872 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5873 	if (descriptor == NULL)
5874 		return B_FILE_ERROR;
5875 
5876 	status_t status = B_OK;
5877 
5878 	// We need to set the locking atomically - someone
5879 	// else might set one at the same time
5880 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5881 			(file_descriptor*)NULL) != NULL)
5882 		status = B_BUSY;
5883 
5884 	put_fd(descriptor);
5885 	return status;
5886 }
5887 
5888 
5889 static status_t
5890 common_unlock_node(int fd, bool kernel)
5891 {
5892 	struct file_descriptor* descriptor;
5893 	struct vnode* vnode;
5894 
5895 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5896 	if (descriptor == NULL)
5897 		return B_FILE_ERROR;
5898 
5899 	status_t status = B_OK;
5900 
5901 	// We need to set the locking atomically - someone
5902 	// else might set one at the same time
5903 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
5904 			(file_descriptor*)NULL, descriptor) != descriptor)
5905 		status = B_BAD_VALUE;
5906 
5907 	put_fd(descriptor);
5908 	return status;
5909 }
5910 
5911 
5912 static status_t
5913 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
5914 	bool kernel)
5915 {
5916 	struct vnode* vnode;
5917 	status_t status;
5918 
5919 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5920 	if (status < B_OK)
5921 		return status;
5922 
5923 	if (HAS_FS_CALL(vnode, read_symlink)) {
5924 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5925 	} else
5926 		status = B_BAD_VALUE;
5927 
5928 	put_vnode(vnode);
5929 	return status;
5930 }
5931 
5932 
5933 static status_t
5934 common_create_symlink(int fd, char* path, const char* toPath, int mode,
5935 	bool kernel)
5936 {
5937 	// path validity checks have to be in the calling function!
5938 	char name[B_FILE_NAME_LENGTH];
5939 	struct vnode* vnode;
5940 	status_t status;
5941 
5942 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5943 
5944 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5945 	if (status < B_OK)
5946 		return status;
5947 
5948 	if (HAS_FS_CALL(vnode, create_symlink))
5949 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5950 	else
5951 		status = EROFS;
5952 
5953 	put_vnode(vnode);
5954 
5955 	return status;
5956 }
5957 
5958 
5959 static status_t
5960 common_create_link(char* path, char* toPath, bool kernel)
5961 {
5962 	// path validity checks have to be in the calling function!
5963 	char name[B_FILE_NAME_LENGTH];
5964 	struct vnode* directory;
5965 	struct vnode* vnode;
5966 	status_t status;
5967 
5968 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
5969 		toPath, kernel));
5970 
5971 	status = path_to_dir_vnode(path, &directory, name, kernel);
5972 	if (status < B_OK)
5973 		return status;
5974 
5975 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
5976 	if (status < B_OK)
5977 		goto err;
5978 
5979 	if (directory->mount != vnode->mount) {
5980 		status = B_CROSS_DEVICE_LINK;
5981 		goto err1;
5982 	}
5983 
5984 	if (HAS_FS_CALL(directory, link))
5985 		status = FS_CALL(directory, link, name, vnode);
5986 	else
5987 		status = EROFS;
5988 
5989 err1:
5990 	put_vnode(vnode);
5991 err:
5992 	put_vnode(directory);
5993 
5994 	return status;
5995 }
5996 
5997 
5998 static status_t
5999 common_unlink(int fd, char* path, bool kernel)
6000 {
6001 	char filename[B_FILE_NAME_LENGTH];
6002 	struct vnode* vnode;
6003 	status_t status;
6004 
6005 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
6006 
6007 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6008 	if (status < 0)
6009 		return status;
6010 
6011 	if (HAS_FS_CALL(vnode, unlink))
6012 		status = FS_CALL(vnode, unlink, filename);
6013 	else
6014 		status = EROFS;
6015 
6016 	put_vnode(vnode);
6017 
6018 	return status;
6019 }
6020 
6021 
6022 static status_t
6023 common_access(char* path, int mode, bool kernel)
6024 {
6025 	struct vnode* vnode;
6026 	status_t status;
6027 
6028 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
6029 	if (status < B_OK)
6030 		return status;
6031 
6032 	if (HAS_FS_CALL(vnode, access))
6033 		status = FS_CALL(vnode, access, mode);
6034 	else
6035 		status = B_OK;
6036 
6037 	put_vnode(vnode);
6038 
6039 	return status;
6040 }
6041 
6042 
6043 static status_t
6044 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6045 {
6046 	struct vnode* fromVnode;
6047 	struct vnode* toVnode;
6048 	char fromName[B_FILE_NAME_LENGTH];
6049 	char toName[B_FILE_NAME_LENGTH];
6050 	status_t status;
6051 
6052 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6053 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6054 
6055 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6056 	if (status < B_OK)
6057 		return status;
6058 
6059 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6060 	if (status < B_OK)
6061 		goto err1;
6062 
6063 	if (fromVnode->device != toVnode->device) {
6064 		status = B_CROSS_DEVICE_LINK;
6065 		goto err2;
6066 	}
6067 
6068 	if (HAS_FS_CALL(fromVnode, rename))
6069 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6070 	else
6071 		status = EROFS;
6072 
6073 err2:
6074 	put_vnode(toVnode);
6075 err1:
6076 	put_vnode(fromVnode);
6077 
6078 	return status;
6079 }
6080 
6081 
6082 static status_t
6083 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6084 {
6085 	struct vnode* vnode = descriptor->u.vnode;
6086 
6087 	FUNCTION(("common_read_stat: stat %p\n", stat));
6088 
6089 	status_t status = FS_CALL(vnode, read_stat, stat);
6090 
6091 	// fill in the st_dev and st_ino fields
6092 	if (status == B_OK) {
6093 		stat->st_dev = vnode->device;
6094 		stat->st_ino = vnode->id;
6095 		stat->st_rdev = -1;
6096 	}
6097 
6098 	return status;
6099 }
6100 
6101 
6102 static status_t
6103 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6104 	int statMask)
6105 {
6106 	struct vnode* vnode = descriptor->u.vnode;
6107 
6108 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode,
6109 		stat, statMask));
6110 
6111 	if (!HAS_FS_CALL(vnode, write_stat))
6112 		return EROFS;
6113 
6114 	return FS_CALL(vnode, write_stat, stat, statMask);
6115 }
6116 
6117 
6118 static status_t
6119 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6120 	struct stat* stat, bool kernel)
6121 {
6122 	struct vnode* vnode;
6123 	status_t status;
6124 
6125 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6126 		stat));
6127 
6128 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6129 		kernel);
6130 	if (status < 0)
6131 		return status;
6132 
6133 	status = FS_CALL(vnode, read_stat, stat);
6134 
6135 	// fill in the st_dev and st_ino fields
6136 	if (status == B_OK) {
6137 		stat->st_dev = vnode->device;
6138 		stat->st_ino = vnode->id;
6139 		stat->st_rdev = -1;
6140 	}
6141 
6142 	put_vnode(vnode);
6143 	return status;
6144 }
6145 
6146 
6147 static status_t
6148 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6149 	const struct stat* stat, int statMask, bool kernel)
6150 {
6151 	struct vnode* vnode;
6152 	status_t status;
6153 
6154 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
6155 
6156 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
6157 	if (status < 0)
6158 		return status;
6159 
6160 	if (HAS_FS_CALL(vnode, write_stat))
6161 		status = FS_CALL(vnode, write_stat, stat, statMask);
6162 	else
6163 		status = EROFS;
6164 
6165 	put_vnode(vnode);
6166 
6167 	return status;
6168 }
6169 
6170 
6171 static int
6172 attr_dir_open(int fd, char* path, bool kernel)
6173 {
6174 	struct vnode* vnode;
6175 	int status;
6176 
6177 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
6178 
6179 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6180 	if (status < B_OK)
6181 		return status;
6182 
6183 	status = open_attr_dir_vnode(vnode, kernel);
6184 	if (status < 0)
6185 		put_vnode(vnode);
6186 
6187 	return status;
6188 }
6189 
6190 
6191 static status_t
6192 attr_dir_close(struct file_descriptor* descriptor)
6193 {
6194 	struct vnode* vnode = descriptor->u.vnode;
6195 
6196 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6197 
6198 	if (HAS_FS_CALL(vnode, close_attr_dir))
6199 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6200 
6201 	return B_OK;
6202 }
6203 
6204 
6205 static void
6206 attr_dir_free_fd(struct file_descriptor* descriptor)
6207 {
6208 	struct vnode* vnode = descriptor->u.vnode;
6209 
6210 	if (vnode != NULL) {
6211 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6212 		put_vnode(vnode);
6213 	}
6214 }
6215 
6216 
6217 static status_t
6218 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6219 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6220 {
6221 	struct vnode* vnode = descriptor->u.vnode;
6222 
6223 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6224 
6225 	if (HAS_FS_CALL(vnode, read_attr_dir))
6226 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6227 			bufferSize, _count);
6228 
6229 	return EOPNOTSUPP;
6230 }
6231 
6232 
6233 static status_t
6234 attr_dir_rewind(struct file_descriptor* descriptor)
6235 {
6236 	struct vnode* vnode = descriptor->u.vnode;
6237 
6238 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6239 
6240 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6241 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6242 
6243 	return EOPNOTSUPP;
6244 }
6245 
6246 
6247 static int
6248 attr_create(int fd, const char* name, uint32 type, int openMode, bool kernel)
6249 {
6250 	struct vnode* vnode;
6251 	void* cookie;
6252 	int status;
6253 
6254 	if (name == NULL || *name == '\0')
6255 		return B_BAD_VALUE;
6256 
6257 	vnode = get_vnode_from_fd(fd, kernel);
6258 	if (vnode == NULL)
6259 		return B_FILE_ERROR;
6260 
6261 	if (!HAS_FS_CALL(vnode, create_attr)) {
6262 		status = EROFS;
6263 		goto err;
6264 	}
6265 
6266 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6267 	if (status < B_OK)
6268 		goto err;
6269 
6270 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
6271 		return status;
6272 
6273 	FS_CALL(vnode, close_attr, cookie);
6274 	FS_CALL(vnode, free_attr_cookie, cookie);
6275 
6276 	FS_CALL(vnode, remove_attr, name);
6277 
6278 err:
6279 	put_vnode(vnode);
6280 
6281 	return status;
6282 }
6283 
6284 
6285 static int
6286 attr_open(int fd, const char* name, int openMode, bool kernel)
6287 {
6288 	struct vnode* vnode;
6289 	void* cookie;
6290 	int status;
6291 
6292 	if (name == NULL || *name == '\0')
6293 		return B_BAD_VALUE;
6294 
6295 	vnode = get_vnode_from_fd(fd, kernel);
6296 	if (vnode == NULL)
6297 		return B_FILE_ERROR;
6298 
6299 	if (!HAS_FS_CALL(vnode, open_attr)) {
6300 		status = EOPNOTSUPP;
6301 		goto err;
6302 	}
6303 
6304 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6305 	if (status < B_OK)
6306 		goto err;
6307 
6308 	// now we only need a file descriptor for this attribute and we're done
6309 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
6310 		return status;
6311 
6312 	FS_CALL(vnode, close_attr, cookie);
6313 	FS_CALL(vnode, free_attr_cookie, cookie);
6314 
6315 err:
6316 	put_vnode(vnode);
6317 
6318 	return status;
6319 }
6320 
6321 
6322 static status_t
6323 attr_close(struct file_descriptor* descriptor)
6324 {
6325 	struct vnode* vnode = descriptor->u.vnode;
6326 
6327 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6328 
6329 	if (HAS_FS_CALL(vnode, close_attr))
6330 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6331 
6332 	return B_OK;
6333 }
6334 
6335 
6336 static void
6337 attr_free_fd(struct file_descriptor* descriptor)
6338 {
6339 	struct vnode* vnode = descriptor->u.vnode;
6340 
6341 	if (vnode != NULL) {
6342 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6343 		put_vnode(vnode);
6344 	}
6345 }
6346 
6347 
6348 static status_t
6349 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6350 	size_t* length)
6351 {
6352 	struct vnode* vnode = descriptor->u.vnode;
6353 
6354 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6355 		*length));
6356 
6357 	if (!HAS_FS_CALL(vnode, read_attr))
6358 		return EOPNOTSUPP;
6359 
6360 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6361 }
6362 
6363 
6364 static status_t
6365 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6366 	size_t* length)
6367 {
6368 	struct vnode* vnode = descriptor->u.vnode;
6369 
6370 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6371 	if (!HAS_FS_CALL(vnode, write_attr))
6372 		return EOPNOTSUPP;
6373 
6374 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6375 }
6376 
6377 
6378 static off_t
6379 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6380 {
6381 	off_t offset;
6382 
6383 	switch (seekType) {
6384 		case SEEK_SET:
6385 			offset = 0;
6386 			break;
6387 		case SEEK_CUR:
6388 			offset = descriptor->pos;
6389 			break;
6390 		case SEEK_END:
6391 		{
6392 			struct vnode* vnode = descriptor->u.vnode;
6393 			if (!HAS_FS_CALL(vnode, read_stat))
6394 				return EOPNOTSUPP;
6395 
6396 			struct stat stat;
6397 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6398 				&stat);
6399 			if (status < B_OK)
6400 				return status;
6401 
6402 			offset = stat.st_size;
6403 			break;
6404 		}
6405 		default:
6406 			return B_BAD_VALUE;
6407 	}
6408 
6409 	// assumes off_t is 64 bits wide
6410 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6411 		return EOVERFLOW;
6412 
6413 	pos += offset;
6414 	if (pos < 0)
6415 		return B_BAD_VALUE;
6416 
6417 	return descriptor->pos = pos;
6418 }
6419 
6420 
6421 static status_t
6422 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6423 {
6424 	struct vnode* vnode = descriptor->u.vnode;
6425 
6426 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6427 
6428 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6429 		return EOPNOTSUPP;
6430 
6431 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6432 }
6433 
6434 
6435 static status_t
6436 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6437 	int statMask)
6438 {
6439 	struct vnode* vnode = descriptor->u.vnode;
6440 
6441 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6442 
6443 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6444 		return EROFS;
6445 
6446 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6447 }
6448 
6449 
6450 static status_t
6451 attr_remove(int fd, const char* name, bool kernel)
6452 {
6453 	struct file_descriptor* descriptor;
6454 	struct vnode* vnode;
6455 	status_t status;
6456 
6457 	if (name == NULL || *name == '\0')
6458 		return B_BAD_VALUE;
6459 
6460 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6461 		kernel));
6462 
6463 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6464 	if (descriptor == NULL)
6465 		return B_FILE_ERROR;
6466 
6467 	if (HAS_FS_CALL(vnode, remove_attr))
6468 		status = FS_CALL(vnode, remove_attr, name);
6469 	else
6470 		status = EROFS;
6471 
6472 	put_fd(descriptor);
6473 
6474 	return status;
6475 }
6476 
6477 
6478 static status_t
6479 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6480 	bool kernel)
6481 {
6482 	struct file_descriptor* fromDescriptor;
6483 	struct file_descriptor* toDescriptor;
6484 	struct vnode* fromVnode;
6485 	struct vnode* toVnode;
6486 	status_t status;
6487 
6488 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6489 		|| *toName == '\0')
6490 		return B_BAD_VALUE;
6491 
6492 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6493 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6494 
6495 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6496 	if (fromDescriptor == NULL)
6497 		return B_FILE_ERROR;
6498 
6499 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6500 	if (toDescriptor == NULL) {
6501 		status = B_FILE_ERROR;
6502 		goto err;
6503 	}
6504 
6505 	// are the files on the same volume?
6506 	if (fromVnode->device != toVnode->device) {
6507 		status = B_CROSS_DEVICE_LINK;
6508 		goto err1;
6509 	}
6510 
6511 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6512 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6513 	} else
6514 		status = EROFS;
6515 
6516 err1:
6517 	put_fd(toDescriptor);
6518 err:
6519 	put_fd(fromDescriptor);
6520 
6521 	return status;
6522 }
6523 
6524 
6525 static status_t
6526 index_dir_open(dev_t mountID, bool kernel)
6527 {
6528 	struct fs_mount* mount;
6529 	void* cookie;
6530 
6531 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6532 
6533 	status_t status = get_mount(mountID, &mount);
6534 	if (status < B_OK)
6535 		return status;
6536 
6537 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6538 		status = EOPNOTSUPP;
6539 		goto error;
6540 	}
6541 
6542 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6543 	if (status < B_OK)
6544 		goto error;
6545 
6546 	// get fd for the index directory
6547 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
6548 	if (status < 0) {
6549 		// something went wrong
6550 		FS_MOUNT_CALL(mount, close_index_dir, cookie);
6551 		FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6552 
6553 error:
6554 		put_mount(mount);
6555 	}
6556 	return status;
6557 }
6558 
6559 
6560 static status_t
6561 index_dir_close(struct file_descriptor* descriptor)
6562 {
6563 	struct fs_mount* mount = descriptor->u.mount;
6564 
6565 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6566 
6567 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6568 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6569 
6570 	return B_OK;
6571 }
6572 
6573 
6574 static void
6575 index_dir_free_fd(struct file_descriptor* descriptor)
6576 {
6577 	struct fs_mount* mount = descriptor->u.mount;
6578 
6579 	if (mount != NULL) {
6580 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6581 		put_mount(mount);
6582 	}
6583 }
6584 
6585 
6586 static status_t
6587 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6588 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6589 {
6590 	struct fs_mount* mount = descriptor->u.mount;
6591 
6592 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6593 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6594 			bufferSize, _count);
6595 	}
6596 
6597 	return EOPNOTSUPP;
6598 }
6599 
6600 
6601 static status_t
6602 index_dir_rewind(struct file_descriptor* descriptor)
6603 {
6604 	struct fs_mount* mount = descriptor->u.mount;
6605 
6606 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6607 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6608 
6609 	return EOPNOTSUPP;
6610 }
6611 
6612 
6613 static status_t
6614 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6615 	bool kernel)
6616 {
6617 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6618 		name, kernel));
6619 
6620 	struct fs_mount* mount;
6621 	status_t status = get_mount(mountID, &mount);
6622 	if (status < B_OK)
6623 		return status;
6624 
6625 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6626 		status = EROFS;
6627 		goto out;
6628 	}
6629 
6630 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6631 
6632 out:
6633 	put_mount(mount);
6634 	return status;
6635 }
6636 
6637 
6638 #if 0
6639 static status_t
6640 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6641 {
6642 	struct vnode* vnode = descriptor->u.vnode;
6643 
6644 	// ToDo: currently unused!
6645 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6646 	if (!HAS_FS_CALL(vnode, read_index_stat))
6647 		return EOPNOTSUPP;
6648 
6649 	return EOPNOTSUPP;
6650 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6651 }
6652 
6653 
6654 static void
6655 index_free_fd(struct file_descriptor* descriptor)
6656 {
6657 	struct vnode* vnode = descriptor->u.vnode;
6658 
6659 	if (vnode != NULL) {
6660 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6661 		put_vnode(vnode);
6662 	}
6663 }
6664 #endif
6665 
6666 
6667 static status_t
6668 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6669 	bool kernel)
6670 {
6671 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6672 		name, kernel));
6673 
6674 	struct fs_mount* mount;
6675 	status_t status = get_mount(mountID, &mount);
6676 	if (status < B_OK)
6677 		return status;
6678 
6679 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6680 		status = EOPNOTSUPP;
6681 		goto out;
6682 	}
6683 
6684 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6685 
6686 out:
6687 	put_mount(mount);
6688 	return status;
6689 }
6690 
6691 
6692 static status_t
6693 index_remove(dev_t mountID, const char* name, bool kernel)
6694 {
6695 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6696 		name, kernel));
6697 
6698 	struct fs_mount* mount;
6699 	status_t status = get_mount(mountID, &mount);
6700 	if (status < B_OK)
6701 		return status;
6702 
6703 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6704 		status = EROFS;
6705 		goto out;
6706 	}
6707 
6708 	status = FS_MOUNT_CALL(mount, remove_index, name);
6709 
6710 out:
6711 	put_mount(mount);
6712 	return status;
6713 }
6714 
6715 
6716 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6717 		It would be nice if the FS would find some more kernel support
6718 		for them.
6719 		For example, query parsing should be moved into the kernel.
6720 */
6721 static int
6722 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6723 	int32 token, bool kernel)
6724 {
6725 	struct fs_mount* mount;
6726 	void* cookie;
6727 
6728 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6729 		query, kernel));
6730 
6731 	status_t status = get_mount(device, &mount);
6732 	if (status < B_OK)
6733 		return status;
6734 
6735 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6736 		status = EOPNOTSUPP;
6737 		goto error;
6738 	}
6739 
6740 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6741 		&cookie);
6742 	if (status < B_OK)
6743 		goto error;
6744 
6745 	// get fd for the index directory
6746 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
6747 	if (status < 0) {
6748 		// something went wrong
6749 		FS_MOUNT_CALL(mount, close_query, cookie);
6750 		FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6751 
6752 error:
6753 		put_mount(mount);
6754 	}
6755 	return status;
6756 }
6757 
6758 
6759 static status_t
6760 query_close(struct file_descriptor* descriptor)
6761 {
6762 	struct fs_mount* mount = descriptor->u.mount;
6763 
6764 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6765 
6766 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6767 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6768 
6769 	return B_OK;
6770 }
6771 
6772 
6773 static void
6774 query_free_fd(struct file_descriptor* descriptor)
6775 {
6776 	struct fs_mount* mount = descriptor->u.mount;
6777 
6778 	if (mount != NULL) {
6779 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6780 		put_mount(mount);
6781 	}
6782 }
6783 
6784 
6785 static status_t
6786 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6787 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6788 {
6789 	struct fs_mount* mount = descriptor->u.mount;
6790 
6791 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6792 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6793 			bufferSize, _count);
6794 	}
6795 
6796 	return EOPNOTSUPP;
6797 }
6798 
6799 
6800 static status_t
6801 query_rewind(struct file_descriptor* descriptor)
6802 {
6803 	struct fs_mount* mount = descriptor->u.mount;
6804 
6805 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6806 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6807 
6808 	return EOPNOTSUPP;
6809 }
6810 
6811 
6812 //	#pragma mark - General File System functions
6813 
6814 
6815 static dev_t
6816 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6817 	const char* args, bool kernel)
6818 {
6819 	struct ::fs_mount* mount;
6820 	status_t status = B_OK;
6821 	fs_volume* volume = NULL;
6822 	int32 layer = 0;
6823 
6824 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6825 
6826 	// The path is always safe, we just have to make sure that fsName is
6827 	// almost valid - we can't make any assumptions about args, though.
6828 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6829 	// We'll get it from the DDM later.
6830 	if (fsName == NULL) {
6831 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6832 			return B_BAD_VALUE;
6833 	} else if (fsName[0] == '\0')
6834 		return B_BAD_VALUE;
6835 
6836 	RecursiveLocker mountOpLocker(sMountOpLock);
6837 
6838 	// Helper to delete a newly created file device on failure.
6839 	// Not exactly beautiful, but helps to keep the code below cleaner.
6840 	struct FileDeviceDeleter {
6841 		FileDeviceDeleter() : id(-1) {}
6842 		~FileDeviceDeleter()
6843 		{
6844 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6845 		}
6846 
6847 		partition_id id;
6848 	} fileDeviceDeleter;
6849 
6850 	// If the file system is not a "virtual" one, the device argument should
6851 	// point to a real file/device (if given at all).
6852 	// get the partition
6853 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6854 	KPartition* partition = NULL;
6855 	KPath normalizedDevice;
6856 	bool newlyCreatedFileDevice = false;
6857 
6858 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
6859 		// normalize the device path
6860 		status = normalizedDevice.SetTo(device, true);
6861 		if (status != B_OK)
6862 			return status;
6863 
6864 		// get a corresponding partition from the DDM
6865 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6866 
6867 		if (!partition) {
6868 			// Partition not found: This either means, the user supplied
6869 			// an invalid path, or the path refers to an image file. We try
6870 			// to let the DDM create a file device for the path.
6871 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
6872 				&newlyCreatedFileDevice);
6873 			if (deviceID >= 0) {
6874 				partition = ddm->RegisterPartition(deviceID);
6875 				if (newlyCreatedFileDevice)
6876 					fileDeviceDeleter.id = deviceID;
6877 			}
6878 		}
6879 
6880 		if (!partition) {
6881 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6882 				normalizedDevice.Path()));
6883 			return B_ENTRY_NOT_FOUND;
6884 		}
6885 
6886 		device = normalizedDevice.Path();
6887 			// correct path to file device
6888 	}
6889 	PartitionRegistrar partitionRegistrar(partition, true);
6890 
6891 	// Write lock the partition's device. For the time being, we keep the lock
6892 	// until we're done mounting -- not nice, but ensure, that no-one is
6893 	// interfering.
6894 	// TODO: Just mark the partition busy while mounting!
6895 	KDiskDevice* diskDevice = NULL;
6896 	if (partition) {
6897 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6898 		if (!diskDevice) {
6899 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6900 			return B_ERROR;
6901 		}
6902 	}
6903 
6904 	DeviceWriteLocker writeLocker(diskDevice, true);
6905 		// this takes over the write lock acquired before
6906 
6907 	if (partition) {
6908 		// make sure, that the partition is not busy
6909 		if (partition->IsBusy()) {
6910 			TRACE(("fs_mount(): Partition is busy.\n"));
6911 			return B_BUSY;
6912 		}
6913 
6914 		// if no FS name had been supplied, we get it from the partition
6915 		if (!fsName) {
6916 			KDiskSystem* diskSystem = partition->DiskSystem();
6917 			if (!diskSystem) {
6918 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6919 					"recognize it.\n"));
6920 				return B_BAD_VALUE;
6921 			}
6922 
6923 			if (!diskSystem->IsFileSystem()) {
6924 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6925 					"partitioning system.\n"));
6926 				return B_BAD_VALUE;
6927 			}
6928 
6929 			// The disk system name will not change, and the KDiskSystem
6930 			// object will not go away while the disk device is locked (and
6931 			// the partition has a reference to it), so this is safe.
6932 			fsName = diskSystem->Name();
6933 		}
6934 	}
6935 
6936 	mount = new(std::nothrow) (struct ::fs_mount);
6937 	if (mount == NULL)
6938 		return B_NO_MEMORY;
6939 
6940 	mount->device_name = strdup(device);
6941 		// "device" can be NULL
6942 
6943 	status = mount->entry_cache.Init();
6944 	if (status != B_OK)
6945 		goto err1;
6946 
6947 	// initialize structure
6948 	mount->id = sNextMountID++;
6949 	mount->partition = NULL;
6950 	mount->root_vnode = NULL;
6951 	mount->covers_vnode = NULL;
6952 	mount->unmounting = false;
6953 	mount->owns_file_device = false;
6954 	mount->volume = NULL;
6955 
6956 	// build up the volume(s)
6957 	while (true) {
6958 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
6959 		if (layerFSName == NULL) {
6960 			if (layer == 0) {
6961 				status = B_NO_MEMORY;
6962 				goto err1;
6963 			}
6964 
6965 			break;
6966 		}
6967 
6968 		volume = (fs_volume*)malloc(sizeof(fs_volume));
6969 		if (volume == NULL) {
6970 			status = B_NO_MEMORY;
6971 			free(layerFSName);
6972 			goto err1;
6973 		}
6974 
6975 		volume->id = mount->id;
6976 		volume->partition = partition != NULL ? partition->ID() : -1;
6977 		volume->layer = layer++;
6978 		volume->private_volume = NULL;
6979 		volume->ops = NULL;
6980 		volume->sub_volume = NULL;
6981 		volume->super_volume = NULL;
6982 		volume->file_system = NULL;
6983 		volume->file_system_name = NULL;
6984 
6985 		volume->file_system_name = get_file_system_name(layerFSName);
6986 		if (volume->file_system_name == NULL) {
6987 			status = B_NO_MEMORY;
6988 			free(layerFSName);
6989 			free(volume);
6990 			goto err1;
6991 		}
6992 
6993 		volume->file_system = get_file_system(layerFSName);
6994 		if (volume->file_system == NULL) {
6995 			status = ENODEV;
6996 			free(layerFSName);
6997 			free(volume->file_system_name);
6998 			free(volume);
6999 			goto err1;
7000 		}
7001 
7002 		if (mount->volume == NULL)
7003 			mount->volume = volume;
7004 		else {
7005 			volume->super_volume = mount->volume;
7006 			mount->volume->sub_volume = volume;
7007 			mount->volume = volume;
7008 		}
7009 	}
7010 
7011 	// insert mount struct into list before we call FS's mount() function
7012 	// so that vnodes can be created for this mount
7013 	mutex_lock(&sMountMutex);
7014 	hash_insert(sMountsTable, mount);
7015 	mutex_unlock(&sMountMutex);
7016 
7017 	ino_t rootID;
7018 
7019 	if (!sRoot) {
7020 		// we haven't mounted anything yet
7021 		if (strcmp(path, "/") != 0) {
7022 			status = B_ERROR;
7023 			goto err2;
7024 		}
7025 
7026 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7027 			args, &rootID);
7028 		if (status < 0) {
7029 			// ToDo: why should we hide the error code from the file system here?
7030 			//status = ERR_VFS_GENERAL;
7031 			goto err2;
7032 		}
7033 	} else {
7034 		struct vnode* coveredVnode;
7035 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
7036 		if (status < B_OK)
7037 			goto err2;
7038 
7039 		// make sure covered_vnode is a directory
7040 		if (!S_ISDIR(coveredVnode->type)) {
7041 			status = B_NOT_A_DIRECTORY;
7042 			goto err2;
7043 		}
7044 
7045 		if (coveredVnode->mount->root_vnode == coveredVnode) {
7046 			// this is already a mount point
7047 			status = B_BUSY;
7048 			goto err2;
7049 		}
7050 
7051 		mount->covers_vnode = coveredVnode;
7052 
7053 		// mount it/them
7054 		fs_volume* volume = mount->volume;
7055 		while (volume) {
7056 			status = volume->file_system->mount(volume, device, flags, args,
7057 				&rootID);
7058 			if (status < B_OK) {
7059 				if (volume->sub_volume)
7060 					goto err4;
7061 				goto err3;
7062 			}
7063 
7064 			volume = volume->super_volume;
7065 		}
7066 
7067 		volume = mount->volume;
7068 		while (volume) {
7069 			if (volume->ops->all_layers_mounted != NULL)
7070 				volume->ops->all_layers_mounted(volume);
7071 			volume = volume->super_volume;
7072 		}
7073 	}
7074 
7075 	// the root node is supposed to be owned by the file system - it must
7076 	// exist at this point
7077 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7078 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7079 		panic("fs_mount: file system does not own its root node!\n");
7080 		status = B_ERROR;
7081 		goto err4;
7082 	}
7083 
7084 	// No race here, since fs_mount() is the only function changing
7085 	// covers_vnode (and holds sMountOpLock at that time).
7086 	mutex_lock(&sVnodeCoveredByMutex);
7087 	if (mount->covers_vnode)
7088 		mount->covers_vnode->covered_by = mount->root_vnode;
7089 	mutex_unlock(&sVnodeCoveredByMutex);
7090 
7091 	if (!sRoot) {
7092 		sRoot = mount->root_vnode;
7093 		mutex_lock(&sIOContextRootLock);
7094 		get_current_io_context(true)->root = sRoot;
7095 		mutex_unlock(&sIOContextRootLock);
7096 		inc_vnode_ref_count(sRoot);
7097 	}
7098 
7099 	// supply the partition (if any) with the mount cookie and mark it mounted
7100 	if (partition) {
7101 		partition->SetMountCookie(mount->volume->private_volume);
7102 		partition->SetVolumeID(mount->id);
7103 
7104 		// keep a partition reference as long as the partition is mounted
7105 		partitionRegistrar.Detach();
7106 		mount->partition = partition;
7107 		mount->owns_file_device = newlyCreatedFileDevice;
7108 		fileDeviceDeleter.id = -1;
7109 	}
7110 
7111 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
7112 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7113 
7114 	return mount->id;
7115 
7116 err4:
7117 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7118 err3:
7119 	if (mount->covers_vnode)
7120 		put_vnode(mount->covers_vnode);
7121 err2:
7122 	mutex_lock(&sMountMutex);
7123 	hash_remove(sMountsTable, mount);
7124 	mutex_unlock(&sMountMutex);
7125 err1:
7126 	delete mount;
7127 
7128 	return status;
7129 }
7130 
7131 
7132 static status_t
7133 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7134 {
7135 	struct vnode* vnode = NULL;
7136 	struct fs_mount* mount;
7137 	status_t err;
7138 
7139 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7140 		kernel));
7141 
7142 	if (path != NULL) {
7143 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
7144 		if (err != B_OK)
7145 			return B_ENTRY_NOT_FOUND;
7146 	}
7147 
7148 	RecursiveLocker mountOpLocker(sMountOpLock);
7149 
7150 	// this lock is not strictly necessary, but here in case of KDEBUG
7151 	// to keep the ASSERT in find_mount() working.
7152 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7153 	mount = find_mount(path != NULL ? vnode->device : mountID);
7154 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7155 	if (mount == NULL) {
7156 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7157 			vnode);
7158 	}
7159 
7160 	if (path != NULL) {
7161 		put_vnode(vnode);
7162 
7163 		if (mount->root_vnode != vnode) {
7164 			// not mountpoint
7165 			return B_BAD_VALUE;
7166 		}
7167 	}
7168 
7169 	// if the volume is associated with a partition, lock the device of the
7170 	// partition as long as we are unmounting
7171 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7172 	KPartition* partition = mount->partition;
7173 	KDiskDevice* diskDevice = NULL;
7174 	if (partition) {
7175 		if (partition->Device() == NULL) {
7176 			dprintf("fs_unmount(): There is no device!\n");
7177 			return B_ERROR;
7178 		}
7179 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7180 		if (!diskDevice) {
7181 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7182 			return B_ERROR;
7183 		}
7184 	}
7185 	DeviceWriteLocker writeLocker(diskDevice, true);
7186 
7187 	// make sure, that the partition is not busy
7188 	if (partition) {
7189 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7190 			TRACE(("fs_unmount(): Partition is busy.\n"));
7191 			return B_BUSY;
7192 		}
7193 	}
7194 
7195 	// grab the vnode master mutex to keep someone from creating
7196 	// a vnode while we're figuring out if we can continue
7197 	mutex_lock(&sVnodeMutex);
7198 
7199 	bool disconnectedDescriptors = false;
7200 
7201 	while (true) {
7202 		bool busy = false;
7203 
7204 		// cycle through the list of vnodes associated with this mount and
7205 		// make sure all of them are not busy or have refs on them
7206 		vnode = NULL;
7207 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7208 		while (iterator.HasNext()) {
7209 			vnode = iterator.Next();
7210 
7211 			// The root vnode ref_count needs to be 1 here (the mount has a
7212 			// reference).
7213 			if (vnode->busy
7214 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7215 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7216 				// there are still vnodes in use on this mount, so we cannot
7217 				// unmount yet
7218 				busy = true;
7219 				break;
7220 			}
7221 		}
7222 
7223 		if (!busy)
7224 			break;
7225 
7226 		if ((flags & B_FORCE_UNMOUNT) == 0) {
7227 			mutex_unlock(&sVnodeMutex);
7228 
7229 			return B_BUSY;
7230 		}
7231 
7232 		if (disconnectedDescriptors) {
7233 			// wait a bit until the last access is finished, and then try again
7234 			mutex_unlock(&sVnodeMutex);
7235 			snooze(100000);
7236 			// TODO: if there is some kind of bug that prevents the ref counts
7237 			//	from getting back to zero, this will fall into an endless loop...
7238 			mutex_lock(&sVnodeMutex);
7239 			continue;
7240 		}
7241 
7242 		// the file system is still busy - but we're forced to unmount it,
7243 		// so let's disconnect all open file descriptors
7244 
7245 		mount->unmounting = true;
7246 			// prevent new vnodes from being created
7247 
7248 		mutex_unlock(&sVnodeMutex);
7249 
7250 		disconnect_mount_or_vnode_fds(mount, NULL);
7251 		disconnectedDescriptors = true;
7252 
7253 		mutex_lock(&sVnodeMutex);
7254 	}
7255 
7256 	// we can safely continue, mark all of the vnodes busy and this mount
7257 	// structure in unmounting state
7258 	mount->unmounting = true;
7259 
7260 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7261 	while (iterator.HasNext()) {
7262 		vnode = iterator.Next();
7263 		vnode->busy = true;
7264 
7265 		if (vnode->ref_count == 0) {
7266 			// this vnode has been unused before
7267 			list_remove_item(&sUnusedVnodeList, vnode);
7268 			sUnusedVnodes--;
7269 		}
7270 	}
7271 
7272 	// The ref_count of the root node is 1 at this point, see above why this is
7273 	mount->root_vnode->ref_count--;
7274 
7275 	mutex_unlock(&sVnodeMutex);
7276 
7277 	mutex_lock(&sVnodeCoveredByMutex);
7278 	mount->covers_vnode->covered_by = NULL;
7279 	mutex_unlock(&sVnodeCoveredByMutex);
7280 	put_vnode(mount->covers_vnode);
7281 
7282 	// Free all vnodes associated with this mount.
7283 	// They will be removed from the mount list by free_vnode(), so
7284 	// we don't have to do this.
7285 	while ((vnode = mount->vnodes.Head()) != NULL) {
7286 		free_vnode(vnode, false);
7287 	}
7288 
7289 	// remove the mount structure from the hash table
7290 	mutex_lock(&sMountMutex);
7291 	hash_remove(sMountsTable, mount);
7292 	mutex_unlock(&sMountMutex);
7293 
7294 	mountOpLocker.Unlock();
7295 
7296 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7297 	notify_unmount(mount->id);
7298 
7299 	// dereference the partition and mark it unmounted
7300 	if (partition) {
7301 		partition->SetVolumeID(-1);
7302 		partition->SetMountCookie(NULL);
7303 
7304 		if (mount->owns_file_device)
7305 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7306 		partition->Unregister();
7307 	}
7308 
7309 	delete mount;
7310 	return B_OK;
7311 }
7312 
7313 
7314 static status_t
7315 fs_sync(dev_t device)
7316 {
7317 	struct fs_mount* mount;
7318 	status_t status = get_mount(device, &mount);
7319 	if (status < B_OK)
7320 		return status;
7321 
7322 	struct vnode marker;
7323 	marker.remove = true;
7324 
7325 	// First, synchronize all file caches
7326 
7327 	while (true) {
7328 		MutexLocker locker(sVnodeMutex);
7329 
7330 		// synchronize access to vnode list
7331 		recursive_lock_lock(&mount->rlock);
7332 
7333 		struct vnode* vnode;
7334 		if (!marker.remove) {
7335 			vnode = mount->vnodes.GetNext(&marker);
7336 			mount->vnodes.Remove(&marker);
7337 			marker.remove =	true;
7338 		} else
7339 			vnode = mount->vnodes.First();
7340 
7341 		while (vnode != NULL && (vnode->cache == NULL
7342 			|| vnode->remove || vnode->busy)) {
7343 			// TODO: we could track writes (and writable mapped vnodes)
7344 			//	and have a simple flag that we could test for here
7345 			vnode = mount->vnodes.GetNext(vnode);
7346 		}
7347 
7348 		if (vnode != NULL) {
7349 			// insert marker vnode again
7350 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7351 			marker.remove = false;
7352 		}
7353 
7354 		recursive_lock_unlock(&mount->rlock);
7355 
7356 		if (vnode == NULL)
7357 			break;
7358 
7359 		vnode = lookup_vnode(mount->id, vnode->id);
7360 		if (vnode == NULL || vnode->busy)
7361 			continue;
7362 
7363 		if (vnode->ref_count == 0) {
7364 			// this vnode has been unused before
7365 			list_remove_item(&sUnusedVnodeList, vnode);
7366 			sUnusedVnodes--;
7367 		}
7368 		inc_vnode_ref_count(vnode);
7369 
7370 		locker.Unlock();
7371 
7372 		if (vnode->cache != NULL && !vnode->remove)
7373 			vnode->cache->WriteModified();
7374 
7375 		put_vnode(vnode);
7376 	}
7377 
7378 	// And then, let the file systems do their synchronizing work
7379 
7380 	if (HAS_FS_MOUNT_CALL(mount, sync))
7381 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7382 
7383 	put_mount(mount);
7384 	return status;
7385 }
7386 
7387 
7388 static status_t
7389 fs_read_info(dev_t device, struct fs_info* info)
7390 {
7391 	struct fs_mount* mount;
7392 	status_t status = get_mount(device, &mount);
7393 	if (status < B_OK)
7394 		return status;
7395 
7396 	memset(info, 0, sizeof(struct fs_info));
7397 
7398 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7399 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7400 
7401 	// fill in info the file system doesn't (have to) know about
7402 	if (status == B_OK) {
7403 		info->dev = mount->id;
7404 		info->root = mount->root_vnode->id;
7405 
7406 		fs_volume* volume = mount->volume;
7407 		while (volume->super_volume != NULL)
7408 			volume = volume->super_volume;
7409 
7410 		strlcpy(info->fsh_name, volume->file_system_name, sizeof(info->fsh_name));
7411 		if (mount->device_name != NULL) {
7412 			strlcpy(info->device_name, mount->device_name,
7413 				sizeof(info->device_name));
7414 		}
7415 	}
7416 
7417 	// if the call is not supported by the file system, there are still
7418 	// the parts that we filled out ourselves
7419 
7420 	put_mount(mount);
7421 	return status;
7422 }
7423 
7424 
7425 static status_t
7426 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7427 {
7428 	struct fs_mount* mount;
7429 	status_t status = get_mount(device, &mount);
7430 	if (status < B_OK)
7431 		return status;
7432 
7433 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7434 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7435 	else
7436 		status = EROFS;
7437 
7438 	put_mount(mount);
7439 	return status;
7440 }
7441 
7442 
7443 static dev_t
7444 fs_next_device(int32* _cookie)
7445 {
7446 	struct fs_mount* mount = NULL;
7447 	dev_t device = *_cookie;
7448 
7449 	mutex_lock(&sMountMutex);
7450 
7451 	// Since device IDs are assigned sequentially, this algorithm
7452 	// does work good enough. It makes sure that the device list
7453 	// returned is sorted, and that no device is skipped when an
7454 	// already visited device got unmounted.
7455 
7456 	while (device < sNextMountID) {
7457 		mount = find_mount(device++);
7458 		if (mount != NULL && mount->volume->private_volume != NULL)
7459 			break;
7460 	}
7461 
7462 	*_cookie = device;
7463 
7464 	if (mount != NULL)
7465 		device = mount->id;
7466 	else
7467 		device = B_BAD_VALUE;
7468 
7469 	mutex_unlock(&sMountMutex);
7470 
7471 	return device;
7472 }
7473 
7474 
7475 static status_t
7476 get_cwd(char* buffer, size_t size, bool kernel)
7477 {
7478 	// Get current working directory from io context
7479 	struct io_context* context = get_current_io_context(kernel);
7480 	status_t status;
7481 
7482 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7483 
7484 	mutex_lock(&context->io_mutex);
7485 
7486 	struct vnode* vnode = context->cwd;
7487 	if (vnode)
7488 		inc_vnode_ref_count(vnode);
7489 
7490 	mutex_unlock(&context->io_mutex);
7491 
7492 	if (vnode) {
7493 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7494 		put_vnode(vnode);
7495 	} else
7496 		status = B_ERROR;
7497 
7498 	return status;
7499 }
7500 
7501 
7502 static status_t
7503 set_cwd(int fd, char* path, bool kernel)
7504 {
7505 	struct io_context* context;
7506 	struct vnode* vnode = NULL;
7507 	struct vnode* oldDirectory;
7508 	status_t status;
7509 
7510 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7511 
7512 	// Get vnode for passed path, and bail if it failed
7513 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7514 	if (status < 0)
7515 		return status;
7516 
7517 	if (!S_ISDIR(vnode->type)) {
7518 		// nope, can't cwd to here
7519 		status = B_NOT_A_DIRECTORY;
7520 		goto err;
7521 	}
7522 
7523 	// Get current io context and lock
7524 	context = get_current_io_context(kernel);
7525 	mutex_lock(&context->io_mutex);
7526 
7527 	// save the old current working directory first
7528 	oldDirectory = context->cwd;
7529 	context->cwd = vnode;
7530 
7531 	mutex_unlock(&context->io_mutex);
7532 
7533 	if (oldDirectory)
7534 		put_vnode(oldDirectory);
7535 
7536 	return B_NO_ERROR;
7537 
7538 err:
7539 	put_vnode(vnode);
7540 	return status;
7541 }
7542 
7543 
7544 //	#pragma mark - kernel mirrored syscalls
7545 
7546 
7547 dev_t
7548 _kern_mount(const char* path, const char* device, const char* fsName,
7549 	uint32 flags, const char* args, size_t argsLength)
7550 {
7551 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7552 	if (pathBuffer.InitCheck() != B_OK)
7553 		return B_NO_MEMORY;
7554 
7555 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7556 }
7557 
7558 
7559 status_t
7560 _kern_unmount(const char* path, uint32 flags)
7561 {
7562 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7563 	if (pathBuffer.InitCheck() != B_OK)
7564 		return B_NO_MEMORY;
7565 
7566 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7567 }
7568 
7569 
7570 status_t
7571 _kern_read_fs_info(dev_t device, struct fs_info* info)
7572 {
7573 	if (info == NULL)
7574 		return B_BAD_VALUE;
7575 
7576 	return fs_read_info(device, info);
7577 }
7578 
7579 
7580 status_t
7581 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7582 {
7583 	if (info == NULL)
7584 		return B_BAD_VALUE;
7585 
7586 	return fs_write_info(device, info, mask);
7587 }
7588 
7589 
7590 status_t
7591 _kern_sync(void)
7592 {
7593 	// Note: _kern_sync() is also called from _user_sync()
7594 	int32 cookie = 0;
7595 	dev_t device;
7596 	while ((device = next_dev(&cookie)) >= 0) {
7597 		status_t status = fs_sync(device);
7598 		if (status != B_OK && status != B_BAD_VALUE)
7599 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
7600 	}
7601 
7602 	return B_OK;
7603 }
7604 
7605 
7606 dev_t
7607 _kern_next_device(int32* _cookie)
7608 {
7609 	return fs_next_device(_cookie);
7610 }
7611 
7612 
7613 status_t
7614 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7615 	size_t infoSize)
7616 {
7617 	if (infoSize != sizeof(fd_info))
7618 		return B_BAD_VALUE;
7619 
7620 	struct io_context* context = NULL;
7621 	struct team* team = NULL;
7622 
7623 	cpu_status state = disable_interrupts();
7624 	GRAB_TEAM_LOCK();
7625 
7626 	bool contextLocked = false;
7627 	team = team_get_team_struct_locked(teamID);
7628 	if (team) {
7629 		// We cannot lock the IO context while holding the team lock, nor can
7630 		// we just drop the team lock, since it might be deleted in the
7631 		// meantime. team_remove_team() acquires the thread lock when removing
7632 		// the team from the team hash table, though. Hence we switch to the
7633 		// thread lock and use mutex_lock_threads_locked().
7634 		context = (io_context*)team->io_context;
7635 
7636 		GRAB_THREAD_LOCK();
7637 		RELEASE_TEAM_LOCK();
7638 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7639 		RELEASE_THREAD_LOCK();
7640 	} else
7641 		RELEASE_TEAM_LOCK();
7642 
7643 	restore_interrupts(state);
7644 
7645 	if (!contextLocked) {
7646 		// team doesn't exit or seems to be gone
7647 		return B_BAD_TEAM_ID;
7648 	}
7649 
7650 	// the team cannot be deleted completely while we're owning its
7651 	// io_context mutex, so we can safely play with it now
7652 
7653 	uint32 slot = *_cookie;
7654 
7655 	struct file_descriptor* descriptor;
7656 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
7657 		slot++;
7658 
7659 	if (slot >= context->table_size) {
7660 		mutex_unlock(&context->io_mutex);
7661 		return B_ENTRY_NOT_FOUND;
7662 	}
7663 
7664 	info->number = slot;
7665 	info->open_mode = descriptor->open_mode;
7666 
7667 	struct vnode* vnode = fd_vnode(descriptor);
7668 	if (vnode != NULL) {
7669 		info->device = vnode->device;
7670 		info->node = vnode->id;
7671 	} else if (descriptor->u.mount != NULL) {
7672 		info->device = descriptor->u.mount->id;
7673 		info->node = -1;
7674 	}
7675 
7676 	mutex_unlock(&context->io_mutex);
7677 
7678 	*_cookie = slot + 1;
7679 	return B_OK;
7680 }
7681 
7682 
7683 int
7684 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7685 	int perms)
7686 {
7687 	if (openMode & O_CREAT)
7688 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
7689 
7690 	return file_open_entry_ref(device, inode, name, openMode, true);
7691 }
7692 
7693 
7694 /*!	\brief Opens a node specified by a FD + path pair.
7695 
7696 	At least one of \a fd and \a path must be specified.
7697 	If only \a fd is given, the function opens the node identified by this
7698 	FD. If only a path is given, this path is opened. If both are given and
7699 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7700 	of the directory (!) identified by \a fd.
7701 
7702 	\param fd The FD. May be < 0.
7703 	\param path The absolute or relative path. May be \c NULL.
7704 	\param openMode The open mode.
7705 	\return A FD referring to the newly opened node, or an error code,
7706 			if an error occurs.
7707 */
7708 int
7709 _kern_open(int fd, const char* path, int openMode, int perms)
7710 {
7711 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7712 	if (pathBuffer.InitCheck() != B_OK)
7713 		return B_NO_MEMORY;
7714 
7715 	if (openMode & O_CREAT)
7716 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7717 
7718 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7719 }
7720 
7721 
7722 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7723 
7724 	The supplied name may be \c NULL, in which case directory identified
7725 	by \a device and \a inode will be opened. Otherwise \a device and
7726 	\a inode identify the parent directory of the directory to be opened
7727 	and \a name its entry name.
7728 
7729 	\param device If \a name is specified the ID of the device the parent
7730 		   directory of the directory to be opened resides on, otherwise
7731 		   the device of the directory itself.
7732 	\param inode If \a name is specified the node ID of the parent
7733 		   directory of the directory to be opened, otherwise node ID of the
7734 		   directory itself.
7735 	\param name The entry name of the directory to be opened. If \c NULL,
7736 		   the \a device + \a inode pair identify the node to be opened.
7737 	\return The FD of the newly opened directory or an error code, if
7738 			something went wrong.
7739 */
7740 int
7741 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7742 {
7743 	return dir_open_entry_ref(device, inode, name, true);
7744 }
7745 
7746 
7747 /*!	\brief Opens a directory specified by a FD + path pair.
7748 
7749 	At least one of \a fd and \a path must be specified.
7750 	If only \a fd is given, the function opens the directory identified by this
7751 	FD. If only a path is given, this path is opened. If both are given and
7752 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7753 	of the directory (!) identified by \a fd.
7754 
7755 	\param fd The FD. May be < 0.
7756 	\param path The absolute or relative path. May be \c NULL.
7757 	\return A FD referring to the newly opened directory, or an error code,
7758 			if an error occurs.
7759 */
7760 int
7761 _kern_open_dir(int fd, const char* path)
7762 {
7763 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7764 	if (pathBuffer.InitCheck() != B_OK)
7765 		return B_NO_MEMORY;
7766 
7767 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7768 }
7769 
7770 
7771 status_t
7772 _kern_fcntl(int fd, int op, uint32 argument)
7773 {
7774 	return common_fcntl(fd, op, argument, true);
7775 }
7776 
7777 
7778 status_t
7779 _kern_fsync(int fd)
7780 {
7781 	return common_sync(fd, true);
7782 }
7783 
7784 
7785 status_t
7786 _kern_lock_node(int fd)
7787 {
7788 	return common_lock_node(fd, true);
7789 }
7790 
7791 
7792 status_t
7793 _kern_unlock_node(int fd)
7794 {
7795 	return common_unlock_node(fd, true);
7796 }
7797 
7798 
7799 status_t
7800 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7801 	int perms)
7802 {
7803 	return dir_create_entry_ref(device, inode, name, perms, true);
7804 }
7805 
7806 
7807 /*!	\brief Creates a directory specified by a FD + path pair.
7808 
7809 	\a path must always be specified (it contains the name of the new directory
7810 	at least). If only a path is given, this path identifies the location at
7811 	which the directory shall be created. If both \a fd and \a path are given and
7812 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7813 	of the directory (!) identified by \a fd.
7814 
7815 	\param fd The FD. May be < 0.
7816 	\param path The absolute or relative path. Must not be \c NULL.
7817 	\param perms The access permissions the new directory shall have.
7818 	\return \c B_OK, if the directory has been created successfully, another
7819 			error code otherwise.
7820 */
7821 status_t
7822 _kern_create_dir(int fd, const char* path, int perms)
7823 {
7824 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7825 	if (pathBuffer.InitCheck() != B_OK)
7826 		return B_NO_MEMORY;
7827 
7828 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7829 }
7830 
7831 
7832 status_t
7833 _kern_remove_dir(int fd, const char* path)
7834 {
7835 	if (path) {
7836 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7837 		if (pathBuffer.InitCheck() != B_OK)
7838 			return B_NO_MEMORY;
7839 
7840 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7841 	}
7842 
7843 	return dir_remove(fd, NULL, true);
7844 }
7845 
7846 
7847 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7848 
7849 	At least one of \a fd and \a path must be specified.
7850 	If only \a fd is given, the function the symlink to be read is the node
7851 	identified by this FD. If only a path is given, this path identifies the
7852 	symlink to be read. If both are given and the path is absolute, \a fd is
7853 	ignored; a relative path is reckoned off of the directory (!) identified
7854 	by \a fd.
7855 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7856 	will still be updated to reflect the required buffer size.
7857 
7858 	\param fd The FD. May be < 0.
7859 	\param path The absolute or relative path. May be \c NULL.
7860 	\param buffer The buffer into which the contents of the symlink shall be
7861 		   written.
7862 	\param _bufferSize A pointer to the size of the supplied buffer.
7863 	\return The length of the link on success or an appropriate error code
7864 */
7865 status_t
7866 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
7867 {
7868 	if (path) {
7869 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7870 		if (pathBuffer.InitCheck() != B_OK)
7871 			return B_NO_MEMORY;
7872 
7873 		return common_read_link(fd, pathBuffer.LockBuffer(),
7874 			buffer, _bufferSize, true);
7875 	}
7876 
7877 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7878 }
7879 
7880 
7881 /*!	\brief Creates a symlink specified by a FD + path pair.
7882 
7883 	\a path must always be specified (it contains the name of the new symlink
7884 	at least). If only a path is given, this path identifies the location at
7885 	which the symlink shall be created. If both \a fd and \a path are given and
7886 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7887 	of the directory (!) identified by \a fd.
7888 
7889 	\param fd The FD. May be < 0.
7890 	\param toPath The absolute or relative path. Must not be \c NULL.
7891 	\param mode The access permissions the new symlink shall have.
7892 	\return \c B_OK, if the symlink has been created successfully, another
7893 			error code otherwise.
7894 */
7895 status_t
7896 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
7897 {
7898 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7899 	if (pathBuffer.InitCheck() != B_OK)
7900 		return B_NO_MEMORY;
7901 
7902 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7903 		toPath, mode, true);
7904 }
7905 
7906 
7907 status_t
7908 _kern_create_link(const char* path, const char* toPath)
7909 {
7910 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7911 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
7912 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7913 		return B_NO_MEMORY;
7914 
7915 	return common_create_link(pathBuffer.LockBuffer(),
7916 		toPathBuffer.LockBuffer(), true);
7917 }
7918 
7919 
7920 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
7921 
7922 	\a path must always be specified (it contains at least the name of the entry
7923 	to be deleted). If only a path is given, this path identifies the entry
7924 	directly. If both \a fd and \a path are given and the path is absolute,
7925 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7926 	identified by \a fd.
7927 
7928 	\param fd The FD. May be < 0.
7929 	\param path The absolute or relative path. Must not be \c NULL.
7930 	\return \c B_OK, if the entry has been removed successfully, another
7931 			error code otherwise.
7932 */
7933 status_t
7934 _kern_unlink(int fd, const char* path)
7935 {
7936 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7937 	if (pathBuffer.InitCheck() != B_OK)
7938 		return B_NO_MEMORY;
7939 
7940 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
7941 }
7942 
7943 
7944 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
7945 		   by another FD + path pair.
7946 
7947 	\a oldPath and \a newPath must always be specified (they contain at least
7948 	the name of the entry). If only a path is given, this path identifies the
7949 	entry directly. If both a FD and a path are given and the path is absolute,
7950 	the FD is ignored; a relative path is reckoned off of the directory (!)
7951 	identified by the respective FD.
7952 
7953 	\param oldFD The FD of the old location. May be < 0.
7954 	\param oldPath The absolute or relative path of the old location. Must not
7955 		   be \c NULL.
7956 	\param newFD The FD of the new location. May be < 0.
7957 	\param newPath The absolute or relative path of the new location. Must not
7958 		   be \c NULL.
7959 	\return \c B_OK, if the entry has been moved successfully, another
7960 			error code otherwise.
7961 */
7962 status_t
7963 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
7964 {
7965 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
7966 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
7967 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7968 		return B_NO_MEMORY;
7969 
7970 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
7971 		newFD, newPathBuffer.LockBuffer(), true);
7972 }
7973 
7974 
7975 status_t
7976 _kern_access(const char* path, int mode)
7977 {
7978 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7979 	if (pathBuffer.InitCheck() != B_OK)
7980 		return B_NO_MEMORY;
7981 
7982 	return common_access(pathBuffer.LockBuffer(), mode, true);
7983 }
7984 
7985 
7986 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
7987 
7988 	If only \a fd is given, the stat operation associated with the type
7989 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
7990 	given, this path identifies the entry for whose node to retrieve the
7991 	stat data. If both \a fd and \a path are given and the path is absolute,
7992 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7993 	identified by \a fd and specifies the entry whose stat data shall be
7994 	retrieved.
7995 
7996 	\param fd The FD. May be < 0.
7997 	\param path The absolute or relative path. Must not be \c NULL.
7998 	\param traverseLeafLink If \a path is given, \c true specifies that the
7999 		   function shall not stick to symlinks, but traverse them.
8000 	\param stat The buffer the stat data shall be written into.
8001 	\param statSize The size of the supplied stat buffer.
8002 	\return \c B_OK, if the the stat data have been read successfully, another
8003 			error code otherwise.
8004 */
8005 status_t
8006 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8007 	struct stat* stat, size_t statSize)
8008 {
8009 	struct stat completeStat;
8010 	struct stat* originalStat = NULL;
8011 	status_t status;
8012 
8013 	if (statSize > sizeof(struct stat))
8014 		return B_BAD_VALUE;
8015 
8016 	// this supports different stat extensions
8017 	if (statSize < sizeof(struct stat)) {
8018 		originalStat = stat;
8019 		stat = &completeStat;
8020 	}
8021 
8022 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8023 
8024 	if (status == B_OK && originalStat != NULL)
8025 		memcpy(originalStat, stat, statSize);
8026 
8027 	return status;
8028 }
8029 
8030 
8031 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8032 
8033 	If only \a fd is given, the stat operation associated with the type
8034 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8035 	given, this path identifies the entry for whose node to write the
8036 	stat data. If both \a fd and \a path are given and the path is absolute,
8037 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8038 	identified by \a fd and specifies the entry whose stat data shall be
8039 	written.
8040 
8041 	\param fd The FD. May be < 0.
8042 	\param path The absolute or relative path. Must not be \c NULL.
8043 	\param traverseLeafLink If \a path is given, \c true specifies that the
8044 		   function shall not stick to symlinks, but traverse them.
8045 	\param stat The buffer containing the stat data to be written.
8046 	\param statSize The size of the supplied stat buffer.
8047 	\param statMask A mask specifying which parts of the stat data shall be
8048 		   written.
8049 	\return \c B_OK, if the the stat data have been written successfully,
8050 			another error code otherwise.
8051 */
8052 status_t
8053 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8054 	const struct stat* stat, size_t statSize, int statMask)
8055 {
8056 	struct stat completeStat;
8057 
8058 	if (statSize > sizeof(struct stat))
8059 		return B_BAD_VALUE;
8060 
8061 	// this supports different stat extensions
8062 	if (statSize < sizeof(struct stat)) {
8063 		memset((uint8*)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
8064 		memcpy(&completeStat, stat, statSize);
8065 		stat = &completeStat;
8066 	}
8067 
8068 	status_t status;
8069 
8070 	if (path) {
8071 		// path given: write the stat of the node referred to by (fd, path)
8072 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8073 		if (pathBuffer.InitCheck() != B_OK)
8074 			return B_NO_MEMORY;
8075 
8076 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8077 			traverseLeafLink, stat, statMask, true);
8078 	} else {
8079 		// no path given: get the FD and use the FD operation
8080 		struct file_descriptor* descriptor
8081 			= get_fd(get_current_io_context(true), fd);
8082 		if (descriptor == NULL)
8083 			return B_FILE_ERROR;
8084 
8085 		if (descriptor->ops->fd_write_stat)
8086 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8087 		else
8088 			status = EOPNOTSUPP;
8089 
8090 		put_fd(descriptor);
8091 	}
8092 
8093 	return status;
8094 }
8095 
8096 
8097 int
8098 _kern_open_attr_dir(int fd, const char* path)
8099 {
8100 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8101 	if (pathBuffer.InitCheck() != B_OK)
8102 		return B_NO_MEMORY;
8103 
8104 	if (path != NULL)
8105 		pathBuffer.SetTo(path);
8106 
8107 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8108 }
8109 
8110 
8111 int
8112 _kern_create_attr(int fd, const char* name, uint32 type, int openMode)
8113 {
8114 	return attr_create(fd, name, type, openMode, true);
8115 }
8116 
8117 
8118 int
8119 _kern_open_attr(int fd, const char* name, int openMode)
8120 {
8121 	return attr_open(fd, name, openMode, true);
8122 }
8123 
8124 
8125 status_t
8126 _kern_remove_attr(int fd, const char* name)
8127 {
8128 	return attr_remove(fd, name, true);
8129 }
8130 
8131 
8132 status_t
8133 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8134 	const char* toName)
8135 {
8136 	return attr_rename(fromFile, fromName, toFile, toName, true);
8137 }
8138 
8139 
8140 int
8141 _kern_open_index_dir(dev_t device)
8142 {
8143 	return index_dir_open(device, true);
8144 }
8145 
8146 
8147 status_t
8148 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8149 {
8150 	return index_create(device, name, type, flags, true);
8151 }
8152 
8153 
8154 status_t
8155 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8156 {
8157 	return index_name_read_stat(device, name, stat, true);
8158 }
8159 
8160 
8161 status_t
8162 _kern_remove_index(dev_t device, const char* name)
8163 {
8164 	return index_remove(device, name, true);
8165 }
8166 
8167 
8168 status_t
8169 _kern_getcwd(char* buffer, size_t size)
8170 {
8171 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8172 
8173 	// Call vfs to get current working directory
8174 	return get_cwd(buffer, size, true);
8175 }
8176 
8177 
8178 status_t
8179 _kern_setcwd(int fd, const char* path)
8180 {
8181 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8182 	if (pathBuffer.InitCheck() != B_OK)
8183 		return B_NO_MEMORY;
8184 
8185 	if (path != NULL)
8186 		pathBuffer.SetTo(path);
8187 
8188 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8189 }
8190 
8191 
8192 //	#pragma mark - userland syscalls
8193 
8194 
8195 dev_t
8196 _user_mount(const char* userPath, const char* userDevice,
8197 	const char* userFileSystem, uint32 flags, const char* userArgs,
8198 	size_t argsLength)
8199 {
8200 	char fileSystem[B_FILE_NAME_LENGTH];
8201 	KPath path, device;
8202 	char* args = NULL;
8203 	status_t status;
8204 
8205 	if (!IS_USER_ADDRESS(userPath)
8206 		|| !IS_USER_ADDRESS(userFileSystem)
8207 		|| !IS_USER_ADDRESS(userDevice))
8208 		return B_BAD_ADDRESS;
8209 
8210 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8211 		return B_NO_MEMORY;
8212 
8213 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8214 		return B_BAD_ADDRESS;
8215 
8216 	if (userFileSystem != NULL
8217 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8218 		return B_BAD_ADDRESS;
8219 
8220 	if (userDevice != NULL
8221 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
8222 		return B_BAD_ADDRESS;
8223 
8224 	if (userArgs != NULL && argsLength > 0) {
8225 		// this is a safety restriction
8226 		if (argsLength >= 65536)
8227 			return B_NAME_TOO_LONG;
8228 
8229 		args = (char*)malloc(argsLength + 1);
8230 		if (args == NULL)
8231 			return B_NO_MEMORY;
8232 
8233 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8234 			free(args);
8235 			return B_BAD_ADDRESS;
8236 		}
8237 	}
8238 	path.UnlockBuffer();
8239 	device.UnlockBuffer();
8240 
8241 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
8242 		userFileSystem ? fileSystem : NULL, flags, args, false);
8243 
8244 	free(args);
8245 	return status;
8246 }
8247 
8248 
8249 status_t
8250 _user_unmount(const char* userPath, uint32 flags)
8251 {
8252 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8253 	if (pathBuffer.InitCheck() != B_OK)
8254 		return B_NO_MEMORY;
8255 
8256 	char* path = pathBuffer.LockBuffer();
8257 
8258 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8259 		return B_BAD_ADDRESS;
8260 
8261 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8262 }
8263 
8264 
8265 status_t
8266 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8267 {
8268 	struct fs_info info;
8269 	status_t status;
8270 
8271 	if (userInfo == NULL)
8272 		return B_BAD_VALUE;
8273 
8274 	if (!IS_USER_ADDRESS(userInfo))
8275 		return B_BAD_ADDRESS;
8276 
8277 	status = fs_read_info(device, &info);
8278 	if (status != B_OK)
8279 		return status;
8280 
8281 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
8282 		return B_BAD_ADDRESS;
8283 
8284 	return B_OK;
8285 }
8286 
8287 
8288 status_t
8289 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8290 {
8291 	struct fs_info info;
8292 
8293 	if (userInfo == NULL)
8294 		return B_BAD_VALUE;
8295 
8296 	if (!IS_USER_ADDRESS(userInfo)
8297 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
8298 		return B_BAD_ADDRESS;
8299 
8300 	return fs_write_info(device, &info, mask);
8301 }
8302 
8303 
8304 dev_t
8305 _user_next_device(int32* _userCookie)
8306 {
8307 	int32 cookie;
8308 	dev_t device;
8309 
8310 	if (!IS_USER_ADDRESS(_userCookie)
8311 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
8312 		return B_BAD_ADDRESS;
8313 
8314 	device = fs_next_device(&cookie);
8315 
8316 	if (device >= B_OK) {
8317 		// update user cookie
8318 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
8319 			return B_BAD_ADDRESS;
8320 	}
8321 
8322 	return device;
8323 }
8324 
8325 
8326 status_t
8327 _user_sync(void)
8328 {
8329 	return _kern_sync();
8330 }
8331 
8332 
8333 status_t
8334 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8335 	size_t infoSize)
8336 {
8337 	struct fd_info info;
8338 	uint32 cookie;
8339 
8340 	// only root can do this (or should root's group be enough?)
8341 	if (geteuid() != 0)
8342 		return B_NOT_ALLOWED;
8343 
8344 	if (infoSize != sizeof(fd_info))
8345 		return B_BAD_VALUE;
8346 
8347 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8348 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
8349 		return B_BAD_ADDRESS;
8350 
8351 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8352 	if (status < B_OK)
8353 		return status;
8354 
8355 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
8356 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
8357 		return B_BAD_ADDRESS;
8358 
8359 	return status;
8360 }
8361 
8362 
8363 status_t
8364 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8365 	char* userPath, size_t pathLength)
8366 {
8367 	if (!IS_USER_ADDRESS(userPath))
8368 		return B_BAD_ADDRESS;
8369 
8370 	KPath path(B_PATH_NAME_LENGTH + 1);
8371 	if (path.InitCheck() != B_OK)
8372 		return B_NO_MEMORY;
8373 
8374 	// copy the leaf name onto the stack
8375 	char stackLeaf[B_FILE_NAME_LENGTH];
8376 	if (leaf) {
8377 		if (!IS_USER_ADDRESS(leaf))
8378 			return B_BAD_ADDRESS;
8379 
8380 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8381 		if (length < 0)
8382 			return length;
8383 		if (length >= B_FILE_NAME_LENGTH)
8384 			return B_NAME_TOO_LONG;
8385 
8386 		leaf = stackLeaf;
8387 	}
8388 
8389 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8390 		path.LockBuffer(), path.BufferSize());
8391 	if (status < B_OK)
8392 		return status;
8393 
8394 	path.UnlockBuffer();
8395 
8396 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8397 	if (length < 0)
8398 		return length;
8399 	if (length >= (int)pathLength)
8400 		return B_BUFFER_OVERFLOW;
8401 
8402 	return B_OK;
8403 }
8404 
8405 
8406 status_t
8407 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8408 {
8409 	if (userPath == NULL || buffer == NULL)
8410 		return B_BAD_VALUE;
8411 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8412 		return B_BAD_ADDRESS;
8413 
8414 	// copy path from userland
8415 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8416 	if (pathBuffer.InitCheck() != B_OK)
8417 		return B_NO_MEMORY;
8418 	char* path = pathBuffer.LockBuffer();
8419 
8420 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8421 		return B_BAD_ADDRESS;
8422 
8423 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8424 		false);
8425 	if (error != B_OK)
8426 		return error;
8427 
8428 	// copy back to userland
8429 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8430 	if (len < 0)
8431 		return len;
8432 	if (len >= B_PATH_NAME_LENGTH)
8433 		return B_BUFFER_OVERFLOW;
8434 
8435 	return B_OK;
8436 }
8437 
8438 
8439 int
8440 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8441 	int openMode, int perms)
8442 {
8443 	char name[B_FILE_NAME_LENGTH];
8444 
8445 	if (userName == NULL || device < 0 || inode < 0)
8446 		return B_BAD_VALUE;
8447 	if (!IS_USER_ADDRESS(userName)
8448 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8449 		return B_BAD_ADDRESS;
8450 
8451 	if (openMode & O_CREAT)
8452 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
8453 
8454 	return file_open_entry_ref(device, inode, name, openMode, false);
8455 }
8456 
8457 
8458 int
8459 _user_open(int fd, const char* userPath, int openMode, int perms)
8460 {
8461 	KPath path(B_PATH_NAME_LENGTH + 1);
8462 	if (path.InitCheck() != B_OK)
8463 		return B_NO_MEMORY;
8464 
8465 	char* buffer = path.LockBuffer();
8466 
8467 	if (!IS_USER_ADDRESS(userPath)
8468 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8469 		return B_BAD_ADDRESS;
8470 
8471 	if (openMode & O_CREAT)
8472 		return file_create(fd, buffer, openMode, perms, false);
8473 
8474 	return file_open(fd, buffer, openMode, false);
8475 }
8476 
8477 
8478 int
8479 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8480 {
8481 	if (userName != NULL) {
8482 		char name[B_FILE_NAME_LENGTH];
8483 
8484 		if (!IS_USER_ADDRESS(userName)
8485 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8486 			return B_BAD_ADDRESS;
8487 
8488 		return dir_open_entry_ref(device, inode, name, false);
8489 	}
8490 	return dir_open_entry_ref(device, inode, NULL, false);
8491 }
8492 
8493 
8494 int
8495 _user_open_dir(int fd, const char* userPath)
8496 {
8497 	KPath path(B_PATH_NAME_LENGTH + 1);
8498 	if (path.InitCheck() != B_OK)
8499 		return B_NO_MEMORY;
8500 
8501 	char* buffer = path.LockBuffer();
8502 
8503 	if (!IS_USER_ADDRESS(userPath)
8504 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8505 		return B_BAD_ADDRESS;
8506 
8507 	return dir_open(fd, buffer, false);
8508 }
8509 
8510 
8511 /*!	\brief Opens a directory's parent directory and returns the entry name
8512 		   of the former.
8513 
8514 	Aside from that is returns the directory's entry name, this method is
8515 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8516 	equivalent, if \a userName is \c NULL.
8517 
8518 	If a name buffer is supplied and the name does not fit the buffer, the
8519 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8520 
8521 	\param fd A FD referring to a directory.
8522 	\param userName Buffer the directory's entry name shall be written into.
8523 		   May be \c NULL.
8524 	\param nameLength Size of the name buffer.
8525 	\return The file descriptor of the opened parent directory, if everything
8526 			went fine, an error code otherwise.
8527 */
8528 int
8529 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8530 {
8531 	bool kernel = false;
8532 
8533 	if (userName && !IS_USER_ADDRESS(userName))
8534 		return B_BAD_ADDRESS;
8535 
8536 	// open the parent dir
8537 	int parentFD = dir_open(fd, (char*)"..", kernel);
8538 	if (parentFD < 0)
8539 		return parentFD;
8540 	FDCloser fdCloser(parentFD, kernel);
8541 
8542 	if (userName) {
8543 		// get the vnodes
8544 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8545 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8546 		VNodePutter parentVNodePutter(parentVNode);
8547 		VNodePutter dirVNodePutter(dirVNode);
8548 		if (!parentVNode || !dirVNode)
8549 			return B_FILE_ERROR;
8550 
8551 		// get the vnode name
8552 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8553 		struct dirent* buffer = (struct dirent*)_buffer;
8554 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8555 			sizeof(_buffer), get_current_io_context(false));
8556 		if (status != B_OK)
8557 			return status;
8558 
8559 		// copy the name to the userland buffer
8560 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8561 		if (len < 0)
8562 			return len;
8563 		if (len >= (int)nameLength)
8564 			return B_BUFFER_OVERFLOW;
8565 	}
8566 
8567 	return fdCloser.Detach();
8568 }
8569 
8570 
8571 status_t
8572 _user_fcntl(int fd, int op, uint32 argument)
8573 {
8574 	status_t status = common_fcntl(fd, op, argument, false);
8575 	if (op == F_SETLKW)
8576 		syscall_restart_handle_post(status);
8577 
8578 	return status;
8579 }
8580 
8581 
8582 status_t
8583 _user_fsync(int fd)
8584 {
8585 	return common_sync(fd, false);
8586 }
8587 
8588 
8589 status_t
8590 _user_flock(int fd, int operation)
8591 {
8592 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8593 
8594 	// Check if the operation is valid
8595 	switch (operation & ~LOCK_NB) {
8596 		case LOCK_UN:
8597 		case LOCK_SH:
8598 		case LOCK_EX:
8599 			break;
8600 
8601 		default:
8602 			return B_BAD_VALUE;
8603 	}
8604 
8605 	struct file_descriptor* descriptor;
8606 	struct vnode* vnode;
8607 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8608 	if (descriptor == NULL)
8609 		return B_FILE_ERROR;
8610 
8611 	if (descriptor->type != FDTYPE_FILE) {
8612 		put_fd(descriptor);
8613 		return B_BAD_VALUE;
8614 	}
8615 
8616 	struct flock flock;
8617 	flock.l_start = 0;
8618 	flock.l_len = OFF_MAX;
8619 	flock.l_whence = 0;
8620 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8621 
8622 	status_t status;
8623 	if ((operation & LOCK_UN) != 0)
8624 		status = release_advisory_lock(vnode, &flock);
8625 	else {
8626 		status = acquire_advisory_lock(vnode,
8627 			thread_get_current_thread()->team->session_id, &flock,
8628 			(operation & LOCK_NB) == 0);
8629 	}
8630 
8631 	syscall_restart_handle_post(status);
8632 
8633 	put_fd(descriptor);
8634 	return status;
8635 }
8636 
8637 
8638 status_t
8639 _user_lock_node(int fd)
8640 {
8641 	return common_lock_node(fd, false);
8642 }
8643 
8644 
8645 status_t
8646 _user_unlock_node(int fd)
8647 {
8648 	return common_unlock_node(fd, false);
8649 }
8650 
8651 
8652 status_t
8653 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8654 	int perms)
8655 {
8656 	char name[B_FILE_NAME_LENGTH];
8657 	status_t status;
8658 
8659 	if (!IS_USER_ADDRESS(userName))
8660 		return B_BAD_ADDRESS;
8661 
8662 	status = user_strlcpy(name, userName, sizeof(name));
8663 	if (status < 0)
8664 		return status;
8665 
8666 	return dir_create_entry_ref(device, inode, name, perms, false);
8667 }
8668 
8669 
8670 status_t
8671 _user_create_dir(int fd, const char* userPath, int perms)
8672 {
8673 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8674 	if (pathBuffer.InitCheck() != B_OK)
8675 		return B_NO_MEMORY;
8676 
8677 	char* path = pathBuffer.LockBuffer();
8678 
8679 	if (!IS_USER_ADDRESS(userPath)
8680 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8681 		return B_BAD_ADDRESS;
8682 
8683 	return dir_create(fd, path, perms, false);
8684 }
8685 
8686 
8687 status_t
8688 _user_remove_dir(int fd, const char* userPath)
8689 {
8690 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8691 	if (pathBuffer.InitCheck() != B_OK)
8692 		return B_NO_MEMORY;
8693 
8694 	char* path = pathBuffer.LockBuffer();
8695 
8696 	if (userPath != NULL) {
8697 		if (!IS_USER_ADDRESS(userPath)
8698 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8699 			return B_BAD_ADDRESS;
8700 	}
8701 
8702 	return dir_remove(fd, userPath ? path : NULL, false);
8703 }
8704 
8705 
8706 status_t
8707 _user_read_link(int fd, const char* userPath, char* userBuffer,
8708 	size_t* userBufferSize)
8709 {
8710 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8711 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8712 		return B_NO_MEMORY;
8713 
8714 	size_t bufferSize;
8715 
8716 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8717 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
8718 		return B_BAD_ADDRESS;
8719 
8720 	char* path = pathBuffer.LockBuffer();
8721 	char* buffer = linkBuffer.LockBuffer();
8722 
8723 	if (userPath) {
8724 		if (!IS_USER_ADDRESS(userPath)
8725 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8726 			return B_BAD_ADDRESS;
8727 
8728 		if (bufferSize > B_PATH_NAME_LENGTH)
8729 			bufferSize = B_PATH_NAME_LENGTH;
8730 	}
8731 
8732 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8733 		&bufferSize, false);
8734 
8735 	// we also update the bufferSize in case of errors
8736 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8737 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
8738 		return B_BAD_ADDRESS;
8739 
8740 	if (status < B_OK)
8741 		return status;
8742 
8743 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8744 		return B_BAD_ADDRESS;
8745 
8746 	return B_OK;
8747 }
8748 
8749 
8750 status_t
8751 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8752 	int mode)
8753 {
8754 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8755 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8756 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8757 		return B_NO_MEMORY;
8758 
8759 	char* path = pathBuffer.LockBuffer();
8760 	char* toPath = toPathBuffer.LockBuffer();
8761 
8762 	if (!IS_USER_ADDRESS(userPath)
8763 		|| !IS_USER_ADDRESS(userToPath)
8764 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8765 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8766 		return B_BAD_ADDRESS;
8767 
8768 	return common_create_symlink(fd, path, toPath, mode, false);
8769 }
8770 
8771 
8772 status_t
8773 _user_create_link(const char* userPath, const char* userToPath)
8774 {
8775 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8776 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8777 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8778 		return B_NO_MEMORY;
8779 
8780 	char* path = pathBuffer.LockBuffer();
8781 	char* toPath = toPathBuffer.LockBuffer();
8782 
8783 	if (!IS_USER_ADDRESS(userPath)
8784 		|| !IS_USER_ADDRESS(userToPath)
8785 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8786 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8787 		return B_BAD_ADDRESS;
8788 
8789 	status_t status = check_path(toPath);
8790 	if (status < B_OK)
8791 		return status;
8792 
8793 	return common_create_link(path, toPath, false);
8794 }
8795 
8796 
8797 status_t
8798 _user_unlink(int fd, const char* userPath)
8799 {
8800 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8801 	if (pathBuffer.InitCheck() != B_OK)
8802 		return B_NO_MEMORY;
8803 
8804 	char* path = pathBuffer.LockBuffer();
8805 
8806 	if (!IS_USER_ADDRESS(userPath)
8807 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8808 		return B_BAD_ADDRESS;
8809 
8810 	return common_unlink(fd, path, false);
8811 }
8812 
8813 
8814 status_t
8815 _user_rename(int oldFD, const char* userOldPath, int newFD,
8816 	const char* userNewPath)
8817 {
8818 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8819 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8820 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8821 		return B_NO_MEMORY;
8822 
8823 	char* oldPath = oldPathBuffer.LockBuffer();
8824 	char* newPath = newPathBuffer.LockBuffer();
8825 
8826 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8827 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8828 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8829 		return B_BAD_ADDRESS;
8830 
8831 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8832 }
8833 
8834 
8835 status_t
8836 _user_create_fifo(const char* userPath, mode_t perms)
8837 {
8838 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8839 	if (pathBuffer.InitCheck() != B_OK)
8840 		return B_NO_MEMORY;
8841 
8842 	char* path = pathBuffer.LockBuffer();
8843 
8844 	if (!IS_USER_ADDRESS(userPath)
8845 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8846 		return B_BAD_ADDRESS;
8847 	}
8848 
8849 	// split into directory vnode and filename path
8850 	char filename[B_FILE_NAME_LENGTH];
8851 	struct vnode* dir;
8852 	status_t status = path_to_dir_vnode(path, &dir, filename, false);
8853 	if (status != B_OK)
8854 		return status;
8855 
8856 	VNodePutter _(dir);
8857 
8858 	// the underlying FS needs to support creating FIFOs
8859 	if (!HAS_FS_CALL(dir, create_special_node))
8860 		return B_UNSUPPORTED;
8861 
8862 	// create the entry	-- the FIFO sub node is set up automatically
8863 	fs_vnode superVnode;
8864 	ino_t nodeID;
8865 	status = FS_CALL(dir, create_special_node, filename, NULL,
8866 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8867 
8868 	// create_special_node() acquired a reference for us that we don't need.
8869 	if (status == B_OK)
8870 		put_vnode(dir->mount->volume, nodeID);
8871 
8872 	return status;
8873 }
8874 
8875 
8876 status_t
8877 _user_create_pipe(int* userFDs)
8878 {
8879 	// rootfs should support creating FIFOs, but let's be sure
8880 	if (!HAS_FS_CALL(sRoot, create_special_node))
8881 		return B_UNSUPPORTED;
8882 
8883 	// create the node	-- the FIFO sub node is set up automatically
8884 	fs_vnode superVnode;
8885 	ino_t nodeID;
8886 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8887 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8888 	if (status != B_OK)
8889 		return status;
8890 
8891 	// We've got one reference to the node and need another one.
8892 	struct vnode* vnode;
8893 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8894 	if (status != B_OK) {
8895 		// that should not happen
8896 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
8897 			sRoot->mount->id, sRoot->id);
8898 		return status;
8899 	}
8900 
8901 	// Everything looks good so far. Open two FDs for reading respectively
8902 	// writing.
8903 	int fds[2];
8904 	fds[0] = open_vnode(vnode, O_RDONLY, false);
8905 	fds[1] = open_vnode(vnode, O_WRONLY, false);
8906 
8907 	FDCloser closer0(fds[0], false);
8908 	FDCloser closer1(fds[1], false);
8909 
8910 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
8911 
8912 	// copy FDs to userland
8913 	if (status == B_OK) {
8914 		if (!IS_USER_ADDRESS(userFDs)
8915 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
8916 			status = B_BAD_ADDRESS;
8917 		}
8918 	}
8919 
8920 	// keep FDs, if everything went fine
8921 	if (status == B_OK) {
8922 		closer0.Detach();
8923 		closer1.Detach();
8924 	}
8925 
8926 	return status;
8927 }
8928 
8929 
8930 status_t
8931 _user_access(const char* userPath, int mode)
8932 {
8933 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8934 	if (pathBuffer.InitCheck() != B_OK)
8935 		return B_NO_MEMORY;
8936 
8937 	char* path = pathBuffer.LockBuffer();
8938 
8939 	if (!IS_USER_ADDRESS(userPath)
8940 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8941 		return B_BAD_ADDRESS;
8942 
8943 	return common_access(path, mode, false);
8944 }
8945 
8946 
8947 status_t
8948 _user_read_stat(int fd, const char* userPath, bool traverseLink,
8949 	struct stat* userStat, size_t statSize)
8950 {
8951 	struct stat stat;
8952 	status_t status;
8953 
8954 	if (statSize > sizeof(struct stat))
8955 		return B_BAD_VALUE;
8956 
8957 	if (!IS_USER_ADDRESS(userStat))
8958 		return B_BAD_ADDRESS;
8959 
8960 	if (userPath) {
8961 		// path given: get the stat of the node referred to by (fd, path)
8962 		if (!IS_USER_ADDRESS(userPath))
8963 			return B_BAD_ADDRESS;
8964 
8965 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8966 		if (pathBuffer.InitCheck() != B_OK)
8967 			return B_NO_MEMORY;
8968 
8969 		char* path = pathBuffer.LockBuffer();
8970 
8971 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
8972 		if (length < B_OK)
8973 			return length;
8974 		if (length >= B_PATH_NAME_LENGTH)
8975 			return B_NAME_TOO_LONG;
8976 
8977 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
8978 	} else {
8979 		// no path given: get the FD and use the FD operation
8980 		struct file_descriptor* descriptor
8981 			= get_fd(get_current_io_context(false), fd);
8982 		if (descriptor == NULL)
8983 			return B_FILE_ERROR;
8984 
8985 		if (descriptor->ops->fd_read_stat)
8986 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
8987 		else
8988 			status = EOPNOTSUPP;
8989 
8990 		put_fd(descriptor);
8991 	}
8992 
8993 	if (status < B_OK)
8994 		return status;
8995 
8996 	return user_memcpy(userStat, &stat, statSize);
8997 }
8998 
8999 
9000 status_t
9001 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9002 	const struct stat* userStat, size_t statSize, int statMask)
9003 {
9004 	if (statSize > sizeof(struct stat))
9005 		return B_BAD_VALUE;
9006 
9007 	struct stat stat;
9008 
9009 	if (!IS_USER_ADDRESS(userStat)
9010 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9011 		return B_BAD_ADDRESS;
9012 
9013 	// clear additional stat fields
9014 	if (statSize < sizeof(struct stat))
9015 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9016 
9017 	status_t status;
9018 
9019 	if (userPath) {
9020 		// path given: write the stat of the node referred to by (fd, path)
9021 		if (!IS_USER_ADDRESS(userPath))
9022 			return B_BAD_ADDRESS;
9023 
9024 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9025 		if (pathBuffer.InitCheck() != B_OK)
9026 			return B_NO_MEMORY;
9027 
9028 		char* path = pathBuffer.LockBuffer();
9029 
9030 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9031 		if (length < B_OK)
9032 			return length;
9033 		if (length >= B_PATH_NAME_LENGTH)
9034 			return B_NAME_TOO_LONG;
9035 
9036 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9037 			statMask, false);
9038 	} else {
9039 		// no path given: get the FD and use the FD operation
9040 		struct file_descriptor* descriptor
9041 			= get_fd(get_current_io_context(false), fd);
9042 		if (descriptor == NULL)
9043 			return B_FILE_ERROR;
9044 
9045 		if (descriptor->ops->fd_write_stat)
9046 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
9047 		else
9048 			status = EOPNOTSUPP;
9049 
9050 		put_fd(descriptor);
9051 	}
9052 
9053 	return status;
9054 }
9055 
9056 
9057 int
9058 _user_open_attr_dir(int fd, const char* userPath)
9059 {
9060 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9061 	if (pathBuffer.InitCheck() != B_OK)
9062 		return B_NO_MEMORY;
9063 
9064 	char* path = pathBuffer.LockBuffer();
9065 
9066 	if (userPath != NULL) {
9067 		if (!IS_USER_ADDRESS(userPath)
9068 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9069 			return B_BAD_ADDRESS;
9070 	}
9071 
9072 	return attr_dir_open(fd, userPath ? path : NULL, false);
9073 }
9074 
9075 
9076 int
9077 _user_create_attr(int fd, const char* userName, uint32 type, int openMode)
9078 {
9079 	char name[B_FILE_NAME_LENGTH];
9080 
9081 	if (!IS_USER_ADDRESS(userName)
9082 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9083 		return B_BAD_ADDRESS;
9084 
9085 	return attr_create(fd, name, type, openMode, false);
9086 }
9087 
9088 
9089 int
9090 _user_open_attr(int fd, const char* userName, int openMode)
9091 {
9092 	char name[B_FILE_NAME_LENGTH];
9093 
9094 	if (!IS_USER_ADDRESS(userName)
9095 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9096 		return B_BAD_ADDRESS;
9097 
9098 	return attr_open(fd, name, openMode, false);
9099 }
9100 
9101 
9102 status_t
9103 _user_remove_attr(int fd, const char* userName)
9104 {
9105 	char name[B_FILE_NAME_LENGTH];
9106 
9107 	if (!IS_USER_ADDRESS(userName)
9108 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9109 		return B_BAD_ADDRESS;
9110 
9111 	return attr_remove(fd, name, false);
9112 }
9113 
9114 
9115 status_t
9116 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9117 	const char* userToName)
9118 {
9119 	if (!IS_USER_ADDRESS(userFromName)
9120 		|| !IS_USER_ADDRESS(userToName))
9121 		return B_BAD_ADDRESS;
9122 
9123 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9124 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9125 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9126 		return B_NO_MEMORY;
9127 
9128 	char* fromName = fromNameBuffer.LockBuffer();
9129 	char* toName = toNameBuffer.LockBuffer();
9130 
9131 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9132 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9133 		return B_BAD_ADDRESS;
9134 
9135 	return attr_rename(fromFile, fromName, toFile, toName, false);
9136 }
9137 
9138 
9139 int
9140 _user_open_index_dir(dev_t device)
9141 {
9142 	return index_dir_open(device, false);
9143 }
9144 
9145 
9146 status_t
9147 _user_create_index(dev_t device, const char* userName, uint32 type, uint32 flags)
9148 {
9149 	char name[B_FILE_NAME_LENGTH];
9150 
9151 	if (!IS_USER_ADDRESS(userName)
9152 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9153 		return B_BAD_ADDRESS;
9154 
9155 	return index_create(device, name, type, flags, false);
9156 }
9157 
9158 
9159 status_t
9160 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9161 {
9162 	char name[B_FILE_NAME_LENGTH];
9163 	struct stat stat;
9164 	status_t status;
9165 
9166 	if (!IS_USER_ADDRESS(userName)
9167 		|| !IS_USER_ADDRESS(userStat)
9168 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9169 		return B_BAD_ADDRESS;
9170 
9171 	status = index_name_read_stat(device, name, &stat, false);
9172 	if (status == B_OK) {
9173 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
9174 			return B_BAD_ADDRESS;
9175 	}
9176 
9177 	return status;
9178 }
9179 
9180 
9181 status_t
9182 _user_remove_index(dev_t device, const char* userName)
9183 {
9184 	char name[B_FILE_NAME_LENGTH];
9185 
9186 	if (!IS_USER_ADDRESS(userName)
9187 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9188 		return B_BAD_ADDRESS;
9189 
9190 	return index_remove(device, name, false);
9191 }
9192 
9193 
9194 status_t
9195 _user_getcwd(char* userBuffer, size_t size)
9196 {
9197 	if (!IS_USER_ADDRESS(userBuffer))
9198 		return B_BAD_ADDRESS;
9199 
9200 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9201 	if (pathBuffer.InitCheck() != B_OK)
9202 		return B_NO_MEMORY;
9203 
9204 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9205 
9206 	if (size > B_PATH_NAME_LENGTH)
9207 		size = B_PATH_NAME_LENGTH;
9208 
9209 	char* path = pathBuffer.LockBuffer();
9210 
9211 	status_t status = get_cwd(path, size, false);
9212 	if (status < B_OK)
9213 		return status;
9214 
9215 	// Copy back the result
9216 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9217 		return B_BAD_ADDRESS;
9218 
9219 	return status;
9220 }
9221 
9222 
9223 status_t
9224 _user_setcwd(int fd, const char* userPath)
9225 {
9226 	TRACE(("user_setcwd: path = %p\n", userPath));
9227 
9228 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9229 	if (pathBuffer.InitCheck() != B_OK)
9230 		return B_NO_MEMORY;
9231 
9232 	char* path = pathBuffer.LockBuffer();
9233 
9234 	if (userPath != NULL) {
9235 		if (!IS_USER_ADDRESS(userPath)
9236 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9237 			return B_BAD_ADDRESS;
9238 	}
9239 
9240 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9241 }
9242 
9243 
9244 status_t
9245 _user_change_root(const char* userPath)
9246 {
9247 	// only root is allowed to chroot()
9248 	if (geteuid() != 0)
9249 		return EPERM;
9250 
9251 	// alloc path buffer
9252 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9253 	if (pathBuffer.InitCheck() != B_OK)
9254 		return B_NO_MEMORY;
9255 
9256 	// copy userland path to kernel
9257 	char* path = pathBuffer.LockBuffer();
9258 	if (userPath != NULL) {
9259 		if (!IS_USER_ADDRESS(userPath)
9260 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9261 			return B_BAD_ADDRESS;
9262 	}
9263 
9264 	// get the vnode
9265 	struct vnode* vnode;
9266 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9267 	if (status != B_OK)
9268 		return status;
9269 
9270 	// set the new root
9271 	struct io_context* context = get_current_io_context(false);
9272 	mutex_lock(&sIOContextRootLock);
9273 	struct vnode* oldRoot = context->root;
9274 	context->root = vnode;
9275 	mutex_unlock(&sIOContextRootLock);
9276 
9277 	put_vnode(oldRoot);
9278 
9279 	return B_OK;
9280 }
9281 
9282 
9283 int
9284 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9285 	uint32 flags, port_id port, int32 token)
9286 {
9287 	char* query;
9288 
9289 	if (device < 0 || userQuery == NULL || queryLength == 0)
9290 		return B_BAD_VALUE;
9291 
9292 	// this is a safety restriction
9293 	if (queryLength >= 65536)
9294 		return B_NAME_TOO_LONG;
9295 
9296 	query = (char*)malloc(queryLength + 1);
9297 	if (query == NULL)
9298 		return B_NO_MEMORY;
9299 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9300 		free(query);
9301 		return B_BAD_ADDRESS;
9302 	}
9303 
9304 	int fd = query_open(device, query, flags, port, token, false);
9305 
9306 	free(query);
9307 	return fd;
9308 }
9309 
9310 
9311 #include "vfs_request_io.cpp"
9312