xref: /haiku/src/system/kernel/fs/vfs.cpp (revision b671e9bbdbd10268a042b4f4cc4317ccd03d105e)
1 /*
2  * Copyright 2005-2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <disk_device_manager/KDiskDevice.h>
36 #include <disk_device_manager/KDiskDeviceManager.h>
37 #include <disk_device_manager/KDiskDeviceUtils.h>
38 #include <disk_device_manager/KDiskSystem.h>
39 #include <fd.h>
40 #include <file_cache.h>
41 #include <fs/node_monitor.h>
42 #include <khash.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <util/OpenHashTable.h>
53 #include <vfs.h>
54 #include <vm.h>
55 #include <vm_cache.h>
56 
57 #include "fifo.h"
58 #include "IORequest.h"
59 
60 
61 //#define TRACE_VFS
62 #ifdef TRACE_VFS
63 #	define TRACE(x) dprintf x
64 #	define FUNCTION(x) dprintf x
65 #else
66 #	define TRACE(x) ;
67 #	define FUNCTION(x) ;
68 #endif
69 
70 #define ADD_DEBUGGER_COMMANDS
71 
72 
73 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
74 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
75 
76 #if KDEBUG
77 #	define FS_CALL(vnode, op, params...) \
78 		( HAS_FS_CALL(vnode, op) ? \
79 			vnode->ops->op(vnode->mount->volume, vnode, params) \
80 			: (panic("FS_CALL op " #op " is NULL"), 0))
81 #	define FS_CALL_NO_PARAMS(vnode, op) \
82 		( HAS_FS_CALL(vnode, op) ? \
83 			vnode->ops->op(vnode->mount->volume, vnode) \
84 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
85 #	define FS_MOUNT_CALL(mount, op, params...) \
86 		( HAS_FS_MOUNT_CALL(mount, op) ? \
87 			mount->volume->ops->op(mount->volume, params) \
88 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
89 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
90 		( HAS_FS_MOUNT_CALL(mount, op) ? \
91 			mount->volume->ops->op(mount->volume) \
92 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
93 #else
94 #	define FS_CALL(vnode, op, params...) \
95 			vnode->ops->op(vnode->mount->volume, vnode, params)
96 #	define FS_CALL_NO_PARAMS(vnode, op) \
97 			vnode->ops->op(vnode->mount->volume, vnode)
98 #	define FS_MOUNT_CALL(mount, op, params...) \
99 			mount->volume->ops->op(mount->volume, params)
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 			mount->volume->ops->op(mount->volume)
102 #endif
103 
104 
105 const static uint32 kMaxUnusedVnodes = 8192;
106 	// This is the maximum number of unused vnodes that the system
107 	// will keep around (weak limit, if there is enough memory left,
108 	// they won't get flushed even when hitting that limit).
109 	// It may be chosen with respect to the available memory or enhanced
110 	// by some timestamp/frequency heurism.
111 
112 const static uint32 kMaxEntryCacheEntryCount = 8192;
113 	// Maximum number of entries per entry cache. It's a hard limit ATM.
114 
115 struct EntryCacheKey {
116 	EntryCacheKey(ino_t dirID, const char* name)
117 		:
118 		dir_id(dirID),
119 		name(name)
120 	{
121 	}
122 
123 	ino_t		dir_id;
124 	const char*	name;
125 };
126 
127 
128 struct EntryCacheEntry : DoublyLinkedListLinkImpl<EntryCacheEntry> {
129 	EntryCacheEntry*	hash_link;
130 	ino_t				node_id;
131 	ino_t				dir_id;
132 	char				name[1];
133 };
134 
135 
136 struct EntryCacheHashDefinition {
137 	typedef EntryCacheKey	KeyType;
138 	typedef EntryCacheEntry	ValueType;
139 
140 	uint32 HashKey(const EntryCacheKey& key) const
141 	{
142 		return (uint32)key.dir_id ^ (uint32)(key.dir_id >> 32)
143 			^ hash_hash_string(key.name);
144 	}
145 
146 	size_t Hash(const EntryCacheEntry* value) const
147 	{
148 		return (uint32)value->dir_id ^ (uint32)(value->dir_id >> 32)
149 			^ hash_hash_string(value->name);
150 	}
151 
152 	bool Compare(const EntryCacheKey& key, const EntryCacheEntry* value) const
153 	{
154 		return value->dir_id == key.dir_id
155 			&& strcmp(value->name, key.name) == 0;
156 	}
157 
158 	EntryCacheEntry*& GetLink(EntryCacheEntry* value) const
159 	{
160 		return value->hash_link;
161 	}
162 };
163 
164 
165 class EntryCache {
166 public:
167 	EntryCache()
168 	{
169 		mutex_init(&fLock, "entry cache");
170 
171 		new(&fEntries) EntryTable;
172 		new(&fUsedEntries) EntryList;
173 		fEntryCount = 0;
174 	}
175 
176 	~EntryCache()
177 	{
178 		while (EntryCacheEntry* entry = fUsedEntries.Head())
179 			_Remove(entry);
180 
181 		mutex_destroy(&fLock);
182 	}
183 
184 	status_t Init()
185 	{
186 		return fEntries.Init();
187 	}
188 
189 	status_t Add(ino_t dirID, const char* name, ino_t nodeID)
190 	{
191 		MutexLocker _(fLock);
192 
193 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
194 		if (entry != NULL) {
195 			entry->node_id = nodeID;
196 			return B_OK;
197 		}
198 
199 		if (fEntryCount >= kMaxEntryCacheEntryCount)
200 			_Remove(fUsedEntries.Head());
201 
202 		entry = (EntryCacheEntry*)malloc(sizeof(EntryCacheEntry)
203 			+ strlen(name));
204 		if (entry == NULL)
205 			return B_NO_MEMORY;
206 
207 		entry->node_id = nodeID;
208 		entry->dir_id = dirID;
209 		strcpy(entry->name, name);
210 
211 		fEntries.Insert(entry);
212 		fUsedEntries.Add(entry);
213 		fEntryCount++;
214 
215 		return B_OK;
216 	}
217 
218 	status_t Remove(ino_t dirID, const char* name)
219 	{
220 		MutexLocker _(fLock);
221 
222 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
223 		if (entry == NULL)
224 			return B_ENTRY_NOT_FOUND;
225 
226 		_Remove(entry);
227 
228 		return B_OK;
229 	}
230 
231 	bool Lookup(ino_t dirID, const char* name, ino_t& nodeID)
232 	{
233 		MutexLocker _(fLock);
234 
235 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
236 		if (entry == NULL)
237 			return false;
238 
239 		// requeue at the end
240 		fUsedEntries.Remove(entry);
241 		fUsedEntries.Add(entry);
242 
243 		nodeID = entry->node_id;
244 		return true;
245 	}
246 
247 	void _Remove(EntryCacheEntry* entry)
248 	{
249 		fEntries.Remove(entry);
250 		fUsedEntries.Remove(entry);
251 		free(entry);
252 		fEntryCount--;
253 	}
254 
255 private:
256 	typedef BOpenHashTable<EntryCacheHashDefinition> EntryTable;
257 	typedef DoublyLinkedList<EntryCacheEntry> EntryList;
258 
259 	mutex		fLock;
260 	EntryTable	fEntries;
261 	EntryList	fUsedEntries;	// LRU queue (LRU entry at the head)
262 	uint32		fEntryCount;
263 };
264 
265 
266 struct vnode : fs_vnode, DoublyLinkedListLinkImpl<vnode> {
267 	struct vnode*	next;
268 	vm_cache*		cache;
269 	dev_t			device;
270 	list_link		unused_link;
271 	ino_t			id;
272 	struct fs_mount* mount;
273 	struct vnode*	covered_by;
274 	int32			ref_count;
275 	uint32			type : 29;
276 						// TODO: S_INDEX_DIR actually needs another bit.
277 						// Better combine this field with the following ones.
278 	uint32			remove : 1;
279 	uint32			busy : 1;
280 	uint32			unpublished : 1;
281 	struct advisory_locking* advisory_locking;
282 	struct file_descriptor* mandatory_locked_by;
283 };
284 
285 struct vnode_hash_key {
286 	dev_t	device;
287 	ino_t	vnode;
288 };
289 
290 typedef DoublyLinkedList<vnode> VnodeList;
291 
292 /*!	\brief Structure to manage a mounted file system
293 
294 	Note: The root_vnode and covers_vnode fields (what others?) are
295 	initialized in fs_mount() and not changed afterwards. That is as soon
296 	as the mount is mounted and it is made sure it won't be unmounted
297 	(e.g. by holding a reference to a vnode of that mount) (read) access
298 	to those fields is always safe, even without additional locking. Morever
299 	while mounted the mount holds a reference to the covers_vnode, and thus
300 	making the access path vnode->mount->covers_vnode->mount->... safe if a
301 	reference to vnode is held (note that for the root mount covers_vnode
302 	is NULL, though).
303 */
304 struct fs_mount {
305 	fs_mount()
306 		:
307 		volume(NULL),
308 		device_name(NULL)
309 	{
310 		recursive_lock_init(&rlock, "mount rlock");
311 	}
312 
313 	~fs_mount()
314 	{
315 		recursive_lock_destroy(&rlock);
316 		free(device_name);
317 
318 		while (volume) {
319 			fs_volume* superVolume = volume->super_volume;
320 
321 			if (volume->file_system != NULL)
322 				put_module(volume->file_system->info.name);
323 
324 			free(volume->file_system_name);
325 			free(volume);
326 			volume = superVolume;
327 		}
328 	}
329 
330 	struct fs_mount* next;
331 	dev_t			id;
332 	fs_volume*		volume;
333 	char*			device_name;
334 	recursive_lock	rlock;	// guards the vnodes list
335 	struct vnode*	root_vnode;
336 	struct vnode*	covers_vnode;
337 	KPartition*		partition;
338 	VnodeList		vnodes;
339 	EntryCache		entry_cache;
340 	bool			unmounting;
341 	bool			owns_file_device;
342 };
343 
344 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
345 	list_link		link;
346 	team_id			team;
347 	pid_t			session;
348 	off_t			start;
349 	off_t			end;
350 	bool			shared;
351 };
352 
353 typedef DoublyLinkedList<advisory_lock> LockList;
354 
355 struct advisory_locking {
356 	sem_id			lock;
357 	sem_id			wait_sem;
358 	LockList		locks;
359 
360 	advisory_locking()
361 		:
362 		lock(-1),
363 		wait_sem(-1)
364 	{
365 	}
366 
367 	~advisory_locking()
368 	{
369 		if (lock >= 0)
370 			delete_sem(lock);
371 		if (wait_sem >= 0)
372 			delete_sem(wait_sem);
373 	}
374 };
375 
376 /*!	\brief Guards sMountsTable.
377 
378 	The holder is allowed to read/write access the sMountsTable.
379 	Manipulation of the fs_mount structures themselves
380 	(and their destruction) requires different locks though.
381 */
382 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
383 
384 /*!	\brief Guards mount/unmount operations.
385 
386 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
387 	That is locking the lock ensures that no FS is mounted/unmounted. In
388 	particular this means that
389 	- sMountsTable will not be modified,
390 	- the fields immutable after initialization of the fs_mount structures in
391 	  sMountsTable will not be modified,
392 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
393 
394 	The thread trying to lock the lock must not hold sVnodeMutex or
395 	sMountMutex.
396 */
397 static recursive_lock sMountOpLock;
398 
399 /*!	\brief Guards the vnode::covered_by field of any vnode
400 
401 	The holder is allowed to read access the vnode::covered_by field of any
402 	vnode. Additionally holding sMountOpLock allows for write access.
403 
404 	The thread trying to lock the must not hold sVnodeMutex.
405 */
406 static mutex sVnodeCoveredByMutex
407 	= MUTEX_INITIALIZER("vfs_vnode_covered_by_lock");
408 
409 /*!	\brief Guards sVnodeTable.
410 
411 	The holder is allowed read/write access to sVnodeTable and to
412 	any unbusy vnode in that table, save to the immutable fields (device, id,
413 	private_node, mount) to which
414 	only read-only access is allowed, and to the field covered_by, which is
415 	guarded by sMountOpLock and sVnodeCoveredByMutex.
416 
417 	The thread trying to lock the mutex must not hold sMountMutex.
418 	You must not have this mutex held when calling create_sem(), as this
419 	might call vfs_free_unused_vnodes().
420 */
421 static mutex sVnodeMutex = MUTEX_INITIALIZER("vfs_vnode_lock");
422 
423 /*!	\brief Guards io_context::root.
424 
425 	Must be held when setting or getting the io_context::root field.
426 	The only operation allowed while holding this lock besides getting or
427 	setting the field is inc_vnode_ref_count() on io_context::root.
428 */
429 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
430 
431 #define VNODE_HASH_TABLE_SIZE 1024
432 static hash_table* sVnodeTable;
433 static list sUnusedVnodeList;
434 static uint32 sUnusedVnodes = 0;
435 static struct vnode* sRoot;
436 
437 #define MOUNTS_HASH_TABLE_SIZE 16
438 static hash_table* sMountsTable;
439 static dev_t sNextMountID = 1;
440 
441 #define MAX_TEMP_IO_VECS 8
442 
443 mode_t __gUmask = 022;
444 
445 /* function declarations */
446 
447 // file descriptor operation prototypes
448 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
449 	void* buffer, size_t* _bytes);
450 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
451 	const void* buffer, size_t* _bytes);
452 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
453 	int seekType);
454 static void file_free_fd(struct file_descriptor* descriptor);
455 static status_t file_close(struct file_descriptor* descriptor);
456 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
457 	struct selectsync* sync);
458 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
459 	struct selectsync* sync);
460 static status_t dir_read(struct io_context* context,
461 	struct file_descriptor* descriptor, struct dirent* buffer,
462 	size_t bufferSize, uint32* _count);
463 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
464 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
465 static status_t dir_rewind(struct file_descriptor* descriptor);
466 static void dir_free_fd(struct file_descriptor* descriptor);
467 static status_t dir_close(struct file_descriptor* descriptor);
468 static status_t attr_dir_read(struct io_context* context,
469 	struct file_descriptor* descriptor, struct dirent* buffer,
470 	size_t bufferSize, uint32* _count);
471 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
472 static void attr_dir_free_fd(struct file_descriptor* descriptor);
473 static status_t attr_dir_close(struct file_descriptor* descriptor);
474 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
475 	void* buffer, size_t* _bytes);
476 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
477 	const void* buffer, size_t* _bytes);
478 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
479 	int seekType);
480 static void attr_free_fd(struct file_descriptor* descriptor);
481 static status_t attr_close(struct file_descriptor* descriptor);
482 static status_t attr_read_stat(struct file_descriptor* descriptor,
483 	struct stat* statData);
484 static status_t attr_write_stat(struct file_descriptor* descriptor,
485 	const struct stat* stat, int statMask);
486 static status_t index_dir_read(struct io_context* context,
487 	struct file_descriptor* descriptor, struct dirent* buffer,
488 	size_t bufferSize, uint32* _count);
489 static status_t index_dir_rewind(struct file_descriptor* descriptor);
490 static void index_dir_free_fd(struct file_descriptor* descriptor);
491 static status_t index_dir_close(struct file_descriptor* descriptor);
492 static status_t query_read(struct io_context* context,
493 	struct file_descriptor* descriptor, struct dirent* buffer,
494 	size_t bufferSize, uint32* _count);
495 static status_t query_rewind(struct file_descriptor* descriptor);
496 static void query_free_fd(struct file_descriptor* descriptor);
497 static status_t query_close(struct file_descriptor* descriptor);
498 
499 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
500 	void* buffer, size_t length);
501 static status_t common_read_stat(struct file_descriptor* descriptor,
502 	struct stat* statData);
503 static status_t common_write_stat(struct file_descriptor* descriptor,
504 	const struct stat* statData, int statMask);
505 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
506 	struct stat* stat, bool kernel);
507 
508 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
509 	bool traverseLeafLink, int count, bool kernel,
510 	struct vnode** _vnode, ino_t* _parentID);
511 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
512 	size_t bufferSize, bool kernel);
513 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
514 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
515 static void inc_vnode_ref_count(struct vnode* vnode);
516 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
517 	bool reenter);
518 static inline void put_vnode(struct vnode* vnode);
519 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
520 	bool kernel);
521 
522 
523 static struct fd_ops sFileOps = {
524 	file_read,
525 	file_write,
526 	file_seek,
527 	common_ioctl,
528 	NULL,		// set_flags
529 	file_select,
530 	file_deselect,
531 	NULL,		// read_dir()
532 	NULL,		// rewind_dir()
533 	common_read_stat,
534 	common_write_stat,
535 	file_close,
536 	file_free_fd
537 };
538 
539 static struct fd_ops sDirectoryOps = {
540 	NULL,		// read()
541 	NULL,		// write()
542 	NULL,		// seek()
543 	common_ioctl,
544 	NULL,		// set_flags
545 	NULL,		// select()
546 	NULL,		// deselect()
547 	dir_read,
548 	dir_rewind,
549 	common_read_stat,
550 	common_write_stat,
551 	dir_close,
552 	dir_free_fd
553 };
554 
555 static struct fd_ops sAttributeDirectoryOps = {
556 	NULL,		// read()
557 	NULL,		// write()
558 	NULL,		// seek()
559 	common_ioctl,
560 	NULL,		// set_flags
561 	NULL,		// select()
562 	NULL,		// deselect()
563 	attr_dir_read,
564 	attr_dir_rewind,
565 	common_read_stat,
566 	common_write_stat,
567 	attr_dir_close,
568 	attr_dir_free_fd
569 };
570 
571 static struct fd_ops sAttributeOps = {
572 	attr_read,
573 	attr_write,
574 	attr_seek,
575 	common_ioctl,
576 	NULL,		// set_flags
577 	NULL,		// select()
578 	NULL,		// deselect()
579 	NULL,		// read_dir()
580 	NULL,		// rewind_dir()
581 	attr_read_stat,
582 	attr_write_stat,
583 	attr_close,
584 	attr_free_fd
585 };
586 
587 static struct fd_ops sIndexDirectoryOps = {
588 	NULL,		// read()
589 	NULL,		// write()
590 	NULL,		// seek()
591 	NULL,		// ioctl()
592 	NULL,		// set_flags
593 	NULL,		// select()
594 	NULL,		// deselect()
595 	index_dir_read,
596 	index_dir_rewind,
597 	NULL,		// read_stat()
598 	NULL,		// write_stat()
599 	index_dir_close,
600 	index_dir_free_fd
601 };
602 
603 #if 0
604 static struct fd_ops sIndexOps = {
605 	NULL,		// read()
606 	NULL,		// write()
607 	NULL,		// seek()
608 	NULL,		// ioctl()
609 	NULL,		// set_flags
610 	NULL,		// select()
611 	NULL,		// deselect()
612 	NULL,		// dir_read()
613 	NULL,		// dir_rewind()
614 	index_read_stat,	// read_stat()
615 	NULL,		// write_stat()
616 	NULL,		// dir_close()
617 	NULL		// free_fd()
618 };
619 #endif
620 
621 static struct fd_ops sQueryOps = {
622 	NULL,		// read()
623 	NULL,		// write()
624 	NULL,		// seek()
625 	NULL,		// ioctl()
626 	NULL,		// set_flags
627 	NULL,		// select()
628 	NULL,		// deselect()
629 	query_read,
630 	query_rewind,
631 	NULL,		// read_stat()
632 	NULL,		// write_stat()
633 	query_close,
634 	query_free_fd
635 };
636 
637 
638 // VNodePutter
639 class VNodePutter {
640 public:
641 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
642 
643 	~VNodePutter()
644 	{
645 		Put();
646 	}
647 
648 	void SetTo(struct vnode* vnode)
649 	{
650 		Put();
651 		fVNode = vnode;
652 	}
653 
654 	void Put()
655 	{
656 		if (fVNode) {
657 			put_vnode(fVNode);
658 			fVNode = NULL;
659 		}
660 	}
661 
662 	struct vnode* Detach()
663 	{
664 		struct vnode* vnode = fVNode;
665 		fVNode = NULL;
666 		return vnode;
667 	}
668 
669 private:
670 	struct vnode* fVNode;
671 };
672 
673 
674 class FDCloser {
675 public:
676 	FDCloser() : fFD(-1), fKernel(true) {}
677 
678 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
679 
680 	~FDCloser()
681 	{
682 		Close();
683 	}
684 
685 	void SetTo(int fd, bool kernel)
686 	{
687 		Close();
688 		fFD = fd;
689 		fKernel = kernel;
690 	}
691 
692 	void Close()
693 	{
694 		if (fFD >= 0) {
695 			if (fKernel)
696 				_kern_close(fFD);
697 			else
698 				_user_close(fFD);
699 			fFD = -1;
700 		}
701 	}
702 
703 	int Detach()
704 	{
705 		int fd = fFD;
706 		fFD = -1;
707 		return fd;
708 	}
709 
710 private:
711 	int		fFD;
712 	bool	fKernel;
713 };
714 
715 
716 #if VFS_PAGES_IO_TRACING
717 
718 namespace VFSPagesIOTracing {
719 
720 class PagesIOTraceEntry : public AbstractTraceEntry {
721 protected:
722 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
723 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
724 		status_t status, size_t bytesTransferred)
725 		:
726 		fVnode(vnode),
727 		fMountID(vnode->mount->id),
728 		fNodeID(vnode->id),
729 		fCookie(cookie),
730 		fPos(pos),
731 		fCount(count),
732 		fFlags(flags),
733 		fBytesRequested(bytesRequested),
734 		fStatus(status),
735 		fBytesTransferred(bytesTransferred)
736 	{
737 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
738 			false);
739 	}
740 
741 	void AddDump(TraceOutput& out, const char* mode)
742 	{
743 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
744 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
745 			fCookie, fPos, fBytesRequested);
746 
747 		if (fVecs != NULL) {
748 			for (uint32 i = 0; i < fCount; i++) {
749 				if (i > 0)
750 					out.Print(", ");
751 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
752 			}
753 		}
754 
755 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
756 			fFlags, fStatus, fBytesTransferred);
757 	}
758 
759 protected:
760 	struct vnode*	fVnode;
761 	dev_t			fMountID;
762 	ino_t			fNodeID;
763 	void*			fCookie;
764 	off_t			fPos;
765 	iovec*			fVecs;
766 	uint32			fCount;
767 	uint32			fFlags;
768 	size_t			fBytesRequested;
769 	status_t		fStatus;
770 	size_t			fBytesTransferred;
771 };
772 
773 
774 class ReadPages : public PagesIOTraceEntry {
775 public:
776 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
777 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
778 		status_t status, size_t bytesTransferred)
779 		:
780 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
781 			bytesRequested, status, bytesTransferred)
782 	{
783 		Initialized();
784 	}
785 
786 	virtual void AddDump(TraceOutput& out)
787 	{
788 		PagesIOTraceEntry::AddDump(out, "read");
789 	}
790 };
791 
792 
793 class WritePages : public PagesIOTraceEntry {
794 public:
795 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
796 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
797 		status_t status, size_t bytesTransferred)
798 		:
799 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
800 			bytesRequested, status, bytesTransferred)
801 	{
802 		Initialized();
803 	}
804 
805 	virtual void AddDump(TraceOutput& out)
806 	{
807 		PagesIOTraceEntry::AddDump(out, "write");
808 	}
809 };
810 
811 }	// namespace VFSPagesIOTracing
812 
813 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
814 #else
815 #	define TPIO(x) ;
816 #endif	// VFS_PAGES_IO_TRACING
817 
818 
819 static int
820 mount_compare(void* _m, const void* _key)
821 {
822 	struct fs_mount* mount = (fs_mount*)_m;
823 	const dev_t* id = (dev_t*)_key;
824 
825 	if (mount->id == *id)
826 		return 0;
827 
828 	return -1;
829 }
830 
831 
832 static uint32
833 mount_hash(void* _m, const void* _key, uint32 range)
834 {
835 	struct fs_mount* mount = (fs_mount*)_m;
836 	const dev_t* id = (dev_t*)_key;
837 
838 	if (mount)
839 		return mount->id % range;
840 
841 	return (uint32)*id % range;
842 }
843 
844 
845 /*! Finds the mounted device (the fs_mount structure) with the given ID.
846 	Note, you must hold the gMountMutex lock when you call this function.
847 */
848 static struct fs_mount*
849 find_mount(dev_t id)
850 {
851 	ASSERT_LOCKED_MUTEX(&sMountMutex);
852 
853 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
854 }
855 
856 
857 static status_t
858 get_mount(dev_t id, struct fs_mount** _mount)
859 {
860 	struct fs_mount* mount;
861 
862 	MutexLocker nodeLocker(sVnodeMutex);
863 	MutexLocker mountLocker(sMountMutex);
864 
865 	mount = find_mount(id);
866 	if (mount == NULL)
867 		return B_BAD_VALUE;
868 
869 	struct vnode* rootNode = mount->root_vnode;
870 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
871 		// might have been called during a mount/unmount operation
872 		return B_BUSY;
873 	}
874 
875 	inc_vnode_ref_count(mount->root_vnode);
876 	*_mount = mount;
877 	return B_OK;
878 }
879 
880 
881 static void
882 put_mount(struct fs_mount* mount)
883 {
884 	if (mount)
885 		put_vnode(mount->root_vnode);
886 }
887 
888 
889 /*!	Tries to open the specified file system module.
890 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
891 	Returns a pointer to file system module interface, or NULL if it
892 	could not open the module.
893 */
894 static file_system_module_info*
895 get_file_system(const char* fsName)
896 {
897 	char name[B_FILE_NAME_LENGTH];
898 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
899 		// construct module name if we didn't get one
900 		// (we currently support only one API)
901 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
902 		fsName = NULL;
903 	}
904 
905 	file_system_module_info* info;
906 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
907 		return NULL;
908 
909 	return info;
910 }
911 
912 
913 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
914 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
915 	The name is allocated for you, and you have to free() it when you're
916 	done with it.
917 	Returns NULL if the required memory is not available.
918 */
919 static char*
920 get_file_system_name(const char* fsName)
921 {
922 	const size_t length = strlen("file_systems/");
923 
924 	if (strncmp(fsName, "file_systems/", length)) {
925 		// the name already seems to be the module's file name
926 		return strdup(fsName);
927 	}
928 
929 	fsName += length;
930 	const char* end = strchr(fsName, '/');
931 	if (end == NULL) {
932 		// this doesn't seem to be a valid name, but well...
933 		return strdup(fsName);
934 	}
935 
936 	// cut off the trailing /v1
937 
938 	char* name = (char*)malloc(end + 1 - fsName);
939 	if (name == NULL)
940 		return NULL;
941 
942 	strlcpy(name, fsName, end + 1 - fsName);
943 	return name;
944 }
945 
946 
947 /*!	Accepts a list of file system names separated by a colon, one for each
948 	layer and returns the file system name for the specified layer.
949 	The name is allocated for you, and you have to free() it when you're
950 	done with it.
951 	Returns NULL if the required memory is not available or if there is no
952 	name for the specified layer.
953 */
954 static char*
955 get_file_system_name_for_layer(const char* fsNames, int32 layer)
956 {
957 	while (layer >= 0) {
958 		const char* end = strchr(fsNames, ':');
959 		if (end == NULL) {
960 			if (layer == 0)
961 				return strdup(fsNames);
962 			return NULL;
963 		}
964 
965 		if (layer == 0) {
966 			size_t length = end - fsNames + 1;
967 			char* result = (char*)malloc(length);
968 			strlcpy(result, fsNames, length);
969 			return result;
970 		}
971 
972 		fsNames = end + 1;
973 		layer--;
974 	}
975 
976 	return NULL;
977 }
978 
979 
980 static int
981 vnode_compare(void* _vnode, const void* _key)
982 {
983 	struct vnode* vnode = (struct vnode*)_vnode;
984 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
985 
986 	if (vnode->device == key->device && vnode->id == key->vnode)
987 		return 0;
988 
989 	return -1;
990 }
991 
992 
993 static uint32
994 vnode_hash(void* _vnode, const void* _key, uint32 range)
995 {
996 	struct vnode* vnode = (struct vnode*)_vnode;
997 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
998 
999 #define VHASH(mountid, vnodeid) \
1000 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
1001 
1002 	if (vnode != NULL)
1003 		return VHASH(vnode->device, vnode->id) % range;
1004 
1005 	return VHASH(key->device, key->vnode) % range;
1006 
1007 #undef VHASH
1008 }
1009 
1010 
1011 static void
1012 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
1013 {
1014 	RecursiveLocker _(mount->rlock);
1015 	mount->vnodes.Add(vnode);
1016 }
1017 
1018 
1019 static void
1020 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
1021 {
1022 	RecursiveLocker _(mount->rlock);
1023 	mount->vnodes.Remove(vnode);
1024 }
1025 
1026 
1027 static status_t
1028 create_new_vnode(struct vnode** _vnode, dev_t mountID, ino_t vnodeID)
1029 {
1030 	FUNCTION(("create_new_vnode()\n"));
1031 
1032 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
1033 	if (vnode == NULL)
1034 		return B_NO_MEMORY;
1035 
1036 	// initialize basic values
1037 	memset(vnode, 0, sizeof(struct vnode));
1038 	vnode->device = mountID;
1039 	vnode->id = vnodeID;
1040 
1041 	// add the vnode to the mount structure
1042 	mutex_lock(&sMountMutex);
1043 	vnode->mount = find_mount(mountID);
1044 	if (!vnode->mount || vnode->mount->unmounting) {
1045 		mutex_unlock(&sMountMutex);
1046 		free(vnode);
1047 		return B_ENTRY_NOT_FOUND;
1048 	}
1049 
1050 	hash_insert(sVnodeTable, vnode);
1051 	add_vnode_to_mount_list(vnode, vnode->mount);
1052 
1053 	mutex_unlock(&sMountMutex);
1054 
1055 	vnode->ref_count = 1;
1056 	*_vnode = vnode;
1057 
1058 	return B_OK;
1059 }
1060 
1061 
1062 /*!	Frees the vnode and all resources it has acquired, and removes
1063 	it from the vnode hash as well as from its mount structure.
1064 	Will also make sure that any cache modifications are written back.
1065 */
1066 static void
1067 free_vnode(struct vnode* vnode, bool reenter)
1068 {
1069 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
1070 
1071 	// write back any changes in this vnode's cache -- but only
1072 	// if the vnode won't be deleted, in which case the changes
1073 	// will be discarded
1074 
1075 	if (!vnode->remove && HAS_FS_CALL(vnode, fsync))
1076 		FS_CALL_NO_PARAMS(vnode, fsync);
1077 
1078 	// Note: If this vnode has a cache attached, there will still be two
1079 	// references to that cache at this point. The last one belongs to the vnode
1080 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1081 	// cache. Each but the last reference to a cache also includes a reference
1082 	// to the vnode. The file cache, however, released its reference (cf.
1083 	// file_cache_create()), so that this vnode's ref count has the chance to
1084 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1085 	// cache reference to be released, which will also release a (no longer
1086 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1087 	// count, so that it will neither become negative nor 0.
1088 	vnode->ref_count = 2;
1089 
1090 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
1091 	// cache either (i.e. no one can get a cache reference while we're deleting
1092 	// the vnode).. This is, however, not the case for the page daemon. It gets
1093 	// its cache references via the pages it scans, so it can in fact get a
1094 	// vnode reference while we're deleting the vnode.
1095 
1096 	if (!vnode->unpublished) {
1097 		if (vnode->remove)
1098 			FS_CALL(vnode, remove_vnode, reenter);
1099 		else
1100 			FS_CALL(vnode, put_vnode, reenter);
1101 	}
1102 
1103 	// The file system has removed the resources of the vnode now, so we can
1104 	// make it available again (and remove the busy vnode from the hash)
1105 	mutex_lock(&sVnodeMutex);
1106 	hash_remove(sVnodeTable, vnode);
1107 	mutex_unlock(&sVnodeMutex);
1108 
1109 	// if we have a vm_cache attached, remove it
1110 	if (vnode->cache)
1111 		vnode->cache->ReleaseRef();
1112 
1113 	vnode->cache = NULL;
1114 
1115 	remove_vnode_from_mount_list(vnode, vnode->mount);
1116 
1117 	free(vnode);
1118 }
1119 
1120 
1121 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1122 	if the counter dropped to 0.
1123 
1124 	The caller must, of course, own a reference to the vnode to call this
1125 	function.
1126 	The caller must not hold the sVnodeMutex or the sMountMutex.
1127 
1128 	\param vnode the vnode.
1129 	\param alwaysFree don't move this vnode into the unused list, but really
1130 		   delete it if possible.
1131 	\param reenter \c true, if this function is called (indirectly) from within
1132 		   a file system. This will be passed to file system hooks only.
1133 	\return \c B_OK, if everything went fine, an error code otherwise.
1134 */
1135 static status_t
1136 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1137 {
1138 	MutexLocker locker(sVnodeMutex);
1139 
1140 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1141 
1142 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1143 
1144 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1145 		vnode->ref_count));
1146 
1147 	if (oldRefCount != 1)
1148 		return B_OK;
1149 
1150 	if (vnode->busy)
1151 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1152 
1153 	bool freeNode = false;
1154 
1155 	// Just insert the vnode into an unused list if we don't need
1156 	// to delete it
1157 	if (vnode->remove || alwaysFree) {
1158 		vnode->busy = true;
1159 		freeNode = true;
1160 	} else {
1161 		list_add_item(&sUnusedVnodeList, vnode);
1162 		if (++sUnusedVnodes > kMaxUnusedVnodes
1163 			&& low_resource_state(
1164 				B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)
1165 					!= B_NO_LOW_RESOURCE) {
1166 			// there are too many unused vnodes so we free the oldest one
1167 			// TODO: evaluate this mechanism
1168 			vnode = (struct vnode*)list_remove_head_item(&sUnusedVnodeList);
1169 			vnode->busy = true;
1170 			freeNode = true;
1171 			sUnusedVnodes--;
1172 		}
1173 	}
1174 
1175 	locker.Unlock();
1176 
1177 	if (freeNode)
1178 		free_vnode(vnode, reenter);
1179 
1180 	return B_OK;
1181 }
1182 
1183 
1184 /*!	\brief Increments the reference counter of the given vnode.
1185 
1186 	The caller must either already have a reference to the vnode or hold
1187 	the sVnodeMutex.
1188 
1189 	\param vnode the vnode.
1190 */
1191 static void
1192 inc_vnode_ref_count(struct vnode* vnode)
1193 {
1194 	atomic_add(&vnode->ref_count, 1);
1195 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1196 		vnode->ref_count));
1197 }
1198 
1199 
1200 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
1201 
1202 	The caller must hold the sVnodeMutex.
1203 
1204 	\param mountID the mount ID.
1205 	\param vnodeID the node ID.
1206 
1207 	\return The vnode structure, if it was found in the hash table, \c NULL
1208 			otherwise.
1209 */
1210 static struct vnode*
1211 lookup_vnode(dev_t mountID, ino_t vnodeID)
1212 {
1213 	struct vnode_hash_key key;
1214 
1215 	key.device = mountID;
1216 	key.vnode = vnodeID;
1217 
1218 	return (vnode*)hash_lookup(sVnodeTable, &key);
1219 }
1220 
1221 
1222 static bool
1223 is_special_node_type(int type)
1224 {
1225 	// at the moment only FIFOs are supported
1226 	return S_ISFIFO(type);
1227 }
1228 
1229 
1230 static status_t
1231 create_special_sub_node(struct vnode* vnode, uint32 flags)
1232 {
1233 	if (S_ISFIFO(vnode->type))
1234 		return create_fifo_vnode(vnode->mount->volume, vnode);
1235 
1236 	return B_BAD_VALUE;
1237 }
1238 
1239 
1240 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1241 
1242 	If the node is not yet in memory, it will be loaded.
1243 
1244 	The caller must not hold the sVnodeMutex or the sMountMutex.
1245 
1246 	\param mountID the mount ID.
1247 	\param vnodeID the node ID.
1248 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1249 		   retrieved vnode structure shall be written.
1250 	\param reenter \c true, if this function is called (indirectly) from within
1251 		   a file system.
1252 	\return \c B_OK, if everything when fine, an error code otherwise.
1253 */
1254 static status_t
1255 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1256 	int reenter)
1257 {
1258 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1259 		_vnode));
1260 
1261 	mutex_lock(&sVnodeMutex);
1262 
1263 	int32 tries = 1000;
1264 		// try for 10 secs
1265 restart:
1266 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1267 	if (vnode && vnode->busy) {
1268 		mutex_unlock(&sVnodeMutex);
1269 		if (!canWait || --tries < 0) {
1270 			// vnode doesn't seem to become unbusy
1271 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1272 				vnodeID);
1273 			return B_BUSY;
1274 		}
1275 		snooze(10000); // 10 ms
1276 		mutex_lock(&sVnodeMutex);
1277 		goto restart;
1278 	}
1279 
1280 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1281 
1282 	status_t status;
1283 
1284 	if (vnode) {
1285 		if (vnode->ref_count == 0) {
1286 			// this vnode has been unused before
1287 			list_remove_item(&sUnusedVnodeList, vnode);
1288 			sUnusedVnodes--;
1289 		}
1290 		inc_vnode_ref_count(vnode);
1291 	} else {
1292 		// we need to create a new vnode and read it in
1293 		status = create_new_vnode(&vnode, mountID, vnodeID);
1294 		if (status != B_OK)
1295 			goto err;
1296 
1297 		vnode->busy = true;
1298 		mutex_unlock(&sVnodeMutex);
1299 
1300 		int type;
1301 		uint32 flags;
1302 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1303 			&flags, reenter);
1304 		if (status == B_OK && vnode->private_node == NULL)
1305 			status = B_BAD_VALUE;
1306 
1307 		bool gotNode = status == B_OK;
1308 		bool publishSpecialSubNode = false;
1309 		if (gotNode) {
1310 			vnode->type = type;
1311 			publishSpecialSubNode = is_special_node_type(type)
1312 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1313 		}
1314 
1315 		if (gotNode && publishSpecialSubNode)
1316 			status = create_special_sub_node(vnode, flags);
1317 
1318 		mutex_lock(&sVnodeMutex);
1319 
1320 		if (status != B_OK) {
1321 			if (gotNode)
1322 				FS_CALL(vnode, put_vnode, reenter);
1323 
1324 			goto err1;
1325 		}
1326 
1327 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
1328 		vnode->busy = false;
1329 	}
1330 
1331 	mutex_unlock(&sVnodeMutex);
1332 
1333 	TRACE(("get_vnode: returning %p\n", vnode));
1334 
1335 	*_vnode = vnode;
1336 	return B_OK;
1337 
1338 err1:
1339 	hash_remove(sVnodeTable, vnode);
1340 	remove_vnode_from_mount_list(vnode, vnode->mount);
1341 err:
1342 	mutex_unlock(&sVnodeMutex);
1343 	if (vnode)
1344 		free(vnode);
1345 
1346 	return status;
1347 }
1348 
1349 
1350 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1351 	if the counter dropped to 0.
1352 
1353 	The caller must, of course, own a reference to the vnode to call this
1354 	function.
1355 	The caller must not hold the sVnodeMutex or the sMountMutex.
1356 
1357 	\param vnode the vnode.
1358 */
1359 static inline void
1360 put_vnode(struct vnode* vnode)
1361 {
1362 	dec_vnode_ref_count(vnode, false, false);
1363 }
1364 
1365 
1366 static void
1367 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1368 {
1369 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1370 
1371 	uint32 count = 1;
1372 	switch (level) {
1373 		case B_NO_LOW_RESOURCE:
1374 			return;
1375 		case B_LOW_RESOURCE_NOTE:
1376 			count = sUnusedVnodes / 100;
1377 			break;
1378 		case B_LOW_RESOURCE_WARNING:
1379 			count = sUnusedVnodes / 10;
1380 			break;
1381 		case B_LOW_RESOURCE_CRITICAL:
1382 			count = sUnusedVnodes;
1383 			break;
1384 	}
1385 
1386 	if (count > sUnusedVnodes)
1387 		count = sUnusedVnodes;
1388 
1389 	// Write back the modified pages of some unused vnodes and free them
1390 
1391 	for (uint32 i = 0; i < count; i++) {
1392 		mutex_lock(&sVnodeMutex);
1393 		struct vnode* vnode = (struct vnode*)list_remove_head_item(
1394 			&sUnusedVnodeList);
1395 		if (vnode == NULL) {
1396 			mutex_unlock(&sVnodeMutex);
1397 			break;
1398 		}
1399 
1400 		inc_vnode_ref_count(vnode);
1401 		sUnusedVnodes--;
1402 
1403 		mutex_unlock(&sVnodeMutex);
1404 
1405 		if (vnode->cache != NULL)
1406 			vnode->cache->WriteModified();
1407 
1408 		dec_vnode_ref_count(vnode, true, false);
1409 			// this should free the vnode when it's still unused
1410 	}
1411 }
1412 
1413 
1414 static inline void
1415 put_advisory_locking(struct advisory_locking* locking)
1416 {
1417 	release_sem(locking->lock);
1418 }
1419 
1420 
1421 /*!	Returns the advisory_locking object of the \a vnode in case it
1422 	has one, and locks it.
1423 	You have to call put_advisory_locking() when you're done with
1424 	it.
1425 	Note, you must not have the vnode mutex locked when calling
1426 	this function.
1427 */
1428 static struct advisory_locking*
1429 get_advisory_locking(struct vnode* vnode)
1430 {
1431 	mutex_lock(&sVnodeMutex);
1432 
1433 	struct advisory_locking* locking = vnode->advisory_locking;
1434 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1435 
1436 	mutex_unlock(&sVnodeMutex);
1437 
1438 	if (lock >= 0)
1439 		lock = acquire_sem(lock);
1440 	if (lock < 0) {
1441 		// This means the locking has been deleted in the mean time
1442 		// or had never existed in the first place - otherwise, we
1443 		// would get the lock at some point.
1444 		return NULL;
1445 	}
1446 
1447 	return locking;
1448 }
1449 
1450 
1451 /*!	Creates a locked advisory_locking object, and attaches it to the
1452 	given \a vnode.
1453 	Returns B_OK in case of success - also if the vnode got such an
1454 	object from someone else in the mean time, you'll still get this
1455 	one locked then.
1456 */
1457 static status_t
1458 create_advisory_locking(struct vnode* vnode)
1459 {
1460 	if (vnode == NULL)
1461 		return B_FILE_ERROR;
1462 
1463 	ObjectDeleter<advisory_locking> lockingDeleter;
1464 	struct advisory_locking* locking = NULL;
1465 
1466 	while (get_advisory_locking(vnode) == NULL) {
1467 		// no locking object set on the vnode yet, create one
1468 		if (locking == NULL) {
1469 			locking = new(std::nothrow) advisory_locking;
1470 			if (locking == NULL)
1471 				return B_NO_MEMORY;
1472 			lockingDeleter.SetTo(locking);
1473 
1474 			locking->wait_sem = create_sem(0, "advisory lock");
1475 			if (locking->wait_sem < 0)
1476 				return locking->wait_sem;
1477 
1478 			locking->lock = create_sem(0, "advisory locking");
1479 			if (locking->lock < 0)
1480 				return locking->lock;
1481 		}
1482 
1483 		// set our newly created locking object
1484 		MutexLocker _(sVnodeMutex);
1485 		if (vnode->advisory_locking == NULL) {
1486 			vnode->advisory_locking = locking;
1487 			lockingDeleter.Detach();
1488 			return B_OK;
1489 		}
1490 	}
1491 
1492 	// The vnode already had a locking object. That's just as well.
1493 
1494 	return B_OK;
1495 }
1496 
1497 
1498 /*!	Retrieves the first lock that has been set by the current team.
1499 */
1500 static status_t
1501 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1502 {
1503 	struct advisory_locking* locking = get_advisory_locking(vnode);
1504 	if (locking == NULL)
1505 		return B_BAD_VALUE;
1506 
1507 	// TODO: this should probably get the flock by its file descriptor!
1508 	team_id team = team_get_current_team_id();
1509 	status_t status = B_BAD_VALUE;
1510 
1511 	LockList::Iterator iterator = locking->locks.GetIterator();
1512 	while (iterator.HasNext()) {
1513 		struct advisory_lock* lock = iterator.Next();
1514 
1515 		if (lock->team == team) {
1516 			flock->l_start = lock->start;
1517 			flock->l_len = lock->end - lock->start + 1;
1518 			status = B_OK;
1519 			break;
1520 		}
1521 	}
1522 
1523 	put_advisory_locking(locking);
1524 	return status;
1525 }
1526 
1527 
1528 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1529 	with the advisory_lock \a lock.
1530 */
1531 static bool
1532 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1533 {
1534 	if (flock == NULL)
1535 		return true;
1536 
1537 	return lock->start <= flock->l_start - 1 + flock->l_len
1538 		&& lock->end >= flock->l_start;
1539 }
1540 
1541 
1542 /*!	Removes the specified lock, or all locks of the calling team
1543 	if \a flock is NULL.
1544 */
1545 static status_t
1546 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1547 {
1548 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1549 
1550 	struct advisory_locking* locking = get_advisory_locking(vnode);
1551 	if (locking == NULL)
1552 		return B_OK;
1553 
1554 	// TODO: use the thread ID instead??
1555 	team_id team = team_get_current_team_id();
1556 	pid_t session = thread_get_current_thread()->team->session_id;
1557 
1558 	// find matching lock entries
1559 
1560 	LockList::Iterator iterator = locking->locks.GetIterator();
1561 	while (iterator.HasNext()) {
1562 		struct advisory_lock* lock = iterator.Next();
1563 		bool removeLock = false;
1564 
1565 		if (lock->session == session)
1566 			removeLock = true;
1567 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1568 			bool endsBeyond = false;
1569 			bool startsBefore = false;
1570 			if (flock != NULL) {
1571 				startsBefore = lock->start < flock->l_start;
1572 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1573 			}
1574 
1575 			if (!startsBefore && !endsBeyond) {
1576 				// lock is completely contained in flock
1577 				removeLock = true;
1578 			} else if (startsBefore && !endsBeyond) {
1579 				// cut the end of the lock
1580 				lock->end = flock->l_start - 1;
1581 			} else if (!startsBefore && endsBeyond) {
1582 				// cut the start of the lock
1583 				lock->start = flock->l_start + flock->l_len;
1584 			} else {
1585 				// divide the lock into two locks
1586 				struct advisory_lock* secondLock = new advisory_lock;
1587 				if (secondLock == NULL) {
1588 					// TODO: we should probably revert the locks we already
1589 					// changed... (ie. allocate upfront)
1590 					put_advisory_locking(locking);
1591 					return B_NO_MEMORY;
1592 				}
1593 
1594 				lock->end = flock->l_start - 1;
1595 
1596 				secondLock->team = lock->team;
1597 				secondLock->session = lock->session;
1598 				// values must already be normalized when getting here
1599 				secondLock->start = flock->l_start + flock->l_len;
1600 				secondLock->end = lock->end;
1601 				secondLock->shared = lock->shared;
1602 
1603 				locking->locks.Add(secondLock);
1604 			}
1605 		}
1606 
1607 		if (removeLock) {
1608 			// this lock is no longer used
1609 			iterator.Remove();
1610 			free(lock);
1611 		}
1612 	}
1613 
1614 	bool removeLocking = locking->locks.IsEmpty();
1615 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1616 
1617 	put_advisory_locking(locking);
1618 
1619 	if (removeLocking) {
1620 		// We can remove the whole advisory locking structure; it's no
1621 		// longer used
1622 		locking = get_advisory_locking(vnode);
1623 		if (locking != NULL) {
1624 			MutexLocker locker(sVnodeMutex);
1625 
1626 			// the locking could have been changed in the mean time
1627 			if (locking->locks.IsEmpty()) {
1628 				vnode->advisory_locking = NULL;
1629 				locker.Unlock();
1630 
1631 				// we've detached the locking from the vnode, so we can
1632 				// safely delete it
1633 				delete_sem(locking->lock);
1634 				delete_sem(locking->wait_sem);
1635 				delete locking;
1636 			} else {
1637 				// the locking is in use again
1638 				locker.Unlock();
1639 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1640 			}
1641 		}
1642 	}
1643 
1644 	return B_OK;
1645 }
1646 
1647 
1648 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1649 	will wait for the lock to become available, if there are any collisions
1650 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1651 
1652 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1653 	BSD flock() semantics are used, that is, all children can unlock the file
1654 	in question (we even allow parents to remove the lock, though, but that
1655 	seems to be in line to what the BSD's are doing).
1656 */
1657 static status_t
1658 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1659 	bool wait)
1660 {
1661 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1662 		vnode, flock, wait ? "yes" : "no"));
1663 
1664 	bool shared = flock->l_type == F_RDLCK;
1665 	status_t status = B_OK;
1666 
1667 	// TODO: do deadlock detection!
1668 
1669 	struct advisory_locking* locking;
1670 	sem_id waitForLock;
1671 
1672 	while (true) {
1673 		// if this vnode has an advisory_locking structure attached,
1674 		// lock that one and search for any colliding file lock
1675 		status = create_advisory_locking(vnode);
1676 		if (status != B_OK)
1677 			return status;
1678 
1679 		locking = vnode->advisory_locking;
1680 		team_id team = team_get_current_team_id();
1681 		waitForLock = -1;
1682 
1683 		// test for collisions
1684 		LockList::Iterator iterator = locking->locks.GetIterator();
1685 		while (iterator.HasNext()) {
1686 			struct advisory_lock* lock = iterator.Next();
1687 
1688 			// TODO: locks from the same team might be joinable!
1689 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1690 				// locks do overlap
1691 				if (!shared || !lock->shared) {
1692 					// we need to wait
1693 					waitForLock = locking->wait_sem;
1694 					break;
1695 				}
1696 			}
1697 		}
1698 
1699 		if (waitForLock < 0)
1700 			break;
1701 
1702 		// We need to wait. Do that or fail now, if we've been asked not to.
1703 
1704 		if (!wait) {
1705 			put_advisory_locking(locking);
1706 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1707 		}
1708 
1709 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1710 			B_CAN_INTERRUPT, 0);
1711 		if (status != B_OK && status != B_BAD_SEM_ID)
1712 			return status;
1713 
1714 		// We have been notified, but we need to re-lock the locking object. So
1715 		// go another round...
1716 	}
1717 
1718 	// install new lock
1719 
1720 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1721 		sizeof(struct advisory_lock));
1722 	if (lock == NULL) {
1723 		if (waitForLock >= B_OK)
1724 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1725 		release_sem(locking->lock);
1726 		return B_NO_MEMORY;
1727 	}
1728 
1729 	lock->team = team_get_current_team_id();
1730 	lock->session = session;
1731 	// values must already be normalized when getting here
1732 	lock->start = flock->l_start;
1733 	lock->end = flock->l_start - 1 + flock->l_len;
1734 	lock->shared = shared;
1735 
1736 	locking->locks.Add(lock);
1737 	put_advisory_locking(locking);
1738 
1739 	return status;
1740 }
1741 
1742 
1743 /*!	Normalizes the \a flock structure to make it easier to compare the
1744 	structure with others. The l_start and l_len fields are set to absolute
1745 	values according to the l_whence field.
1746 */
1747 static status_t
1748 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1749 {
1750 	switch (flock->l_whence) {
1751 		case SEEK_SET:
1752 			break;
1753 		case SEEK_CUR:
1754 			flock->l_start += descriptor->pos;
1755 			break;
1756 		case SEEK_END:
1757 		{
1758 			struct vnode* vnode = descriptor->u.vnode;
1759 			struct stat stat;
1760 			status_t status;
1761 
1762 			if (!HAS_FS_CALL(vnode, read_stat))
1763 				return EOPNOTSUPP;
1764 
1765 			status = FS_CALL(vnode, read_stat, &stat);
1766 			if (status != B_OK)
1767 				return status;
1768 
1769 			flock->l_start += stat.st_size;
1770 			break;
1771 		}
1772 		default:
1773 			return B_BAD_VALUE;
1774 	}
1775 
1776 	if (flock->l_start < 0)
1777 		flock->l_start = 0;
1778 	if (flock->l_len == 0)
1779 		flock->l_len = OFF_MAX;
1780 
1781 	// don't let the offset and length overflow
1782 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1783 		flock->l_len = OFF_MAX - flock->l_start;
1784 
1785 	if (flock->l_len < 0) {
1786 		// a negative length reverses the region
1787 		flock->l_start += flock->l_len;
1788 		flock->l_len = -flock->l_len;
1789 	}
1790 
1791 	return B_OK;
1792 }
1793 
1794 
1795 static void
1796 replace_vnode_if_disconnected(struct fs_mount* mount,
1797 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1798 	struct vnode* fallBack, bool lockRootLock)
1799 {
1800 	if (lockRootLock)
1801 		mutex_lock(&sIOContextRootLock);
1802 
1803 	struct vnode* obsoleteVnode = NULL;
1804 
1805 	if (vnode != NULL && vnode->mount == mount
1806 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1807 		obsoleteVnode = vnode;
1808 
1809 		if (vnode == mount->root_vnode) {
1810 			// redirect the vnode to the covered vnode
1811 			vnode = mount->covers_vnode;
1812 		} else
1813 			vnode = fallBack;
1814 
1815 		if (vnode != NULL)
1816 			inc_vnode_ref_count(vnode);
1817 	}
1818 
1819 	if (lockRootLock)
1820 		mutex_unlock(&sIOContextRootLock);
1821 
1822 	if (obsoleteVnode != NULL)
1823 		put_vnode(obsoleteVnode);
1824 }
1825 
1826 
1827 /*!	Disconnects all file descriptors that are associated with the
1828 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1829 	\a mount object.
1830 
1831 	Note, after you've called this function, there might still be ongoing
1832 	accesses - they won't be interrupted if they already happened before.
1833 	However, any subsequent access will fail.
1834 
1835 	This is not a cheap function and should be used with care and rarely.
1836 	TODO: there is currently no means to stop a blocking read/write!
1837 */
1838 void
1839 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1840 	struct vnode* vnodeToDisconnect)
1841 {
1842 	// iterate over all teams and peek into their file descriptors
1843 	int32 nextTeamID = 0;
1844 
1845 	while (true) {
1846 		struct io_context* context = NULL;
1847 		bool contextLocked = false;
1848 		struct team* team = NULL;
1849 		team_id lastTeamID;
1850 
1851 		cpu_status state = disable_interrupts();
1852 		SpinLocker teamsLock(gTeamSpinlock);
1853 
1854 		lastTeamID = peek_next_thread_id();
1855 		if (nextTeamID < lastTeamID) {
1856 			// get next valid team
1857 			while (nextTeamID < lastTeamID
1858 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1859 				nextTeamID++;
1860 			}
1861 
1862 			if (team) {
1863 				context = (io_context*)team->io_context;
1864 
1865 				// Some acrobatics to lock the context in a safe way
1866 				// (cf. _kern_get_next_fd_info() for details).
1867 				GRAB_THREAD_LOCK();
1868 				teamsLock.Unlock();
1869 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1870 					== B_OK;
1871 				RELEASE_THREAD_LOCK();
1872 
1873 				nextTeamID++;
1874 			}
1875 		}
1876 
1877 		teamsLock.Unlock();
1878 		restore_interrupts(state);
1879 
1880 		if (context == NULL)
1881 			break;
1882 
1883 		// we now have a context - since we couldn't lock it while having
1884 		// safe access to the team structure, we now need to lock the mutex
1885 		// manually
1886 
1887 		if (!contextLocked) {
1888 			// team seems to be gone, go over to the next team
1889 			continue;
1890 		}
1891 
1892 		// the team cannot be deleted completely while we're owning its
1893 		// io_context mutex, so we can safely play with it now
1894 
1895 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1896 			sRoot, true);
1897 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1898 			sRoot, false);
1899 
1900 		for (uint32 i = 0; i < context->table_size; i++) {
1901 			if (struct file_descriptor* descriptor = context->fds[i]) {
1902 				inc_fd_ref_count(descriptor);
1903 
1904 				// if this descriptor points at this mount, we
1905 				// need to disconnect it to be able to unmount
1906 				struct vnode* vnode = fd_vnode(descriptor);
1907 				if (vnodeToDisconnect != NULL) {
1908 					if (vnode == vnodeToDisconnect)
1909 						disconnect_fd(descriptor);
1910 				} else if ((vnode != NULL && vnode->mount == mount)
1911 					|| (vnode == NULL && descriptor->u.mount == mount))
1912 					disconnect_fd(descriptor);
1913 
1914 				put_fd(descriptor);
1915 			}
1916 		}
1917 
1918 		mutex_unlock(&context->io_mutex);
1919 	}
1920 }
1921 
1922 
1923 /*!	\brief Gets the root node of the current IO context.
1924 	If \a kernel is \c true, the kernel IO context will be used.
1925 	The caller obtains a reference to the returned node.
1926 */
1927 struct vnode*
1928 get_root_vnode(bool kernel)
1929 {
1930 	if (!kernel) {
1931 		// Get current working directory from io context
1932 		struct io_context* context = get_current_io_context(kernel);
1933 
1934 		mutex_lock(&sIOContextRootLock);
1935 
1936 		struct vnode* root = context->root;
1937 		if (root != NULL)
1938 			inc_vnode_ref_count(root);
1939 
1940 		mutex_unlock(&sIOContextRootLock);
1941 
1942 		if (root != NULL)
1943 			return root;
1944 
1945 		// That should never happen.
1946 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1947 			"root\n", team_get_current_team_id());
1948 	}
1949 
1950 	inc_vnode_ref_count(sRoot);
1951 	return sRoot;
1952 }
1953 
1954 
1955 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1956 		   by.
1957 
1958 	Given an arbitrary vnode, the function checks, whether the node is covered
1959 	by the root of a volume. If it is the function obtains a reference to the
1960 	volume root node and returns it.
1961 
1962 	\param vnode The vnode in question.
1963 	\return The volume root vnode the vnode cover is covered by, if it is
1964 			indeed a mount point, or \c NULL otherwise.
1965 */
1966 static struct vnode*
1967 resolve_mount_point_to_volume_root(struct vnode* vnode)
1968 {
1969 	if (!vnode)
1970 		return NULL;
1971 
1972 	struct vnode* volumeRoot = NULL;
1973 
1974 	mutex_lock(&sVnodeCoveredByMutex);
1975 	if (vnode->covered_by) {
1976 		volumeRoot = vnode->covered_by;
1977 		inc_vnode_ref_count(volumeRoot);
1978 	}
1979 	mutex_unlock(&sVnodeCoveredByMutex);
1980 
1981 	return volumeRoot;
1982 }
1983 
1984 
1985 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1986 		   by.
1987 
1988 	Given an arbitrary vnode (identified by mount and node ID), the function
1989 	checks, whether the node is covered by the root of a volume. If it is the
1990 	function returns the mount and node ID of the volume root node. Otherwise
1991 	it simply returns the supplied mount and node ID.
1992 
1993 	In case of error (e.g. the supplied node could not be found) the variables
1994 	for storing the resolved mount and node ID remain untouched and an error
1995 	code is returned.
1996 
1997 	\param mountID The mount ID of the vnode in question.
1998 	\param nodeID The node ID of the vnode in question.
1999 	\param resolvedMountID Pointer to storage for the resolved mount ID.
2000 	\param resolvedNodeID Pointer to storage for the resolved node ID.
2001 	\return
2002 	- \c B_OK, if everything went fine,
2003 	- another error code, if something went wrong.
2004 */
2005 status_t
2006 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
2007 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
2008 {
2009 	// get the node
2010 	struct vnode* node;
2011 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
2012 	if (error != B_OK)
2013 		return error;
2014 
2015 	// resolve the node
2016 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
2017 	if (resolvedNode) {
2018 		put_vnode(node);
2019 		node = resolvedNode;
2020 	}
2021 
2022 	// set the return values
2023 	*resolvedMountID = node->device;
2024 	*resolvedNodeID = node->id;
2025 
2026 	put_vnode(node);
2027 
2028 	return B_OK;
2029 }
2030 
2031 
2032 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
2033 
2034 	Given an arbitrary vnode, the function checks, whether the node is the
2035 	root of a volume. If it is (and if it is not "/"), the function obtains
2036 	a reference to the underlying mount point node and returns it.
2037 
2038 	\param vnode The vnode in question (caller must have a reference).
2039 	\return The mount point vnode the vnode covers, if it is indeed a volume
2040 			root and not "/", or \c NULL otherwise.
2041 */
2042 static struct vnode*
2043 resolve_volume_root_to_mount_point(struct vnode* vnode)
2044 {
2045 	if (!vnode)
2046 		return NULL;
2047 
2048 	struct vnode* mountPoint = NULL;
2049 
2050 	struct fs_mount* mount = vnode->mount;
2051 	if (vnode == mount->root_vnode && mount->covers_vnode) {
2052 		mountPoint = mount->covers_vnode;
2053 		inc_vnode_ref_count(mountPoint);
2054 	}
2055 
2056 	return mountPoint;
2057 }
2058 
2059 
2060 /*!	\brief Gets the directory path and leaf name for a given path.
2061 
2062 	The supplied \a path is transformed to refer to the directory part of
2063 	the entry identified by the original path, and into the buffer \a filename
2064 	the leaf name of the original entry is written.
2065 	Neither the returned path nor the leaf name can be expected to be
2066 	canonical.
2067 
2068 	\param path The path to be analyzed. Must be able to store at least one
2069 		   additional character.
2070 	\param filename The buffer into which the leaf name will be written.
2071 		   Must be of size B_FILE_NAME_LENGTH at least.
2072 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2073 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2074 		   if the given path name is empty.
2075 */
2076 static status_t
2077 get_dir_path_and_leaf(char* path, char* filename)
2078 {
2079 	if (*path == '\0')
2080 		return B_ENTRY_NOT_FOUND;
2081 
2082 	char* p = strrchr(path, '/');
2083 		// '/' are not allowed in file names!
2084 
2085 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2086 
2087 	if (!p) {
2088 		// this path is single segment with no '/' in it
2089 		// ex. "foo"
2090 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2091 			return B_NAME_TOO_LONG;
2092 		strcpy(path, ".");
2093 	} else {
2094 		p++;
2095 		if (p[0] == '\0') {
2096 			// special case: the path ends in one or more '/' - remove them
2097 			while (*--p == '/' && p != path);
2098 			p[1] = '\0';
2099 
2100 			if (p == path && p[0] == '/') {
2101 				// This path points to the root of the file system
2102 				strcpy(filename, ".");
2103 				return B_OK;
2104 			}
2105 			for (; p != path && *(p - 1) != '/'; p--);
2106 				// rewind to the start of the leaf before the '/'
2107 		}
2108 
2109 		// normal leaf: replace the leaf portion of the path with a '.'
2110 		if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
2111 				>= B_FILE_NAME_LENGTH) {
2112 			return B_NAME_TOO_LONG;
2113 		}
2114 		p[0] = '.';
2115 		p[1] = '\0';
2116 	}
2117 	return B_OK;
2118 }
2119 
2120 
2121 static status_t
2122 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2123 	bool traverse, bool kernel, struct vnode** _vnode)
2124 {
2125 	char clonedName[B_FILE_NAME_LENGTH + 1];
2126 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2127 		return B_NAME_TOO_LONG;
2128 
2129 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2130 	struct vnode* directory;
2131 
2132 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2133 	if (status < 0)
2134 		return status;
2135 
2136 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2137 		_vnode, NULL);
2138 }
2139 
2140 
2141 static status_t
2142 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2143 {
2144 	ino_t id;
2145 
2146 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2147 		return get_vnode(dir->device, id, _vnode, true, false);
2148 
2149 	status_t status = FS_CALL(dir, lookup, name, &id);
2150 	if (status != B_OK)
2151 		return status;
2152 
2153 	mutex_lock(&sVnodeMutex);
2154 	*_vnode = lookup_vnode(dir->device, id);
2155 	mutex_unlock(&sVnodeMutex);
2156 
2157 	if (*_vnode == NULL) {
2158 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2159 			"0x%Lx)\n", dir->device, id);
2160 		return B_ENTRY_NOT_FOUND;
2161 	}
2162 
2163 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2164 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2165 //		(*_vnode)->mount->id, (*_vnode)->id);
2166 
2167 	return B_OK;
2168 }
2169 
2170 
2171 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2172 	\a path must not be NULL.
2173 	If it returns successfully, \a path contains the name of the last path
2174 	component. This function clobbers the buffer pointed to by \a path only
2175 	if it does contain more than one component.
2176 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2177 	it is successful or not!
2178 */
2179 static status_t
2180 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2181 	int count, struct io_context* ioContext, struct vnode** _vnode,
2182 	ino_t* _parentID)
2183 {
2184 	status_t status = B_OK;
2185 	ino_t lastParentID = vnode->id;
2186 
2187 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2188 
2189 	if (path == NULL) {
2190 		put_vnode(vnode);
2191 		return B_BAD_VALUE;
2192 	}
2193 
2194 	if (*path == '\0') {
2195 		put_vnode(vnode);
2196 		return B_ENTRY_NOT_FOUND;
2197 	}
2198 
2199 	while (true) {
2200 		struct vnode* nextVnode;
2201 		char* nextPath;
2202 
2203 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2204 			path));
2205 
2206 		// done?
2207 		if (path[0] == '\0')
2208 			break;
2209 
2210 		// walk to find the next path component ("path" will point to a single
2211 		// path component), and filter out multiple slashes
2212 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2213 				nextPath++);
2214 
2215 		if (*nextPath == '/') {
2216 			*nextPath = '\0';
2217 			do
2218 				nextPath++;
2219 			while (*nextPath == '/');
2220 		}
2221 
2222 		// See if the '..' is at the root of a mount and move to the covered
2223 		// vnode so we pass the '..' path to the underlying filesystem.
2224 		// Also prevent breaking the root of the IO context.
2225 		if (strcmp("..", path) == 0) {
2226 			if (vnode == ioContext->root) {
2227 				// Attempted prison break! Keep it contained.
2228 				path = nextPath;
2229 				continue;
2230 			} else if (vnode->mount->root_vnode == vnode
2231 				&& vnode->mount->covers_vnode) {
2232 				nextVnode = vnode->mount->covers_vnode;
2233 				inc_vnode_ref_count(nextVnode);
2234 				put_vnode(vnode);
2235 				vnode = nextVnode;
2236 			}
2237 		}
2238 
2239 		// check if vnode is really a directory
2240 		if (status == B_OK && !S_ISDIR(vnode->type))
2241 			status = B_NOT_A_DIRECTORY;
2242 
2243 		// Check if we have the right to search the current directory vnode.
2244 		// If a file system doesn't have the access() function, we assume that
2245 		// searching a directory is always allowed
2246 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2247 			status = FS_CALL(vnode, access, X_OK);
2248 
2249 		// Tell the filesystem to get the vnode of this path component (if we
2250 		// got the permission from the call above)
2251 		if (status == B_OK)
2252 			status = lookup_dir_entry(vnode, path, &nextVnode);
2253 
2254 		if (status != B_OK) {
2255 			put_vnode(vnode);
2256 			return status;
2257 		}
2258 
2259 		// If the new node is a symbolic link, resolve it (if we've been told
2260 		// to do it)
2261 		if (S_ISLNK(nextVnode->type)
2262 			&& !(!traverseLeafLink && nextPath[0] == '\0')) {
2263 			size_t bufferSize;
2264 			char* buffer;
2265 
2266 			TRACE(("traverse link\n"));
2267 
2268 			// it's not exactly nice style using goto in this way, but hey,
2269 			// it works :-/
2270 			if (count + 1 > B_MAX_SYMLINKS) {
2271 				status = B_LINK_LIMIT;
2272 				goto resolve_link_error;
2273 			}
2274 
2275 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2276 			if (buffer == NULL) {
2277 				status = B_NO_MEMORY;
2278 				goto resolve_link_error;
2279 			}
2280 
2281 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2282 				bufferSize--;
2283 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2284 				// null-terminate
2285 				if (status >= 0)
2286 					buffer[bufferSize] = '\0';
2287 			} else
2288 				status = B_BAD_VALUE;
2289 
2290 			if (status != B_OK) {
2291 				free(buffer);
2292 
2293 		resolve_link_error:
2294 				put_vnode(vnode);
2295 				put_vnode(nextVnode);
2296 
2297 				return status;
2298 			}
2299 			put_vnode(nextVnode);
2300 
2301 			// Check if we start from the root directory or the current
2302 			// directory ("vnode" still points to that one).
2303 			// Cut off all leading slashes if it's the root directory
2304 			path = buffer;
2305 			bool absoluteSymlink = false;
2306 			if (path[0] == '/') {
2307 				// we don't need the old directory anymore
2308 				put_vnode(vnode);
2309 
2310 				while (*++path == '/')
2311 					;
2312 
2313 				mutex_lock(&sIOContextRootLock);
2314 				vnode = ioContext->root;
2315 				inc_vnode_ref_count(vnode);
2316 				mutex_unlock(&sIOContextRootLock);
2317 
2318 				absoluteSymlink = true;
2319 			}
2320 
2321 			inc_vnode_ref_count(vnode);
2322 				// balance the next recursion - we will decrement the
2323 				// ref_count of the vnode, no matter if we succeeded or not
2324 
2325 			if (absoluteSymlink && *path == '\0') {
2326 				// symlink was just "/"
2327 				nextVnode = vnode;
2328 			} else {
2329 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2330 					ioContext, &nextVnode, &lastParentID);
2331 			}
2332 
2333 			free(buffer);
2334 
2335 			if (status != B_OK) {
2336 				put_vnode(vnode);
2337 				return status;
2338 			}
2339 		} else
2340 			lastParentID = vnode->id;
2341 
2342 		// decrease the ref count on the old dir we just looked up into
2343 		put_vnode(vnode);
2344 
2345 		path = nextPath;
2346 		vnode = nextVnode;
2347 
2348 		// see if we hit a mount point
2349 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2350 		if (mountPoint) {
2351 			put_vnode(vnode);
2352 			vnode = mountPoint;
2353 		}
2354 	}
2355 
2356 	*_vnode = vnode;
2357 	if (_parentID)
2358 		*_parentID = lastParentID;
2359 
2360 	return B_OK;
2361 }
2362 
2363 
2364 static status_t
2365 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2366 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2367 {
2368 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2369 		get_current_io_context(kernel), _vnode, _parentID);
2370 }
2371 
2372 
2373 static status_t
2374 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2375 	ino_t* _parentID, bool kernel)
2376 {
2377 	struct vnode* start = NULL;
2378 
2379 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2380 
2381 	if (!path)
2382 		return B_BAD_VALUE;
2383 
2384 	if (*path == '\0')
2385 		return B_ENTRY_NOT_FOUND;
2386 
2387 	// figure out if we need to start at root or at cwd
2388 	if (*path == '/') {
2389 		if (sRoot == NULL) {
2390 			// we're a bit early, aren't we?
2391 			return B_ERROR;
2392 		}
2393 
2394 		while (*++path == '/')
2395 			;
2396 		start = get_root_vnode(kernel);
2397 
2398 		if (*path == '\0') {
2399 			*_vnode = start;
2400 			return B_OK;
2401 		}
2402 
2403 	} else {
2404 		struct io_context* context = get_current_io_context(kernel);
2405 
2406 		mutex_lock(&context->io_mutex);
2407 		start = context->cwd;
2408 		if (start != NULL)
2409 			inc_vnode_ref_count(start);
2410 		mutex_unlock(&context->io_mutex);
2411 
2412 		if (start == NULL)
2413 			return B_ERROR;
2414 	}
2415 
2416 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2417 		_parentID);
2418 }
2419 
2420 
2421 /*! Returns the vnode in the next to last segment of the path, and returns
2422 	the last portion in filename.
2423 	The path buffer must be able to store at least one additional character.
2424 */
2425 static status_t
2426 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2427 	bool kernel)
2428 {
2429 	status_t status = get_dir_path_and_leaf(path, filename);
2430 	if (status != B_OK)
2431 		return status;
2432 
2433 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2434 }
2435 
2436 
2437 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2438 		   to by a FD + path pair.
2439 
2440 	\a path must be given in either case. \a fd might be omitted, in which
2441 	case \a path is either an absolute path or one relative to the current
2442 	directory. If both a supplied and \a path is relative it is reckoned off
2443 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2444 	ignored.
2445 
2446 	The caller has the responsibility to call put_vnode() on the returned
2447 	directory vnode.
2448 
2449 	\param fd The FD. May be < 0.
2450 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2451 	       is modified by this function. It must have at least room for a
2452 	       string one character longer than the path it contains.
2453 	\param _vnode A pointer to a variable the directory vnode shall be written
2454 		   into.
2455 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2456 		   the leaf name of the specified entry will be written.
2457 	\param kernel \c true, if invoked from inside the kernel, \c false if
2458 		   invoked from userland.
2459 	\return \c B_OK, if everything went fine, another error code otherwise.
2460 */
2461 static status_t
2462 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2463 	char* filename, bool kernel)
2464 {
2465 	if (!path)
2466 		return B_BAD_VALUE;
2467 	if (*path == '\0')
2468 		return B_ENTRY_NOT_FOUND;
2469 	if (fd < 0)
2470 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2471 
2472 	status_t status = get_dir_path_and_leaf(path, filename);
2473 	if (status != B_OK)
2474 		return status;
2475 
2476 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2477 }
2478 
2479 
2480 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2481 		   to by a vnode + path pair.
2482 
2483 	\a path must be given in either case. \a vnode might be omitted, in which
2484 	case \a path is either an absolute path or one relative to the current
2485 	directory. If both a supplied and \a path is relative it is reckoned off
2486 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2487 	ignored.
2488 
2489 	The caller has the responsibility to call put_vnode() on the returned
2490 	directory vnode.
2491 
2492 	\param vnode The vnode. May be \c NULL.
2493 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2494 	       is modified by this function. It must have at least room for a
2495 	       string one character longer than the path it contains.
2496 	\param _vnode A pointer to a variable the directory vnode shall be written
2497 		   into.
2498 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2499 		   the leaf name of the specified entry will be written.
2500 	\param kernel \c true, if invoked from inside the kernel, \c false if
2501 		   invoked from userland.
2502 	\return \c B_OK, if everything went fine, another error code otherwise.
2503 */
2504 static status_t
2505 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2506 	struct vnode** _vnode, char* filename, bool kernel)
2507 {
2508 	if (!path)
2509 		return B_BAD_VALUE;
2510 	if (*path == '\0')
2511 		return B_ENTRY_NOT_FOUND;
2512 	if (vnode == NULL || path[0] == '/')
2513 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2514 
2515 	status_t status = get_dir_path_and_leaf(path, filename);
2516 	if (status != B_OK)
2517 		return status;
2518 
2519 	inc_vnode_ref_count(vnode);
2520 		// vnode_path_to_vnode() always decrements the ref count
2521 
2522 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2523 }
2524 
2525 
2526 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2527 */
2528 static status_t
2529 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2530 	size_t bufferSize, struct io_context* ioContext)
2531 {
2532 	if (bufferSize < sizeof(struct dirent))
2533 		return B_BAD_VALUE;
2534 
2535 	// See if vnode is the root of a mount and move to the covered
2536 	// vnode so we get the underlying file system
2537 	VNodePutter vnodePutter;
2538 	if (vnode->mount->root_vnode == vnode
2539 		&& vnode->mount->covers_vnode != NULL) {
2540 		vnode = vnode->mount->covers_vnode;
2541 		inc_vnode_ref_count(vnode);
2542 		vnodePutter.SetTo(vnode);
2543 	}
2544 
2545 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2546 		// The FS supports getting the name of a vnode.
2547 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2548 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2549 			return B_OK;
2550 	}
2551 
2552 	// The FS doesn't support getting the name of a vnode. So we search the
2553 	// parent directory for the vnode, if the caller let us.
2554 
2555 	if (parent == NULL)
2556 		return EOPNOTSUPP;
2557 
2558 	void* cookie;
2559 
2560 	status_t status = FS_CALL(parent, open_dir, &cookie);
2561 	if (status >= B_OK) {
2562 		while (true) {
2563 			uint32 num = 1;
2564 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2565 				&num);
2566 			if (status != B_OK)
2567 				break;
2568 			if (num == 0) {
2569 				status = B_ENTRY_NOT_FOUND;
2570 				break;
2571 			}
2572 
2573 			if (vnode->id == buffer->d_ino) {
2574 				// found correct entry!
2575 				break;
2576 			}
2577 		}
2578 
2579 		FS_CALL(vnode, close_dir, cookie);
2580 		FS_CALL(vnode, free_dir_cookie, cookie);
2581 	}
2582 	return status;
2583 }
2584 
2585 
2586 static status_t
2587 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2588 	size_t nameSize, bool kernel)
2589 {
2590 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2591 	struct dirent* dirent = (struct dirent*)buffer;
2592 
2593 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2594 		get_current_io_context(kernel));
2595 	if (status != B_OK)
2596 		return status;
2597 
2598 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2599 		return B_BUFFER_OVERFLOW;
2600 
2601 	return B_OK;
2602 }
2603 
2604 
2605 /*!	Gets the full path to a given directory vnode.
2606 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2607 	file system doesn't support this call, it will fall back to iterating
2608 	through the parent directory to get the name of the child.
2609 
2610 	To protect against circular loops, it supports a maximum tree depth
2611 	of 256 levels.
2612 
2613 	Note that the path may not be correct the time this function returns!
2614 	It doesn't use any locking to prevent returning the correct path, as
2615 	paths aren't safe anyway: the path to a file can change at any time.
2616 
2617 	It might be a good idea, though, to check if the returned path exists
2618 	in the calling function (it's not done here because of efficiency)
2619 */
2620 static status_t
2621 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2622 	bool kernel)
2623 {
2624 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2625 
2626 	if (vnode == NULL || buffer == NULL)
2627 		return B_BAD_VALUE;
2628 
2629 	if (!S_ISDIR(vnode->type))
2630 		return B_NOT_A_DIRECTORY;
2631 
2632 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2633 	KPath pathBuffer;
2634 	if (pathBuffer.InitCheck() != B_OK)
2635 		return B_NO_MEMORY;
2636 
2637 	char* path = pathBuffer.LockBuffer();
2638 	int32 insert = pathBuffer.BufferSize();
2639 	int32 maxLevel = 256;
2640 	int32 length;
2641 	status_t status;
2642 
2643 	// we don't use get_vnode() here because this call is more
2644 	// efficient and does all we need from get_vnode()
2645 	inc_vnode_ref_count(vnode);
2646 
2647 	// resolve a volume root to its mount point
2648 	struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2649 	if (mountPoint) {
2650 		put_vnode(vnode);
2651 		vnode = mountPoint;
2652 	}
2653 
2654 	path[--insert] = '\0';
2655 
2656 	struct io_context* ioContext = get_current_io_context(kernel);
2657 
2658 	while (true) {
2659 		// the name buffer is also used for fs_read_dir()
2660 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2661 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2662 		struct vnode* parentVnode;
2663 		ino_t parentID;
2664 
2665 		// lookup the parent vnode
2666 		if (vnode == ioContext->root) {
2667 			// we hit the IO context root
2668 			parentVnode = vnode;
2669 			inc_vnode_ref_count(vnode);
2670 		} else {
2671 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2672 			if (status != B_OK)
2673 				goto out;
2674 		}
2675 
2676 		// get the node's name
2677 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2678 			sizeof(nameBuffer), ioContext);
2679 
2680 		// resolve a volume root to its mount point
2681 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2682 		if (mountPoint) {
2683 			put_vnode(parentVnode);
2684 			parentVnode = mountPoint;
2685 			parentID = parentVnode->id;
2686 		}
2687 
2688 		bool hitRoot = (parentVnode == vnode);
2689 
2690 		// release the current vnode, we only need its parent from now on
2691 		put_vnode(vnode);
2692 		vnode = parentVnode;
2693 
2694 		if (status != B_OK)
2695 			goto out;
2696 
2697 		if (hitRoot) {
2698 			// we have reached "/", which means we have constructed the full
2699 			// path
2700 			break;
2701 		}
2702 
2703 		// ToDo: add an explicit check for loops in about 10 levels to do
2704 		// real loop detection
2705 
2706 		// don't go deeper as 'maxLevel' to prevent circular loops
2707 		if (maxLevel-- < 0) {
2708 			status = ELOOP;
2709 			goto out;
2710 		}
2711 
2712 		// add the name in front of the current path
2713 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2714 		length = strlen(name);
2715 		insert -= length;
2716 		if (insert <= 0) {
2717 			status = ENOBUFS;
2718 			goto out;
2719 		}
2720 		memcpy(path + insert, name, length);
2721 		path[--insert] = '/';
2722 	}
2723 
2724 	// the root dir will result in an empty path: fix it
2725 	if (path[insert] == '\0')
2726 		path[--insert] = '/';
2727 
2728 	TRACE(("  path is: %s\n", path + insert));
2729 
2730 	// copy the path to the output buffer
2731 	length = pathBuffer.BufferSize() - insert;
2732 	if (length <= (int)bufferSize)
2733 		memcpy(buffer, path + insert, length);
2734 	else
2735 		status = ENOBUFS;
2736 
2737 out:
2738 	put_vnode(vnode);
2739 	return status;
2740 }
2741 
2742 
2743 /*!	Checks the length of every path component, and adds a '.'
2744 	if the path ends in a slash.
2745 	The given path buffer must be able to store at least one
2746 	additional character.
2747 */
2748 static status_t
2749 check_path(char* to)
2750 {
2751 	int32 length = 0;
2752 
2753 	// check length of every path component
2754 
2755 	while (*to) {
2756 		char* begin;
2757 		if (*to == '/')
2758 			to++, length++;
2759 
2760 		begin = to;
2761 		while (*to != '/' && *to)
2762 			to++, length++;
2763 
2764 		if (to - begin > B_FILE_NAME_LENGTH)
2765 			return B_NAME_TOO_LONG;
2766 	}
2767 
2768 	if (length == 0)
2769 		return B_ENTRY_NOT_FOUND;
2770 
2771 	// complete path if there is a slash at the end
2772 
2773 	if (*(to - 1) == '/') {
2774 		if (length > B_PATH_NAME_LENGTH - 2)
2775 			return B_NAME_TOO_LONG;
2776 
2777 		to[0] = '.';
2778 		to[1] = '\0';
2779 	}
2780 
2781 	return B_OK;
2782 }
2783 
2784 
2785 static struct file_descriptor*
2786 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2787 {
2788 	struct file_descriptor* descriptor
2789 		= get_fd(get_current_io_context(kernel), fd);
2790 	if (descriptor == NULL)
2791 		return NULL;
2792 
2793 	struct vnode* vnode = fd_vnode(descriptor);
2794 	if (vnode == NULL) {
2795 		put_fd(descriptor);
2796 		return NULL;
2797 	}
2798 
2799 	// ToDo: when we can close a file descriptor at any point, investigate
2800 	//	if this is still valid to do (accessing the vnode without ref_count
2801 	//	or locking)
2802 	*_vnode = vnode;
2803 	return descriptor;
2804 }
2805 
2806 
2807 static struct vnode*
2808 get_vnode_from_fd(int fd, bool kernel)
2809 {
2810 	struct file_descriptor* descriptor;
2811 	struct vnode* vnode;
2812 
2813 	descriptor = get_fd(get_current_io_context(kernel), fd);
2814 	if (descriptor == NULL)
2815 		return NULL;
2816 
2817 	vnode = fd_vnode(descriptor);
2818 	if (vnode != NULL)
2819 		inc_vnode_ref_count(vnode);
2820 
2821 	put_fd(descriptor);
2822 	return vnode;
2823 }
2824 
2825 
2826 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2827 	only the path will be considered. In this case, the \a path must not be
2828 	NULL.
2829 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2830 	and should be NULL for files.
2831 */
2832 static status_t
2833 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2834 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2835 {
2836 	if (fd < 0 && !path)
2837 		return B_BAD_VALUE;
2838 
2839 	if (path != NULL && *path == '\0')
2840 		return B_ENTRY_NOT_FOUND;
2841 
2842 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2843 		// no FD or absolute path
2844 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2845 	}
2846 
2847 	// FD only, or FD + relative path
2848 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2849 	if (!vnode)
2850 		return B_FILE_ERROR;
2851 
2852 	if (path != NULL) {
2853 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2854 			_vnode, _parentID);
2855 	}
2856 
2857 	// there is no relative path to take into account
2858 
2859 	*_vnode = vnode;
2860 	if (_parentID)
2861 		*_parentID = -1;
2862 
2863 	return B_OK;
2864 }
2865 
2866 
2867 static int
2868 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2869 	void* cookie, int openMode, bool kernel)
2870 {
2871 	struct file_descriptor* descriptor;
2872 	int fd;
2873 
2874 	// If the vnode is locked, we don't allow creating a new file/directory
2875 	// file_descriptor for it
2876 	if (vnode && vnode->mandatory_locked_by != NULL
2877 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2878 		return B_BUSY;
2879 
2880 	descriptor = alloc_fd();
2881 	if (!descriptor)
2882 		return B_NO_MEMORY;
2883 
2884 	if (vnode)
2885 		descriptor->u.vnode = vnode;
2886 	else
2887 		descriptor->u.mount = mount;
2888 	descriptor->cookie = cookie;
2889 
2890 	switch (type) {
2891 		// vnode types
2892 		case FDTYPE_FILE:
2893 			descriptor->ops = &sFileOps;
2894 			break;
2895 		case FDTYPE_DIR:
2896 			descriptor->ops = &sDirectoryOps;
2897 			break;
2898 		case FDTYPE_ATTR:
2899 			descriptor->ops = &sAttributeOps;
2900 			break;
2901 		case FDTYPE_ATTR_DIR:
2902 			descriptor->ops = &sAttributeDirectoryOps;
2903 			break;
2904 
2905 		// mount types
2906 		case FDTYPE_INDEX_DIR:
2907 			descriptor->ops = &sIndexDirectoryOps;
2908 			break;
2909 		case FDTYPE_QUERY:
2910 			descriptor->ops = &sQueryOps;
2911 			break;
2912 
2913 		default:
2914 			panic("get_new_fd() called with unknown type %d\n", type);
2915 			break;
2916 	}
2917 	descriptor->type = type;
2918 	descriptor->open_mode = openMode;
2919 
2920 	fd = new_fd(get_current_io_context(kernel), descriptor);
2921 	if (fd < 0) {
2922 		free(descriptor);
2923 		return B_NO_MORE_FDS;
2924 	}
2925 
2926 	return fd;
2927 }
2928 
2929 
2930 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2931 	vfs_normalize_path(). See there for more documentation.
2932 */
2933 static status_t
2934 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2935 {
2936 	VNodePutter dirPutter;
2937 	struct vnode* dir = NULL;
2938 	status_t error;
2939 
2940 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2941 		// get dir vnode + leaf name
2942 		struct vnode* nextDir;
2943 		char leaf[B_FILE_NAME_LENGTH];
2944 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2945 		if (error != B_OK)
2946 			return error;
2947 
2948 		dir = nextDir;
2949 		strcpy(path, leaf);
2950 		dirPutter.SetTo(dir);
2951 
2952 		// get file vnode, if we shall resolve links
2953 		bool fileExists = false;
2954 		struct vnode* fileVnode;
2955 		VNodePutter fileVnodePutter;
2956 		if (traverseLink) {
2957 			inc_vnode_ref_count(dir);
2958 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2959 					NULL) == B_OK) {
2960 				fileVnodePutter.SetTo(fileVnode);
2961 				fileExists = true;
2962 			}
2963 		}
2964 
2965 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->type)) {
2966 			// we're done -- construct the path
2967 			bool hasLeaf = true;
2968 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2969 				// special cases "." and ".." -- get the dir, forget the leaf
2970 				inc_vnode_ref_count(dir);
2971 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2972 					&nextDir, NULL);
2973 				if (error != B_OK)
2974 					return error;
2975 				dir = nextDir;
2976 				dirPutter.SetTo(dir);
2977 				hasLeaf = false;
2978 			}
2979 
2980 			// get the directory path
2981 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2982 			if (error != B_OK)
2983 				return error;
2984 
2985 			// append the leaf name
2986 			if (hasLeaf) {
2987 				// insert a directory separator if this is not the file system
2988 				// root
2989 				if ((strcmp(path, "/") != 0
2990 					&& strlcat(path, "/", pathSize) >= pathSize)
2991 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2992 					return B_NAME_TOO_LONG;
2993 				}
2994 			}
2995 
2996 			return B_OK;
2997 		}
2998 
2999 		// read link
3000 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
3001 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
3002 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
3003 			if (error != B_OK)
3004 				return error;
3005 			path[bufferSize] = '\0';
3006 		} else
3007 			return B_BAD_VALUE;
3008 	}
3009 
3010 	return B_LINK_LIMIT;
3011 }
3012 
3013 
3014 #ifdef ADD_DEBUGGER_COMMANDS
3015 
3016 
3017 static void
3018 _dump_advisory_locking(advisory_locking* locking)
3019 {
3020 	if (locking == NULL)
3021 		return;
3022 
3023 	kprintf("   lock:        %ld", locking->lock);
3024 	kprintf("   wait_sem:    %ld", locking->wait_sem);
3025 
3026 	int32 index = 0;
3027 	LockList::Iterator iterator = locking->locks.GetIterator();
3028 	while (iterator.HasNext()) {
3029 		struct advisory_lock* lock = iterator.Next();
3030 
3031 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
3032 		kprintf("        start:  %Ld\n", lock->start);
3033 		kprintf("        end:    %Ld\n", lock->end);
3034 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3035 	}
3036 }
3037 
3038 
3039 static void
3040 _dump_mount(struct fs_mount* mount)
3041 {
3042 	kprintf("MOUNT: %p\n", mount);
3043 	kprintf(" id:            %ld\n", mount->id);
3044 	kprintf(" device_name:   %s\n", mount->device_name);
3045 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3046 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
3047 	kprintf(" partition:     %p\n", mount->partition);
3048 	kprintf(" lock:          %p\n", &mount->rlock);
3049 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3050 		mount->owns_file_device ? " owns_file_device" : "");
3051 
3052 	fs_volume* volume = mount->volume;
3053 	while (volume != NULL) {
3054 		kprintf(" volume %p:\n", volume);
3055 		kprintf("  layer:            %ld\n", volume->layer);
3056 		kprintf("  private_volume:   %p\n", volume->private_volume);
3057 		kprintf("  ops:              %p\n", volume->ops);
3058 		kprintf("  file_system:      %p\n", volume->file_system);
3059 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3060 		volume = volume->super_volume;
3061 	}
3062 
3063 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3064 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3065 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3066 	set_debug_variable("_partition", (addr_t)mount->partition);
3067 }
3068 
3069 
3070 static void
3071 _dump_vnode(struct vnode* vnode)
3072 {
3073 	kprintf("VNODE: %p\n", vnode);
3074 	kprintf(" device:        %ld\n", vnode->device);
3075 	kprintf(" id:            %Ld\n", vnode->id);
3076 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3077 	kprintf(" private_node:  %p\n", vnode->private_node);
3078 	kprintf(" mount:         %p\n", vnode->mount);
3079 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3080 	kprintf(" cache:         %p\n", vnode->cache);
3081 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
3082 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3083 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3084 
3085 	_dump_advisory_locking(vnode->advisory_locking);
3086 
3087 	set_debug_variable("_node", (addr_t)vnode->private_node);
3088 	set_debug_variable("_mount", (addr_t)vnode->mount);
3089 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3090 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3091 }
3092 
3093 
3094 static int
3095 dump_mount(int argc, char** argv)
3096 {
3097 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3098 		kprintf("usage: %s [id|address]\n", argv[0]);
3099 		return 0;
3100 	}
3101 
3102 	uint32 id = parse_expression(argv[1]);
3103 	struct fs_mount* mount = NULL;
3104 
3105 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3106 	if (mount == NULL) {
3107 		if (IS_USER_ADDRESS(id)) {
3108 			kprintf("fs_mount not found\n");
3109 			return 0;
3110 		}
3111 		mount = (fs_mount*)id;
3112 	}
3113 
3114 	_dump_mount(mount);
3115 	return 0;
3116 }
3117 
3118 
3119 static int
3120 dump_mounts(int argc, char** argv)
3121 {
3122 	if (argc != 1) {
3123 		kprintf("usage: %s\n", argv[0]);
3124 		return 0;
3125 	}
3126 
3127 	kprintf("address     id root       covers     cookie     fs_name\n");
3128 
3129 	struct hash_iterator iterator;
3130 	struct fs_mount* mount;
3131 
3132 	hash_open(sMountsTable, &iterator);
3133 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3134 			!= NULL) {
3135 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3136 			mount->covers_vnode, mount->volume->private_volume,
3137 			mount->volume->file_system_name);
3138 
3139 		fs_volume* volume = mount->volume;
3140 		while (volume->super_volume != NULL) {
3141 			volume = volume->super_volume;
3142 			kprintf("                                     %p %s\n",
3143 				volume->private_volume, volume->file_system_name);
3144 		}
3145 	}
3146 
3147 	hash_close(sMountsTable, &iterator, false);
3148 	return 0;
3149 }
3150 
3151 
3152 static int
3153 dump_vnode(int argc, char** argv)
3154 {
3155 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
3156 		kprintf("usage: %s <device> <id>\n"
3157 			"   or: %s <address>\n", argv[0], argv[0]);
3158 		return 0;
3159 	}
3160 
3161 	struct vnode* vnode = NULL;
3162 
3163 	if (argc == 2) {
3164 		vnode = (struct vnode*)parse_expression(argv[1]);
3165 		if (IS_USER_ADDRESS(vnode)) {
3166 			kprintf("invalid vnode address\n");
3167 			return 0;
3168 		}
3169 		_dump_vnode(vnode);
3170 		return 0;
3171 	}
3172 
3173 	struct hash_iterator iterator;
3174 	dev_t device = parse_expression(argv[1]);
3175 	ino_t id = parse_expression(argv[2]);
3176 
3177 	hash_open(sVnodeTable, &iterator);
3178 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3179 		if (vnode->id != id || vnode->device != device)
3180 			continue;
3181 
3182 		_dump_vnode(vnode);
3183 	}
3184 
3185 	hash_close(sVnodeTable, &iterator, false);
3186 	return 0;
3187 }
3188 
3189 
3190 static int
3191 dump_vnodes(int argc, char** argv)
3192 {
3193 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3194 		kprintf("usage: %s [device]\n", argv[0]);
3195 		return 0;
3196 	}
3197 
3198 	// restrict dumped nodes to a certain device if requested
3199 	dev_t device = parse_expression(argv[1]);
3200 
3201 	struct hash_iterator iterator;
3202 	struct vnode* vnode;
3203 
3204 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3205 		"flags\n");
3206 
3207 	hash_open(sVnodeTable, &iterator);
3208 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3209 		if (vnode->device != device)
3210 			continue;
3211 
3212 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3213 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3214 			vnode->advisory_locking, vnode->remove ? "r" : "-",
3215 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3216 	}
3217 
3218 	hash_close(sVnodeTable, &iterator, false);
3219 	return 0;
3220 }
3221 
3222 
3223 static int
3224 dump_vnode_caches(int argc, char** argv)
3225 {
3226 	struct hash_iterator iterator;
3227 	struct vnode* vnode;
3228 
3229 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3230 		kprintf("usage: %s [device]\n", argv[0]);
3231 		return 0;
3232 	}
3233 
3234 	// restrict dumped nodes to a certain device if requested
3235 	dev_t device = -1;
3236 	if (argc > 1)
3237 		device = parse_expression(argv[1]);
3238 
3239 	kprintf("address    dev     inode cache          size   pages\n");
3240 
3241 	hash_open(sVnodeTable, &iterator);
3242 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3243 		if (vnode->cache == NULL)
3244 			continue;
3245 		if (device != -1 && vnode->device != device)
3246 			continue;
3247 
3248 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3249 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3250 				/ B_PAGE_SIZE, vnode->cache->page_count);
3251 	}
3252 
3253 	hash_close(sVnodeTable, &iterator, false);
3254 	return 0;
3255 }
3256 
3257 
3258 int
3259 dump_io_context(int argc, char** argv)
3260 {
3261 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3262 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3263 		return 0;
3264 	}
3265 
3266 	struct io_context* context = NULL;
3267 
3268 	if (argc > 1) {
3269 		uint32 num = parse_expression(argv[1]);
3270 		if (IS_KERNEL_ADDRESS(num))
3271 			context = (struct io_context*)num;
3272 		else {
3273 			struct team* team = team_get_team_struct_locked(num);
3274 			if (team == NULL) {
3275 				kprintf("could not find team with ID %ld\n", num);
3276 				return 0;
3277 			}
3278 			context = (struct io_context*)team->io_context;
3279 		}
3280 	} else
3281 		context = get_current_io_context(true);
3282 
3283 	kprintf("I/O CONTEXT: %p\n", context);
3284 	kprintf(" root vnode:\t%p\n", context->root);
3285 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3286 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3287 	kprintf(" max fds:\t%lu\n", context->table_size);
3288 
3289 	if (context->num_used_fds)
3290 		kprintf("   no. type     ops ref open mode        pos cookie\n");
3291 
3292 	for (uint32 i = 0; i < context->table_size; i++) {
3293 		struct file_descriptor* fd = context->fds[i];
3294 		if (fd == NULL)
3295 			continue;
3296 
3297 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type,
3298 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode, fd->pos,
3299 			fd->cookie, fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3300 				? "mount" : "vnode",
3301 			fd->u.vnode);
3302 	}
3303 
3304 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3305 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3306 
3307 	set_debug_variable("_cwd", (addr_t)context->cwd);
3308 
3309 	return 0;
3310 }
3311 
3312 
3313 int
3314 dump_vnode_usage(int argc, char** argv)
3315 {
3316 	if (argc != 1) {
3317 		kprintf("usage: %s\n", argv[0]);
3318 		return 0;
3319 	}
3320 
3321 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3322 		kMaxUnusedVnodes);
3323 
3324 	struct hash_iterator iterator;
3325 	hash_open(sVnodeTable, &iterator);
3326 
3327 	uint32 count = 0;
3328 	struct vnode* vnode;
3329 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3330 		count++;
3331 	}
3332 
3333 	hash_close(sVnodeTable, &iterator, false);
3334 
3335 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3336 	return 0;
3337 }
3338 
3339 #endif	// ADD_DEBUGGER_COMMANDS
3340 
3341 /*!	Clears an iovec array of physical pages.
3342 	Returns in \a _bytes the number of bytes successfully cleared.
3343 */
3344 static status_t
3345 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3346 {
3347 	size_t bytes = *_bytes;
3348 	size_t index = 0;
3349 
3350 	while (bytes > 0) {
3351 		size_t length = min_c(vecs[index].iov_len, bytes);
3352 
3353 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3354 			length);
3355 		if (status != B_OK) {
3356 			*_bytes -= bytes;
3357 			return status;
3358 		}
3359 
3360 		bytes -= length;
3361 	}
3362 
3363 	return B_OK;
3364 }
3365 
3366 
3367 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3368 	and calls the file system hooks to read/write the request to disk.
3369 */
3370 static status_t
3371 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3372 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3373 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3374 	bool doWrite)
3375 {
3376 	if (fileVecCount == 0) {
3377 		// There are no file vecs at this offset, so we're obviously trying
3378 		// to access the file outside of its bounds
3379 		return B_BAD_VALUE;
3380 	}
3381 
3382 	size_t numBytes = *_numBytes;
3383 	uint32 fileVecIndex;
3384 	size_t vecOffset = *_vecOffset;
3385 	uint32 vecIndex = *_vecIndex;
3386 	status_t status;
3387 	size_t size;
3388 
3389 	if (!doWrite && vecOffset == 0) {
3390 		// now directly read the data from the device
3391 		// the first file_io_vec can be read directly
3392 
3393 		if (fileVecs[0].length < numBytes)
3394 			size = fileVecs[0].length;
3395 		else
3396 			size = numBytes;
3397 
3398 		if (fileVecs[0].offset >= 0) {
3399 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3400 				&vecs[vecIndex], vecCount - vecIndex, &size);
3401 		} else {
3402 			// sparse read
3403 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3404 		}
3405 		if (status != B_OK)
3406 			return status;
3407 
3408 		// TODO: this is a work-around for buggy device drivers!
3409 		//	When our own drivers honour the length, we can:
3410 		//	a) also use this direct I/O for writes (otherwise, it would
3411 		//	   overwrite precious data)
3412 		//	b) panic if the term below is true (at least for writes)
3413 		if (size > fileVecs[0].length) {
3414 			//dprintf("warning: device driver %p doesn't respect total length "
3415 			//	"in read_pages() call!\n", ref->device);
3416 			size = fileVecs[0].length;
3417 		}
3418 
3419 		ASSERT(size <= fileVecs[0].length);
3420 
3421 		// If the file portion was contiguous, we're already done now
3422 		if (size == numBytes)
3423 			return B_OK;
3424 
3425 		// if we reached the end of the file, we can return as well
3426 		if (size != fileVecs[0].length) {
3427 			*_numBytes = size;
3428 			return B_OK;
3429 		}
3430 
3431 		fileVecIndex = 1;
3432 
3433 		// first, find out where we have to continue in our iovecs
3434 		for (; vecIndex < vecCount; vecIndex++) {
3435 			if (size < vecs[vecIndex].iov_len)
3436 				break;
3437 
3438 			size -= vecs[vecIndex].iov_len;
3439 		}
3440 
3441 		vecOffset = size;
3442 	} else {
3443 		fileVecIndex = 0;
3444 		size = 0;
3445 	}
3446 
3447 	// Too bad, let's process the rest of the file_io_vecs
3448 
3449 	size_t totalSize = size;
3450 	size_t bytesLeft = numBytes - size;
3451 
3452 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3453 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3454 		off_t fileOffset = fileVec.offset;
3455 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3456 
3457 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3458 
3459 		// process the complete fileVec
3460 		while (fileLeft > 0) {
3461 			iovec tempVecs[MAX_TEMP_IO_VECS];
3462 			uint32 tempCount = 0;
3463 
3464 			// size tracks how much of what is left of the current fileVec
3465 			// (fileLeft) has been assigned to tempVecs
3466 			size = 0;
3467 
3468 			// assign what is left of the current fileVec to the tempVecs
3469 			for (size = 0; size < fileLeft && vecIndex < vecCount
3470 					&& tempCount < MAX_TEMP_IO_VECS;) {
3471 				// try to satisfy one iovec per iteration (or as much as
3472 				// possible)
3473 
3474 				// bytes left of the current iovec
3475 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3476 				if (vecLeft == 0) {
3477 					vecOffset = 0;
3478 					vecIndex++;
3479 					continue;
3480 				}
3481 
3482 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3483 					vecIndex, vecOffset, size));
3484 
3485 				// actually available bytes
3486 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3487 
3488 				tempVecs[tempCount].iov_base
3489 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3490 				tempVecs[tempCount].iov_len = tempVecSize;
3491 				tempCount++;
3492 
3493 				size += tempVecSize;
3494 				vecOffset += tempVecSize;
3495 			}
3496 
3497 			size_t bytes = size;
3498 
3499 			if (fileOffset == -1) {
3500 				if (doWrite) {
3501 					panic("sparse write attempt: vnode %p", vnode);
3502 					status = B_IO_ERROR;
3503 				} else {
3504 					// sparse read
3505 					status = zero_pages(tempVecs, tempCount, &bytes);
3506 				}
3507 			} else if (doWrite) {
3508 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3509 					tempVecs, tempCount, &bytes);
3510 			} else {
3511 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3512 					tempVecs, tempCount, &bytes);
3513 			}
3514 			if (status != B_OK)
3515 				return status;
3516 
3517 			totalSize += bytes;
3518 			bytesLeft -= size;
3519 			if (fileOffset >= 0)
3520 				fileOffset += size;
3521 			fileLeft -= size;
3522 			//dprintf("-> file left = %Lu\n", fileLeft);
3523 
3524 			if (size != bytes || vecIndex >= vecCount) {
3525 				// there are no more bytes or iovecs, let's bail out
3526 				*_numBytes = totalSize;
3527 				return B_OK;
3528 			}
3529 		}
3530 	}
3531 
3532 	*_vecIndex = vecIndex;
3533 	*_vecOffset = vecOffset;
3534 	*_numBytes = totalSize;
3535 	return B_OK;
3536 }
3537 
3538 
3539 //	#pragma mark - public API for file systems
3540 
3541 
3542 extern "C" status_t
3543 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3544 	fs_vnode_ops* ops)
3545 {
3546 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3547 		volume, volume->id, vnodeID, privateNode));
3548 
3549 	if (privateNode == NULL)
3550 		return B_BAD_VALUE;
3551 
3552 	mutex_lock(&sVnodeMutex);
3553 
3554 	// file system integrity check:
3555 	// test if the vnode already exists and bail out if this is the case!
3556 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3557 	if (vnode != NULL) {
3558 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3559 			volume->id, vnodeID, privateNode, vnode->private_node);
3560 	}
3561 
3562 	status_t status = create_new_vnode(&vnode, volume->id, vnodeID);
3563 	if (status == B_OK) {
3564 		vnode->private_node = privateNode;
3565 		vnode->ops = ops;
3566 		vnode->busy = true;
3567 		vnode->unpublished = true;
3568 	}
3569 
3570 	TRACE(("returns: %s\n", strerror(status)));
3571 
3572 	mutex_unlock(&sVnodeMutex);
3573 	return status;
3574 }
3575 
3576 
3577 extern "C" status_t
3578 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3579 	fs_vnode_ops* ops, int type, uint32 flags)
3580 {
3581 	FUNCTION(("publish_vnode()\n"));
3582 
3583 	MutexLocker locker(sVnodeMutex);
3584 
3585 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3586 	status_t status = B_OK;
3587 
3588 	if (vnode != NULL && vnode->busy && vnode->unpublished
3589 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3590 		// already known, but not published
3591 	} else if (vnode == NULL && privateNode != NULL) {
3592 		status = create_new_vnode(&vnode, volume->id, vnodeID);
3593 		if (status == B_OK) {
3594 			vnode->private_node = privateNode;
3595 			vnode->ops = ops;
3596 			vnode->busy = true;
3597 			vnode->unpublished = true;
3598 		}
3599 	} else
3600 		status = B_BAD_VALUE;
3601 
3602 	bool publishSpecialSubNode = false;
3603 
3604 	if (status == B_OK) {
3605 		vnode->type = type;
3606 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
3607 		publishSpecialSubNode = is_special_node_type(type)
3608 			&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3609 	}
3610 
3611 
3612 	// create sub vnodes, if necessary
3613 	if (status == B_OK
3614 			&& (volume->sub_volume != NULL || publishSpecialSubNode)) {
3615 		locker.Unlock();
3616 
3617 		fs_volume* subVolume = volume;
3618 		if (volume->sub_volume != NULL) {
3619 			while (status == B_OK && subVolume->sub_volume != NULL) {
3620 				subVolume = subVolume->sub_volume;
3621 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3622 					vnode);
3623 			}
3624 		}
3625 
3626 		if (status == B_OK && publishSpecialSubNode)
3627 			status = create_special_sub_node(vnode, flags);
3628 
3629 		if (status != B_OK) {
3630 			// error -- clean up the created sub vnodes
3631 			while (subVolume->super_volume != volume) {
3632 				subVolume = subVolume->super_volume;
3633 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3634 			}
3635 		}
3636 
3637 		locker.Lock();
3638 
3639 		if (status != B_OK) {
3640 			hash_remove(sVnodeTable, vnode);
3641 			remove_vnode_from_mount_list(vnode, vnode->mount);
3642 			free(vnode);
3643 		}
3644 	}
3645 
3646 	if (status == B_OK) {
3647 		vnode->busy = false;
3648 		vnode->unpublished = false;
3649 	}
3650 
3651 	TRACE(("returns: %s\n", strerror(status)));
3652 
3653 	return status;
3654 }
3655 
3656 
3657 extern "C" status_t
3658 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3659 {
3660 	struct vnode* vnode;
3661 
3662 	if (volume == NULL)
3663 		return B_BAD_VALUE;
3664 
3665 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3666 	if (status != B_OK)
3667 		return status;
3668 
3669 	// If this is a layered FS, we need to get the node cookie for the requested
3670 	// layer.
3671 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3672 		fs_vnode resolvedNode;
3673 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3674 			&resolvedNode);
3675 		if (status != B_OK) {
3676 			panic("get_vnode(): Failed to get super node for vnode %p, "
3677 				"volume: %p", vnode, volume);
3678 			put_vnode(vnode);
3679 			return status;
3680 		}
3681 
3682 		if (_privateNode != NULL)
3683 			*_privateNode = resolvedNode.private_node;
3684 	} else if (_privateNode != NULL)
3685 		*_privateNode = vnode->private_node;
3686 
3687 	return B_OK;
3688 }
3689 
3690 
3691 extern "C" status_t
3692 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3693 {
3694 	struct vnode* vnode;
3695 
3696 	mutex_lock(&sVnodeMutex);
3697 	vnode = lookup_vnode(volume->id, vnodeID);
3698 	mutex_unlock(&sVnodeMutex);
3699 
3700 	if (vnode == NULL)
3701 		return B_BAD_VALUE;
3702 
3703 	inc_vnode_ref_count(vnode);
3704 	return B_OK;
3705 }
3706 
3707 
3708 extern "C" status_t
3709 put_vnode(fs_volume* volume, ino_t vnodeID)
3710 {
3711 	struct vnode* vnode;
3712 
3713 	mutex_lock(&sVnodeMutex);
3714 	vnode = lookup_vnode(volume->id, vnodeID);
3715 	mutex_unlock(&sVnodeMutex);
3716 
3717 	if (vnode == NULL)
3718 		return B_BAD_VALUE;
3719 
3720 	dec_vnode_ref_count(vnode, false, true);
3721 	return B_OK;
3722 }
3723 
3724 
3725 extern "C" status_t
3726 remove_vnode(fs_volume* volume, ino_t vnodeID)
3727 {
3728 	MutexLocker locker(sVnodeMutex);
3729 
3730 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3731 	if (vnode == NULL)
3732 		return B_ENTRY_NOT_FOUND;
3733 
3734 	if (vnode->covered_by != NULL) {
3735 		// this vnode is in use
3736 		return B_BUSY;
3737 	}
3738 
3739 	vnode->remove = true;
3740 	bool removeUnpublished = false;
3741 
3742 	if (vnode->unpublished) {
3743 		// prepare the vnode for deletion
3744 		removeUnpublished = true;
3745 		vnode->busy = true;
3746 	}
3747 
3748 	locker.Unlock();
3749 
3750 	if (removeUnpublished) {
3751 		// If the vnode hasn't been published yet, we delete it here
3752 		atomic_add(&vnode->ref_count, -1);
3753 		free_vnode(vnode, true);
3754 	}
3755 
3756 	return B_OK;
3757 }
3758 
3759 
3760 extern "C" status_t
3761 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3762 {
3763 	struct vnode* vnode;
3764 
3765 	mutex_lock(&sVnodeMutex);
3766 
3767 	vnode = lookup_vnode(volume->id, vnodeID);
3768 	if (vnode)
3769 		vnode->remove = false;
3770 
3771 	mutex_unlock(&sVnodeMutex);
3772 	return B_OK;
3773 }
3774 
3775 
3776 extern "C" status_t
3777 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3778 {
3779 	MutexLocker _(sVnodeMutex);
3780 
3781 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3782 		if (_removed != NULL)
3783 			*_removed = vnode->remove;
3784 		return B_OK;
3785 	}
3786 
3787 	return B_BAD_VALUE;
3788 }
3789 
3790 
3791 extern "C" fs_volume*
3792 volume_for_vnode(fs_vnode* _vnode)
3793 {
3794 	if (_vnode == NULL)
3795 		return NULL;
3796 
3797 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3798 	return vnode->mount->volume;
3799 }
3800 
3801 
3802 extern "C" status_t
3803 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3804 	size_t* _numBytes)
3805 {
3806 	struct file_descriptor* descriptor;
3807 	struct vnode* vnode;
3808 
3809 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3810 	if (descriptor == NULL)
3811 		return B_FILE_ERROR;
3812 
3813 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3814 		count, 0, _numBytes);
3815 
3816 	put_fd(descriptor);
3817 	return status;
3818 }
3819 
3820 
3821 extern "C" status_t
3822 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3823 	size_t* _numBytes)
3824 {
3825 	struct file_descriptor* descriptor;
3826 	struct vnode* vnode;
3827 
3828 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3829 	if (descriptor == NULL)
3830 		return B_FILE_ERROR;
3831 
3832 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3833 		count, 0, _numBytes);
3834 
3835 	put_fd(descriptor);
3836 	return status;
3837 }
3838 
3839 
3840 extern "C" status_t
3841 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3842 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3843 	size_t* _bytes)
3844 {
3845 	struct file_descriptor* descriptor;
3846 	struct vnode* vnode;
3847 
3848 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3849 	if (descriptor == NULL)
3850 		return B_FILE_ERROR;
3851 
3852 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3853 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3854 		false);
3855 
3856 	put_fd(descriptor);
3857 	return status;
3858 }
3859 
3860 
3861 extern "C" status_t
3862 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3863 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3864 	size_t* _bytes)
3865 {
3866 	struct file_descriptor* descriptor;
3867 	struct vnode* vnode;
3868 
3869 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3870 	if (descriptor == NULL)
3871 		return B_FILE_ERROR;
3872 
3873 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3874 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3875 		true);
3876 
3877 	put_fd(descriptor);
3878 	return status;
3879 }
3880 
3881 
3882 extern "C" status_t
3883 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3884 {
3885 	// lookup mount -- the caller is required to make sure that the mount
3886 	// won't go away
3887 	MutexLocker locker(sMountMutex);
3888 	struct fs_mount* mount = find_mount(mountID);
3889 	if (mount == NULL)
3890 		return B_BAD_VALUE;
3891 	locker.Unlock();
3892 
3893 	return mount->entry_cache.Add(dirID, name, nodeID);
3894 }
3895 
3896 
3897 extern "C" status_t
3898 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3899 {
3900 	// lookup mount -- the caller is required to make sure that the mount
3901 	// won't go away
3902 	MutexLocker locker(sMountMutex);
3903 	struct fs_mount* mount = find_mount(mountID);
3904 	if (mount == NULL)
3905 		return B_BAD_VALUE;
3906 	locker.Unlock();
3907 
3908 	return mount->entry_cache.Remove(dirID, name);
3909 }
3910 
3911 
3912 //	#pragma mark - private VFS API
3913 //	Functions the VFS exports for other parts of the kernel
3914 
3915 
3916 /*! Acquires another reference to the vnode that has to be released
3917 	by calling vfs_put_vnode().
3918 */
3919 void
3920 vfs_acquire_vnode(struct vnode* vnode)
3921 {
3922 	inc_vnode_ref_count(vnode);
3923 }
3924 
3925 
3926 /*! This is currently called from file_cache_create() only.
3927 	It's probably a temporary solution as long as devfs requires that
3928 	fs_read_pages()/fs_write_pages() are called with the standard
3929 	open cookie and not with a device cookie.
3930 	If that's done differently, remove this call; it has no other
3931 	purpose.
3932 */
3933 extern "C" status_t
3934 vfs_get_cookie_from_fd(int fd, void** _cookie)
3935 {
3936 	struct file_descriptor* descriptor;
3937 
3938 	descriptor = get_fd(get_current_io_context(true), fd);
3939 	if (descriptor == NULL)
3940 		return B_FILE_ERROR;
3941 
3942 	*_cookie = descriptor->cookie;
3943 	return B_OK;
3944 }
3945 
3946 
3947 extern "C" int
3948 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3949 {
3950 	*vnode = get_vnode_from_fd(fd, kernel);
3951 
3952 	if (*vnode == NULL)
3953 		return B_FILE_ERROR;
3954 
3955 	return B_NO_ERROR;
3956 }
3957 
3958 
3959 extern "C" status_t
3960 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
3961 {
3962 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3963 		path, kernel));
3964 
3965 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3966 	if (pathBuffer.InitCheck() != B_OK)
3967 		return B_NO_MEMORY;
3968 
3969 	char* buffer = pathBuffer.LockBuffer();
3970 	strlcpy(buffer, path, pathBuffer.BufferSize());
3971 
3972 	struct vnode* vnode;
3973 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3974 	if (status != B_OK)
3975 		return status;
3976 
3977 	*_vnode = vnode;
3978 	return B_OK;
3979 }
3980 
3981 
3982 extern "C" status_t
3983 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
3984 {
3985 	struct vnode* vnode;
3986 
3987 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3988 	if (status != B_OK)
3989 		return status;
3990 
3991 	*_vnode = vnode;
3992 	return B_OK;
3993 }
3994 
3995 
3996 extern "C" status_t
3997 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3998 	const char* name, struct vnode** _vnode)
3999 {
4000 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4001 }
4002 
4003 
4004 extern "C" void
4005 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4006 {
4007 	*_mountID = vnode->device;
4008 	*_vnodeID = vnode->id;
4009 }
4010 
4011 
4012 /*!	Looks up a vnode with the given mount and vnode ID.
4013 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4014 	to the node.
4015 	It's currently only be used by file_cache_create().
4016 */
4017 extern "C" status_t
4018 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4019 {
4020 	mutex_lock(&sVnodeMutex);
4021 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4022 	mutex_unlock(&sVnodeMutex);
4023 
4024 	if (vnode == NULL)
4025 		return B_ERROR;
4026 
4027 	*_vnode = vnode;
4028 	return B_OK;
4029 }
4030 
4031 
4032 extern "C" status_t
4033 vfs_get_fs_node_from_path(fs_volume* volume, const char* path, bool kernel,
4034 	void** _node)
4035 {
4036 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4037 		volume, path, kernel));
4038 
4039 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4040 	if (pathBuffer.InitCheck() != B_OK)
4041 		return B_NO_MEMORY;
4042 
4043 	fs_mount* mount;
4044 	status_t status = get_mount(volume->id, &mount);
4045 	if (status != B_OK)
4046 		return status;
4047 
4048 	char* buffer = pathBuffer.LockBuffer();
4049 	strlcpy(buffer, path, pathBuffer.BufferSize());
4050 
4051 	struct vnode* vnode = mount->root_vnode;
4052 
4053 	if (buffer[0] == '/')
4054 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
4055 	else {
4056 		inc_vnode_ref_count(vnode);
4057 			// vnode_path_to_vnode() releases a reference to the starting vnode
4058 		status = vnode_path_to_vnode(vnode, buffer, true, 0, kernel, &vnode,
4059 			NULL);
4060 	}
4061 
4062 	put_mount(mount);
4063 
4064 	if (status != B_OK)
4065 		return status;
4066 
4067 	if (vnode->device != volume->id) {
4068 		// wrong mount ID - must not gain access on foreign file system nodes
4069 		put_vnode(vnode);
4070 		return B_BAD_VALUE;
4071 	}
4072 
4073 	// Use get_vnode() to resolve the cookie for the right layer.
4074 	status = get_vnode(volume, vnode->id, _node);
4075 	put_vnode(vnode);
4076 
4077 	return status;
4078 }
4079 
4080 
4081 status_t
4082 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4083 	struct stat* stat, bool kernel)
4084 {
4085 	status_t status;
4086 
4087 	if (path) {
4088 		// path given: get the stat of the node referred to by (fd, path)
4089 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4090 		if (pathBuffer.InitCheck() != B_OK)
4091 			return B_NO_MEMORY;
4092 
4093 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4094 			traverseLeafLink, stat, kernel);
4095 	} else {
4096 		// no path given: get the FD and use the FD operation
4097 		struct file_descriptor* descriptor
4098 			= get_fd(get_current_io_context(kernel), fd);
4099 		if (descriptor == NULL)
4100 			return B_FILE_ERROR;
4101 
4102 		if (descriptor->ops->fd_read_stat)
4103 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4104 		else
4105 			status = EOPNOTSUPP;
4106 
4107 		put_fd(descriptor);
4108 	}
4109 
4110 	return status;
4111 }
4112 
4113 
4114 /*!	Finds the full path to the file that contains the module \a moduleName,
4115 	puts it into \a pathBuffer, and returns B_OK for success.
4116 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4117 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4118 	\a pathBuffer is clobbered in any case and must not be relied on if this
4119 	functions returns unsuccessfully.
4120 	\a basePath and \a pathBuffer must not point to the same space.
4121 */
4122 status_t
4123 vfs_get_module_path(const char* basePath, const char* moduleName,
4124 	char* pathBuffer, size_t bufferSize)
4125 {
4126 	struct vnode* dir;
4127 	struct vnode* file;
4128 	status_t status;
4129 	size_t length;
4130 	char* path;
4131 
4132 	if (bufferSize == 0
4133 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4134 		return B_BUFFER_OVERFLOW;
4135 
4136 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4137 	if (status != B_OK)
4138 		return status;
4139 
4140 	// the path buffer had been clobbered by the above call
4141 	length = strlcpy(pathBuffer, basePath, bufferSize);
4142 	if (pathBuffer[length - 1] != '/')
4143 		pathBuffer[length++] = '/';
4144 
4145 	path = pathBuffer + length;
4146 	bufferSize -= length;
4147 
4148 	while (moduleName) {
4149 		char* nextPath = strchr(moduleName, '/');
4150 		if (nextPath == NULL)
4151 			length = strlen(moduleName);
4152 		else {
4153 			length = nextPath - moduleName;
4154 			nextPath++;
4155 		}
4156 
4157 		if (length + 1 >= bufferSize) {
4158 			status = B_BUFFER_OVERFLOW;
4159 			goto err;
4160 		}
4161 
4162 		memcpy(path, moduleName, length);
4163 		path[length] = '\0';
4164 		moduleName = nextPath;
4165 
4166 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4167 		if (status != B_OK) {
4168 			// vnode_path_to_vnode() has already released the reference to dir
4169 			return status;
4170 		}
4171 
4172 		if (S_ISDIR(file->type)) {
4173 			// goto the next directory
4174 			path[length] = '/';
4175 			path[length + 1] = '\0';
4176 			path += length + 1;
4177 			bufferSize -= length + 1;
4178 
4179 			dir = file;
4180 		} else if (S_ISREG(file->type)) {
4181 			// it's a file so it should be what we've searched for
4182 			put_vnode(file);
4183 
4184 			return B_OK;
4185 		} else {
4186 			TRACE(("vfs_get_module_path(): something is strange here: "
4187 				"0x%08lx...\n", file->type));
4188 			status = B_ERROR;
4189 			dir = file;
4190 			goto err;
4191 		}
4192 	}
4193 
4194 	// if we got here, the moduleName just pointed to a directory, not to
4195 	// a real module - what should we do in this case?
4196 	status = B_ENTRY_NOT_FOUND;
4197 
4198 err:
4199 	put_vnode(dir);
4200 	return status;
4201 }
4202 
4203 
4204 /*!	\brief Normalizes a given path.
4205 
4206 	The path must refer to an existing or non-existing entry in an existing
4207 	directory, that is chopping off the leaf component the remaining path must
4208 	refer to an existing directory.
4209 
4210 	The returned will be canonical in that it will be absolute, will not
4211 	contain any "." or ".." components or duplicate occurrences of '/'s,
4212 	and none of the directory components will by symbolic links.
4213 
4214 	Any two paths referring to the same entry, will result in the same
4215 	normalized path (well, that is pretty much the definition of `normalized',
4216 	isn't it :-).
4217 
4218 	\param path The path to be normalized.
4219 	\param buffer The buffer into which the normalized path will be written.
4220 		   May be the same one as \a path.
4221 	\param bufferSize The size of \a buffer.
4222 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4223 	\param kernel \c true, if the IO context of the kernel shall be used,
4224 		   otherwise that of the team this thread belongs to. Only relevant,
4225 		   if the path is relative (to get the CWD).
4226 	\return \c B_OK if everything went fine, another error code otherwise.
4227 */
4228 status_t
4229 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4230 	bool traverseLink, bool kernel)
4231 {
4232 	if (!path || !buffer || bufferSize < 1)
4233 		return B_BAD_VALUE;
4234 
4235 	if (path != buffer) {
4236 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4237 			return B_BUFFER_OVERFLOW;
4238 	}
4239 
4240 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4241 }
4242 
4243 
4244 /*!	\brief Creates a special node in the file system.
4245 
4246 	The caller gets a reference to the newly created node (which is passed
4247 	back through \a _createdVnode) and is responsible for releasing it.
4248 
4249 	\param path The path where to create the entry for the node. Can be \c NULL,
4250 		in which case the node is created without an entry in the root FS -- it
4251 		will automatically be deleted when the last reference has been released.
4252 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4253 		the target file system will just create the node with its standard
4254 		operations. Depending on the type of the node a subnode might be created
4255 		automatically, though.
4256 	\param mode The type and permissions for the node to be created.
4257 	\param flags Flags to be passed to the creating FS.
4258 	\param kernel \c true, if called in the kernel context (relevant only if
4259 		\a path is not \c NULL and not absolute).
4260 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4261 		file system creating the node, with the private data pointer and
4262 		operations for the super node. Can be \c NULL.
4263 	\param _createVnode Pointer to pre-allocated storage where to store the
4264 		pointer to the newly created node.
4265 	\return \c B_OK, if everything went fine, another error code otherwise.
4266 */
4267 status_t
4268 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4269 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4270 	struct vnode** _createdVnode)
4271 {
4272 	struct vnode* dirNode;
4273 	char _leaf[B_FILE_NAME_LENGTH];
4274 	char* leaf = NULL;
4275 
4276 	if (path) {
4277 		// We've got a path. Get the dir vnode and the leaf name.
4278 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4279 		if (tmpPathBuffer.InitCheck() != B_OK)
4280 			return B_NO_MEMORY;
4281 
4282 		char* tmpPath = tmpPathBuffer.LockBuffer();
4283 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4284 			return B_NAME_TOO_LONG;
4285 
4286 		// get the dir vnode and the leaf name
4287 		leaf = _leaf;
4288 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4289 		if (error != B_OK)
4290 			return error;
4291 	} else {
4292 		// No path. Create the node in the root FS.
4293 		dirNode = sRoot;
4294 		inc_vnode_ref_count(dirNode);
4295 	}
4296 
4297 	VNodePutter _(dirNode);
4298 
4299 	// check support for creating special nodes
4300 	if (!HAS_FS_CALL(dirNode, create_special_node))
4301 		return B_UNSUPPORTED;
4302 
4303 	// create the node
4304 	fs_vnode superVnode;
4305 	ino_t nodeID;
4306 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4307 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4308 	if (status != B_OK)
4309 		return status;
4310 
4311 	// lookup the node
4312 	mutex_lock(&sVnodeMutex);
4313 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4314 	mutex_unlock(&sVnodeMutex);
4315 
4316 	if (*_createdVnode == NULL) {
4317 		panic("vfs_create_special_node(): lookup of node failed");
4318 		return B_ERROR;
4319 	}
4320 
4321 	return B_OK;
4322 }
4323 
4324 
4325 extern "C" void
4326 vfs_put_vnode(struct vnode* vnode)
4327 {
4328 	put_vnode(vnode);
4329 }
4330 
4331 
4332 extern "C" status_t
4333 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4334 {
4335 	// Get current working directory from io context
4336 	struct io_context* context = get_current_io_context(false);
4337 	status_t status = B_OK;
4338 
4339 	mutex_lock(&context->io_mutex);
4340 
4341 	if (context->cwd != NULL) {
4342 		*_mountID = context->cwd->device;
4343 		*_vnodeID = context->cwd->id;
4344 	} else
4345 		status = B_ERROR;
4346 
4347 	mutex_unlock(&context->io_mutex);
4348 	return status;
4349 }
4350 
4351 
4352 status_t
4353 vfs_unmount(dev_t mountID, uint32 flags)
4354 {
4355 	return fs_unmount(NULL, mountID, flags, true);
4356 }
4357 
4358 
4359 extern "C" status_t
4360 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4361 {
4362 	struct vnode* vnode;
4363 
4364 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4365 	if (status != B_OK)
4366 		return status;
4367 
4368 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4369 	put_vnode(vnode);
4370 	return B_OK;
4371 }
4372 
4373 
4374 extern "C" void
4375 vfs_free_unused_vnodes(int32 level)
4376 {
4377 	vnode_low_resource_handler(NULL,
4378 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4379 }
4380 
4381 
4382 extern "C" bool
4383 vfs_can_page(struct vnode* vnode, void* cookie)
4384 {
4385 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4386 
4387 	if (HAS_FS_CALL(vnode, can_page))
4388 		return FS_CALL(vnode, can_page, cookie);
4389 	return false;
4390 }
4391 
4392 
4393 extern "C" status_t
4394 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4395 	size_t count, uint32 flags, size_t* _numBytes)
4396 {
4397 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4398 		pos));
4399 
4400 #if VFS_PAGES_IO_TRACING
4401 	size_t bytesRequested = *_numBytes;
4402 #endif
4403 
4404 	IORequest request;
4405 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4406 	if (status == B_OK) {
4407 		status = vfs_vnode_io(vnode, cookie, &request);
4408 		if (status == B_OK)
4409 			status = request.Wait();
4410 		*_numBytes = request.TransferredBytes();
4411 	}
4412 
4413 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4414 		status, *_numBytes));
4415 
4416 	return status;
4417 }
4418 
4419 
4420 extern "C" status_t
4421 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4422 	size_t count, uint32 flags, size_t* _numBytes)
4423 {
4424 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4425 		pos));
4426 
4427 #if VFS_PAGES_IO_TRACING
4428 	size_t bytesRequested = *_numBytes;
4429 #endif
4430 
4431 	IORequest request;
4432 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4433 	if (status == B_OK) {
4434 		status = vfs_vnode_io(vnode, cookie, &request);
4435 		if (status == B_OK)
4436 			status = request.Wait();
4437 		*_numBytes = request.TransferredBytes();
4438 	}
4439 
4440 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4441 		status, *_numBytes));
4442 
4443 	return status;
4444 }
4445 
4446 
4447 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
4448 	created if \a allocate is \c true.
4449 	In case it's successful, it will also grab a reference to the cache
4450 	it returns.
4451 */
4452 extern "C" status_t
4453 vfs_get_vnode_cache(struct vnode* vnode, vm_cache** _cache, bool allocate)
4454 {
4455 	if (vnode->cache != NULL) {
4456 		vnode->cache->AcquireRef();
4457 		*_cache = vnode->cache;
4458 		return B_OK;
4459 	}
4460 
4461 	mutex_lock(&sVnodeMutex);
4462 
4463 	status_t status = B_OK;
4464 
4465 	// The cache could have been created in the meantime
4466 	if (vnode->cache == NULL) {
4467 		if (allocate) {
4468 			// TODO: actually the vnode need to be busy already here, or
4469 			//	else this won't work...
4470 			bool wasBusy = vnode->busy;
4471 			vnode->busy = true;
4472 			mutex_unlock(&sVnodeMutex);
4473 
4474 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4475 
4476 			mutex_lock(&sVnodeMutex);
4477 			vnode->busy = wasBusy;
4478 		} else
4479 			status = B_BAD_VALUE;
4480 	}
4481 
4482 	mutex_unlock(&sVnodeMutex);
4483 
4484 	if (status == B_OK) {
4485 		vnode->cache->AcquireRef();
4486 		*_cache = vnode->cache;
4487 	}
4488 
4489 	return status;
4490 }
4491 
4492 
4493 status_t
4494 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4495 	file_io_vec* vecs, size_t* _count)
4496 {
4497 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4498 		vnode, vecs, offset, size));
4499 
4500 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4501 }
4502 
4503 
4504 status_t
4505 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4506 {
4507 	status_t status = FS_CALL(vnode, read_stat, stat);
4508 
4509 	// fill in the st_dev and st_ino fields
4510 	if (status == B_OK) {
4511 		stat->st_dev = vnode->device;
4512 		stat->st_ino = vnode->id;
4513 		stat->st_rdev = -1;
4514 	}
4515 
4516 	return status;
4517 }
4518 
4519 
4520 status_t
4521 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4522 {
4523 	struct vnode* vnode;
4524 	status_t status = get_vnode(device, inode, &vnode, true, false);
4525 	if (status != B_OK)
4526 		return status;
4527 
4528 	status = FS_CALL(vnode, read_stat, stat);
4529 
4530 	// fill in the st_dev and st_ino fields
4531 	if (status == B_OK) {
4532 		stat->st_dev = vnode->device;
4533 		stat->st_ino = vnode->id;
4534 		stat->st_rdev = -1;
4535 	}
4536 
4537 	put_vnode(vnode);
4538 	return status;
4539 }
4540 
4541 
4542 status_t
4543 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4544 {
4545 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4546 }
4547 
4548 
4549 status_t
4550 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4551 	char* path, size_t pathLength)
4552 {
4553 	struct vnode* vnode;
4554 	status_t status;
4555 
4556 	// filter invalid leaf names
4557 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4558 		return B_BAD_VALUE;
4559 
4560 	// get the vnode matching the dir's node_ref
4561 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4562 		// special cases "." and "..": we can directly get the vnode of the
4563 		// referenced directory
4564 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4565 		leaf = NULL;
4566 	} else
4567 		status = get_vnode(device, inode, &vnode, true, false);
4568 	if (status != B_OK)
4569 		return status;
4570 
4571 	// get the directory path
4572 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4573 	put_vnode(vnode);
4574 		// we don't need the vnode anymore
4575 	if (status != B_OK)
4576 		return status;
4577 
4578 	// append the leaf name
4579 	if (leaf) {
4580 		// insert a directory separator if this is not the file system root
4581 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4582 				>= pathLength)
4583 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4584 			return B_NAME_TOO_LONG;
4585 		}
4586 	}
4587 
4588 	return B_OK;
4589 }
4590 
4591 
4592 /*!	If the given descriptor locked its vnode, that lock will be released. */
4593 void
4594 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4595 {
4596 	struct vnode* vnode = fd_vnode(descriptor);
4597 
4598 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4599 		vnode->mandatory_locked_by = NULL;
4600 }
4601 
4602 
4603 /*!	Closes all file descriptors of the specified I/O context that
4604 	have the O_CLOEXEC flag set.
4605 */
4606 void
4607 vfs_exec_io_context(io_context* context)
4608 {
4609 	uint32 i;
4610 
4611 	for (i = 0; i < context->table_size; i++) {
4612 		mutex_lock(&context->io_mutex);
4613 
4614 		struct file_descriptor* descriptor = context->fds[i];
4615 		bool remove = false;
4616 
4617 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4618 			context->fds[i] = NULL;
4619 			context->num_used_fds--;
4620 
4621 			remove = true;
4622 		}
4623 
4624 		mutex_unlock(&context->io_mutex);
4625 
4626 		if (remove) {
4627 			close_fd(descriptor);
4628 			put_fd(descriptor);
4629 		}
4630 	}
4631 }
4632 
4633 
4634 /*! Sets up a new io_control structure, and inherits the properties
4635 	of the parent io_control if it is given.
4636 */
4637 io_context*
4638 vfs_new_io_context(io_context* parentContext)
4639 {
4640 	size_t tableSize;
4641 	struct io_context* context;
4642 
4643 	context = (io_context*)malloc(sizeof(struct io_context));
4644 	if (context == NULL)
4645 		return NULL;
4646 
4647 	memset(context, 0, sizeof(struct io_context));
4648 	context->ref_count = 1;
4649 
4650 	MutexLocker parentLocker;
4651 	if (parentContext) {
4652 		parentLocker.SetTo(parentContext->io_mutex, false);
4653 		tableSize = parentContext->table_size;
4654 	} else
4655 		tableSize = DEFAULT_FD_TABLE_SIZE;
4656 
4657 	// allocate space for FDs and their close-on-exec flag
4658 	context->fds = (file_descriptor**)malloc(
4659 		sizeof(struct file_descriptor*) * tableSize
4660 		+ sizeof(struct select_sync*) * tableSize
4661 		+ (tableSize + 7) / 8);
4662 	if (context->fds == NULL) {
4663 		free(context);
4664 		return NULL;
4665 	}
4666 
4667 	context->select_infos = (select_info**)(context->fds + tableSize);
4668 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4669 
4670 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4671 		+ sizeof(struct select_sync*) * tableSize
4672 		+ (tableSize + 7) / 8);
4673 
4674 	mutex_init(&context->io_mutex, "I/O context");
4675 
4676 	// Copy all parent file descriptors
4677 
4678 	if (parentContext) {
4679 		size_t i;
4680 
4681 		mutex_lock(&sIOContextRootLock);
4682 		context->root = parentContext->root;
4683 		if (context->root)
4684 			inc_vnode_ref_count(context->root);
4685 		mutex_unlock(&sIOContextRootLock);
4686 
4687 		context->cwd = parentContext->cwd;
4688 		if (context->cwd)
4689 			inc_vnode_ref_count(context->cwd);
4690 
4691 		for (i = 0; i < tableSize; i++) {
4692 			struct file_descriptor* descriptor = parentContext->fds[i];
4693 
4694 			if (descriptor != NULL) {
4695 				context->fds[i] = descriptor;
4696 				context->num_used_fds++;
4697 				atomic_add(&descriptor->ref_count, 1);
4698 				atomic_add(&descriptor->open_count, 1);
4699 
4700 				if (fd_close_on_exec(parentContext, i))
4701 					fd_set_close_on_exec(context, i, true);
4702 			}
4703 		}
4704 
4705 		parentLocker.Unlock();
4706 	} else {
4707 		context->root = sRoot;
4708 		context->cwd = sRoot;
4709 
4710 		if (context->root)
4711 			inc_vnode_ref_count(context->root);
4712 
4713 		if (context->cwd)
4714 			inc_vnode_ref_count(context->cwd);
4715 	}
4716 
4717 	context->table_size = tableSize;
4718 
4719 	list_init(&context->node_monitors);
4720 	context->max_monitors = DEFAULT_NODE_MONITORS;
4721 
4722 	return context;
4723 }
4724 
4725 
4726 static status_t
4727 vfs_free_io_context(io_context* context)
4728 {
4729 	uint32 i;
4730 
4731 	if (context->root)
4732 		put_vnode(context->root);
4733 
4734 	if (context->cwd)
4735 		put_vnode(context->cwd);
4736 
4737 	mutex_lock(&context->io_mutex);
4738 
4739 	for (i = 0; i < context->table_size; i++) {
4740 		if (struct file_descriptor* descriptor = context->fds[i]) {
4741 			close_fd(descriptor);
4742 			put_fd(descriptor);
4743 		}
4744 	}
4745 
4746 	mutex_destroy(&context->io_mutex);
4747 
4748 	remove_node_monitors(context);
4749 	free(context->fds);
4750 	free(context);
4751 
4752 	return B_OK;
4753 }
4754 
4755 
4756 void
4757 vfs_get_io_context(io_context* context)
4758 {
4759 	atomic_add(&context->ref_count, 1);
4760 }
4761 
4762 
4763 void
4764 vfs_put_io_context(io_context* context)
4765 {
4766 	if (atomic_add(&context->ref_count, -1) == 1)
4767 		vfs_free_io_context(context);
4768 }
4769 
4770 
4771 static status_t
4772 vfs_resize_fd_table(struct io_context* context, const int newSize)
4773 {
4774 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4775 		return EINVAL;
4776 
4777 	MutexLocker _(context->io_mutex);
4778 
4779 	int oldSize = context->table_size;
4780 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4781 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4782 
4783 	// If the tables shrink, make sure none of the fds being dropped are in use.
4784 	if (newSize < oldSize) {
4785 		for (int i = oldSize; i-- > newSize;) {
4786 			if (context->fds[i])
4787 				return EBUSY;
4788 		}
4789 	}
4790 
4791 	// store pointers to the old tables
4792 	file_descriptor** oldFDs = context->fds;
4793 	select_info** oldSelectInfos = context->select_infos;
4794 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4795 
4796 	// allocate new tables
4797 	file_descriptor** newFDs = (file_descriptor**)malloc(
4798 		sizeof(struct file_descriptor*) * newSize
4799 		+ sizeof(struct select_sync*) * newSize
4800 		+ newCloseOnExitBitmapSize);
4801 	if (newFDs == NULL)
4802 		return ENOMEM;
4803 
4804 	context->fds = newFDs;
4805 	context->select_infos = (select_info**)(context->fds + newSize);
4806 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4807 	context->table_size = newSize;
4808 
4809 	// copy entries from old tables
4810 	int toCopy = min_c(oldSize, newSize);
4811 
4812 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4813 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4814 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4815 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4816 
4817 	// clear additional entries, if the tables grow
4818 	if (newSize > oldSize) {
4819 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4820 		memset(context->select_infos + oldSize, 0,
4821 			sizeof(void*) * (newSize - oldSize));
4822 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4823 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4824 	}
4825 
4826 	free(oldFDs);
4827 
4828 	return B_OK;
4829 }
4830 
4831 
4832 static status_t
4833 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4834 {
4835 	int	status = B_OK;
4836 
4837 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4838 		return EINVAL;
4839 
4840 	mutex_lock(&context->io_mutex);
4841 
4842 	if ((size_t)newSize < context->num_monitors) {
4843 		status = EBUSY;
4844 		goto out;
4845 	}
4846 	context->max_monitors = newSize;
4847 
4848 out:
4849 	mutex_unlock(&context->io_mutex);
4850 	return status;
4851 }
4852 
4853 
4854 int
4855 vfs_getrlimit(int resource, struct rlimit* rlp)
4856 {
4857 	if (!rlp)
4858 		return B_BAD_ADDRESS;
4859 
4860 	switch (resource) {
4861 		case RLIMIT_NOFILE:
4862 		{
4863 			struct io_context* context = get_current_io_context(false);
4864 			MutexLocker _(context->io_mutex);
4865 
4866 			rlp->rlim_cur = context->table_size;
4867 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4868 			return 0;
4869 		}
4870 
4871 		case RLIMIT_NOVMON:
4872 		{
4873 			struct io_context* context = get_current_io_context(false);
4874 			MutexLocker _(context->io_mutex);
4875 
4876 			rlp->rlim_cur = context->max_monitors;
4877 			rlp->rlim_max = MAX_NODE_MONITORS;
4878 			return 0;
4879 		}
4880 
4881 		default:
4882 			return B_BAD_VALUE;
4883 	}
4884 }
4885 
4886 
4887 int
4888 vfs_setrlimit(int resource, const struct rlimit* rlp)
4889 {
4890 	if (!rlp)
4891 		return B_BAD_ADDRESS;
4892 
4893 	switch (resource) {
4894 		case RLIMIT_NOFILE:
4895 			/* TODO: check getuid() */
4896 			if (rlp->rlim_max != RLIM_SAVED_MAX
4897 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4898 				return B_NOT_ALLOWED;
4899 
4900 			return vfs_resize_fd_table(get_current_io_context(false),
4901 				rlp->rlim_cur);
4902 
4903 		case RLIMIT_NOVMON:
4904 			/* TODO: check getuid() */
4905 			if (rlp->rlim_max != RLIM_SAVED_MAX
4906 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4907 				return B_NOT_ALLOWED;
4908 
4909 			return vfs_resize_monitor_table(get_current_io_context(false),
4910 				rlp->rlim_cur);
4911 
4912 		default:
4913 			return B_BAD_VALUE;
4914 	}
4915 }
4916 
4917 
4918 status_t
4919 vfs_init(kernel_args* args)
4920 {
4921 	struct vnode dummyVnode;
4922 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
4923 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
4924 	if (sVnodeTable == NULL)
4925 		panic("vfs_init: error creating vnode hash table\n");
4926 
4927 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
4928 
4929 	struct fs_mount dummyMount;
4930 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
4931 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
4932 	if (sMountsTable == NULL)
4933 		panic("vfs_init: error creating mounts hash table\n");
4934 
4935 	node_monitor_init();
4936 
4937 	sRoot = NULL;
4938 
4939 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
4940 
4941 	if (block_cache_init() != B_OK)
4942 		return B_ERROR;
4943 
4944 #ifdef ADD_DEBUGGER_COMMANDS
4945 	// add some debugger commands
4946 	add_debugger_command("vnode", &dump_vnode,
4947 		"info about the specified vnode");
4948 	add_debugger_command("vnodes", &dump_vnodes,
4949 		"list all vnodes (from the specified device)");
4950 	add_debugger_command("vnode_caches", &dump_vnode_caches,
4951 		"list all vnode caches");
4952 	add_debugger_command("mount", &dump_mount,
4953 		"info about the specified fs_mount");
4954 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4955 	add_debugger_command("io_context", &dump_io_context,
4956 		"info about the I/O context");
4957 	add_debugger_command("vnode_usage", &dump_vnode_usage,
4958 		"info about vnode usage");
4959 #endif
4960 
4961 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
4962 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
4963 
4964 	file_map_init();
4965 
4966 	return file_cache_init();
4967 }
4968 
4969 
4970 //	#pragma mark - fd_ops implementations
4971 
4972 
4973 /*!
4974 	Calls fs_open() on the given vnode and returns a new
4975 	file descriptor for it
4976 */
4977 static int
4978 open_vnode(struct vnode* vnode, int openMode, bool kernel)
4979 {
4980 	void* cookie;
4981 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
4982 	if (status != B_OK)
4983 		return status;
4984 
4985 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4986 	if (fd < 0) {
4987 		FS_CALL(vnode, close, cookie);
4988 		FS_CALL(vnode, free_cookie, cookie);
4989 	}
4990 	return fd;
4991 }
4992 
4993 
4994 /*!
4995 	Calls fs_open() on the given vnode and returns a new
4996 	file descriptor for it
4997 */
4998 static int
4999 create_vnode(struct vnode* directory, const char* name, int openMode,
5000 	int perms, bool kernel)
5001 {
5002 	status_t status = B_ERROR;
5003 	struct vnode* vnode;
5004 	void* cookie;
5005 	ino_t newID;
5006 
5007 	// This is somewhat tricky: If the entry already exists, the FS responsible
5008 	// for the directory might not necessarily the one also responsible for the
5009 	// node the entry refers to. So we can actually never call the create() hook
5010 	// without O_EXCL. Instead we try to look the entry up first. If it already
5011 	// exists, we just open the node (unless O_EXCL), otherwise we call create()
5012 	// with O_EXCL. This introduces a race condition, since we someone else
5013 	// might have created the entry in the meantime. We hope the respective
5014 	// FS returns the correct error code and retry (up to 3 times) again.
5015 
5016 	for (int i = 0; i < 3 && status != B_OK; i++) {
5017 		// look the node up
5018 		status = lookup_dir_entry(directory, name, &vnode);
5019 		if (status == B_OK) {
5020 			VNodePutter putter(vnode);
5021 
5022 			if ((openMode & O_EXCL) != 0)
5023 				return B_FILE_EXISTS;
5024 
5025 			// If the node is a symlink, we have to follow it, unless
5026 			// O_NOTRAVERSE is set.
5027 			if (S_ISLNK(vnode->type) && (openMode & O_NOTRAVERSE) == 0) {
5028 				putter.Put();
5029 				char clonedName[B_FILE_NAME_LENGTH + 1];
5030 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5031 						>= B_FILE_NAME_LENGTH) {
5032 					return B_NAME_TOO_LONG;
5033 				}
5034 
5035 				inc_vnode_ref_count(directory);
5036 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5037 					kernel, &vnode, NULL);
5038 				if (status != B_OK)
5039 					return status;
5040 
5041 				putter.SetTo(vnode);
5042 			}
5043 
5044 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5045 			// on success keep the vnode reference for the FD
5046 			if (fd >= 0)
5047 				putter.Detach();
5048 
5049 			return fd;
5050 		}
5051 
5052 		// it doesn't exist yet -- try to create it
5053 
5054 		if (!HAS_FS_CALL(directory, create))
5055 			return EROFS;
5056 
5057 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5058 			&cookie, &newID);
5059 		if (status != B_OK
5060 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5061 			return status;
5062 		}
5063 	}
5064 
5065 	if (status != B_OK)
5066 		return status;
5067 
5068 	// the node has been created successfully
5069 
5070 	mutex_lock(&sVnodeMutex);
5071 	vnode = lookup_vnode(directory->device, newID);
5072 	mutex_unlock(&sVnodeMutex);
5073 
5074 	if (vnode == NULL) {
5075 		panic("vfs: fs_create() returned success but there is no vnode, "
5076 			"mount ID %ld!\n", directory->device);
5077 		return B_BAD_VALUE;
5078 	}
5079 
5080 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5081 	if (fd >= 0)
5082 		return fd;
5083 
5084 	status = fd;
5085 
5086 	// something went wrong, clean up
5087 
5088 	FS_CALL(vnode, close, cookie);
5089 	FS_CALL(vnode, free_cookie, cookie);
5090 	put_vnode(vnode);
5091 
5092 	FS_CALL(directory, unlink, name);
5093 
5094 	return status;
5095 }
5096 
5097 
5098 /*! Calls fs open_dir() on the given vnode and returns a new
5099 	file descriptor for it
5100 */
5101 static int
5102 open_dir_vnode(struct vnode* vnode, bool kernel)
5103 {
5104 	void* cookie;
5105 	int status;
5106 
5107 	status = FS_CALL(vnode, open_dir, &cookie);
5108 	if (status != B_OK)
5109 		return status;
5110 
5111 	// file is opened, create a fd
5112 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
5113 	if (status >= 0)
5114 		return status;
5115 
5116 	FS_CALL(vnode, close_dir, cookie);
5117 	FS_CALL(vnode, free_dir_cookie, cookie);
5118 
5119 	return status;
5120 }
5121 
5122 
5123 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5124 	file descriptor for it.
5125 	Used by attr_dir_open(), and attr_dir_open_fd().
5126 */
5127 static int
5128 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5129 {
5130 	void* cookie;
5131 	int status;
5132 
5133 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5134 		return EOPNOTSUPP;
5135 
5136 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5137 	if (status < 0)
5138 		return status;
5139 
5140 	// file is opened, create a fd
5141 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
5142 	if (status >= 0)
5143 		return status;
5144 
5145 	FS_CALL(vnode, close_attr_dir, cookie);
5146 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5147 
5148 	return status;
5149 }
5150 
5151 
5152 static int
5153 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5154 	int openMode, int perms, bool kernel)
5155 {
5156 	struct vnode* directory;
5157 	int status;
5158 
5159 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5160 		"kernel %d\n", name, openMode, perms, kernel));
5161 
5162 	// get directory to put the new file in
5163 	status = get_vnode(mountID, directoryID, &directory, true, false);
5164 	if (status != B_OK)
5165 		return status;
5166 
5167 	status = create_vnode(directory, name, openMode, perms, kernel);
5168 	put_vnode(directory);
5169 
5170 	return status;
5171 }
5172 
5173 
5174 static int
5175 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5176 {
5177 	char name[B_FILE_NAME_LENGTH];
5178 	struct vnode* directory;
5179 	int status;
5180 
5181 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5182 		openMode, perms, kernel));
5183 
5184 	// get directory to put the new file in
5185 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5186 	if (status < 0)
5187 		return status;
5188 
5189 	status = create_vnode(directory, name, openMode, perms, kernel);
5190 
5191 	put_vnode(directory);
5192 	return status;
5193 }
5194 
5195 
5196 static int
5197 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5198 	int openMode, bool kernel)
5199 {
5200 	if (name == NULL || *name == '\0')
5201 		return B_BAD_VALUE;
5202 
5203 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5204 		mountID, directoryID, name, openMode));
5205 
5206 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5207 
5208 	// get the vnode matching the entry_ref
5209 	struct vnode* vnode;
5210 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5211 		kernel, &vnode);
5212 	if (status != B_OK)
5213 		return status;
5214 
5215 	int fd = open_vnode(vnode, openMode, kernel);
5216 	if (fd < 0)
5217 		put_vnode(vnode);
5218 
5219 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5220 		vnode->id, name);
5221 	return fd;
5222 }
5223 
5224 
5225 static int
5226 file_open(int fd, char* path, int openMode, bool kernel)
5227 {
5228 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5229 
5230 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5231 		fd, path, openMode, kernel));
5232 
5233 	// get the vnode matching the vnode + path combination
5234 	struct vnode* vnode;
5235 	ino_t parentID;
5236 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5237 		&parentID, kernel);
5238 	if (status != B_OK)
5239 		return status;
5240 
5241 	// open the vnode
5242 	int newFD = open_vnode(vnode, openMode, kernel);
5243 	// put only on error -- otherwise our reference was transferred to the FD
5244 	if (newFD < 0)
5245 		put_vnode(vnode);
5246 
5247 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5248 		vnode->device, parentID, vnode->id, NULL);
5249 
5250 	return newFD;
5251 }
5252 
5253 
5254 static status_t
5255 file_close(struct file_descriptor* descriptor)
5256 {
5257 	struct vnode* vnode = descriptor->u.vnode;
5258 	status_t status = B_OK;
5259 
5260 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5261 
5262 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5263 		vnode->id);
5264 	if (HAS_FS_CALL(vnode, close)) {
5265 		status = FS_CALL(vnode, close, descriptor->cookie);
5266 	}
5267 
5268 	if (status == B_OK) {
5269 		// remove all outstanding locks for this team
5270 		release_advisory_lock(vnode, NULL);
5271 	}
5272 	return status;
5273 }
5274 
5275 
5276 static void
5277 file_free_fd(struct file_descriptor* descriptor)
5278 {
5279 	struct vnode* vnode = descriptor->u.vnode;
5280 
5281 	if (vnode != NULL) {
5282 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5283 		put_vnode(vnode);
5284 	}
5285 }
5286 
5287 
5288 static status_t
5289 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5290 	size_t* length)
5291 {
5292 	struct vnode* vnode = descriptor->u.vnode;
5293 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5294 		*length));
5295 
5296 	if (S_ISDIR(vnode->type))
5297 		return B_IS_A_DIRECTORY;
5298 
5299 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5300 }
5301 
5302 
5303 static status_t
5304 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5305 	size_t* length)
5306 {
5307 	struct vnode* vnode = descriptor->u.vnode;
5308 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5309 
5310 	if (S_ISDIR(vnode->type))
5311 		return B_IS_A_DIRECTORY;
5312 	if (!HAS_FS_CALL(vnode, write))
5313 		return EROFS;
5314 
5315 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5316 }
5317 
5318 
5319 static off_t
5320 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5321 {
5322 	struct vnode* vnode = descriptor->u.vnode;
5323 	off_t offset;
5324 
5325 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5326 
5327 	// some kinds of files are not seekable
5328 	switch (vnode->type & S_IFMT) {
5329 		case S_IFIFO:
5330 		case S_IFSOCK:
5331 			return ESPIPE;
5332 
5333 		// The Open Group Base Specs don't mention any file types besides pipes,
5334 		// fifos, and sockets specially, so we allow seeking them.
5335 		case S_IFREG:
5336 		case S_IFBLK:
5337 		case S_IFDIR:
5338 		case S_IFLNK:
5339 		case S_IFCHR:
5340 			break;
5341 	}
5342 
5343 	switch (seekType) {
5344 		case SEEK_SET:
5345 			offset = 0;
5346 			break;
5347 		case SEEK_CUR:
5348 			offset = descriptor->pos;
5349 			break;
5350 		case SEEK_END:
5351 		{
5352 			// stat() the node
5353 			if (!HAS_FS_CALL(vnode, read_stat))
5354 				return EOPNOTSUPP;
5355 
5356 			struct stat stat;
5357 			status_t status = FS_CALL(vnode, read_stat, &stat);
5358 			if (status != B_OK)
5359 				return status;
5360 
5361 			offset = stat.st_size;
5362 			break;
5363 		}
5364 		default:
5365 			return B_BAD_VALUE;
5366 	}
5367 
5368 	// assumes off_t is 64 bits wide
5369 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5370 		return EOVERFLOW;
5371 
5372 	pos += offset;
5373 	if (pos < 0)
5374 		return B_BAD_VALUE;
5375 
5376 	return descriptor->pos = pos;
5377 }
5378 
5379 
5380 static status_t
5381 file_select(struct file_descriptor* descriptor, uint8 event,
5382 	struct selectsync* sync)
5383 {
5384 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5385 
5386 	struct vnode* vnode = descriptor->u.vnode;
5387 
5388 	// If the FS has no select() hook, notify select() now.
5389 	if (!HAS_FS_CALL(vnode, select))
5390 		return notify_select_event(sync, event);
5391 
5392 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5393 }
5394 
5395 
5396 static status_t
5397 file_deselect(struct file_descriptor* descriptor, uint8 event,
5398 	struct selectsync* sync)
5399 {
5400 	struct vnode* vnode = descriptor->u.vnode;
5401 
5402 	if (!HAS_FS_CALL(vnode, deselect))
5403 		return B_OK;
5404 
5405 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5406 }
5407 
5408 
5409 static status_t
5410 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5411 	bool kernel)
5412 {
5413 	struct vnode* vnode;
5414 	status_t status;
5415 
5416 	if (name == NULL || *name == '\0')
5417 		return B_BAD_VALUE;
5418 
5419 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5420 		"perms = %d)\n", mountID, parentID, name, perms));
5421 
5422 	status = get_vnode(mountID, parentID, &vnode, true, false);
5423 	if (status != B_OK)
5424 		return status;
5425 
5426 	if (HAS_FS_CALL(vnode, create_dir))
5427 		status = FS_CALL(vnode, create_dir, name, perms);
5428 	else
5429 		status = EROFS;
5430 
5431 	put_vnode(vnode);
5432 	return status;
5433 }
5434 
5435 
5436 static status_t
5437 dir_create(int fd, char* path, int perms, bool kernel)
5438 {
5439 	char filename[B_FILE_NAME_LENGTH];
5440 	struct vnode* vnode;
5441 	status_t status;
5442 
5443 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5444 		kernel));
5445 
5446 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5447 	if (status < 0)
5448 		return status;
5449 
5450 	if (HAS_FS_CALL(vnode, create_dir)) {
5451 		status = FS_CALL(vnode, create_dir, filename, perms);
5452 	} else
5453 		status = EROFS;
5454 
5455 	put_vnode(vnode);
5456 	return status;
5457 }
5458 
5459 
5460 static int
5461 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5462 {
5463 	struct vnode* vnode;
5464 	int status;
5465 
5466 	FUNCTION(("dir_open_entry_ref()\n"));
5467 
5468 	if (name && *name == '\0')
5469 		return B_BAD_VALUE;
5470 
5471 	// get the vnode matching the entry_ref/node_ref
5472 	if (name) {
5473 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5474 			&vnode);
5475 	} else
5476 		status = get_vnode(mountID, parentID, &vnode, true, false);
5477 	if (status != B_OK)
5478 		return status;
5479 
5480 	int fd = open_dir_vnode(vnode, kernel);
5481 	if (fd < 0)
5482 		put_vnode(vnode);
5483 
5484 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5485 		vnode->id, name);
5486 	return fd;
5487 }
5488 
5489 
5490 static int
5491 dir_open(int fd, char* path, bool kernel)
5492 {
5493 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5494 		kernel));
5495 
5496 	// get the vnode matching the vnode + path combination
5497 	struct vnode* vnode = NULL;
5498 	ino_t parentID;
5499 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5500 		kernel);
5501 	if (status != B_OK)
5502 		return status;
5503 
5504 	// open the dir
5505 	int newFD = open_dir_vnode(vnode, kernel);
5506 	if (newFD < 0)
5507 		put_vnode(vnode);
5508 
5509 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID,
5510 		vnode->id, NULL);
5511 	return newFD;
5512 }
5513 
5514 
5515 static status_t
5516 dir_close(struct file_descriptor* descriptor)
5517 {
5518 	struct vnode* vnode = descriptor->u.vnode;
5519 
5520 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5521 
5522 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5523 		vnode->id);
5524 	if (HAS_FS_CALL(vnode, close_dir))
5525 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5526 
5527 	return B_OK;
5528 }
5529 
5530 
5531 static void
5532 dir_free_fd(struct file_descriptor* descriptor)
5533 {
5534 	struct vnode* vnode = descriptor->u.vnode;
5535 
5536 	if (vnode != NULL) {
5537 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5538 		put_vnode(vnode);
5539 	}
5540 }
5541 
5542 
5543 static status_t
5544 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5545 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5546 {
5547 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5548 		bufferSize, _count);
5549 }
5550 
5551 
5552 static status_t
5553 fix_dirent(struct vnode* parent, struct dirent* entry,
5554 	struct io_context* ioContext)
5555 {
5556 	// set d_pdev and d_pino
5557 	entry->d_pdev = parent->device;
5558 	entry->d_pino = parent->id;
5559 
5560 	// If this is the ".." entry and the directory is the root of a FS,
5561 	// we need to replace d_dev and d_ino with the actual values.
5562 	if (strcmp(entry->d_name, "..") == 0
5563 		&& parent->mount->root_vnode == parent
5564 		&& parent->mount->covers_vnode) {
5565 		inc_vnode_ref_count(parent);
5566 			// vnode_path_to_vnode() puts the node
5567 
5568 		// Make sure the IO context root is not bypassed.
5569 		if (parent == ioContext->root) {
5570 			entry->d_dev = parent->device;
5571 			entry->d_ino = parent->id;
5572 		} else {
5573 			// ".." is guaranteed not to be clobbered by this call
5574 			struct vnode* vnode;
5575 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5576 				ioContext, &vnode, NULL);
5577 
5578 			if (status == B_OK) {
5579 				entry->d_dev = vnode->device;
5580 				entry->d_ino = vnode->id;
5581 			}
5582 		}
5583 	} else {
5584 		// resolve mount points
5585 		MutexLocker _(&sVnodeMutex);
5586 
5587 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5588 		if (vnode != NULL) {
5589 			MutexLocker _(&sVnodeCoveredByMutex);
5590 			if (vnode->covered_by != NULL) {
5591 				entry->d_dev = vnode->covered_by->device;
5592 				entry->d_ino = vnode->covered_by->id;
5593 			}
5594 		}
5595 	}
5596 
5597 	return B_OK;
5598 }
5599 
5600 
5601 static status_t
5602 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5603 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5604 {
5605 	if (!HAS_FS_CALL(vnode, read_dir))
5606 		return EOPNOTSUPP;
5607 
5608 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5609 		_count);
5610 	if (error != B_OK)
5611 		return error;
5612 
5613 	// we need to adjust the read dirents
5614 	uint32 count = *_count;
5615 	for (uint32 i = 0; i < count; i++) {
5616 		error = fix_dirent(vnode, buffer, ioContext);
5617 		if (error != B_OK)
5618 			return error;
5619 
5620 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5621 	}
5622 
5623 	return error;
5624 }
5625 
5626 
5627 static status_t
5628 dir_rewind(struct file_descriptor* descriptor)
5629 {
5630 	struct vnode* vnode = descriptor->u.vnode;
5631 
5632 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5633 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5634 	}
5635 
5636 	return EOPNOTSUPP;
5637 }
5638 
5639 
5640 static status_t
5641 dir_remove(int fd, char* path, bool kernel)
5642 {
5643 	char name[B_FILE_NAME_LENGTH];
5644 	struct vnode* directory;
5645 	status_t status;
5646 
5647 	if (path != NULL) {
5648 		// we need to make sure our path name doesn't stop with "/", ".",
5649 		// or ".."
5650 		char* lastSlash = strrchr(path, '/');
5651 		if (lastSlash != NULL) {
5652 			char* leaf = lastSlash + 1;
5653 			if (!strcmp(leaf, ".."))
5654 				return B_NOT_ALLOWED;
5655 
5656 			// omit multiple slashes
5657 			while (lastSlash > path && lastSlash[-1] == '/') {
5658 				lastSlash--;
5659 			}
5660 
5661 			if (!leaf[0]
5662 				|| !strcmp(leaf, ".")) {
5663 				// "name/" -> "name", or "name/." -> "name"
5664 				lastSlash[0] = '\0';
5665 			}
5666 		}
5667 
5668 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5669 			return B_NOT_ALLOWED;
5670 	}
5671 
5672 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5673 	if (status != B_OK)
5674 		return status;
5675 
5676 	if (HAS_FS_CALL(directory, remove_dir))
5677 		status = FS_CALL(directory, remove_dir, name);
5678 	else
5679 		status = EROFS;
5680 
5681 	put_vnode(directory);
5682 	return status;
5683 }
5684 
5685 
5686 static status_t
5687 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5688 	size_t length)
5689 {
5690 	struct vnode* vnode = descriptor->u.vnode;
5691 
5692 	if (HAS_FS_CALL(vnode, ioctl))
5693 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5694 
5695 	return EOPNOTSUPP;
5696 }
5697 
5698 
5699 static status_t
5700 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5701 {
5702 	struct flock flock;
5703 
5704 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5705 		fd, op, argument, kernel ? "kernel" : "user"));
5706 
5707 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5708 		fd);
5709 	if (descriptor == NULL)
5710 		return B_FILE_ERROR;
5711 
5712 	struct vnode* vnode = fd_vnode(descriptor);
5713 
5714 	status_t status = B_OK;
5715 
5716 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5717 		if (descriptor->type != FDTYPE_FILE)
5718 			status = B_BAD_VALUE;
5719 		else if (user_memcpy(&flock, (struct flock*)argument,
5720 				sizeof(struct flock)) != B_OK)
5721 			status = B_BAD_ADDRESS;
5722 
5723 		if (status != B_OK) {
5724 			put_fd(descriptor);
5725 			return status;
5726 		}
5727 	}
5728 
5729 	switch (op) {
5730 		case F_SETFD:
5731 		{
5732 			struct io_context* context = get_current_io_context(kernel);
5733 			// Set file descriptor flags
5734 
5735 			// O_CLOEXEC is the only flag available at this time
5736 			mutex_lock(&context->io_mutex);
5737 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5738 			mutex_unlock(&context->io_mutex);
5739 
5740 			status = B_OK;
5741 			break;
5742 		}
5743 
5744 		case F_GETFD:
5745 		{
5746 			struct io_context* context = get_current_io_context(kernel);
5747 
5748 			// Get file descriptor flags
5749 			mutex_lock(&context->io_mutex);
5750 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5751 			mutex_unlock(&context->io_mutex);
5752 			break;
5753 		}
5754 
5755 		case F_SETFL:
5756 			// Set file descriptor open mode
5757 
5758 			// we only accept changes to O_APPEND and O_NONBLOCK
5759 			argument &= O_APPEND | O_NONBLOCK;
5760 			if (descriptor->ops->fd_set_flags != NULL) {
5761 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5762 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5763 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5764 					(int)argument);
5765 			} else
5766 				status = EOPNOTSUPP;
5767 
5768 			if (status == B_OK) {
5769 				// update this descriptor's open_mode field
5770 				descriptor->open_mode = (descriptor->open_mode
5771 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5772 			}
5773 
5774 			break;
5775 
5776 		case F_GETFL:
5777 			// Get file descriptor open mode
5778 			status = descriptor->open_mode;
5779 			break;
5780 
5781 		case F_DUPFD:
5782 		{
5783 			struct io_context* context = get_current_io_context(kernel);
5784 
5785 			status = new_fd_etc(context, descriptor, (int)argument);
5786 			if (status >= 0) {
5787 				mutex_lock(&context->io_mutex);
5788 				fd_set_close_on_exec(context, fd, false);
5789 				mutex_unlock(&context->io_mutex);
5790 
5791 				atomic_add(&descriptor->ref_count, 1);
5792 			}
5793 			break;
5794 		}
5795 
5796 		case F_GETLK:
5797 			if (vnode != NULL) {
5798 				status = get_advisory_lock(vnode, &flock);
5799 				if (status == B_OK) {
5800 					// copy back flock structure
5801 					status = user_memcpy((struct flock*)argument, &flock,
5802 						sizeof(struct flock));
5803 				}
5804 			} else
5805 				status = B_BAD_VALUE;
5806 			break;
5807 
5808 		case F_SETLK:
5809 		case F_SETLKW:
5810 			status = normalize_flock(descriptor, &flock);
5811 			if (status != B_OK)
5812 				break;
5813 
5814 			if (vnode == NULL) {
5815 				status = B_BAD_VALUE;
5816 			} else if (flock.l_type == F_UNLCK) {
5817 				status = release_advisory_lock(vnode, &flock);
5818 			} else {
5819 				// the open mode must match the lock type
5820 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5821 						&& flock.l_type == F_WRLCK)
5822 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5823 						&& flock.l_type == F_RDLCK))
5824 					status = B_FILE_ERROR;
5825 				else {
5826 					status = acquire_advisory_lock(vnode, -1,
5827 						&flock, op == F_SETLKW);
5828 				}
5829 			}
5830 			break;
5831 
5832 		// ToDo: add support for more ops?
5833 
5834 		default:
5835 			status = B_BAD_VALUE;
5836 	}
5837 
5838 	put_fd(descriptor);
5839 	return status;
5840 }
5841 
5842 
5843 static status_t
5844 common_sync(int fd, bool kernel)
5845 {
5846 	struct file_descriptor* descriptor;
5847 	struct vnode* vnode;
5848 	status_t status;
5849 
5850 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5851 
5852 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5853 	if (descriptor == NULL)
5854 		return B_FILE_ERROR;
5855 
5856 	if (HAS_FS_CALL(vnode, fsync))
5857 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5858 	else
5859 		status = EOPNOTSUPP;
5860 
5861 	put_fd(descriptor);
5862 	return status;
5863 }
5864 
5865 
5866 static status_t
5867 common_lock_node(int fd, bool kernel)
5868 {
5869 	struct file_descriptor* descriptor;
5870 	struct vnode* vnode;
5871 
5872 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5873 	if (descriptor == NULL)
5874 		return B_FILE_ERROR;
5875 
5876 	status_t status = B_OK;
5877 
5878 	// We need to set the locking atomically - someone
5879 	// else might set one at the same time
5880 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5881 			(file_descriptor*)NULL) != NULL)
5882 		status = B_BUSY;
5883 
5884 	put_fd(descriptor);
5885 	return status;
5886 }
5887 
5888 
5889 static status_t
5890 common_unlock_node(int fd, bool kernel)
5891 {
5892 	struct file_descriptor* descriptor;
5893 	struct vnode* vnode;
5894 
5895 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5896 	if (descriptor == NULL)
5897 		return B_FILE_ERROR;
5898 
5899 	status_t status = B_OK;
5900 
5901 	// We need to set the locking atomically - someone
5902 	// else might set one at the same time
5903 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
5904 			(file_descriptor*)NULL, descriptor) != descriptor)
5905 		status = B_BAD_VALUE;
5906 
5907 	put_fd(descriptor);
5908 	return status;
5909 }
5910 
5911 
5912 static status_t
5913 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
5914 	bool kernel)
5915 {
5916 	struct vnode* vnode;
5917 	status_t status;
5918 
5919 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5920 	if (status != B_OK)
5921 		return status;
5922 
5923 	if (HAS_FS_CALL(vnode, read_symlink)) {
5924 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5925 	} else
5926 		status = B_BAD_VALUE;
5927 
5928 	put_vnode(vnode);
5929 	return status;
5930 }
5931 
5932 
5933 static status_t
5934 common_create_symlink(int fd, char* path, const char* toPath, int mode,
5935 	bool kernel)
5936 {
5937 	// path validity checks have to be in the calling function!
5938 	char name[B_FILE_NAME_LENGTH];
5939 	struct vnode* vnode;
5940 	status_t status;
5941 
5942 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
5943 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5944 
5945 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5946 	if (status != B_OK)
5947 		return status;
5948 
5949 	if (HAS_FS_CALL(vnode, create_symlink))
5950 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5951 	else {
5952 		status = HAS_FS_CALL(vnode, write)
5953 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
5954 	}
5955 
5956 	put_vnode(vnode);
5957 
5958 	return status;
5959 }
5960 
5961 
5962 static status_t
5963 common_create_link(char* path, char* toPath, bool kernel)
5964 {
5965 	// path validity checks have to be in the calling function!
5966 
5967 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
5968 		toPath, kernel));
5969 
5970 	char name[B_FILE_NAME_LENGTH];
5971 	struct vnode* directory;
5972 	status_t status = path_to_dir_vnode(path, &directory, name, kernel);
5973 	if (status != B_OK)
5974 		return status;
5975 
5976 	struct vnode* vnode;
5977 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
5978 	if (status != B_OK)
5979 		goto err;
5980 
5981 	if (directory->mount != vnode->mount) {
5982 		status = B_CROSS_DEVICE_LINK;
5983 		goto err1;
5984 	}
5985 
5986 	if (HAS_FS_CALL(directory, link))
5987 		status = FS_CALL(directory, link, name, vnode);
5988 	else
5989 		status = EROFS;
5990 
5991 err1:
5992 	put_vnode(vnode);
5993 err:
5994 	put_vnode(directory);
5995 
5996 	return status;
5997 }
5998 
5999 
6000 static status_t
6001 common_unlink(int fd, char* path, bool kernel)
6002 {
6003 	char filename[B_FILE_NAME_LENGTH];
6004 	struct vnode* vnode;
6005 	status_t status;
6006 
6007 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6008 		kernel));
6009 
6010 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6011 	if (status < 0)
6012 		return status;
6013 
6014 	if (HAS_FS_CALL(vnode, unlink))
6015 		status = FS_CALL(vnode, unlink, filename);
6016 	else
6017 		status = EROFS;
6018 
6019 	put_vnode(vnode);
6020 
6021 	return status;
6022 }
6023 
6024 
6025 static status_t
6026 common_access(char* path, int mode, bool kernel)
6027 {
6028 	struct vnode* vnode;
6029 	status_t status;
6030 
6031 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
6032 	if (status != B_OK)
6033 		return status;
6034 
6035 	if (HAS_FS_CALL(vnode, access))
6036 		status = FS_CALL(vnode, access, mode);
6037 	else
6038 		status = B_OK;
6039 
6040 	put_vnode(vnode);
6041 
6042 	return status;
6043 }
6044 
6045 
6046 static status_t
6047 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6048 {
6049 	struct vnode* fromVnode;
6050 	struct vnode* toVnode;
6051 	char fromName[B_FILE_NAME_LENGTH];
6052 	char toName[B_FILE_NAME_LENGTH];
6053 	status_t status;
6054 
6055 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6056 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6057 
6058 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6059 	if (status != B_OK)
6060 		return status;
6061 
6062 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6063 	if (status != B_OK)
6064 		goto err1;
6065 
6066 	if (fromVnode->device != toVnode->device) {
6067 		status = B_CROSS_DEVICE_LINK;
6068 		goto err2;
6069 	}
6070 
6071 	if (HAS_FS_CALL(fromVnode, rename))
6072 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6073 	else
6074 		status = EROFS;
6075 
6076 err2:
6077 	put_vnode(toVnode);
6078 err1:
6079 	put_vnode(fromVnode);
6080 
6081 	return status;
6082 }
6083 
6084 
6085 static status_t
6086 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6087 {
6088 	struct vnode* vnode = descriptor->u.vnode;
6089 
6090 	FUNCTION(("common_read_stat: stat %p\n", stat));
6091 
6092 	// TODO: remove this once all file systems properly set them!
6093 	stat->st_crtim.tv_nsec = 0;
6094 	stat->st_ctim.tv_nsec = 0;
6095 	stat->st_mtim.tv_nsec = 0;
6096 	stat->st_atim.tv_nsec = 0;
6097 
6098 	status_t status = FS_CALL(vnode, read_stat, stat);
6099 
6100 	// fill in the st_dev and st_ino fields
6101 	if (status == B_OK) {
6102 		stat->st_dev = vnode->device;
6103 		stat->st_ino = vnode->id;
6104 		stat->st_rdev = -1;
6105 	}
6106 
6107 	return status;
6108 }
6109 
6110 
6111 static status_t
6112 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6113 	int statMask)
6114 {
6115 	struct vnode* vnode = descriptor->u.vnode;
6116 
6117 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6118 		vnode, stat, statMask));
6119 
6120 	if (!HAS_FS_CALL(vnode, write_stat))
6121 		return EROFS;
6122 
6123 	return FS_CALL(vnode, write_stat, stat, statMask);
6124 }
6125 
6126 
6127 static status_t
6128 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6129 	struct stat* stat, bool kernel)
6130 {
6131 	struct vnode* vnode;
6132 	status_t status;
6133 
6134 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6135 		stat));
6136 
6137 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6138 		kernel);
6139 	if (status < 0)
6140 		return status;
6141 
6142 	status = FS_CALL(vnode, read_stat, stat);
6143 
6144 	// fill in the st_dev and st_ino fields
6145 	if (status == B_OK) {
6146 		stat->st_dev = vnode->device;
6147 		stat->st_ino = vnode->id;
6148 		stat->st_rdev = -1;
6149 	}
6150 
6151 	put_vnode(vnode);
6152 	return status;
6153 }
6154 
6155 
6156 static status_t
6157 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6158 	const struct stat* stat, int statMask, bool kernel)
6159 {
6160 	struct vnode* vnode;
6161 	status_t status;
6162 
6163 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6164 		"kernel %d\n", fd, path, stat, statMask, kernel));
6165 
6166 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6167 		kernel);
6168 	if (status < 0)
6169 		return status;
6170 
6171 	if (HAS_FS_CALL(vnode, write_stat))
6172 		status = FS_CALL(vnode, write_stat, stat, statMask);
6173 	else
6174 		status = EROFS;
6175 
6176 	put_vnode(vnode);
6177 
6178 	return status;
6179 }
6180 
6181 
6182 static int
6183 attr_dir_open(int fd, char* path, bool kernel)
6184 {
6185 	struct vnode* vnode;
6186 	int status;
6187 
6188 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6189 		kernel));
6190 
6191 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6192 	if (status != B_OK)
6193 		return status;
6194 
6195 	status = open_attr_dir_vnode(vnode, kernel);
6196 	if (status < 0)
6197 		put_vnode(vnode);
6198 
6199 	return status;
6200 }
6201 
6202 
6203 static status_t
6204 attr_dir_close(struct file_descriptor* descriptor)
6205 {
6206 	struct vnode* vnode = descriptor->u.vnode;
6207 
6208 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6209 
6210 	if (HAS_FS_CALL(vnode, close_attr_dir))
6211 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6212 
6213 	return B_OK;
6214 }
6215 
6216 
6217 static void
6218 attr_dir_free_fd(struct file_descriptor* descriptor)
6219 {
6220 	struct vnode* vnode = descriptor->u.vnode;
6221 
6222 	if (vnode != NULL) {
6223 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6224 		put_vnode(vnode);
6225 	}
6226 }
6227 
6228 
6229 static status_t
6230 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6231 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6232 {
6233 	struct vnode* vnode = descriptor->u.vnode;
6234 
6235 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6236 
6237 	if (HAS_FS_CALL(vnode, read_attr_dir))
6238 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6239 			bufferSize, _count);
6240 
6241 	return EOPNOTSUPP;
6242 }
6243 
6244 
6245 static status_t
6246 attr_dir_rewind(struct file_descriptor* descriptor)
6247 {
6248 	struct vnode* vnode = descriptor->u.vnode;
6249 
6250 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6251 
6252 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6253 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6254 
6255 	return EOPNOTSUPP;
6256 }
6257 
6258 
6259 static int
6260 attr_create(int fd, char* path, const char* name, uint32 type,
6261 	int openMode, bool kernel)
6262 {
6263 	if (name == NULL || *name == '\0')
6264 		return B_BAD_VALUE;
6265 
6266 	struct vnode* vnode;
6267 	status_t status = fd_and_path_to_vnode(fd, path,
6268 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6269 	if (status != B_OK)
6270 		return status;
6271 
6272 	if (!HAS_FS_CALL(vnode, create_attr)) {
6273 		status = EROFS;
6274 		goto err;
6275 	}
6276 
6277 	void* cookie;
6278 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6279 	if (status != B_OK)
6280 		goto err;
6281 
6282 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6283 	if (fd >= 0)
6284 		return fd;
6285 
6286 	status = fd;
6287 
6288 	FS_CALL(vnode, close_attr, cookie);
6289 	FS_CALL(vnode, free_attr_cookie, cookie);
6290 
6291 	FS_CALL(vnode, remove_attr, name);
6292 
6293 err:
6294 	put_vnode(vnode);
6295 
6296 	return status;
6297 }
6298 
6299 
6300 static int
6301 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6302 {
6303 	if (name == NULL || *name == '\0')
6304 		return B_BAD_VALUE;
6305 
6306 	struct vnode* vnode;
6307 	status_t status = fd_and_path_to_vnode(fd, path,
6308 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6309 	if (status != B_OK)
6310 		return status;
6311 
6312 	if (!HAS_FS_CALL(vnode, open_attr)) {
6313 		status = EOPNOTSUPP;
6314 		goto err;
6315 	}
6316 
6317 	void* cookie;
6318 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6319 	if (status != B_OK)
6320 		goto err;
6321 
6322 	// now we only need a file descriptor for this attribute and we're done
6323 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6324 	if (fd >= 0)
6325 		return fd;
6326 
6327 	status = fd;
6328 
6329 	FS_CALL(vnode, close_attr, cookie);
6330 	FS_CALL(vnode, free_attr_cookie, cookie);
6331 
6332 err:
6333 	put_vnode(vnode);
6334 
6335 	return status;
6336 }
6337 
6338 
6339 static status_t
6340 attr_close(struct file_descriptor* descriptor)
6341 {
6342 	struct vnode* vnode = descriptor->u.vnode;
6343 
6344 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6345 
6346 	if (HAS_FS_CALL(vnode, close_attr))
6347 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6348 
6349 	return B_OK;
6350 }
6351 
6352 
6353 static void
6354 attr_free_fd(struct file_descriptor* descriptor)
6355 {
6356 	struct vnode* vnode = descriptor->u.vnode;
6357 
6358 	if (vnode != NULL) {
6359 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6360 		put_vnode(vnode);
6361 	}
6362 }
6363 
6364 
6365 static status_t
6366 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6367 	size_t* length)
6368 {
6369 	struct vnode* vnode = descriptor->u.vnode;
6370 
6371 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6372 		*length));
6373 
6374 	if (!HAS_FS_CALL(vnode, read_attr))
6375 		return EOPNOTSUPP;
6376 
6377 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6378 }
6379 
6380 
6381 static status_t
6382 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6383 	size_t* length)
6384 {
6385 	struct vnode* vnode = descriptor->u.vnode;
6386 
6387 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6388 	if (!HAS_FS_CALL(vnode, write_attr))
6389 		return EOPNOTSUPP;
6390 
6391 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6392 }
6393 
6394 
6395 static off_t
6396 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6397 {
6398 	off_t offset;
6399 
6400 	switch (seekType) {
6401 		case SEEK_SET:
6402 			offset = 0;
6403 			break;
6404 		case SEEK_CUR:
6405 			offset = descriptor->pos;
6406 			break;
6407 		case SEEK_END:
6408 		{
6409 			struct vnode* vnode = descriptor->u.vnode;
6410 			if (!HAS_FS_CALL(vnode, read_stat))
6411 				return EOPNOTSUPP;
6412 
6413 			struct stat stat;
6414 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6415 				&stat);
6416 			if (status != B_OK)
6417 				return status;
6418 
6419 			offset = stat.st_size;
6420 			break;
6421 		}
6422 		default:
6423 			return B_BAD_VALUE;
6424 	}
6425 
6426 	// assumes off_t is 64 bits wide
6427 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6428 		return EOVERFLOW;
6429 
6430 	pos += offset;
6431 	if (pos < 0)
6432 		return B_BAD_VALUE;
6433 
6434 	return descriptor->pos = pos;
6435 }
6436 
6437 
6438 static status_t
6439 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6440 {
6441 	struct vnode* vnode = descriptor->u.vnode;
6442 
6443 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6444 
6445 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6446 		return EOPNOTSUPP;
6447 
6448 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6449 }
6450 
6451 
6452 static status_t
6453 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6454 	int statMask)
6455 {
6456 	struct vnode* vnode = descriptor->u.vnode;
6457 
6458 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6459 
6460 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6461 		return EROFS;
6462 
6463 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6464 }
6465 
6466 
6467 static status_t
6468 attr_remove(int fd, const char* name, bool kernel)
6469 {
6470 	struct file_descriptor* descriptor;
6471 	struct vnode* vnode;
6472 	status_t status;
6473 
6474 	if (name == NULL || *name == '\0')
6475 		return B_BAD_VALUE;
6476 
6477 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6478 		kernel));
6479 
6480 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6481 	if (descriptor == NULL)
6482 		return B_FILE_ERROR;
6483 
6484 	if (HAS_FS_CALL(vnode, remove_attr))
6485 		status = FS_CALL(vnode, remove_attr, name);
6486 	else
6487 		status = EROFS;
6488 
6489 	put_fd(descriptor);
6490 
6491 	return status;
6492 }
6493 
6494 
6495 static status_t
6496 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6497 	bool kernel)
6498 {
6499 	struct file_descriptor* fromDescriptor;
6500 	struct file_descriptor* toDescriptor;
6501 	struct vnode* fromVnode;
6502 	struct vnode* toVnode;
6503 	status_t status;
6504 
6505 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6506 		|| *toName == '\0')
6507 		return B_BAD_VALUE;
6508 
6509 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6510 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6511 
6512 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6513 	if (fromDescriptor == NULL)
6514 		return B_FILE_ERROR;
6515 
6516 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6517 	if (toDescriptor == NULL) {
6518 		status = B_FILE_ERROR;
6519 		goto err;
6520 	}
6521 
6522 	// are the files on the same volume?
6523 	if (fromVnode->device != toVnode->device) {
6524 		status = B_CROSS_DEVICE_LINK;
6525 		goto err1;
6526 	}
6527 
6528 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6529 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6530 	} else
6531 		status = EROFS;
6532 
6533 err1:
6534 	put_fd(toDescriptor);
6535 err:
6536 	put_fd(fromDescriptor);
6537 
6538 	return status;
6539 }
6540 
6541 
6542 static int
6543 index_dir_open(dev_t mountID, bool kernel)
6544 {
6545 	struct fs_mount* mount;
6546 	void* cookie;
6547 
6548 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6549 
6550 	status_t status = get_mount(mountID, &mount);
6551 	if (status != B_OK)
6552 		return status;
6553 
6554 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6555 		status = EOPNOTSUPP;
6556 		goto error;
6557 	}
6558 
6559 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6560 	if (status != B_OK)
6561 		goto error;
6562 
6563 	// get fd for the index directory
6564 	int fd;
6565 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
6566 	if (fd >= 0)
6567 		return fd;
6568 
6569 	// something went wrong
6570 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6571 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6572 
6573 	status = fd;
6574 
6575 error:
6576 	put_mount(mount);
6577 	return status;
6578 }
6579 
6580 
6581 static status_t
6582 index_dir_close(struct file_descriptor* descriptor)
6583 {
6584 	struct fs_mount* mount = descriptor->u.mount;
6585 
6586 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6587 
6588 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6589 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6590 
6591 	return B_OK;
6592 }
6593 
6594 
6595 static void
6596 index_dir_free_fd(struct file_descriptor* descriptor)
6597 {
6598 	struct fs_mount* mount = descriptor->u.mount;
6599 
6600 	if (mount != NULL) {
6601 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6602 		put_mount(mount);
6603 	}
6604 }
6605 
6606 
6607 static status_t
6608 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6609 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6610 {
6611 	struct fs_mount* mount = descriptor->u.mount;
6612 
6613 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6614 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6615 			bufferSize, _count);
6616 	}
6617 
6618 	return EOPNOTSUPP;
6619 }
6620 
6621 
6622 static status_t
6623 index_dir_rewind(struct file_descriptor* descriptor)
6624 {
6625 	struct fs_mount* mount = descriptor->u.mount;
6626 
6627 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6628 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6629 
6630 	return EOPNOTSUPP;
6631 }
6632 
6633 
6634 static status_t
6635 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6636 	bool kernel)
6637 {
6638 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6639 		name, kernel));
6640 
6641 	struct fs_mount* mount;
6642 	status_t status = get_mount(mountID, &mount);
6643 	if (status != B_OK)
6644 		return status;
6645 
6646 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6647 		status = EROFS;
6648 		goto out;
6649 	}
6650 
6651 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6652 
6653 out:
6654 	put_mount(mount);
6655 	return status;
6656 }
6657 
6658 
6659 #if 0
6660 static status_t
6661 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6662 {
6663 	struct vnode* vnode = descriptor->u.vnode;
6664 
6665 	// ToDo: currently unused!
6666 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6667 	if (!HAS_FS_CALL(vnode, read_index_stat))
6668 		return EOPNOTSUPP;
6669 
6670 	return EOPNOTSUPP;
6671 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6672 }
6673 
6674 
6675 static void
6676 index_free_fd(struct file_descriptor* descriptor)
6677 {
6678 	struct vnode* vnode = descriptor->u.vnode;
6679 
6680 	if (vnode != NULL) {
6681 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6682 		put_vnode(vnode);
6683 	}
6684 }
6685 #endif
6686 
6687 
6688 static status_t
6689 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6690 	bool kernel)
6691 {
6692 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6693 		name, kernel));
6694 
6695 	struct fs_mount* mount;
6696 	status_t status = get_mount(mountID, &mount);
6697 	if (status != B_OK)
6698 		return status;
6699 
6700 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6701 		status = EOPNOTSUPP;
6702 		goto out;
6703 	}
6704 
6705 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6706 
6707 out:
6708 	put_mount(mount);
6709 	return status;
6710 }
6711 
6712 
6713 static status_t
6714 index_remove(dev_t mountID, const char* name, bool kernel)
6715 {
6716 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6717 		name, kernel));
6718 
6719 	struct fs_mount* mount;
6720 	status_t status = get_mount(mountID, &mount);
6721 	if (status != B_OK)
6722 		return status;
6723 
6724 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6725 		status = EROFS;
6726 		goto out;
6727 	}
6728 
6729 	status = FS_MOUNT_CALL(mount, remove_index, name);
6730 
6731 out:
6732 	put_mount(mount);
6733 	return status;
6734 }
6735 
6736 
6737 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6738 		It would be nice if the FS would find some more kernel support
6739 		for them.
6740 		For example, query parsing should be moved into the kernel.
6741 */
6742 static int
6743 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6744 	int32 token, bool kernel)
6745 {
6746 	struct fs_mount* mount;
6747 	void* cookie;
6748 
6749 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6750 		query, kernel));
6751 
6752 	status_t status = get_mount(device, &mount);
6753 	if (status != B_OK)
6754 		return status;
6755 
6756 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6757 		status = EOPNOTSUPP;
6758 		goto error;
6759 	}
6760 
6761 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6762 		&cookie);
6763 	if (status != B_OK)
6764 		goto error;
6765 
6766 	// get fd for the index directory
6767 	int fd;
6768 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
6769 	if (fd >= 0)
6770 		return fd;
6771 
6772 	status = fd;
6773 
6774 	// something went wrong
6775 	FS_MOUNT_CALL(mount, close_query, cookie);
6776 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6777 
6778 error:
6779 	put_mount(mount);
6780 	return status;
6781 }
6782 
6783 
6784 static status_t
6785 query_close(struct file_descriptor* descriptor)
6786 {
6787 	struct fs_mount* mount = descriptor->u.mount;
6788 
6789 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6790 
6791 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6792 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6793 
6794 	return B_OK;
6795 }
6796 
6797 
6798 static void
6799 query_free_fd(struct file_descriptor* descriptor)
6800 {
6801 	struct fs_mount* mount = descriptor->u.mount;
6802 
6803 	if (mount != NULL) {
6804 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6805 		put_mount(mount);
6806 	}
6807 }
6808 
6809 
6810 static status_t
6811 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6812 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6813 {
6814 	struct fs_mount* mount = descriptor->u.mount;
6815 
6816 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6817 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6818 			bufferSize, _count);
6819 	}
6820 
6821 	return EOPNOTSUPP;
6822 }
6823 
6824 
6825 static status_t
6826 query_rewind(struct file_descriptor* descriptor)
6827 {
6828 	struct fs_mount* mount = descriptor->u.mount;
6829 
6830 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6831 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6832 
6833 	return EOPNOTSUPP;
6834 }
6835 
6836 
6837 //	#pragma mark - General File System functions
6838 
6839 
6840 static dev_t
6841 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6842 	const char* args, bool kernel)
6843 {
6844 	struct ::fs_mount* mount;
6845 	status_t status = B_OK;
6846 	fs_volume* volume = NULL;
6847 	int32 layer = 0;
6848 
6849 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6850 
6851 	// The path is always safe, we just have to make sure that fsName is
6852 	// almost valid - we can't make any assumptions about args, though.
6853 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6854 	// We'll get it from the DDM later.
6855 	if (fsName == NULL) {
6856 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6857 			return B_BAD_VALUE;
6858 	} else if (fsName[0] == '\0')
6859 		return B_BAD_VALUE;
6860 
6861 	RecursiveLocker mountOpLocker(sMountOpLock);
6862 
6863 	// Helper to delete a newly created file device on failure.
6864 	// Not exactly beautiful, but helps to keep the code below cleaner.
6865 	struct FileDeviceDeleter {
6866 		FileDeviceDeleter() : id(-1) {}
6867 		~FileDeviceDeleter()
6868 		{
6869 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6870 		}
6871 
6872 		partition_id id;
6873 	} fileDeviceDeleter;
6874 
6875 	// If the file system is not a "virtual" one, the device argument should
6876 	// point to a real file/device (if given at all).
6877 	// get the partition
6878 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6879 	KPartition* partition = NULL;
6880 	KPath normalizedDevice;
6881 	bool newlyCreatedFileDevice = false;
6882 
6883 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
6884 		// normalize the device path
6885 		status = normalizedDevice.SetTo(device, true);
6886 		if (status != B_OK)
6887 			return status;
6888 
6889 		// get a corresponding partition from the DDM
6890 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6891 		if (partition == NULL) {
6892 			// Partition not found: This either means, the user supplied
6893 			// an invalid path, or the path refers to an image file. We try
6894 			// to let the DDM create a file device for the path.
6895 			partition_id deviceID = ddm->CreateFileDevice(
6896 				normalizedDevice.Path(), &newlyCreatedFileDevice);
6897 			if (deviceID >= 0) {
6898 				partition = ddm->RegisterPartition(deviceID);
6899 				if (newlyCreatedFileDevice)
6900 					fileDeviceDeleter.id = deviceID;
6901 			}
6902 		}
6903 
6904 		if (!partition) {
6905 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6906 				normalizedDevice.Path()));
6907 			return B_ENTRY_NOT_FOUND;
6908 		}
6909 
6910 		device = normalizedDevice.Path();
6911 			// correct path to file device
6912 	}
6913 	PartitionRegistrar partitionRegistrar(partition, true);
6914 
6915 	// Write lock the partition's device. For the time being, we keep the lock
6916 	// until we're done mounting -- not nice, but ensure, that no-one is
6917 	// interfering.
6918 	// TODO: Just mark the partition busy while mounting!
6919 	KDiskDevice* diskDevice = NULL;
6920 	if (partition) {
6921 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6922 		if (!diskDevice) {
6923 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6924 			return B_ERROR;
6925 		}
6926 	}
6927 
6928 	DeviceWriteLocker writeLocker(diskDevice, true);
6929 		// this takes over the write lock acquired before
6930 
6931 	if (partition != NULL) {
6932 		// make sure, that the partition is not busy
6933 		if (partition->IsBusy()) {
6934 			TRACE(("fs_mount(): Partition is busy.\n"));
6935 			return B_BUSY;
6936 		}
6937 
6938 		// if no FS name had been supplied, we get it from the partition
6939 		if (fsName == NULL) {
6940 			KDiskSystem* diskSystem = partition->DiskSystem();
6941 			if (!diskSystem) {
6942 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6943 					"recognize it.\n"));
6944 				return B_BAD_VALUE;
6945 			}
6946 
6947 			if (!diskSystem->IsFileSystem()) {
6948 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6949 					"partitioning system.\n"));
6950 				return B_BAD_VALUE;
6951 			}
6952 
6953 			// The disk system name will not change, and the KDiskSystem
6954 			// object will not go away while the disk device is locked (and
6955 			// the partition has a reference to it), so this is safe.
6956 			fsName = diskSystem->Name();
6957 		}
6958 	}
6959 
6960 	mount = new(std::nothrow) (struct ::fs_mount);
6961 	if (mount == NULL)
6962 		return B_NO_MEMORY;
6963 
6964 	mount->device_name = strdup(device);
6965 		// "device" can be NULL
6966 
6967 	status = mount->entry_cache.Init();
6968 	if (status != B_OK)
6969 		goto err1;
6970 
6971 	// initialize structure
6972 	mount->id = sNextMountID++;
6973 	mount->partition = NULL;
6974 	mount->root_vnode = NULL;
6975 	mount->covers_vnode = NULL;
6976 	mount->unmounting = false;
6977 	mount->owns_file_device = false;
6978 	mount->volume = NULL;
6979 
6980 	// build up the volume(s)
6981 	while (true) {
6982 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
6983 		if (layerFSName == NULL) {
6984 			if (layer == 0) {
6985 				status = B_NO_MEMORY;
6986 				goto err1;
6987 			}
6988 
6989 			break;
6990 		}
6991 
6992 		volume = (fs_volume*)malloc(sizeof(fs_volume));
6993 		if (volume == NULL) {
6994 			status = B_NO_MEMORY;
6995 			free(layerFSName);
6996 			goto err1;
6997 		}
6998 
6999 		volume->id = mount->id;
7000 		volume->partition = partition != NULL ? partition->ID() : -1;
7001 		volume->layer = layer++;
7002 		volume->private_volume = NULL;
7003 		volume->ops = NULL;
7004 		volume->sub_volume = NULL;
7005 		volume->super_volume = NULL;
7006 		volume->file_system = NULL;
7007 		volume->file_system_name = NULL;
7008 
7009 		volume->file_system_name = get_file_system_name(layerFSName);
7010 		if (volume->file_system_name == NULL) {
7011 			status = B_NO_MEMORY;
7012 			free(layerFSName);
7013 			free(volume);
7014 			goto err1;
7015 		}
7016 
7017 		volume->file_system = get_file_system(layerFSName);
7018 		if (volume->file_system == NULL) {
7019 			status = ENODEV;
7020 			free(layerFSName);
7021 			free(volume->file_system_name);
7022 			free(volume);
7023 			goto err1;
7024 		}
7025 
7026 		if (mount->volume == NULL)
7027 			mount->volume = volume;
7028 		else {
7029 			volume->super_volume = mount->volume;
7030 			mount->volume->sub_volume = volume;
7031 			mount->volume = volume;
7032 		}
7033 	}
7034 
7035 	// insert mount struct into list before we call FS's mount() function
7036 	// so that vnodes can be created for this mount
7037 	mutex_lock(&sMountMutex);
7038 	hash_insert(sMountsTable, mount);
7039 	mutex_unlock(&sMountMutex);
7040 
7041 	ino_t rootID;
7042 
7043 	if (!sRoot) {
7044 		// we haven't mounted anything yet
7045 		if (strcmp(path, "/") != 0) {
7046 			status = B_ERROR;
7047 			goto err2;
7048 		}
7049 
7050 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7051 			args, &rootID);
7052 		if (status != 0)
7053 			goto err2;
7054 	} else {
7055 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7056 		if (status != B_OK)
7057 			goto err2;
7058 
7059 		// make sure covered_vnode is a directory
7060 		if (!S_ISDIR(mount->covers_vnode->type)) {
7061 			status = B_NOT_A_DIRECTORY;
7062 			goto err3;
7063 		}
7064 
7065 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7066 			// this is already a mount point
7067 			status = B_BUSY;
7068 			goto err3;
7069 		}
7070 
7071 		// mount it/them
7072 		fs_volume* volume = mount->volume;
7073 		while (volume) {
7074 			status = volume->file_system->mount(volume, device, flags, args,
7075 				&rootID);
7076 			if (status != B_OK) {
7077 				if (volume->sub_volume)
7078 					goto err4;
7079 				goto err3;
7080 			}
7081 
7082 			volume = volume->super_volume;
7083 		}
7084 
7085 		volume = mount->volume;
7086 		while (volume) {
7087 			if (volume->ops->all_layers_mounted != NULL)
7088 				volume->ops->all_layers_mounted(volume);
7089 			volume = volume->super_volume;
7090 		}
7091 	}
7092 
7093 	// the root node is supposed to be owned by the file system - it must
7094 	// exist at this point
7095 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7096 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7097 		panic("fs_mount: file system does not own its root node!\n");
7098 		status = B_ERROR;
7099 		goto err4;
7100 	}
7101 
7102 	// No race here, since fs_mount() is the only function changing
7103 	// covers_vnode (and holds sMountOpLock at that time).
7104 	mutex_lock(&sVnodeCoveredByMutex);
7105 	if (mount->covers_vnode)
7106 		mount->covers_vnode->covered_by = mount->root_vnode;
7107 	mutex_unlock(&sVnodeCoveredByMutex);
7108 
7109 	if (!sRoot) {
7110 		sRoot = mount->root_vnode;
7111 		mutex_lock(&sIOContextRootLock);
7112 		get_current_io_context(true)->root = sRoot;
7113 		mutex_unlock(&sIOContextRootLock);
7114 		inc_vnode_ref_count(sRoot);
7115 	}
7116 
7117 	// supply the partition (if any) with the mount cookie and mark it mounted
7118 	if (partition) {
7119 		partition->SetMountCookie(mount->volume->private_volume);
7120 		partition->SetVolumeID(mount->id);
7121 
7122 		// keep a partition reference as long as the partition is mounted
7123 		partitionRegistrar.Detach();
7124 		mount->partition = partition;
7125 		mount->owns_file_device = newlyCreatedFileDevice;
7126 		fileDeviceDeleter.id = -1;
7127 	}
7128 
7129 	notify_mount(mount->id,
7130 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7131 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7132 
7133 	return mount->id;
7134 
7135 err4:
7136 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7137 err3:
7138 	if (mount->covers_vnode != NULL)
7139 		put_vnode(mount->covers_vnode);
7140 err2:
7141 	mutex_lock(&sMountMutex);
7142 	hash_remove(sMountsTable, mount);
7143 	mutex_unlock(&sMountMutex);
7144 err1:
7145 	delete mount;
7146 
7147 	return status;
7148 }
7149 
7150 
7151 static status_t
7152 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7153 {
7154 	struct vnode* vnode = NULL;
7155 	struct fs_mount* mount;
7156 	status_t err;
7157 
7158 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7159 		kernel));
7160 
7161 	if (path != NULL) {
7162 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
7163 		if (err != B_OK)
7164 			return B_ENTRY_NOT_FOUND;
7165 	}
7166 
7167 	RecursiveLocker mountOpLocker(sMountOpLock);
7168 
7169 	// this lock is not strictly necessary, but here in case of KDEBUG
7170 	// to keep the ASSERT in find_mount() working.
7171 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7172 	mount = find_mount(path != NULL ? vnode->device : mountID);
7173 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7174 	if (mount == NULL) {
7175 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7176 			vnode);
7177 	}
7178 
7179 	if (path != NULL) {
7180 		put_vnode(vnode);
7181 
7182 		if (mount->root_vnode != vnode) {
7183 			// not mountpoint
7184 			return B_BAD_VALUE;
7185 		}
7186 	}
7187 
7188 	// if the volume is associated with a partition, lock the device of the
7189 	// partition as long as we are unmounting
7190 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7191 	KPartition* partition = mount->partition;
7192 	KDiskDevice* diskDevice = NULL;
7193 	if (partition != NULL) {
7194 		if (partition->Device() == NULL) {
7195 			dprintf("fs_unmount(): There is no device!\n");
7196 			return B_ERROR;
7197 		}
7198 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7199 		if (!diskDevice) {
7200 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7201 			return B_ERROR;
7202 		}
7203 	}
7204 	DeviceWriteLocker writeLocker(diskDevice, true);
7205 
7206 	// make sure, that the partition is not busy
7207 	if (partition != NULL) {
7208 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7209 			TRACE(("fs_unmount(): Partition is busy.\n"));
7210 			return B_BUSY;
7211 		}
7212 	}
7213 
7214 	// grab the vnode master mutex to keep someone from creating
7215 	// a vnode while we're figuring out if we can continue
7216 	mutex_lock(&sVnodeMutex);
7217 
7218 	bool disconnectedDescriptors = false;
7219 
7220 	while (true) {
7221 		bool busy = false;
7222 
7223 		// cycle through the list of vnodes associated with this mount and
7224 		// make sure all of them are not busy or have refs on them
7225 		vnode = NULL;
7226 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7227 		while (iterator.HasNext()) {
7228 			vnode = iterator.Next();
7229 
7230 			// The root vnode ref_count needs to be 1 here (the mount has a
7231 			// reference).
7232 			if (vnode->busy
7233 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7234 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7235 				// there are still vnodes in use on this mount, so we cannot
7236 				// unmount yet
7237 				busy = true;
7238 				break;
7239 			}
7240 		}
7241 
7242 		if (!busy)
7243 			break;
7244 
7245 		if ((flags & B_FORCE_UNMOUNT) == 0) {
7246 			mutex_unlock(&sVnodeMutex);
7247 
7248 			return B_BUSY;
7249 		}
7250 
7251 		if (disconnectedDescriptors) {
7252 			// wait a bit until the last access is finished, and then try again
7253 			mutex_unlock(&sVnodeMutex);
7254 			snooze(100000);
7255 			// TODO: if there is some kind of bug that prevents the ref counts
7256 			// from getting back to zero, this will fall into an endless loop...
7257 			mutex_lock(&sVnodeMutex);
7258 			continue;
7259 		}
7260 
7261 		// the file system is still busy - but we're forced to unmount it,
7262 		// so let's disconnect all open file descriptors
7263 
7264 		mount->unmounting = true;
7265 			// prevent new vnodes from being created
7266 
7267 		mutex_unlock(&sVnodeMutex);
7268 
7269 		disconnect_mount_or_vnode_fds(mount, NULL);
7270 		disconnectedDescriptors = true;
7271 
7272 		mutex_lock(&sVnodeMutex);
7273 	}
7274 
7275 	// we can safely continue, mark all of the vnodes busy and this mount
7276 	// structure in unmounting state
7277 	mount->unmounting = true;
7278 
7279 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7280 	while (iterator.HasNext()) {
7281 		vnode = iterator.Next();
7282 		vnode->busy = true;
7283 
7284 		if (vnode->ref_count == 0) {
7285 			// this vnode has been unused before
7286 			list_remove_item(&sUnusedVnodeList, vnode);
7287 			sUnusedVnodes--;
7288 		}
7289 	}
7290 
7291 	// The ref_count of the root node is 1 at this point, see above why this is
7292 	mount->root_vnode->ref_count--;
7293 
7294 	mutex_unlock(&sVnodeMutex);
7295 
7296 	mutex_lock(&sVnodeCoveredByMutex);
7297 	mount->covers_vnode->covered_by = NULL;
7298 	mutex_unlock(&sVnodeCoveredByMutex);
7299 	put_vnode(mount->covers_vnode);
7300 
7301 	// Free all vnodes associated with this mount.
7302 	// They will be removed from the mount list by free_vnode(), so
7303 	// we don't have to do this.
7304 	while ((vnode = mount->vnodes.Head()) != NULL) {
7305 		free_vnode(vnode, false);
7306 	}
7307 
7308 	// remove the mount structure from the hash table
7309 	mutex_lock(&sMountMutex);
7310 	hash_remove(sMountsTable, mount);
7311 	mutex_unlock(&sMountMutex);
7312 
7313 	mountOpLocker.Unlock();
7314 
7315 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7316 	notify_unmount(mount->id);
7317 
7318 	// dereference the partition and mark it unmounted
7319 	if (partition) {
7320 		partition->SetVolumeID(-1);
7321 		partition->SetMountCookie(NULL);
7322 
7323 		if (mount->owns_file_device)
7324 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7325 		partition->Unregister();
7326 	}
7327 
7328 	delete mount;
7329 	return B_OK;
7330 }
7331 
7332 
7333 static status_t
7334 fs_sync(dev_t device)
7335 {
7336 	struct fs_mount* mount;
7337 	status_t status = get_mount(device, &mount);
7338 	if (status != B_OK)
7339 		return status;
7340 
7341 	struct vnode marker;
7342 	marker.remove = true;
7343 
7344 	// First, synchronize all file caches
7345 
7346 	while (true) {
7347 		MutexLocker locker(sVnodeMutex);
7348 
7349 		// synchronize access to vnode list
7350 		recursive_lock_lock(&mount->rlock);
7351 
7352 		struct vnode* vnode;
7353 		if (!marker.remove) {
7354 			vnode = mount->vnodes.GetNext(&marker);
7355 			mount->vnodes.Remove(&marker);
7356 			marker.remove =	true;
7357 		} else
7358 			vnode = mount->vnodes.First();
7359 
7360 		while (vnode != NULL && (vnode->cache == NULL
7361 			|| vnode->remove || vnode->busy)) {
7362 			// TODO: we could track writes (and writable mapped vnodes)
7363 			//	and have a simple flag that we could test for here
7364 			vnode = mount->vnodes.GetNext(vnode);
7365 		}
7366 
7367 		if (vnode != NULL) {
7368 			// insert marker vnode again
7369 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7370 			marker.remove = false;
7371 		}
7372 
7373 		recursive_lock_unlock(&mount->rlock);
7374 
7375 		if (vnode == NULL)
7376 			break;
7377 
7378 		vnode = lookup_vnode(mount->id, vnode->id);
7379 		if (vnode == NULL || vnode->busy)
7380 			continue;
7381 
7382 		if (vnode->ref_count == 0) {
7383 			// this vnode has been unused before
7384 			list_remove_item(&sUnusedVnodeList, vnode);
7385 			sUnusedVnodes--;
7386 		}
7387 		inc_vnode_ref_count(vnode);
7388 
7389 		locker.Unlock();
7390 
7391 		if (vnode->cache != NULL && !vnode->remove)
7392 			vnode->cache->WriteModified();
7393 
7394 		put_vnode(vnode);
7395 	}
7396 
7397 	// And then, let the file systems do their synchronizing work
7398 
7399 	if (HAS_FS_MOUNT_CALL(mount, sync))
7400 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7401 
7402 	put_mount(mount);
7403 	return status;
7404 }
7405 
7406 
7407 static status_t
7408 fs_read_info(dev_t device, struct fs_info* info)
7409 {
7410 	struct fs_mount* mount;
7411 	status_t status = get_mount(device, &mount);
7412 	if (status != B_OK)
7413 		return status;
7414 
7415 	memset(info, 0, sizeof(struct fs_info));
7416 
7417 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7418 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7419 
7420 	// fill in info the file system doesn't (have to) know about
7421 	if (status == B_OK) {
7422 		info->dev = mount->id;
7423 		info->root = mount->root_vnode->id;
7424 
7425 		fs_volume* volume = mount->volume;
7426 		while (volume->super_volume != NULL)
7427 			volume = volume->super_volume;
7428 
7429 		strlcpy(info->fsh_name, volume->file_system_name,
7430 			sizeof(info->fsh_name));
7431 		if (mount->device_name != NULL) {
7432 			strlcpy(info->device_name, mount->device_name,
7433 				sizeof(info->device_name));
7434 		}
7435 	}
7436 
7437 	// if the call is not supported by the file system, there are still
7438 	// the parts that we filled out ourselves
7439 
7440 	put_mount(mount);
7441 	return status;
7442 }
7443 
7444 
7445 static status_t
7446 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7447 {
7448 	struct fs_mount* mount;
7449 	status_t status = get_mount(device, &mount);
7450 	if (status != B_OK)
7451 		return status;
7452 
7453 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7454 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7455 	else
7456 		status = EROFS;
7457 
7458 	put_mount(mount);
7459 	return status;
7460 }
7461 
7462 
7463 static dev_t
7464 fs_next_device(int32* _cookie)
7465 {
7466 	struct fs_mount* mount = NULL;
7467 	dev_t device = *_cookie;
7468 
7469 	mutex_lock(&sMountMutex);
7470 
7471 	// Since device IDs are assigned sequentially, this algorithm
7472 	// does work good enough. It makes sure that the device list
7473 	// returned is sorted, and that no device is skipped when an
7474 	// already visited device got unmounted.
7475 
7476 	while (device < sNextMountID) {
7477 		mount = find_mount(device++);
7478 		if (mount != NULL && mount->volume->private_volume != NULL)
7479 			break;
7480 	}
7481 
7482 	*_cookie = device;
7483 
7484 	if (mount != NULL)
7485 		device = mount->id;
7486 	else
7487 		device = B_BAD_VALUE;
7488 
7489 	mutex_unlock(&sMountMutex);
7490 
7491 	return device;
7492 }
7493 
7494 
7495 static status_t
7496 get_cwd(char* buffer, size_t size, bool kernel)
7497 {
7498 	// Get current working directory from io context
7499 	struct io_context* context = get_current_io_context(kernel);
7500 	status_t status;
7501 
7502 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7503 
7504 	mutex_lock(&context->io_mutex);
7505 
7506 	struct vnode* vnode = context->cwd;
7507 	if (vnode)
7508 		inc_vnode_ref_count(vnode);
7509 
7510 	mutex_unlock(&context->io_mutex);
7511 
7512 	if (vnode) {
7513 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7514 		put_vnode(vnode);
7515 	} else
7516 		status = B_ERROR;
7517 
7518 	return status;
7519 }
7520 
7521 
7522 static status_t
7523 set_cwd(int fd, char* path, bool kernel)
7524 {
7525 	struct io_context* context;
7526 	struct vnode* vnode = NULL;
7527 	struct vnode* oldDirectory;
7528 	status_t status;
7529 
7530 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7531 
7532 	// Get vnode for passed path, and bail if it failed
7533 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7534 	if (status < 0)
7535 		return status;
7536 
7537 	if (!S_ISDIR(vnode->type)) {
7538 		// nope, can't cwd to here
7539 		status = B_NOT_A_DIRECTORY;
7540 		goto err;
7541 	}
7542 
7543 	// Get current io context and lock
7544 	context = get_current_io_context(kernel);
7545 	mutex_lock(&context->io_mutex);
7546 
7547 	// save the old current working directory first
7548 	oldDirectory = context->cwd;
7549 	context->cwd = vnode;
7550 
7551 	mutex_unlock(&context->io_mutex);
7552 
7553 	if (oldDirectory)
7554 		put_vnode(oldDirectory);
7555 
7556 	return B_NO_ERROR;
7557 
7558 err:
7559 	put_vnode(vnode);
7560 	return status;
7561 }
7562 
7563 
7564 //	#pragma mark - kernel mirrored syscalls
7565 
7566 
7567 dev_t
7568 _kern_mount(const char* path, const char* device, const char* fsName,
7569 	uint32 flags, const char* args, size_t argsLength)
7570 {
7571 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7572 	if (pathBuffer.InitCheck() != B_OK)
7573 		return B_NO_MEMORY;
7574 
7575 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7576 }
7577 
7578 
7579 status_t
7580 _kern_unmount(const char* path, uint32 flags)
7581 {
7582 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7583 	if (pathBuffer.InitCheck() != B_OK)
7584 		return B_NO_MEMORY;
7585 
7586 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7587 }
7588 
7589 
7590 status_t
7591 _kern_read_fs_info(dev_t device, struct fs_info* info)
7592 {
7593 	if (info == NULL)
7594 		return B_BAD_VALUE;
7595 
7596 	return fs_read_info(device, info);
7597 }
7598 
7599 
7600 status_t
7601 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7602 {
7603 	if (info == NULL)
7604 		return B_BAD_VALUE;
7605 
7606 	return fs_write_info(device, info, mask);
7607 }
7608 
7609 
7610 status_t
7611 _kern_sync(void)
7612 {
7613 	// Note: _kern_sync() is also called from _user_sync()
7614 	int32 cookie = 0;
7615 	dev_t device;
7616 	while ((device = next_dev(&cookie)) >= 0) {
7617 		status_t status = fs_sync(device);
7618 		if (status != B_OK && status != B_BAD_VALUE) {
7619 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7620 				strerror(status));
7621 		}
7622 	}
7623 
7624 	return B_OK;
7625 }
7626 
7627 
7628 dev_t
7629 _kern_next_device(int32* _cookie)
7630 {
7631 	return fs_next_device(_cookie);
7632 }
7633 
7634 
7635 status_t
7636 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7637 	size_t infoSize)
7638 {
7639 	if (infoSize != sizeof(fd_info))
7640 		return B_BAD_VALUE;
7641 
7642 	struct io_context* context = NULL;
7643 	struct team* team = NULL;
7644 
7645 	cpu_status state = disable_interrupts();
7646 	GRAB_TEAM_LOCK();
7647 
7648 	bool contextLocked = false;
7649 	team = team_get_team_struct_locked(teamID);
7650 	if (team) {
7651 		// We cannot lock the IO context while holding the team lock, nor can
7652 		// we just drop the team lock, since it might be deleted in the
7653 		// meantime. team_remove_team() acquires the thread lock when removing
7654 		// the team from the team hash table, though. Hence we switch to the
7655 		// thread lock and use mutex_lock_threads_locked().
7656 		context = (io_context*)team->io_context;
7657 
7658 		GRAB_THREAD_LOCK();
7659 		RELEASE_TEAM_LOCK();
7660 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7661 		RELEASE_THREAD_LOCK();
7662 	} else
7663 		RELEASE_TEAM_LOCK();
7664 
7665 	restore_interrupts(state);
7666 
7667 	if (!contextLocked) {
7668 		// team doesn't exit or seems to be gone
7669 		return B_BAD_TEAM_ID;
7670 	}
7671 
7672 	// the team cannot be deleted completely while we're owning its
7673 	// io_context mutex, so we can safely play with it now
7674 
7675 	uint32 slot = *_cookie;
7676 
7677 	struct file_descriptor* descriptor;
7678 	while (slot < context->table_size
7679 		&& (descriptor = context->fds[slot]) == NULL) {
7680 		slot++;
7681 	}
7682 
7683 	if (slot >= context->table_size) {
7684 		mutex_unlock(&context->io_mutex);
7685 		return B_ENTRY_NOT_FOUND;
7686 	}
7687 
7688 	info->number = slot;
7689 	info->open_mode = descriptor->open_mode;
7690 
7691 	struct vnode* vnode = fd_vnode(descriptor);
7692 	if (vnode != NULL) {
7693 		info->device = vnode->device;
7694 		info->node = vnode->id;
7695 	} else if (descriptor->u.mount != NULL) {
7696 		info->device = descriptor->u.mount->id;
7697 		info->node = -1;
7698 	}
7699 
7700 	mutex_unlock(&context->io_mutex);
7701 
7702 	*_cookie = slot + 1;
7703 	return B_OK;
7704 }
7705 
7706 
7707 int
7708 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7709 	int perms)
7710 {
7711 	if ((openMode & O_CREAT) != 0) {
7712 		return file_create_entry_ref(device, inode, name, openMode, perms,
7713 			true);
7714 	}
7715 
7716 	return file_open_entry_ref(device, inode, name, openMode, true);
7717 }
7718 
7719 
7720 /*!	\brief Opens a node specified by a FD + path pair.
7721 
7722 	At least one of \a fd and \a path must be specified.
7723 	If only \a fd is given, the function opens the node identified by this
7724 	FD. If only a path is given, this path is opened. If both are given and
7725 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7726 	of the directory (!) identified by \a fd.
7727 
7728 	\param fd The FD. May be < 0.
7729 	\param path The absolute or relative path. May be \c NULL.
7730 	\param openMode The open mode.
7731 	\return A FD referring to the newly opened node, or an error code,
7732 			if an error occurs.
7733 */
7734 int
7735 _kern_open(int fd, const char* path, int openMode, int perms)
7736 {
7737 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7738 	if (pathBuffer.InitCheck() != B_OK)
7739 		return B_NO_MEMORY;
7740 
7741 	if (openMode & O_CREAT)
7742 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7743 
7744 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7745 }
7746 
7747 
7748 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7749 
7750 	The supplied name may be \c NULL, in which case directory identified
7751 	by \a device and \a inode will be opened. Otherwise \a device and
7752 	\a inode identify the parent directory of the directory to be opened
7753 	and \a name its entry name.
7754 
7755 	\param device If \a name is specified the ID of the device the parent
7756 		   directory of the directory to be opened resides on, otherwise
7757 		   the device of the directory itself.
7758 	\param inode If \a name is specified the node ID of the parent
7759 		   directory of the directory to be opened, otherwise node ID of the
7760 		   directory itself.
7761 	\param name The entry name of the directory to be opened. If \c NULL,
7762 		   the \a device + \a inode pair identify the node to be opened.
7763 	\return The FD of the newly opened directory or an error code, if
7764 			something went wrong.
7765 */
7766 int
7767 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7768 {
7769 	return dir_open_entry_ref(device, inode, name, true);
7770 }
7771 
7772 
7773 /*!	\brief Opens a directory specified by a FD + path pair.
7774 
7775 	At least one of \a fd and \a path must be specified.
7776 	If only \a fd is given, the function opens the directory identified by this
7777 	FD. If only a path is given, this path is opened. If both are given and
7778 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7779 	of the directory (!) identified by \a fd.
7780 
7781 	\param fd The FD. May be < 0.
7782 	\param path The absolute or relative path. May be \c NULL.
7783 	\return A FD referring to the newly opened directory, or an error code,
7784 			if an error occurs.
7785 */
7786 int
7787 _kern_open_dir(int fd, const char* path)
7788 {
7789 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7790 	if (pathBuffer.InitCheck() != B_OK)
7791 		return B_NO_MEMORY;
7792 
7793 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7794 }
7795 
7796 
7797 status_t
7798 _kern_fcntl(int fd, int op, uint32 argument)
7799 {
7800 	return common_fcntl(fd, op, argument, true);
7801 }
7802 
7803 
7804 status_t
7805 _kern_fsync(int fd)
7806 {
7807 	return common_sync(fd, true);
7808 }
7809 
7810 
7811 status_t
7812 _kern_lock_node(int fd)
7813 {
7814 	return common_lock_node(fd, true);
7815 }
7816 
7817 
7818 status_t
7819 _kern_unlock_node(int fd)
7820 {
7821 	return common_unlock_node(fd, true);
7822 }
7823 
7824 
7825 status_t
7826 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7827 	int perms)
7828 {
7829 	return dir_create_entry_ref(device, inode, name, perms, true);
7830 }
7831 
7832 
7833 /*!	\brief Creates a directory specified by a FD + path pair.
7834 
7835 	\a path must always be specified (it contains the name of the new directory
7836 	at least). If only a path is given, this path identifies the location at
7837 	which the directory shall be created. If both \a fd and \a path are given
7838 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
7839 	of the directory (!) identified by \a fd.
7840 
7841 	\param fd The FD. May be < 0.
7842 	\param path The absolute or relative path. Must not be \c NULL.
7843 	\param perms The access permissions the new directory shall have.
7844 	\return \c B_OK, if the directory has been created successfully, another
7845 			error code otherwise.
7846 */
7847 status_t
7848 _kern_create_dir(int fd, const char* path, int perms)
7849 {
7850 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7851 	if (pathBuffer.InitCheck() != B_OK)
7852 		return B_NO_MEMORY;
7853 
7854 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7855 }
7856 
7857 
7858 status_t
7859 _kern_remove_dir(int fd, const char* path)
7860 {
7861 	if (path) {
7862 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7863 		if (pathBuffer.InitCheck() != B_OK)
7864 			return B_NO_MEMORY;
7865 
7866 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7867 	}
7868 
7869 	return dir_remove(fd, NULL, true);
7870 }
7871 
7872 
7873 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7874 
7875 	At least one of \a fd and \a path must be specified.
7876 	If only \a fd is given, the function the symlink to be read is the node
7877 	identified by this FD. If only a path is given, this path identifies the
7878 	symlink to be read. If both are given and the path is absolute, \a fd is
7879 	ignored; a relative path is reckoned off of the directory (!) identified
7880 	by \a fd.
7881 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7882 	will still be updated to reflect the required buffer size.
7883 
7884 	\param fd The FD. May be < 0.
7885 	\param path The absolute or relative path. May be \c NULL.
7886 	\param buffer The buffer into which the contents of the symlink shall be
7887 		   written.
7888 	\param _bufferSize A pointer to the size of the supplied buffer.
7889 	\return The length of the link on success or an appropriate error code
7890 */
7891 status_t
7892 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
7893 {
7894 	if (path) {
7895 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7896 		if (pathBuffer.InitCheck() != B_OK)
7897 			return B_NO_MEMORY;
7898 
7899 		return common_read_link(fd, pathBuffer.LockBuffer(),
7900 			buffer, _bufferSize, true);
7901 	}
7902 
7903 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7904 }
7905 
7906 
7907 /*!	\brief Creates a symlink specified by a FD + path pair.
7908 
7909 	\a path must always be specified (it contains the name of the new symlink
7910 	at least). If only a path is given, this path identifies the location at
7911 	which the symlink shall be created. If both \a fd and \a path are given and
7912 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7913 	of the directory (!) identified by \a fd.
7914 
7915 	\param fd The FD. May be < 0.
7916 	\param toPath The absolute or relative path. Must not be \c NULL.
7917 	\param mode The access permissions the new symlink shall have.
7918 	\return \c B_OK, if the symlink has been created successfully, another
7919 			error code otherwise.
7920 */
7921 status_t
7922 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
7923 {
7924 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7925 	if (pathBuffer.InitCheck() != B_OK)
7926 		return B_NO_MEMORY;
7927 
7928 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7929 		toPath, mode, true);
7930 }
7931 
7932 
7933 status_t
7934 _kern_create_link(const char* path, const char* toPath)
7935 {
7936 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7937 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
7938 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7939 		return B_NO_MEMORY;
7940 
7941 	return common_create_link(pathBuffer.LockBuffer(),
7942 		toPathBuffer.LockBuffer(), true);
7943 }
7944 
7945 
7946 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
7947 
7948 	\a path must always be specified (it contains at least the name of the entry
7949 	to be deleted). If only a path is given, this path identifies the entry
7950 	directly. If both \a fd and \a path are given and the path is absolute,
7951 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7952 	identified by \a fd.
7953 
7954 	\param fd The FD. May be < 0.
7955 	\param path The absolute or relative path. Must not be \c NULL.
7956 	\return \c B_OK, if the entry has been removed successfully, another
7957 			error code otherwise.
7958 */
7959 status_t
7960 _kern_unlink(int fd, const char* path)
7961 {
7962 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7963 	if (pathBuffer.InitCheck() != B_OK)
7964 		return B_NO_MEMORY;
7965 
7966 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
7967 }
7968 
7969 
7970 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
7971 		   by another FD + path pair.
7972 
7973 	\a oldPath and \a newPath must always be specified (they contain at least
7974 	the name of the entry). If only a path is given, this path identifies the
7975 	entry directly. If both a FD and a path are given and the path is absolute,
7976 	the FD is ignored; a relative path is reckoned off of the directory (!)
7977 	identified by the respective FD.
7978 
7979 	\param oldFD The FD of the old location. May be < 0.
7980 	\param oldPath The absolute or relative path of the old location. Must not
7981 		   be \c NULL.
7982 	\param newFD The FD of the new location. May be < 0.
7983 	\param newPath The absolute or relative path of the new location. Must not
7984 		   be \c NULL.
7985 	\return \c B_OK, if the entry has been moved successfully, another
7986 			error code otherwise.
7987 */
7988 status_t
7989 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
7990 {
7991 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
7992 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
7993 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7994 		return B_NO_MEMORY;
7995 
7996 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
7997 		newFD, newPathBuffer.LockBuffer(), true);
7998 }
7999 
8000 
8001 status_t
8002 _kern_access(const char* path, int mode)
8003 {
8004 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8005 	if (pathBuffer.InitCheck() != B_OK)
8006 		return B_NO_MEMORY;
8007 
8008 	return common_access(pathBuffer.LockBuffer(), mode, true);
8009 }
8010 
8011 
8012 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8013 
8014 	If only \a fd is given, the stat operation associated with the type
8015 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8016 	given, this path identifies the entry for whose node to retrieve the
8017 	stat data. If both \a fd and \a path are given and the path is absolute,
8018 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8019 	identified by \a fd and specifies the entry whose stat data shall be
8020 	retrieved.
8021 
8022 	\param fd The FD. May be < 0.
8023 	\param path The absolute or relative path. Must not be \c NULL.
8024 	\param traverseLeafLink If \a path is given, \c true specifies that the
8025 		   function shall not stick to symlinks, but traverse them.
8026 	\param stat The buffer the stat data shall be written into.
8027 	\param statSize The size of the supplied stat buffer.
8028 	\return \c B_OK, if the the stat data have been read successfully, another
8029 			error code otherwise.
8030 */
8031 status_t
8032 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8033 	struct stat* stat, size_t statSize)
8034 {
8035 	struct stat completeStat;
8036 	struct stat* originalStat = NULL;
8037 	status_t status;
8038 
8039 	if (statSize > sizeof(struct stat))
8040 		return B_BAD_VALUE;
8041 
8042 	// this supports different stat extensions
8043 	if (statSize < sizeof(struct stat)) {
8044 		originalStat = stat;
8045 		stat = &completeStat;
8046 	}
8047 
8048 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8049 
8050 	if (status == B_OK && originalStat != NULL)
8051 		memcpy(originalStat, stat, statSize);
8052 
8053 	return status;
8054 }
8055 
8056 
8057 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8058 
8059 	If only \a fd is given, the stat operation associated with the type
8060 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8061 	given, this path identifies the entry for whose node to write the
8062 	stat data. If both \a fd and \a path are given and the path is absolute,
8063 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8064 	identified by \a fd and specifies the entry whose stat data shall be
8065 	written.
8066 
8067 	\param fd The FD. May be < 0.
8068 	\param path The absolute or relative path. Must not be \c NULL.
8069 	\param traverseLeafLink If \a path is given, \c true specifies that the
8070 		   function shall not stick to symlinks, but traverse them.
8071 	\param stat The buffer containing the stat data to be written.
8072 	\param statSize The size of the supplied stat buffer.
8073 	\param statMask A mask specifying which parts of the stat data shall be
8074 		   written.
8075 	\return \c B_OK, if the the stat data have been written successfully,
8076 			another error code otherwise.
8077 */
8078 status_t
8079 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8080 	const struct stat* stat, size_t statSize, int statMask)
8081 {
8082 	struct stat completeStat;
8083 
8084 	if (statSize > sizeof(struct stat))
8085 		return B_BAD_VALUE;
8086 
8087 	// this supports different stat extensions
8088 	if (statSize < sizeof(struct stat)) {
8089 		memset((uint8*)&completeStat + statSize, 0,
8090 			sizeof(struct stat) - statSize);
8091 		memcpy(&completeStat, stat, statSize);
8092 		stat = &completeStat;
8093 	}
8094 
8095 	status_t status;
8096 
8097 	if (path) {
8098 		// path given: write the stat of the node referred to by (fd, path)
8099 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8100 		if (pathBuffer.InitCheck() != B_OK)
8101 			return B_NO_MEMORY;
8102 
8103 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8104 			traverseLeafLink, stat, statMask, true);
8105 	} else {
8106 		// no path given: get the FD and use the FD operation
8107 		struct file_descriptor* descriptor
8108 			= get_fd(get_current_io_context(true), fd);
8109 		if (descriptor == NULL)
8110 			return B_FILE_ERROR;
8111 
8112 		if (descriptor->ops->fd_write_stat)
8113 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8114 		else
8115 			status = EOPNOTSUPP;
8116 
8117 		put_fd(descriptor);
8118 	}
8119 
8120 	return status;
8121 }
8122 
8123 
8124 int
8125 _kern_open_attr_dir(int fd, const char* path)
8126 {
8127 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8128 	if (pathBuffer.InitCheck() != B_OK)
8129 		return B_NO_MEMORY;
8130 
8131 	if (path != NULL)
8132 		pathBuffer.SetTo(path);
8133 
8134 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8135 }
8136 
8137 
8138 int
8139 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8140 	int openMode)
8141 {
8142 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8143 	if (pathBuffer.InitCheck() != B_OK)
8144 		return B_NO_MEMORY;
8145 
8146 	if ((openMode & O_CREAT) != 0) {
8147 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8148 			true);
8149 	}
8150 
8151 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8152 }
8153 
8154 
8155 status_t
8156 _kern_remove_attr(int fd, const char* name)
8157 {
8158 	return attr_remove(fd, name, true);
8159 }
8160 
8161 
8162 status_t
8163 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8164 	const char* toName)
8165 {
8166 	return attr_rename(fromFile, fromName, toFile, toName, true);
8167 }
8168 
8169 
8170 int
8171 _kern_open_index_dir(dev_t device)
8172 {
8173 	return index_dir_open(device, true);
8174 }
8175 
8176 
8177 status_t
8178 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8179 {
8180 	return index_create(device, name, type, flags, true);
8181 }
8182 
8183 
8184 status_t
8185 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8186 {
8187 	return index_name_read_stat(device, name, stat, true);
8188 }
8189 
8190 
8191 status_t
8192 _kern_remove_index(dev_t device, const char* name)
8193 {
8194 	return index_remove(device, name, true);
8195 }
8196 
8197 
8198 status_t
8199 _kern_getcwd(char* buffer, size_t size)
8200 {
8201 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8202 
8203 	// Call vfs to get current working directory
8204 	return get_cwd(buffer, size, true);
8205 }
8206 
8207 
8208 status_t
8209 _kern_setcwd(int fd, const char* path)
8210 {
8211 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8212 	if (pathBuffer.InitCheck() != B_OK)
8213 		return B_NO_MEMORY;
8214 
8215 	if (path != NULL)
8216 		pathBuffer.SetTo(path);
8217 
8218 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8219 }
8220 
8221 
8222 //	#pragma mark - userland syscalls
8223 
8224 
8225 dev_t
8226 _user_mount(const char* userPath, const char* userDevice,
8227 	const char* userFileSystem, uint32 flags, const char* userArgs,
8228 	size_t argsLength)
8229 {
8230 	char fileSystem[B_FILE_NAME_LENGTH];
8231 	KPath path, device;
8232 	char* args = NULL;
8233 	status_t status;
8234 
8235 	if (!IS_USER_ADDRESS(userPath)
8236 		|| !IS_USER_ADDRESS(userFileSystem)
8237 		|| !IS_USER_ADDRESS(userDevice))
8238 		return B_BAD_ADDRESS;
8239 
8240 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8241 		return B_NO_MEMORY;
8242 
8243 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8244 		return B_BAD_ADDRESS;
8245 
8246 	if (userFileSystem != NULL
8247 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8248 		return B_BAD_ADDRESS;
8249 
8250 	if (userDevice != NULL
8251 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8252 			< B_OK)
8253 		return B_BAD_ADDRESS;
8254 
8255 	if (userArgs != NULL && argsLength > 0) {
8256 		// this is a safety restriction
8257 		if (argsLength >= 65536)
8258 			return B_NAME_TOO_LONG;
8259 
8260 		args = (char*)malloc(argsLength + 1);
8261 		if (args == NULL)
8262 			return B_NO_MEMORY;
8263 
8264 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8265 			free(args);
8266 			return B_BAD_ADDRESS;
8267 		}
8268 	}
8269 	path.UnlockBuffer();
8270 	device.UnlockBuffer();
8271 
8272 	status = fs_mount(path.LockBuffer(),
8273 		userDevice != NULL ? device.Path() : NULL,
8274 		userFileSystem ? fileSystem : NULL, flags, args, false);
8275 
8276 	free(args);
8277 	return status;
8278 }
8279 
8280 
8281 status_t
8282 _user_unmount(const char* userPath, uint32 flags)
8283 {
8284 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8285 	if (pathBuffer.InitCheck() != B_OK)
8286 		return B_NO_MEMORY;
8287 
8288 	char* path = pathBuffer.LockBuffer();
8289 
8290 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8291 		return B_BAD_ADDRESS;
8292 
8293 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8294 }
8295 
8296 
8297 status_t
8298 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8299 {
8300 	struct fs_info info;
8301 	status_t status;
8302 
8303 	if (userInfo == NULL)
8304 		return B_BAD_VALUE;
8305 
8306 	if (!IS_USER_ADDRESS(userInfo))
8307 		return B_BAD_ADDRESS;
8308 
8309 	status = fs_read_info(device, &info);
8310 	if (status != B_OK)
8311 		return status;
8312 
8313 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8314 		return B_BAD_ADDRESS;
8315 
8316 	return B_OK;
8317 }
8318 
8319 
8320 status_t
8321 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8322 {
8323 	struct fs_info info;
8324 
8325 	if (userInfo == NULL)
8326 		return B_BAD_VALUE;
8327 
8328 	if (!IS_USER_ADDRESS(userInfo)
8329 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8330 		return B_BAD_ADDRESS;
8331 
8332 	return fs_write_info(device, &info, mask);
8333 }
8334 
8335 
8336 dev_t
8337 _user_next_device(int32* _userCookie)
8338 {
8339 	int32 cookie;
8340 	dev_t device;
8341 
8342 	if (!IS_USER_ADDRESS(_userCookie)
8343 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8344 		return B_BAD_ADDRESS;
8345 
8346 	device = fs_next_device(&cookie);
8347 
8348 	if (device >= B_OK) {
8349 		// update user cookie
8350 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8351 			return B_BAD_ADDRESS;
8352 	}
8353 
8354 	return device;
8355 }
8356 
8357 
8358 status_t
8359 _user_sync(void)
8360 {
8361 	return _kern_sync();
8362 }
8363 
8364 
8365 status_t
8366 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8367 	size_t infoSize)
8368 {
8369 	struct fd_info info;
8370 	uint32 cookie;
8371 
8372 	// only root can do this (or should root's group be enough?)
8373 	if (geteuid() != 0)
8374 		return B_NOT_ALLOWED;
8375 
8376 	if (infoSize != sizeof(fd_info))
8377 		return B_BAD_VALUE;
8378 
8379 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8380 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8381 		return B_BAD_ADDRESS;
8382 
8383 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8384 	if (status != B_OK)
8385 		return status;
8386 
8387 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8388 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8389 		return B_BAD_ADDRESS;
8390 
8391 	return status;
8392 }
8393 
8394 
8395 status_t
8396 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8397 	char* userPath, size_t pathLength)
8398 {
8399 	if (!IS_USER_ADDRESS(userPath))
8400 		return B_BAD_ADDRESS;
8401 
8402 	KPath path(B_PATH_NAME_LENGTH + 1);
8403 	if (path.InitCheck() != B_OK)
8404 		return B_NO_MEMORY;
8405 
8406 	// copy the leaf name onto the stack
8407 	char stackLeaf[B_FILE_NAME_LENGTH];
8408 	if (leaf) {
8409 		if (!IS_USER_ADDRESS(leaf))
8410 			return B_BAD_ADDRESS;
8411 
8412 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8413 		if (length < 0)
8414 			return length;
8415 		if (length >= B_FILE_NAME_LENGTH)
8416 			return B_NAME_TOO_LONG;
8417 
8418 		leaf = stackLeaf;
8419 	}
8420 
8421 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8422 		path.LockBuffer(), path.BufferSize());
8423 	if (status != B_OK)
8424 		return status;
8425 
8426 	path.UnlockBuffer();
8427 
8428 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8429 	if (length < 0)
8430 		return length;
8431 	if (length >= (int)pathLength)
8432 		return B_BUFFER_OVERFLOW;
8433 
8434 	return B_OK;
8435 }
8436 
8437 
8438 status_t
8439 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8440 {
8441 	if (userPath == NULL || buffer == NULL)
8442 		return B_BAD_VALUE;
8443 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8444 		return B_BAD_ADDRESS;
8445 
8446 	// copy path from userland
8447 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8448 	if (pathBuffer.InitCheck() != B_OK)
8449 		return B_NO_MEMORY;
8450 	char* path = pathBuffer.LockBuffer();
8451 
8452 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8453 		return B_BAD_ADDRESS;
8454 
8455 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8456 		false);
8457 	if (error != B_OK)
8458 		return error;
8459 
8460 	// copy back to userland
8461 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8462 	if (len < 0)
8463 		return len;
8464 	if (len >= B_PATH_NAME_LENGTH)
8465 		return B_BUFFER_OVERFLOW;
8466 
8467 	return B_OK;
8468 }
8469 
8470 
8471 int
8472 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8473 	int openMode, int perms)
8474 {
8475 	char name[B_FILE_NAME_LENGTH];
8476 
8477 	if (userName == NULL || device < 0 || inode < 0)
8478 		return B_BAD_VALUE;
8479 	if (!IS_USER_ADDRESS(userName)
8480 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8481 		return B_BAD_ADDRESS;
8482 
8483 	if ((openMode & O_CREAT) != 0) {
8484 		return file_create_entry_ref(device, inode, name, openMode, perms,
8485 		 false);
8486 	}
8487 
8488 	return file_open_entry_ref(device, inode, name, openMode, false);
8489 }
8490 
8491 
8492 int
8493 _user_open(int fd, const char* userPath, int openMode, int perms)
8494 {
8495 	KPath path(B_PATH_NAME_LENGTH + 1);
8496 	if (path.InitCheck() != B_OK)
8497 		return B_NO_MEMORY;
8498 
8499 	char* buffer = path.LockBuffer();
8500 
8501 	if (!IS_USER_ADDRESS(userPath)
8502 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8503 		return B_BAD_ADDRESS;
8504 
8505 	if ((openMode & O_CREAT) != 0)
8506 		return file_create(fd, buffer, openMode, perms, false);
8507 
8508 	return file_open(fd, buffer, openMode, false);
8509 }
8510 
8511 
8512 int
8513 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8514 {
8515 	if (userName != NULL) {
8516 		char name[B_FILE_NAME_LENGTH];
8517 
8518 		if (!IS_USER_ADDRESS(userName)
8519 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8520 			return B_BAD_ADDRESS;
8521 
8522 		return dir_open_entry_ref(device, inode, name, false);
8523 	}
8524 	return dir_open_entry_ref(device, inode, NULL, false);
8525 }
8526 
8527 
8528 int
8529 _user_open_dir(int fd, const char* userPath)
8530 {
8531 	KPath path(B_PATH_NAME_LENGTH + 1);
8532 	if (path.InitCheck() != B_OK)
8533 		return B_NO_MEMORY;
8534 
8535 	char* buffer = path.LockBuffer();
8536 
8537 	if (!IS_USER_ADDRESS(userPath)
8538 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8539 		return B_BAD_ADDRESS;
8540 
8541 	return dir_open(fd, buffer, false);
8542 }
8543 
8544 
8545 /*!	\brief Opens a directory's parent directory and returns the entry name
8546 		   of the former.
8547 
8548 	Aside from that is returns the directory's entry name, this method is
8549 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8550 	equivalent, if \a userName is \c NULL.
8551 
8552 	If a name buffer is supplied and the name does not fit the buffer, the
8553 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8554 
8555 	\param fd A FD referring to a directory.
8556 	\param userName Buffer the directory's entry name shall be written into.
8557 		   May be \c NULL.
8558 	\param nameLength Size of the name buffer.
8559 	\return The file descriptor of the opened parent directory, if everything
8560 			went fine, an error code otherwise.
8561 */
8562 int
8563 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8564 {
8565 	bool kernel = false;
8566 
8567 	if (userName && !IS_USER_ADDRESS(userName))
8568 		return B_BAD_ADDRESS;
8569 
8570 	// open the parent dir
8571 	int parentFD = dir_open(fd, (char*)"..", kernel);
8572 	if (parentFD < 0)
8573 		return parentFD;
8574 	FDCloser fdCloser(parentFD, kernel);
8575 
8576 	if (userName) {
8577 		// get the vnodes
8578 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8579 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8580 		VNodePutter parentVNodePutter(parentVNode);
8581 		VNodePutter dirVNodePutter(dirVNode);
8582 		if (!parentVNode || !dirVNode)
8583 			return B_FILE_ERROR;
8584 
8585 		// get the vnode name
8586 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8587 		struct dirent* buffer = (struct dirent*)_buffer;
8588 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8589 			sizeof(_buffer), get_current_io_context(false));
8590 		if (status != B_OK)
8591 			return status;
8592 
8593 		// copy the name to the userland buffer
8594 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8595 		if (len < 0)
8596 			return len;
8597 		if (len >= (int)nameLength)
8598 			return B_BUFFER_OVERFLOW;
8599 	}
8600 
8601 	return fdCloser.Detach();
8602 }
8603 
8604 
8605 status_t
8606 _user_fcntl(int fd, int op, uint32 argument)
8607 {
8608 	status_t status = common_fcntl(fd, op, argument, false);
8609 	if (op == F_SETLKW)
8610 		syscall_restart_handle_post(status);
8611 
8612 	return status;
8613 }
8614 
8615 
8616 status_t
8617 _user_fsync(int fd)
8618 {
8619 	return common_sync(fd, false);
8620 }
8621 
8622 
8623 status_t
8624 _user_flock(int fd, int operation)
8625 {
8626 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8627 
8628 	// Check if the operation is valid
8629 	switch (operation & ~LOCK_NB) {
8630 		case LOCK_UN:
8631 		case LOCK_SH:
8632 		case LOCK_EX:
8633 			break;
8634 
8635 		default:
8636 			return B_BAD_VALUE;
8637 	}
8638 
8639 	struct file_descriptor* descriptor;
8640 	struct vnode* vnode;
8641 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8642 	if (descriptor == NULL)
8643 		return B_FILE_ERROR;
8644 
8645 	if (descriptor->type != FDTYPE_FILE) {
8646 		put_fd(descriptor);
8647 		return B_BAD_VALUE;
8648 	}
8649 
8650 	struct flock flock;
8651 	flock.l_start = 0;
8652 	flock.l_len = OFF_MAX;
8653 	flock.l_whence = 0;
8654 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8655 
8656 	status_t status;
8657 	if ((operation & LOCK_UN) != 0)
8658 		status = release_advisory_lock(vnode, &flock);
8659 	else {
8660 		status = acquire_advisory_lock(vnode,
8661 			thread_get_current_thread()->team->session_id, &flock,
8662 			(operation & LOCK_NB) == 0);
8663 	}
8664 
8665 	syscall_restart_handle_post(status);
8666 
8667 	put_fd(descriptor);
8668 	return status;
8669 }
8670 
8671 
8672 status_t
8673 _user_lock_node(int fd)
8674 {
8675 	return common_lock_node(fd, false);
8676 }
8677 
8678 
8679 status_t
8680 _user_unlock_node(int fd)
8681 {
8682 	return common_unlock_node(fd, false);
8683 }
8684 
8685 
8686 status_t
8687 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8688 	int perms)
8689 {
8690 	char name[B_FILE_NAME_LENGTH];
8691 	status_t status;
8692 
8693 	if (!IS_USER_ADDRESS(userName))
8694 		return B_BAD_ADDRESS;
8695 
8696 	status = user_strlcpy(name, userName, sizeof(name));
8697 	if (status < 0)
8698 		return status;
8699 
8700 	return dir_create_entry_ref(device, inode, name, perms, false);
8701 }
8702 
8703 
8704 status_t
8705 _user_create_dir(int fd, const char* userPath, int perms)
8706 {
8707 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8708 	if (pathBuffer.InitCheck() != B_OK)
8709 		return B_NO_MEMORY;
8710 
8711 	char* path = pathBuffer.LockBuffer();
8712 
8713 	if (!IS_USER_ADDRESS(userPath)
8714 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8715 		return B_BAD_ADDRESS;
8716 
8717 	return dir_create(fd, path, perms, false);
8718 }
8719 
8720 
8721 status_t
8722 _user_remove_dir(int fd, const char* userPath)
8723 {
8724 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8725 	if (pathBuffer.InitCheck() != B_OK)
8726 		return B_NO_MEMORY;
8727 
8728 	char* path = pathBuffer.LockBuffer();
8729 
8730 	if (userPath != NULL) {
8731 		if (!IS_USER_ADDRESS(userPath)
8732 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8733 			return B_BAD_ADDRESS;
8734 	}
8735 
8736 	return dir_remove(fd, userPath ? path : NULL, false);
8737 }
8738 
8739 
8740 status_t
8741 _user_read_link(int fd, const char* userPath, char* userBuffer,
8742 	size_t* userBufferSize)
8743 {
8744 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8745 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8746 		return B_NO_MEMORY;
8747 
8748 	size_t bufferSize;
8749 
8750 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8751 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8752 		return B_BAD_ADDRESS;
8753 
8754 	char* path = pathBuffer.LockBuffer();
8755 	char* buffer = linkBuffer.LockBuffer();
8756 
8757 	if (userPath) {
8758 		if (!IS_USER_ADDRESS(userPath)
8759 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8760 			return B_BAD_ADDRESS;
8761 
8762 		if (bufferSize > B_PATH_NAME_LENGTH)
8763 			bufferSize = B_PATH_NAME_LENGTH;
8764 	}
8765 
8766 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8767 		&bufferSize, false);
8768 
8769 	// we also update the bufferSize in case of errors
8770 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8771 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8772 		return B_BAD_ADDRESS;
8773 
8774 	if (status != B_OK)
8775 		return status;
8776 
8777 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8778 		return B_BAD_ADDRESS;
8779 
8780 	return B_OK;
8781 }
8782 
8783 
8784 status_t
8785 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8786 	int mode)
8787 {
8788 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8789 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8790 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8791 		return B_NO_MEMORY;
8792 
8793 	char* path = pathBuffer.LockBuffer();
8794 	char* toPath = toPathBuffer.LockBuffer();
8795 
8796 	if (!IS_USER_ADDRESS(userPath)
8797 		|| !IS_USER_ADDRESS(userToPath)
8798 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8799 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8800 		return B_BAD_ADDRESS;
8801 
8802 	return common_create_symlink(fd, path, toPath, mode, false);
8803 }
8804 
8805 
8806 status_t
8807 _user_create_link(const char* userPath, const char* userToPath)
8808 {
8809 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8810 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8811 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8812 		return B_NO_MEMORY;
8813 
8814 	char* path = pathBuffer.LockBuffer();
8815 	char* toPath = toPathBuffer.LockBuffer();
8816 
8817 	if (!IS_USER_ADDRESS(userPath)
8818 		|| !IS_USER_ADDRESS(userToPath)
8819 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8820 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8821 		return B_BAD_ADDRESS;
8822 
8823 	status_t status = check_path(toPath);
8824 	if (status != B_OK)
8825 		return status;
8826 
8827 	return common_create_link(path, toPath, false);
8828 }
8829 
8830 
8831 status_t
8832 _user_unlink(int fd, const char* userPath)
8833 {
8834 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8835 	if (pathBuffer.InitCheck() != B_OK)
8836 		return B_NO_MEMORY;
8837 
8838 	char* path = pathBuffer.LockBuffer();
8839 
8840 	if (!IS_USER_ADDRESS(userPath)
8841 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8842 		return B_BAD_ADDRESS;
8843 
8844 	return common_unlink(fd, path, false);
8845 }
8846 
8847 
8848 status_t
8849 _user_rename(int oldFD, const char* userOldPath, int newFD,
8850 	const char* userNewPath)
8851 {
8852 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8853 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8854 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8855 		return B_NO_MEMORY;
8856 
8857 	char* oldPath = oldPathBuffer.LockBuffer();
8858 	char* newPath = newPathBuffer.LockBuffer();
8859 
8860 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8861 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8862 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8863 		return B_BAD_ADDRESS;
8864 
8865 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8866 }
8867 
8868 
8869 status_t
8870 _user_create_fifo(const char* userPath, mode_t perms)
8871 {
8872 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8873 	if (pathBuffer.InitCheck() != B_OK)
8874 		return B_NO_MEMORY;
8875 
8876 	char* path = pathBuffer.LockBuffer();
8877 
8878 	if (!IS_USER_ADDRESS(userPath)
8879 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8880 		return B_BAD_ADDRESS;
8881 	}
8882 
8883 	// split into directory vnode and filename path
8884 	char filename[B_FILE_NAME_LENGTH];
8885 	struct vnode* dir;
8886 	status_t status = path_to_dir_vnode(path, &dir, filename, false);
8887 	if (status != B_OK)
8888 		return status;
8889 
8890 	VNodePutter _(dir);
8891 
8892 	// the underlying FS needs to support creating FIFOs
8893 	if (!HAS_FS_CALL(dir, create_special_node))
8894 		return B_UNSUPPORTED;
8895 
8896 	// create the entry	-- the FIFO sub node is set up automatically
8897 	fs_vnode superVnode;
8898 	ino_t nodeID;
8899 	status = FS_CALL(dir, create_special_node, filename, NULL,
8900 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8901 
8902 	// create_special_node() acquired a reference for us that we don't need.
8903 	if (status == B_OK)
8904 		put_vnode(dir->mount->volume, nodeID);
8905 
8906 	return status;
8907 }
8908 
8909 
8910 status_t
8911 _user_create_pipe(int* userFDs)
8912 {
8913 	// rootfs should support creating FIFOs, but let's be sure
8914 	if (!HAS_FS_CALL(sRoot, create_special_node))
8915 		return B_UNSUPPORTED;
8916 
8917 	// create the node	-- the FIFO sub node is set up automatically
8918 	fs_vnode superVnode;
8919 	ino_t nodeID;
8920 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8921 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8922 	if (status != B_OK)
8923 		return status;
8924 
8925 	// We've got one reference to the node and need another one.
8926 	struct vnode* vnode;
8927 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8928 	if (status != B_OK) {
8929 		// that should not happen
8930 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
8931 			sRoot->mount->id, sRoot->id);
8932 		return status;
8933 	}
8934 
8935 	// Everything looks good so far. Open two FDs for reading respectively
8936 	// writing.
8937 	int fds[2];
8938 	fds[0] = open_vnode(vnode, O_RDONLY, false);
8939 	fds[1] = open_vnode(vnode, O_WRONLY, false);
8940 
8941 	FDCloser closer0(fds[0], false);
8942 	FDCloser closer1(fds[1], false);
8943 
8944 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
8945 
8946 	// copy FDs to userland
8947 	if (status == B_OK) {
8948 		if (!IS_USER_ADDRESS(userFDs)
8949 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
8950 			status = B_BAD_ADDRESS;
8951 		}
8952 	}
8953 
8954 	// keep FDs, if everything went fine
8955 	if (status == B_OK) {
8956 		closer0.Detach();
8957 		closer1.Detach();
8958 	}
8959 
8960 	return status;
8961 }
8962 
8963 
8964 status_t
8965 _user_access(const char* userPath, int mode)
8966 {
8967 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8968 	if (pathBuffer.InitCheck() != B_OK)
8969 		return B_NO_MEMORY;
8970 
8971 	char* path = pathBuffer.LockBuffer();
8972 
8973 	if (!IS_USER_ADDRESS(userPath)
8974 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8975 		return B_BAD_ADDRESS;
8976 
8977 	return common_access(path, mode, false);
8978 }
8979 
8980 
8981 status_t
8982 _user_read_stat(int fd, const char* userPath, bool traverseLink,
8983 	struct stat* userStat, size_t statSize)
8984 {
8985 	struct stat stat;
8986 	status_t status;
8987 
8988 	if (statSize > sizeof(struct stat))
8989 		return B_BAD_VALUE;
8990 
8991 	if (!IS_USER_ADDRESS(userStat))
8992 		return B_BAD_ADDRESS;
8993 
8994 	if (userPath) {
8995 		// path given: get the stat of the node referred to by (fd, path)
8996 		if (!IS_USER_ADDRESS(userPath))
8997 			return B_BAD_ADDRESS;
8998 
8999 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9000 		if (pathBuffer.InitCheck() != B_OK)
9001 			return B_NO_MEMORY;
9002 
9003 		char* path = pathBuffer.LockBuffer();
9004 
9005 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9006 		if (length < B_OK)
9007 			return length;
9008 		if (length >= B_PATH_NAME_LENGTH)
9009 			return B_NAME_TOO_LONG;
9010 
9011 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9012 	} else {
9013 		// no path given: get the FD and use the FD operation
9014 		struct file_descriptor* descriptor
9015 			= get_fd(get_current_io_context(false), fd);
9016 		if (descriptor == NULL)
9017 			return B_FILE_ERROR;
9018 
9019 		if (descriptor->ops->fd_read_stat)
9020 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9021 		else
9022 			status = EOPNOTSUPP;
9023 
9024 		put_fd(descriptor);
9025 	}
9026 
9027 	if (status != B_OK)
9028 		return status;
9029 
9030 	return user_memcpy(userStat, &stat, statSize);
9031 }
9032 
9033 
9034 status_t
9035 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9036 	const struct stat* userStat, size_t statSize, int statMask)
9037 {
9038 	if (statSize > sizeof(struct stat))
9039 		return B_BAD_VALUE;
9040 
9041 	struct stat stat;
9042 
9043 	if (!IS_USER_ADDRESS(userStat)
9044 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9045 		return B_BAD_ADDRESS;
9046 
9047 	// clear additional stat fields
9048 	if (statSize < sizeof(struct stat))
9049 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9050 
9051 	status_t status;
9052 
9053 	if (userPath) {
9054 		// path given: write the stat of the node referred to by (fd, path)
9055 		if (!IS_USER_ADDRESS(userPath))
9056 			return B_BAD_ADDRESS;
9057 
9058 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9059 		if (pathBuffer.InitCheck() != B_OK)
9060 			return B_NO_MEMORY;
9061 
9062 		char* path = pathBuffer.LockBuffer();
9063 
9064 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9065 		if (length < B_OK)
9066 			return length;
9067 		if (length >= B_PATH_NAME_LENGTH)
9068 			return B_NAME_TOO_LONG;
9069 
9070 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9071 			statMask, false);
9072 	} else {
9073 		// no path given: get the FD and use the FD operation
9074 		struct file_descriptor* descriptor
9075 			= get_fd(get_current_io_context(false), fd);
9076 		if (descriptor == NULL)
9077 			return B_FILE_ERROR;
9078 
9079 		if (descriptor->ops->fd_write_stat) {
9080 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9081 				statMask);
9082 		} else
9083 			status = EOPNOTSUPP;
9084 
9085 		put_fd(descriptor);
9086 	}
9087 
9088 	return status;
9089 }
9090 
9091 
9092 int
9093 _user_open_attr_dir(int fd, const char* userPath)
9094 {
9095 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9096 	if (pathBuffer.InitCheck() != B_OK)
9097 		return B_NO_MEMORY;
9098 
9099 	char* path = pathBuffer.LockBuffer();
9100 
9101 	if (userPath != NULL) {
9102 		if (!IS_USER_ADDRESS(userPath)
9103 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9104 			return B_BAD_ADDRESS;
9105 	}
9106 
9107 	return attr_dir_open(fd, userPath ? path : NULL, false);
9108 }
9109 
9110 
9111 ssize_t
9112 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9113 	size_t readBytes)
9114 {
9115 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9116 	if (attr < 0)
9117 		return attr;
9118 
9119 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9120 	_user_close(attr);
9121 
9122 	return bytes;
9123 }
9124 
9125 
9126 ssize_t
9127 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9128 	const void* buffer, size_t writeBytes)
9129 {
9130 	// Try to support the BeOS typical truncation as well as the position
9131 	// argument
9132 	int attr = attr_create(fd, NULL, attribute, type,
9133 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9134 	if (attr < 0)
9135 		return attr;
9136 
9137 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9138 	_user_close(attr);
9139 
9140 	return bytes;
9141 }
9142 
9143 
9144 status_t
9145 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9146 {
9147 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9148 	if (attr < 0)
9149 		return attr;
9150 
9151 	struct file_descriptor* descriptor
9152 		= get_fd(get_current_io_context(false), attr);
9153 	if (descriptor == NULL) {
9154 		_user_close(attr);
9155 		return B_FILE_ERROR;
9156 	}
9157 
9158 	struct stat stat;
9159 	status_t status;
9160 	if (descriptor->ops->fd_read_stat)
9161 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9162 	else
9163 		status = EOPNOTSUPP;
9164 
9165 	put_fd(descriptor);
9166 	_user_close(attr);
9167 
9168 	if (status == B_OK) {
9169 		attr_info info;
9170 		info.type = stat.st_type;
9171 		info.size = stat.st_size;
9172 
9173 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9174 			return B_BAD_ADDRESS;
9175 	}
9176 
9177 	return status;
9178 }
9179 
9180 
9181 int
9182 _user_open_attr(int fd, const char* userPath, const char* userName,
9183 	uint32 type, int openMode)
9184 {
9185 	char name[B_FILE_NAME_LENGTH];
9186 
9187 	if (!IS_USER_ADDRESS(userName)
9188 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9189 		return B_BAD_ADDRESS;
9190 
9191 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9192 	if (pathBuffer.InitCheck() != B_OK)
9193 		return B_NO_MEMORY;
9194 
9195 	char* path = pathBuffer.LockBuffer();
9196 
9197 	if (userPath != NULL) {
9198 		if (!IS_USER_ADDRESS(userPath)
9199 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9200 			return B_BAD_ADDRESS;
9201 	}
9202 
9203 	if ((openMode & O_CREAT) != 0) {
9204 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9205 			false);
9206 	}
9207 
9208 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9209 }
9210 
9211 
9212 status_t
9213 _user_remove_attr(int fd, const char* userName)
9214 {
9215 	char name[B_FILE_NAME_LENGTH];
9216 
9217 	if (!IS_USER_ADDRESS(userName)
9218 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9219 		return B_BAD_ADDRESS;
9220 
9221 	return attr_remove(fd, name, false);
9222 }
9223 
9224 
9225 status_t
9226 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9227 	const char* userToName)
9228 {
9229 	if (!IS_USER_ADDRESS(userFromName)
9230 		|| !IS_USER_ADDRESS(userToName))
9231 		return B_BAD_ADDRESS;
9232 
9233 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9234 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9235 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9236 		return B_NO_MEMORY;
9237 
9238 	char* fromName = fromNameBuffer.LockBuffer();
9239 	char* toName = toNameBuffer.LockBuffer();
9240 
9241 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9242 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9243 		return B_BAD_ADDRESS;
9244 
9245 	return attr_rename(fromFile, fromName, toFile, toName, false);
9246 }
9247 
9248 
9249 int
9250 _user_open_index_dir(dev_t device)
9251 {
9252 	return index_dir_open(device, false);
9253 }
9254 
9255 
9256 status_t
9257 _user_create_index(dev_t device, const char* userName, uint32 type,
9258 	uint32 flags)
9259 {
9260 	char name[B_FILE_NAME_LENGTH];
9261 
9262 	if (!IS_USER_ADDRESS(userName)
9263 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9264 		return B_BAD_ADDRESS;
9265 
9266 	return index_create(device, name, type, flags, false);
9267 }
9268 
9269 
9270 status_t
9271 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9272 {
9273 	char name[B_FILE_NAME_LENGTH];
9274 	struct stat stat;
9275 	status_t status;
9276 
9277 	if (!IS_USER_ADDRESS(userName)
9278 		|| !IS_USER_ADDRESS(userStat)
9279 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9280 		return B_BAD_ADDRESS;
9281 
9282 	status = index_name_read_stat(device, name, &stat, false);
9283 	if (status == B_OK) {
9284 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9285 			return B_BAD_ADDRESS;
9286 	}
9287 
9288 	return status;
9289 }
9290 
9291 
9292 status_t
9293 _user_remove_index(dev_t device, const char* userName)
9294 {
9295 	char name[B_FILE_NAME_LENGTH];
9296 
9297 	if (!IS_USER_ADDRESS(userName)
9298 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9299 		return B_BAD_ADDRESS;
9300 
9301 	return index_remove(device, name, false);
9302 }
9303 
9304 
9305 status_t
9306 _user_getcwd(char* userBuffer, size_t size)
9307 {
9308 	if (!IS_USER_ADDRESS(userBuffer))
9309 		return B_BAD_ADDRESS;
9310 
9311 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9312 	if (pathBuffer.InitCheck() != B_OK)
9313 		return B_NO_MEMORY;
9314 
9315 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9316 
9317 	if (size > B_PATH_NAME_LENGTH)
9318 		size = B_PATH_NAME_LENGTH;
9319 
9320 	char* path = pathBuffer.LockBuffer();
9321 
9322 	status_t status = get_cwd(path, size, false);
9323 	if (status != B_OK)
9324 		return status;
9325 
9326 	// Copy back the result
9327 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9328 		return B_BAD_ADDRESS;
9329 
9330 	return status;
9331 }
9332 
9333 
9334 status_t
9335 _user_setcwd(int fd, const char* userPath)
9336 {
9337 	TRACE(("user_setcwd: path = %p\n", userPath));
9338 
9339 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9340 	if (pathBuffer.InitCheck() != B_OK)
9341 		return B_NO_MEMORY;
9342 
9343 	char* path = pathBuffer.LockBuffer();
9344 
9345 	if (userPath != NULL) {
9346 		if (!IS_USER_ADDRESS(userPath)
9347 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9348 			return B_BAD_ADDRESS;
9349 	}
9350 
9351 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9352 }
9353 
9354 
9355 status_t
9356 _user_change_root(const char* userPath)
9357 {
9358 	// only root is allowed to chroot()
9359 	if (geteuid() != 0)
9360 		return EPERM;
9361 
9362 	// alloc path buffer
9363 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9364 	if (pathBuffer.InitCheck() != B_OK)
9365 		return B_NO_MEMORY;
9366 
9367 	// copy userland path to kernel
9368 	char* path = pathBuffer.LockBuffer();
9369 	if (userPath != NULL) {
9370 		if (!IS_USER_ADDRESS(userPath)
9371 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9372 			return B_BAD_ADDRESS;
9373 	}
9374 
9375 	// get the vnode
9376 	struct vnode* vnode;
9377 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9378 	if (status != B_OK)
9379 		return status;
9380 
9381 	// set the new root
9382 	struct io_context* context = get_current_io_context(false);
9383 	mutex_lock(&sIOContextRootLock);
9384 	struct vnode* oldRoot = context->root;
9385 	context->root = vnode;
9386 	mutex_unlock(&sIOContextRootLock);
9387 
9388 	put_vnode(oldRoot);
9389 
9390 	return B_OK;
9391 }
9392 
9393 
9394 int
9395 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9396 	uint32 flags, port_id port, int32 token)
9397 {
9398 	char* query;
9399 
9400 	if (device < 0 || userQuery == NULL || queryLength == 0)
9401 		return B_BAD_VALUE;
9402 
9403 	// this is a safety restriction
9404 	if (queryLength >= 65536)
9405 		return B_NAME_TOO_LONG;
9406 
9407 	query = (char*)malloc(queryLength + 1);
9408 	if (query == NULL)
9409 		return B_NO_MEMORY;
9410 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9411 		free(query);
9412 		return B_BAD_ADDRESS;
9413 	}
9414 
9415 	int fd = query_open(device, query, flags, port, token, false);
9416 
9417 	free(query);
9418 	return fd;
9419 }
9420 
9421 
9422 #include "vfs_request_io.cpp"
9423