xref: /haiku/src/system/kernel/fs/vfs.cpp (revision bea66afaeb8d038d8918106a430a56b6e9fb3109)
1 /*
2  * Copyright 2005-2009, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2009, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <disk_device_manager/KDiskDevice.h>
36 #include <disk_device_manager/KDiskDeviceManager.h>
37 #include <disk_device_manager/KDiskDeviceUtils.h>
38 #include <disk_device_manager/KDiskSystem.h>
39 #include <fd.h>
40 #include <file_cache.h>
41 #include <fs/node_monitor.h>
42 #include <khash.h>
43 #include <KPath.h>
44 #include <lock.h>
45 #include <low_resource_manager.h>
46 #include <syscalls.h>
47 #include <syscall_restart.h>
48 #include <tracing.h>
49 #include <util/atomic.h>
50 #include <util/AutoLock.h>
51 #include <util/DoublyLinkedList.h>
52 #include <util/OpenHashTable.h>
53 #include <vfs.h>
54 #include <vm.h>
55 #include <vm_cache.h>
56 
57 #include "fifo.h"
58 #include "IORequest.h"
59 
60 
61 //#define TRACE_VFS
62 #ifdef TRACE_VFS
63 #	define TRACE(x) dprintf x
64 #	define FUNCTION(x) dprintf x
65 #else
66 #	define TRACE(x) ;
67 #	define FUNCTION(x) ;
68 #endif
69 
70 #define ADD_DEBUGGER_COMMANDS
71 
72 
73 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
74 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
75 
76 #if KDEBUG
77 #	define FS_CALL(vnode, op, params...) \
78 		( HAS_FS_CALL(vnode, op) ? \
79 			vnode->ops->op(vnode->mount->volume, vnode, params) \
80 			: (panic("FS_CALL op " #op " is NULL"), 0))
81 #	define FS_CALL_NO_PARAMS(vnode, op) \
82 		( HAS_FS_CALL(vnode, op) ? \
83 			vnode->ops->op(vnode->mount->volume, vnode) \
84 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
85 #	define FS_MOUNT_CALL(mount, op, params...) \
86 		( HAS_FS_MOUNT_CALL(mount, op) ? \
87 			mount->volume->ops->op(mount->volume, params) \
88 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
89 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
90 		( HAS_FS_MOUNT_CALL(mount, op) ? \
91 			mount->volume->ops->op(mount->volume) \
92 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
93 #else
94 #	define FS_CALL(vnode, op, params...) \
95 			vnode->ops->op(vnode->mount->volume, vnode, params)
96 #	define FS_CALL_NO_PARAMS(vnode, op) \
97 			vnode->ops->op(vnode->mount->volume, vnode)
98 #	define FS_MOUNT_CALL(mount, op, params...) \
99 			mount->volume->ops->op(mount->volume, params)
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 			mount->volume->ops->op(mount->volume)
102 #endif
103 
104 
105 const static uint32 kMaxUnusedVnodes = 8192;
106 	// This is the maximum number of unused vnodes that the system
107 	// will keep around (weak limit, if there is enough memory left,
108 	// they won't get flushed even when hitting that limit).
109 	// It may be chosen with respect to the available memory or enhanced
110 	// by some timestamp/frequency heurism.
111 
112 const static uint32 kMaxEntryCacheEntryCount = 8192;
113 	// Maximum number of entries per entry cache. It's a hard limit ATM.
114 
115 const static size_t kMaxPathLength = 65536;
116 	// The absolute maximum path length (for getcwd() - this is not depending
117 	// on PATH_MAX
118 
119 struct EntryCacheKey {
120 	EntryCacheKey(ino_t dirID, const char* name)
121 		:
122 		dir_id(dirID),
123 		name(name)
124 	{
125 	}
126 
127 	ino_t		dir_id;
128 	const char*	name;
129 };
130 
131 
132 struct EntryCacheEntry : DoublyLinkedListLinkImpl<EntryCacheEntry> {
133 	EntryCacheEntry*	hash_link;
134 	ino_t				node_id;
135 	ino_t				dir_id;
136 	char				name[1];
137 };
138 
139 
140 struct EntryCacheHashDefinition {
141 	typedef EntryCacheKey	KeyType;
142 	typedef EntryCacheEntry	ValueType;
143 
144 	uint32 HashKey(const EntryCacheKey& key) const
145 	{
146 		return (uint32)key.dir_id ^ (uint32)(key.dir_id >> 32)
147 			^ hash_hash_string(key.name);
148 	}
149 
150 	size_t Hash(const EntryCacheEntry* value) const
151 	{
152 		return (uint32)value->dir_id ^ (uint32)(value->dir_id >> 32)
153 			^ hash_hash_string(value->name);
154 	}
155 
156 	bool Compare(const EntryCacheKey& key, const EntryCacheEntry* value) const
157 	{
158 		return value->dir_id == key.dir_id
159 			&& strcmp(value->name, key.name) == 0;
160 	}
161 
162 	EntryCacheEntry*& GetLink(EntryCacheEntry* value) const
163 	{
164 		return value->hash_link;
165 	}
166 };
167 
168 
169 class EntryCache {
170 public:
171 	EntryCache()
172 	{
173 		mutex_init(&fLock, "entry cache");
174 
175 		new(&fEntries) EntryTable;
176 		new(&fUsedEntries) EntryList;
177 		fEntryCount = 0;
178 	}
179 
180 	~EntryCache()
181 	{
182 		while (EntryCacheEntry* entry = fUsedEntries.Head())
183 			_Remove(entry);
184 
185 		mutex_destroy(&fLock);
186 	}
187 
188 	status_t Init()
189 	{
190 		return fEntries.Init();
191 	}
192 
193 	status_t Add(ino_t dirID, const char* name, ino_t nodeID)
194 	{
195 		MutexLocker _(fLock);
196 
197 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
198 		if (entry != NULL) {
199 			entry->node_id = nodeID;
200 			return B_OK;
201 		}
202 
203 		if (fEntryCount >= kMaxEntryCacheEntryCount)
204 			_Remove(fUsedEntries.Head());
205 
206 		entry = (EntryCacheEntry*)malloc(sizeof(EntryCacheEntry)
207 			+ strlen(name));
208 		if (entry == NULL)
209 			return B_NO_MEMORY;
210 
211 		entry->node_id = nodeID;
212 		entry->dir_id = dirID;
213 		strcpy(entry->name, name);
214 
215 		fEntries.Insert(entry);
216 		fUsedEntries.Add(entry);
217 		fEntryCount++;
218 
219 		return B_OK;
220 	}
221 
222 	status_t Remove(ino_t dirID, const char* name)
223 	{
224 		MutexLocker _(fLock);
225 
226 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
227 		if (entry == NULL)
228 			return B_ENTRY_NOT_FOUND;
229 
230 		_Remove(entry);
231 
232 		return B_OK;
233 	}
234 
235 	bool Lookup(ino_t dirID, const char* name, ino_t& nodeID)
236 	{
237 		MutexLocker _(fLock);
238 
239 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
240 		if (entry == NULL)
241 			return false;
242 
243 		// requeue at the end
244 		fUsedEntries.Remove(entry);
245 		fUsedEntries.Add(entry);
246 
247 		nodeID = entry->node_id;
248 		return true;
249 	}
250 
251 	void _Remove(EntryCacheEntry* entry)
252 	{
253 		fEntries.Remove(entry);
254 		fUsedEntries.Remove(entry);
255 		free(entry);
256 		fEntryCount--;
257 	}
258 
259 private:
260 	typedef BOpenHashTable<EntryCacheHashDefinition> EntryTable;
261 	typedef DoublyLinkedList<EntryCacheEntry> EntryList;
262 
263 	mutex		fLock;
264 	EntryTable	fEntries;
265 	EntryList	fUsedEntries;	// LRU queue (LRU entry at the head)
266 	uint32		fEntryCount;
267 };
268 
269 
270 struct vnode : fs_vnode, DoublyLinkedListLinkImpl<vnode> {
271 	struct vnode*	next;
272 	vm_cache*		cache;
273 	dev_t			device;
274 	list_link		unused_link;
275 	ino_t			id;
276 	struct fs_mount* mount;
277 	struct vnode*	covered_by;
278 	int32			ref_count;
279 	uint32			type : 29;
280 						// TODO: S_INDEX_DIR actually needs another bit.
281 						// Better combine this field with the following ones.
282 	uint32			remove : 1;
283 	uint32			busy : 1;
284 	uint32			unpublished : 1;
285 	struct advisory_locking* advisory_locking;
286 	struct file_descriptor* mandatory_locked_by;
287 };
288 
289 struct vnode_hash_key {
290 	dev_t	device;
291 	ino_t	vnode;
292 };
293 
294 typedef DoublyLinkedList<vnode> VnodeList;
295 
296 /*!	\brief Structure to manage a mounted file system
297 
298 	Note: The root_vnode and covers_vnode fields (what others?) are
299 	initialized in fs_mount() and not changed afterwards. That is as soon
300 	as the mount is mounted and it is made sure it won't be unmounted
301 	(e.g. by holding a reference to a vnode of that mount) (read) access
302 	to those fields is always safe, even without additional locking. Morever
303 	while mounted the mount holds a reference to the covers_vnode, and thus
304 	making the access path vnode->mount->covers_vnode->mount->... safe if a
305 	reference to vnode is held (note that for the root mount covers_vnode
306 	is NULL, though).
307 */
308 struct fs_mount {
309 	fs_mount()
310 		:
311 		volume(NULL),
312 		device_name(NULL)
313 	{
314 		recursive_lock_init(&rlock, "mount rlock");
315 	}
316 
317 	~fs_mount()
318 	{
319 		recursive_lock_destroy(&rlock);
320 		free(device_name);
321 
322 		while (volume) {
323 			fs_volume* superVolume = volume->super_volume;
324 
325 			if (volume->file_system != NULL)
326 				put_module(volume->file_system->info.name);
327 
328 			free(volume->file_system_name);
329 			free(volume);
330 			volume = superVolume;
331 		}
332 	}
333 
334 	struct fs_mount* next;
335 	dev_t			id;
336 	fs_volume*		volume;
337 	char*			device_name;
338 	recursive_lock	rlock;	// guards the vnodes list
339 	struct vnode*	root_vnode;
340 	struct vnode*	covers_vnode;
341 	KPartition*		partition;
342 	VnodeList		vnodes;
343 	EntryCache		entry_cache;
344 	bool			unmounting;
345 	bool			owns_file_device;
346 };
347 
348 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
349 	list_link		link;
350 	team_id			team;
351 	pid_t			session;
352 	off_t			start;
353 	off_t			end;
354 	bool			shared;
355 };
356 
357 typedef DoublyLinkedList<advisory_lock> LockList;
358 
359 struct advisory_locking {
360 	sem_id			lock;
361 	sem_id			wait_sem;
362 	LockList		locks;
363 
364 	advisory_locking()
365 		:
366 		lock(-1),
367 		wait_sem(-1)
368 	{
369 	}
370 
371 	~advisory_locking()
372 	{
373 		if (lock >= 0)
374 			delete_sem(lock);
375 		if (wait_sem >= 0)
376 			delete_sem(wait_sem);
377 	}
378 };
379 
380 /*!	\brief Guards sMountsTable.
381 
382 	The holder is allowed to read/write access the sMountsTable.
383 	Manipulation of the fs_mount structures themselves
384 	(and their destruction) requires different locks though.
385 */
386 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
387 
388 /*!	\brief Guards mount/unmount operations.
389 
390 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
391 	That is locking the lock ensures that no FS is mounted/unmounted. In
392 	particular this means that
393 	- sMountsTable will not be modified,
394 	- the fields immutable after initialization of the fs_mount structures in
395 	  sMountsTable will not be modified,
396 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
397 
398 	The thread trying to lock the lock must not hold sVnodeMutex or
399 	sMountMutex.
400 */
401 static recursive_lock sMountOpLock;
402 
403 /*!	\brief Guards the vnode::covered_by field of any vnode
404 
405 	The holder is allowed to read access the vnode::covered_by field of any
406 	vnode. Additionally holding sMountOpLock allows for write access.
407 
408 	The thread trying to lock the must not hold sVnodeMutex.
409 */
410 static mutex sVnodeCoveredByMutex
411 	= MUTEX_INITIALIZER("vfs_vnode_covered_by_lock");
412 
413 /*!	\brief Guards sVnodeTable.
414 
415 	The holder is allowed read/write access to sVnodeTable and to
416 	any unbusy vnode in that table, save to the immutable fields (device, id,
417 	private_node, mount) to which
418 	only read-only access is allowed, and to the field covered_by, which is
419 	guarded by sMountOpLock and sVnodeCoveredByMutex.
420 
421 	The thread trying to lock the mutex must not hold sMountMutex.
422 	You must not have this mutex held when calling create_sem(), as this
423 	might call vfs_free_unused_vnodes().
424 */
425 static mutex sVnodeMutex = MUTEX_INITIALIZER("vfs_vnode_lock");
426 
427 /*!	\brief Guards io_context::root.
428 
429 	Must be held when setting or getting the io_context::root field.
430 	The only operation allowed while holding this lock besides getting or
431 	setting the field is inc_vnode_ref_count() on io_context::root.
432 */
433 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
434 
435 #define VNODE_HASH_TABLE_SIZE 1024
436 static hash_table* sVnodeTable;
437 static list sUnusedVnodeList;
438 static uint32 sUnusedVnodes = 0;
439 static struct vnode* sRoot;
440 
441 #define MOUNTS_HASH_TABLE_SIZE 16
442 static hash_table* sMountsTable;
443 static dev_t sNextMountID = 1;
444 
445 #define MAX_TEMP_IO_VECS 8
446 
447 mode_t __gUmask = 022;
448 
449 /* function declarations */
450 
451 // file descriptor operation prototypes
452 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
453 	void* buffer, size_t* _bytes);
454 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
455 	const void* buffer, size_t* _bytes);
456 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
457 	int seekType);
458 static void file_free_fd(struct file_descriptor* descriptor);
459 static status_t file_close(struct file_descriptor* descriptor);
460 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
461 	struct selectsync* sync);
462 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
463 	struct selectsync* sync);
464 static status_t dir_read(struct io_context* context,
465 	struct file_descriptor* descriptor, struct dirent* buffer,
466 	size_t bufferSize, uint32* _count);
467 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
468 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
469 static status_t dir_rewind(struct file_descriptor* descriptor);
470 static void dir_free_fd(struct file_descriptor* descriptor);
471 static status_t dir_close(struct file_descriptor* descriptor);
472 static status_t attr_dir_read(struct io_context* context,
473 	struct file_descriptor* descriptor, struct dirent* buffer,
474 	size_t bufferSize, uint32* _count);
475 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
476 static void attr_dir_free_fd(struct file_descriptor* descriptor);
477 static status_t attr_dir_close(struct file_descriptor* descriptor);
478 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
479 	void* buffer, size_t* _bytes);
480 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
481 	const void* buffer, size_t* _bytes);
482 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
483 	int seekType);
484 static void attr_free_fd(struct file_descriptor* descriptor);
485 static status_t attr_close(struct file_descriptor* descriptor);
486 static status_t attr_read_stat(struct file_descriptor* descriptor,
487 	struct stat* statData);
488 static status_t attr_write_stat(struct file_descriptor* descriptor,
489 	const struct stat* stat, int statMask);
490 static status_t index_dir_read(struct io_context* context,
491 	struct file_descriptor* descriptor, struct dirent* buffer,
492 	size_t bufferSize, uint32* _count);
493 static status_t index_dir_rewind(struct file_descriptor* descriptor);
494 static void index_dir_free_fd(struct file_descriptor* descriptor);
495 static status_t index_dir_close(struct file_descriptor* descriptor);
496 static status_t query_read(struct io_context* context,
497 	struct file_descriptor* descriptor, struct dirent* buffer,
498 	size_t bufferSize, uint32* _count);
499 static status_t query_rewind(struct file_descriptor* descriptor);
500 static void query_free_fd(struct file_descriptor* descriptor);
501 static status_t query_close(struct file_descriptor* descriptor);
502 
503 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
504 	void* buffer, size_t length);
505 static status_t common_read_stat(struct file_descriptor* descriptor,
506 	struct stat* statData);
507 static status_t common_write_stat(struct file_descriptor* descriptor,
508 	const struct stat* statData, int statMask);
509 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
510 	struct stat* stat, bool kernel);
511 
512 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
513 	bool traverseLeafLink, int count, bool kernel,
514 	struct vnode** _vnode, ino_t* _parentID);
515 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
516 	size_t bufferSize, bool kernel);
517 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
518 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
519 static void inc_vnode_ref_count(struct vnode* vnode);
520 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
521 	bool reenter);
522 static inline void put_vnode(struct vnode* vnode);
523 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
524 	bool kernel);
525 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
526 
527 
528 static struct fd_ops sFileOps = {
529 	file_read,
530 	file_write,
531 	file_seek,
532 	common_ioctl,
533 	NULL,		// set_flags
534 	file_select,
535 	file_deselect,
536 	NULL,		// read_dir()
537 	NULL,		// rewind_dir()
538 	common_read_stat,
539 	common_write_stat,
540 	file_close,
541 	file_free_fd
542 };
543 
544 static struct fd_ops sDirectoryOps = {
545 	NULL,		// read()
546 	NULL,		// write()
547 	NULL,		// seek()
548 	common_ioctl,
549 	NULL,		// set_flags
550 	NULL,		// select()
551 	NULL,		// deselect()
552 	dir_read,
553 	dir_rewind,
554 	common_read_stat,
555 	common_write_stat,
556 	dir_close,
557 	dir_free_fd
558 };
559 
560 static struct fd_ops sAttributeDirectoryOps = {
561 	NULL,		// read()
562 	NULL,		// write()
563 	NULL,		// seek()
564 	common_ioctl,
565 	NULL,		// set_flags
566 	NULL,		// select()
567 	NULL,		// deselect()
568 	attr_dir_read,
569 	attr_dir_rewind,
570 	common_read_stat,
571 	common_write_stat,
572 	attr_dir_close,
573 	attr_dir_free_fd
574 };
575 
576 static struct fd_ops sAttributeOps = {
577 	attr_read,
578 	attr_write,
579 	attr_seek,
580 	common_ioctl,
581 	NULL,		// set_flags
582 	NULL,		// select()
583 	NULL,		// deselect()
584 	NULL,		// read_dir()
585 	NULL,		// rewind_dir()
586 	attr_read_stat,
587 	attr_write_stat,
588 	attr_close,
589 	attr_free_fd
590 };
591 
592 static struct fd_ops sIndexDirectoryOps = {
593 	NULL,		// read()
594 	NULL,		// write()
595 	NULL,		// seek()
596 	NULL,		// ioctl()
597 	NULL,		// set_flags
598 	NULL,		// select()
599 	NULL,		// deselect()
600 	index_dir_read,
601 	index_dir_rewind,
602 	NULL,		// read_stat()
603 	NULL,		// write_stat()
604 	index_dir_close,
605 	index_dir_free_fd
606 };
607 
608 #if 0
609 static struct fd_ops sIndexOps = {
610 	NULL,		// read()
611 	NULL,		// write()
612 	NULL,		// seek()
613 	NULL,		// ioctl()
614 	NULL,		// set_flags
615 	NULL,		// select()
616 	NULL,		// deselect()
617 	NULL,		// dir_read()
618 	NULL,		// dir_rewind()
619 	index_read_stat,	// read_stat()
620 	NULL,		// write_stat()
621 	NULL,		// dir_close()
622 	NULL		// free_fd()
623 };
624 #endif
625 
626 static struct fd_ops sQueryOps = {
627 	NULL,		// read()
628 	NULL,		// write()
629 	NULL,		// seek()
630 	NULL,		// ioctl()
631 	NULL,		// set_flags
632 	NULL,		// select()
633 	NULL,		// deselect()
634 	query_read,
635 	query_rewind,
636 	NULL,		// read_stat()
637 	NULL,		// write_stat()
638 	query_close,
639 	query_free_fd
640 };
641 
642 
643 // VNodePutter
644 class VNodePutter {
645 public:
646 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
647 
648 	~VNodePutter()
649 	{
650 		Put();
651 	}
652 
653 	void SetTo(struct vnode* vnode)
654 	{
655 		Put();
656 		fVNode = vnode;
657 	}
658 
659 	void Put()
660 	{
661 		if (fVNode) {
662 			put_vnode(fVNode);
663 			fVNode = NULL;
664 		}
665 	}
666 
667 	struct vnode* Detach()
668 	{
669 		struct vnode* vnode = fVNode;
670 		fVNode = NULL;
671 		return vnode;
672 	}
673 
674 private:
675 	struct vnode* fVNode;
676 };
677 
678 
679 class FDCloser {
680 public:
681 	FDCloser() : fFD(-1), fKernel(true) {}
682 
683 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
684 
685 	~FDCloser()
686 	{
687 		Close();
688 	}
689 
690 	void SetTo(int fd, bool kernel)
691 	{
692 		Close();
693 		fFD = fd;
694 		fKernel = kernel;
695 	}
696 
697 	void Close()
698 	{
699 		if (fFD >= 0) {
700 			if (fKernel)
701 				_kern_close(fFD);
702 			else
703 				_user_close(fFD);
704 			fFD = -1;
705 		}
706 	}
707 
708 	int Detach()
709 	{
710 		int fd = fFD;
711 		fFD = -1;
712 		return fd;
713 	}
714 
715 private:
716 	int		fFD;
717 	bool	fKernel;
718 };
719 
720 
721 #if VFS_PAGES_IO_TRACING
722 
723 namespace VFSPagesIOTracing {
724 
725 class PagesIOTraceEntry : public AbstractTraceEntry {
726 protected:
727 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
728 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
729 		status_t status, size_t bytesTransferred)
730 		:
731 		fVnode(vnode),
732 		fMountID(vnode->mount->id),
733 		fNodeID(vnode->id),
734 		fCookie(cookie),
735 		fPos(pos),
736 		fCount(count),
737 		fFlags(flags),
738 		fBytesRequested(bytesRequested),
739 		fStatus(status),
740 		fBytesTransferred(bytesTransferred)
741 	{
742 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
743 			false);
744 	}
745 
746 	void AddDump(TraceOutput& out, const char* mode)
747 	{
748 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
749 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
750 			fCookie, fPos, fBytesRequested);
751 
752 		if (fVecs != NULL) {
753 			for (uint32 i = 0; i < fCount; i++) {
754 				if (i > 0)
755 					out.Print(", ");
756 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
757 			}
758 		}
759 
760 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
761 			fFlags, fStatus, fBytesTransferred);
762 	}
763 
764 protected:
765 	struct vnode*	fVnode;
766 	dev_t			fMountID;
767 	ino_t			fNodeID;
768 	void*			fCookie;
769 	off_t			fPos;
770 	iovec*			fVecs;
771 	uint32			fCount;
772 	uint32			fFlags;
773 	size_t			fBytesRequested;
774 	status_t		fStatus;
775 	size_t			fBytesTransferred;
776 };
777 
778 
779 class ReadPages : public PagesIOTraceEntry {
780 public:
781 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
782 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
783 		status_t status, size_t bytesTransferred)
784 		:
785 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
786 			bytesRequested, status, bytesTransferred)
787 	{
788 		Initialized();
789 	}
790 
791 	virtual void AddDump(TraceOutput& out)
792 	{
793 		PagesIOTraceEntry::AddDump(out, "read");
794 	}
795 };
796 
797 
798 class WritePages : public PagesIOTraceEntry {
799 public:
800 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
801 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
802 		status_t status, size_t bytesTransferred)
803 		:
804 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
805 			bytesRequested, status, bytesTransferred)
806 	{
807 		Initialized();
808 	}
809 
810 	virtual void AddDump(TraceOutput& out)
811 	{
812 		PagesIOTraceEntry::AddDump(out, "write");
813 	}
814 };
815 
816 }	// namespace VFSPagesIOTracing
817 
818 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
819 #else
820 #	define TPIO(x) ;
821 #endif	// VFS_PAGES_IO_TRACING
822 
823 
824 static int
825 mount_compare(void* _m, const void* _key)
826 {
827 	struct fs_mount* mount = (fs_mount*)_m;
828 	const dev_t* id = (dev_t*)_key;
829 
830 	if (mount->id == *id)
831 		return 0;
832 
833 	return -1;
834 }
835 
836 
837 static uint32
838 mount_hash(void* _m, const void* _key, uint32 range)
839 {
840 	struct fs_mount* mount = (fs_mount*)_m;
841 	const dev_t* id = (dev_t*)_key;
842 
843 	if (mount)
844 		return mount->id % range;
845 
846 	return (uint32)*id % range;
847 }
848 
849 
850 /*! Finds the mounted device (the fs_mount structure) with the given ID.
851 	Note, you must hold the gMountMutex lock when you call this function.
852 */
853 static struct fs_mount*
854 find_mount(dev_t id)
855 {
856 	ASSERT_LOCKED_MUTEX(&sMountMutex);
857 
858 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
859 }
860 
861 
862 static status_t
863 get_mount(dev_t id, struct fs_mount** _mount)
864 {
865 	struct fs_mount* mount;
866 
867 	MutexLocker nodeLocker(sVnodeMutex);
868 	MutexLocker mountLocker(sMountMutex);
869 
870 	mount = find_mount(id);
871 	if (mount == NULL)
872 		return B_BAD_VALUE;
873 
874 	struct vnode* rootNode = mount->root_vnode;
875 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
876 		// might have been called during a mount/unmount operation
877 		return B_BUSY;
878 	}
879 
880 	inc_vnode_ref_count(mount->root_vnode);
881 	*_mount = mount;
882 	return B_OK;
883 }
884 
885 
886 static void
887 put_mount(struct fs_mount* mount)
888 {
889 	if (mount)
890 		put_vnode(mount->root_vnode);
891 }
892 
893 
894 /*!	Tries to open the specified file system module.
895 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
896 	Returns a pointer to file system module interface, or NULL if it
897 	could not open the module.
898 */
899 static file_system_module_info*
900 get_file_system(const char* fsName)
901 {
902 	char name[B_FILE_NAME_LENGTH];
903 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
904 		// construct module name if we didn't get one
905 		// (we currently support only one API)
906 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
907 		fsName = NULL;
908 	}
909 
910 	file_system_module_info* info;
911 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
912 		return NULL;
913 
914 	return info;
915 }
916 
917 
918 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
919 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
920 	The name is allocated for you, and you have to free() it when you're
921 	done with it.
922 	Returns NULL if the required memory is not available.
923 */
924 static char*
925 get_file_system_name(const char* fsName)
926 {
927 	const size_t length = strlen("file_systems/");
928 
929 	if (strncmp(fsName, "file_systems/", length)) {
930 		// the name already seems to be the module's file name
931 		return strdup(fsName);
932 	}
933 
934 	fsName += length;
935 	const char* end = strchr(fsName, '/');
936 	if (end == NULL) {
937 		// this doesn't seem to be a valid name, but well...
938 		return strdup(fsName);
939 	}
940 
941 	// cut off the trailing /v1
942 
943 	char* name = (char*)malloc(end + 1 - fsName);
944 	if (name == NULL)
945 		return NULL;
946 
947 	strlcpy(name, fsName, end + 1 - fsName);
948 	return name;
949 }
950 
951 
952 /*!	Accepts a list of file system names separated by a colon, one for each
953 	layer and returns the file system name for the specified layer.
954 	The name is allocated for you, and you have to free() it when you're
955 	done with it.
956 	Returns NULL if the required memory is not available or if there is no
957 	name for the specified layer.
958 */
959 static char*
960 get_file_system_name_for_layer(const char* fsNames, int32 layer)
961 {
962 	while (layer >= 0) {
963 		const char* end = strchr(fsNames, ':');
964 		if (end == NULL) {
965 			if (layer == 0)
966 				return strdup(fsNames);
967 			return NULL;
968 		}
969 
970 		if (layer == 0) {
971 			size_t length = end - fsNames + 1;
972 			char* result = (char*)malloc(length);
973 			strlcpy(result, fsNames, length);
974 			return result;
975 		}
976 
977 		fsNames = end + 1;
978 		layer--;
979 	}
980 
981 	return NULL;
982 }
983 
984 
985 static int
986 vnode_compare(void* _vnode, const void* _key)
987 {
988 	struct vnode* vnode = (struct vnode*)_vnode;
989 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
990 
991 	if (vnode->device == key->device && vnode->id == key->vnode)
992 		return 0;
993 
994 	return -1;
995 }
996 
997 
998 static uint32
999 vnode_hash(void* _vnode, const void* _key, uint32 range)
1000 {
1001 	struct vnode* vnode = (struct vnode*)_vnode;
1002 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
1003 
1004 #define VHASH(mountid, vnodeid) \
1005 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
1006 
1007 	if (vnode != NULL)
1008 		return VHASH(vnode->device, vnode->id) % range;
1009 
1010 	return VHASH(key->device, key->vnode) % range;
1011 
1012 #undef VHASH
1013 }
1014 
1015 
1016 static void
1017 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
1018 {
1019 	RecursiveLocker _(mount->rlock);
1020 	mount->vnodes.Add(vnode);
1021 }
1022 
1023 
1024 static void
1025 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
1026 {
1027 	RecursiveLocker _(mount->rlock);
1028 	mount->vnodes.Remove(vnode);
1029 }
1030 
1031 
1032 static status_t
1033 create_new_vnode(struct vnode** _vnode, dev_t mountID, ino_t vnodeID)
1034 {
1035 	FUNCTION(("create_new_vnode()\n"));
1036 
1037 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
1038 	if (vnode == NULL)
1039 		return B_NO_MEMORY;
1040 
1041 	// initialize basic values
1042 	memset(vnode, 0, sizeof(struct vnode));
1043 	vnode->device = mountID;
1044 	vnode->id = vnodeID;
1045 
1046 	// add the vnode to the mount structure
1047 	mutex_lock(&sMountMutex);
1048 	vnode->mount = find_mount(mountID);
1049 	if (!vnode->mount || vnode->mount->unmounting) {
1050 		mutex_unlock(&sMountMutex);
1051 		free(vnode);
1052 		return B_ENTRY_NOT_FOUND;
1053 	}
1054 
1055 	hash_insert(sVnodeTable, vnode);
1056 	add_vnode_to_mount_list(vnode, vnode->mount);
1057 
1058 	mutex_unlock(&sMountMutex);
1059 
1060 	vnode->ref_count = 1;
1061 	*_vnode = vnode;
1062 
1063 	return B_OK;
1064 }
1065 
1066 
1067 /*!	Frees the vnode and all resources it has acquired, and removes
1068 	it from the vnode hash as well as from its mount structure.
1069 	Will also make sure that any cache modifications are written back.
1070 */
1071 static void
1072 free_vnode(struct vnode* vnode, bool reenter)
1073 {
1074 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
1075 
1076 	// write back any changes in this vnode's cache -- but only
1077 	// if the vnode won't be deleted, in which case the changes
1078 	// will be discarded
1079 
1080 	if (!vnode->remove && HAS_FS_CALL(vnode, fsync))
1081 		FS_CALL_NO_PARAMS(vnode, fsync);
1082 
1083 	// Note: If this vnode has a cache attached, there will still be two
1084 	// references to that cache at this point. The last one belongs to the vnode
1085 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1086 	// cache. Each but the last reference to a cache also includes a reference
1087 	// to the vnode. The file cache, however, released its reference (cf.
1088 	// file_cache_create()), so that this vnode's ref count has the chance to
1089 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1090 	// cache reference to be released, which will also release a (no longer
1091 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1092 	// count, so that it will neither become negative nor 0.
1093 	vnode->ref_count = 2;
1094 
1095 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
1096 	// cache either (i.e. no one can get a cache reference while we're deleting
1097 	// the vnode).. This is, however, not the case for the page daemon. It gets
1098 	// its cache references via the pages it scans, so it can in fact get a
1099 	// vnode reference while we're deleting the vnode.
1100 
1101 	if (!vnode->unpublished) {
1102 		if (vnode->remove)
1103 			FS_CALL(vnode, remove_vnode, reenter);
1104 		else
1105 			FS_CALL(vnode, put_vnode, reenter);
1106 	}
1107 
1108 	// The file system has removed the resources of the vnode now, so we can
1109 	// make it available again (and remove the busy vnode from the hash)
1110 	mutex_lock(&sVnodeMutex);
1111 	hash_remove(sVnodeTable, vnode);
1112 	mutex_unlock(&sVnodeMutex);
1113 
1114 	// if we have a vm_cache attached, remove it
1115 	if (vnode->cache)
1116 		vnode->cache->ReleaseRef();
1117 
1118 	vnode->cache = NULL;
1119 
1120 	remove_vnode_from_mount_list(vnode, vnode->mount);
1121 
1122 	free(vnode);
1123 }
1124 
1125 
1126 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1127 	if the counter dropped to 0.
1128 
1129 	The caller must, of course, own a reference to the vnode to call this
1130 	function.
1131 	The caller must not hold the sVnodeMutex or the sMountMutex.
1132 
1133 	\param vnode the vnode.
1134 	\param alwaysFree don't move this vnode into the unused list, but really
1135 		   delete it if possible.
1136 	\param reenter \c true, if this function is called (indirectly) from within
1137 		   a file system. This will be passed to file system hooks only.
1138 	\return \c B_OK, if everything went fine, an error code otherwise.
1139 */
1140 static status_t
1141 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1142 {
1143 	MutexLocker locker(sVnodeMutex);
1144 
1145 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1146 
1147 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1148 
1149 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1150 		vnode->ref_count));
1151 
1152 	if (oldRefCount != 1)
1153 		return B_OK;
1154 
1155 	if (vnode->busy)
1156 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1157 
1158 	bool freeNode = false;
1159 
1160 	// Just insert the vnode into an unused list if we don't need
1161 	// to delete it
1162 	if (vnode->remove || alwaysFree) {
1163 		vnode->busy = true;
1164 		freeNode = true;
1165 	} else {
1166 		list_add_item(&sUnusedVnodeList, vnode);
1167 		if (++sUnusedVnodes > kMaxUnusedVnodes
1168 			&& low_resource_state(
1169 				B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)
1170 					!= B_NO_LOW_RESOURCE) {
1171 			// there are too many unused vnodes so we free the oldest one
1172 			// TODO: evaluate this mechanism
1173 			vnode = (struct vnode*)list_remove_head_item(&sUnusedVnodeList);
1174 			vnode->busy = true;
1175 			freeNode = true;
1176 			sUnusedVnodes--;
1177 		}
1178 	}
1179 
1180 	locker.Unlock();
1181 
1182 	if (freeNode)
1183 		free_vnode(vnode, reenter);
1184 
1185 	return B_OK;
1186 }
1187 
1188 
1189 /*!	\brief Increments the reference counter of the given vnode.
1190 
1191 	The caller must either already have a reference to the vnode or hold
1192 	the sVnodeMutex.
1193 
1194 	\param vnode the vnode.
1195 */
1196 static void
1197 inc_vnode_ref_count(struct vnode* vnode)
1198 {
1199 	atomic_add(&vnode->ref_count, 1);
1200 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1201 		vnode->ref_count));
1202 }
1203 
1204 
1205 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
1206 
1207 	The caller must hold the sVnodeMutex.
1208 
1209 	\param mountID the mount ID.
1210 	\param vnodeID the node ID.
1211 
1212 	\return The vnode structure, if it was found in the hash table, \c NULL
1213 			otherwise.
1214 */
1215 static struct vnode*
1216 lookup_vnode(dev_t mountID, ino_t vnodeID)
1217 {
1218 	struct vnode_hash_key key;
1219 
1220 	key.device = mountID;
1221 	key.vnode = vnodeID;
1222 
1223 	return (vnode*)hash_lookup(sVnodeTable, &key);
1224 }
1225 
1226 
1227 static bool
1228 is_special_node_type(int type)
1229 {
1230 	// at the moment only FIFOs are supported
1231 	return S_ISFIFO(type);
1232 }
1233 
1234 
1235 static status_t
1236 create_special_sub_node(struct vnode* vnode, uint32 flags)
1237 {
1238 	if (S_ISFIFO(vnode->type))
1239 		return create_fifo_vnode(vnode->mount->volume, vnode);
1240 
1241 	return B_BAD_VALUE;
1242 }
1243 
1244 
1245 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1246 
1247 	If the node is not yet in memory, it will be loaded.
1248 
1249 	The caller must not hold the sVnodeMutex or the sMountMutex.
1250 
1251 	\param mountID the mount ID.
1252 	\param vnodeID the node ID.
1253 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1254 		   retrieved vnode structure shall be written.
1255 	\param reenter \c true, if this function is called (indirectly) from within
1256 		   a file system.
1257 	\return \c B_OK, if everything when fine, an error code otherwise.
1258 */
1259 static status_t
1260 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1261 	int reenter)
1262 {
1263 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1264 		_vnode));
1265 
1266 	mutex_lock(&sVnodeMutex);
1267 
1268 	int32 tries = 1000;
1269 		// try for 10 secs
1270 restart:
1271 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1272 	if (vnode && vnode->busy) {
1273 		mutex_unlock(&sVnodeMutex);
1274 		if (!canWait || --tries < 0) {
1275 			// vnode doesn't seem to become unbusy
1276 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1277 				vnodeID);
1278 			return B_BUSY;
1279 		}
1280 		snooze(10000); // 10 ms
1281 		mutex_lock(&sVnodeMutex);
1282 		goto restart;
1283 	}
1284 
1285 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1286 
1287 	status_t status;
1288 
1289 	if (vnode) {
1290 		if (vnode->ref_count == 0) {
1291 			// this vnode has been unused before
1292 			list_remove_item(&sUnusedVnodeList, vnode);
1293 			sUnusedVnodes--;
1294 		}
1295 		inc_vnode_ref_count(vnode);
1296 	} else {
1297 		// we need to create a new vnode and read it in
1298 		status = create_new_vnode(&vnode, mountID, vnodeID);
1299 		if (status != B_OK)
1300 			goto err;
1301 
1302 		vnode->busy = true;
1303 		mutex_unlock(&sVnodeMutex);
1304 
1305 		int type;
1306 		uint32 flags;
1307 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1308 			&flags, reenter);
1309 		if (status == B_OK && vnode->private_node == NULL)
1310 			status = B_BAD_VALUE;
1311 
1312 		bool gotNode = status == B_OK;
1313 		bool publishSpecialSubNode = false;
1314 		if (gotNode) {
1315 			vnode->type = type;
1316 			publishSpecialSubNode = is_special_node_type(type)
1317 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1318 		}
1319 
1320 		if (gotNode && publishSpecialSubNode)
1321 			status = create_special_sub_node(vnode, flags);
1322 
1323 		mutex_lock(&sVnodeMutex);
1324 
1325 		if (status != B_OK) {
1326 			if (gotNode)
1327 				FS_CALL(vnode, put_vnode, reenter);
1328 
1329 			goto err1;
1330 		}
1331 
1332 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
1333 		vnode->busy = false;
1334 	}
1335 
1336 	mutex_unlock(&sVnodeMutex);
1337 
1338 	TRACE(("get_vnode: returning %p\n", vnode));
1339 
1340 	*_vnode = vnode;
1341 	return B_OK;
1342 
1343 err1:
1344 	hash_remove(sVnodeTable, vnode);
1345 	remove_vnode_from_mount_list(vnode, vnode->mount);
1346 err:
1347 	mutex_unlock(&sVnodeMutex);
1348 	if (vnode)
1349 		free(vnode);
1350 
1351 	return status;
1352 }
1353 
1354 
1355 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1356 	if the counter dropped to 0.
1357 
1358 	The caller must, of course, own a reference to the vnode to call this
1359 	function.
1360 	The caller must not hold the sVnodeMutex or the sMountMutex.
1361 
1362 	\param vnode the vnode.
1363 */
1364 static inline void
1365 put_vnode(struct vnode* vnode)
1366 {
1367 	dec_vnode_ref_count(vnode, false, false);
1368 }
1369 
1370 
1371 static void
1372 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1373 {
1374 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1375 
1376 	uint32 count = 1;
1377 	switch (level) {
1378 		case B_NO_LOW_RESOURCE:
1379 			return;
1380 		case B_LOW_RESOURCE_NOTE:
1381 			count = sUnusedVnodes / 100;
1382 			break;
1383 		case B_LOW_RESOURCE_WARNING:
1384 			count = sUnusedVnodes / 10;
1385 			break;
1386 		case B_LOW_RESOURCE_CRITICAL:
1387 			count = sUnusedVnodes;
1388 			break;
1389 	}
1390 
1391 	if (count > sUnusedVnodes)
1392 		count = sUnusedVnodes;
1393 
1394 	// Write back the modified pages of some unused vnodes and free them
1395 
1396 	for (uint32 i = 0; i < count; i++) {
1397 		mutex_lock(&sVnodeMutex);
1398 		struct vnode* vnode = (struct vnode*)list_remove_head_item(
1399 			&sUnusedVnodeList);
1400 		if (vnode == NULL) {
1401 			mutex_unlock(&sVnodeMutex);
1402 			break;
1403 		}
1404 
1405 		inc_vnode_ref_count(vnode);
1406 		sUnusedVnodes--;
1407 
1408 		mutex_unlock(&sVnodeMutex);
1409 
1410 		if (vnode->cache != NULL)
1411 			vnode->cache->WriteModified();
1412 
1413 		dec_vnode_ref_count(vnode, true, false);
1414 			// this should free the vnode when it's still unused
1415 	}
1416 }
1417 
1418 
1419 static inline void
1420 put_advisory_locking(struct advisory_locking* locking)
1421 {
1422 	release_sem(locking->lock);
1423 }
1424 
1425 
1426 /*!	Returns the advisory_locking object of the \a vnode in case it
1427 	has one, and locks it.
1428 	You have to call put_advisory_locking() when you're done with
1429 	it.
1430 	Note, you must not have the vnode mutex locked when calling
1431 	this function.
1432 */
1433 static struct advisory_locking*
1434 get_advisory_locking(struct vnode* vnode)
1435 {
1436 	mutex_lock(&sVnodeMutex);
1437 
1438 	struct advisory_locking* locking = vnode->advisory_locking;
1439 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1440 
1441 	mutex_unlock(&sVnodeMutex);
1442 
1443 	if (lock >= 0)
1444 		lock = acquire_sem(lock);
1445 	if (lock < 0) {
1446 		// This means the locking has been deleted in the mean time
1447 		// or had never existed in the first place - otherwise, we
1448 		// would get the lock at some point.
1449 		return NULL;
1450 	}
1451 
1452 	return locking;
1453 }
1454 
1455 
1456 /*!	Creates a locked advisory_locking object, and attaches it to the
1457 	given \a vnode.
1458 	Returns B_OK in case of success - also if the vnode got such an
1459 	object from someone else in the mean time, you'll still get this
1460 	one locked then.
1461 */
1462 static status_t
1463 create_advisory_locking(struct vnode* vnode)
1464 {
1465 	if (vnode == NULL)
1466 		return B_FILE_ERROR;
1467 
1468 	ObjectDeleter<advisory_locking> lockingDeleter;
1469 	struct advisory_locking* locking = NULL;
1470 
1471 	while (get_advisory_locking(vnode) == NULL) {
1472 		// no locking object set on the vnode yet, create one
1473 		if (locking == NULL) {
1474 			locking = new(std::nothrow) advisory_locking;
1475 			if (locking == NULL)
1476 				return B_NO_MEMORY;
1477 			lockingDeleter.SetTo(locking);
1478 
1479 			locking->wait_sem = create_sem(0, "advisory lock");
1480 			if (locking->wait_sem < 0)
1481 				return locking->wait_sem;
1482 
1483 			locking->lock = create_sem(0, "advisory locking");
1484 			if (locking->lock < 0)
1485 				return locking->lock;
1486 		}
1487 
1488 		// set our newly created locking object
1489 		MutexLocker _(sVnodeMutex);
1490 		if (vnode->advisory_locking == NULL) {
1491 			vnode->advisory_locking = locking;
1492 			lockingDeleter.Detach();
1493 			return B_OK;
1494 		}
1495 	}
1496 
1497 	// The vnode already had a locking object. That's just as well.
1498 
1499 	return B_OK;
1500 }
1501 
1502 
1503 /*!	Retrieves the first lock that has been set by the current team.
1504 */
1505 static status_t
1506 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1507 {
1508 	struct advisory_locking* locking = get_advisory_locking(vnode);
1509 	if (locking == NULL)
1510 		return B_BAD_VALUE;
1511 
1512 	// TODO: this should probably get the flock by its file descriptor!
1513 	team_id team = team_get_current_team_id();
1514 	status_t status = B_BAD_VALUE;
1515 
1516 	LockList::Iterator iterator = locking->locks.GetIterator();
1517 	while (iterator.HasNext()) {
1518 		struct advisory_lock* lock = iterator.Next();
1519 
1520 		if (lock->team == team) {
1521 			flock->l_start = lock->start;
1522 			flock->l_len = lock->end - lock->start + 1;
1523 			status = B_OK;
1524 			break;
1525 		}
1526 	}
1527 
1528 	put_advisory_locking(locking);
1529 	return status;
1530 }
1531 
1532 
1533 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1534 	with the advisory_lock \a lock.
1535 */
1536 static bool
1537 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1538 {
1539 	if (flock == NULL)
1540 		return true;
1541 
1542 	return lock->start <= flock->l_start - 1 + flock->l_len
1543 		&& lock->end >= flock->l_start;
1544 }
1545 
1546 
1547 /*!	Removes the specified lock, or all locks of the calling team
1548 	if \a flock is NULL.
1549 */
1550 static status_t
1551 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1552 {
1553 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1554 
1555 	struct advisory_locking* locking = get_advisory_locking(vnode);
1556 	if (locking == NULL)
1557 		return B_OK;
1558 
1559 	// TODO: use the thread ID instead??
1560 	team_id team = team_get_current_team_id();
1561 	pid_t session = thread_get_current_thread()->team->session_id;
1562 
1563 	// find matching lock entries
1564 
1565 	LockList::Iterator iterator = locking->locks.GetIterator();
1566 	while (iterator.HasNext()) {
1567 		struct advisory_lock* lock = iterator.Next();
1568 		bool removeLock = false;
1569 
1570 		if (lock->session == session)
1571 			removeLock = true;
1572 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1573 			bool endsBeyond = false;
1574 			bool startsBefore = false;
1575 			if (flock != NULL) {
1576 				startsBefore = lock->start < flock->l_start;
1577 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1578 			}
1579 
1580 			if (!startsBefore && !endsBeyond) {
1581 				// lock is completely contained in flock
1582 				removeLock = true;
1583 			} else if (startsBefore && !endsBeyond) {
1584 				// cut the end of the lock
1585 				lock->end = flock->l_start - 1;
1586 			} else if (!startsBefore && endsBeyond) {
1587 				// cut the start of the lock
1588 				lock->start = flock->l_start + flock->l_len;
1589 			} else {
1590 				// divide the lock into two locks
1591 				struct advisory_lock* secondLock = new advisory_lock;
1592 				if (secondLock == NULL) {
1593 					// TODO: we should probably revert the locks we already
1594 					// changed... (ie. allocate upfront)
1595 					put_advisory_locking(locking);
1596 					return B_NO_MEMORY;
1597 				}
1598 
1599 				lock->end = flock->l_start - 1;
1600 
1601 				secondLock->team = lock->team;
1602 				secondLock->session = lock->session;
1603 				// values must already be normalized when getting here
1604 				secondLock->start = flock->l_start + flock->l_len;
1605 				secondLock->end = lock->end;
1606 				secondLock->shared = lock->shared;
1607 
1608 				locking->locks.Add(secondLock);
1609 			}
1610 		}
1611 
1612 		if (removeLock) {
1613 			// this lock is no longer used
1614 			iterator.Remove();
1615 			free(lock);
1616 		}
1617 	}
1618 
1619 	bool removeLocking = locking->locks.IsEmpty();
1620 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1621 
1622 	put_advisory_locking(locking);
1623 
1624 	if (removeLocking) {
1625 		// We can remove the whole advisory locking structure; it's no
1626 		// longer used
1627 		locking = get_advisory_locking(vnode);
1628 		if (locking != NULL) {
1629 			MutexLocker locker(sVnodeMutex);
1630 
1631 			// the locking could have been changed in the mean time
1632 			if (locking->locks.IsEmpty()) {
1633 				vnode->advisory_locking = NULL;
1634 				locker.Unlock();
1635 
1636 				// we've detached the locking from the vnode, so we can
1637 				// safely delete it
1638 				delete_sem(locking->lock);
1639 				delete_sem(locking->wait_sem);
1640 				delete locking;
1641 			} else {
1642 				// the locking is in use again
1643 				locker.Unlock();
1644 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1645 			}
1646 		}
1647 	}
1648 
1649 	return B_OK;
1650 }
1651 
1652 
1653 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1654 	will wait for the lock to become available, if there are any collisions
1655 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1656 
1657 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1658 	BSD flock() semantics are used, that is, all children can unlock the file
1659 	in question (we even allow parents to remove the lock, though, but that
1660 	seems to be in line to what the BSD's are doing).
1661 */
1662 static status_t
1663 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1664 	bool wait)
1665 {
1666 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1667 		vnode, flock, wait ? "yes" : "no"));
1668 
1669 	bool shared = flock->l_type == F_RDLCK;
1670 	status_t status = B_OK;
1671 
1672 	// TODO: do deadlock detection!
1673 
1674 	struct advisory_locking* locking;
1675 	sem_id waitForLock;
1676 
1677 	while (true) {
1678 		// if this vnode has an advisory_locking structure attached,
1679 		// lock that one and search for any colliding file lock
1680 		status = create_advisory_locking(vnode);
1681 		if (status != B_OK)
1682 			return status;
1683 
1684 		locking = vnode->advisory_locking;
1685 		team_id team = team_get_current_team_id();
1686 		waitForLock = -1;
1687 
1688 		// test for collisions
1689 		LockList::Iterator iterator = locking->locks.GetIterator();
1690 		while (iterator.HasNext()) {
1691 			struct advisory_lock* lock = iterator.Next();
1692 
1693 			// TODO: locks from the same team might be joinable!
1694 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1695 				// locks do overlap
1696 				if (!shared || !lock->shared) {
1697 					// we need to wait
1698 					waitForLock = locking->wait_sem;
1699 					break;
1700 				}
1701 			}
1702 		}
1703 
1704 		if (waitForLock < 0)
1705 			break;
1706 
1707 		// We need to wait. Do that or fail now, if we've been asked not to.
1708 
1709 		if (!wait) {
1710 			put_advisory_locking(locking);
1711 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1712 		}
1713 
1714 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1715 			B_CAN_INTERRUPT, 0);
1716 		if (status != B_OK && status != B_BAD_SEM_ID)
1717 			return status;
1718 
1719 		// We have been notified, but we need to re-lock the locking object. So
1720 		// go another round...
1721 	}
1722 
1723 	// install new lock
1724 
1725 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1726 		sizeof(struct advisory_lock));
1727 	if (lock == NULL) {
1728 		if (waitForLock >= B_OK)
1729 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1730 		release_sem(locking->lock);
1731 		return B_NO_MEMORY;
1732 	}
1733 
1734 	lock->team = team_get_current_team_id();
1735 	lock->session = session;
1736 	// values must already be normalized when getting here
1737 	lock->start = flock->l_start;
1738 	lock->end = flock->l_start - 1 + flock->l_len;
1739 	lock->shared = shared;
1740 
1741 	locking->locks.Add(lock);
1742 	put_advisory_locking(locking);
1743 
1744 	return status;
1745 }
1746 
1747 
1748 /*!	Normalizes the \a flock structure to make it easier to compare the
1749 	structure with others. The l_start and l_len fields are set to absolute
1750 	values according to the l_whence field.
1751 */
1752 static status_t
1753 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1754 {
1755 	switch (flock->l_whence) {
1756 		case SEEK_SET:
1757 			break;
1758 		case SEEK_CUR:
1759 			flock->l_start += descriptor->pos;
1760 			break;
1761 		case SEEK_END:
1762 		{
1763 			struct vnode* vnode = descriptor->u.vnode;
1764 			struct stat stat;
1765 			status_t status;
1766 
1767 			if (!HAS_FS_CALL(vnode, read_stat))
1768 				return EOPNOTSUPP;
1769 
1770 			status = FS_CALL(vnode, read_stat, &stat);
1771 			if (status != B_OK)
1772 				return status;
1773 
1774 			flock->l_start += stat.st_size;
1775 			break;
1776 		}
1777 		default:
1778 			return B_BAD_VALUE;
1779 	}
1780 
1781 	if (flock->l_start < 0)
1782 		flock->l_start = 0;
1783 	if (flock->l_len == 0)
1784 		flock->l_len = OFF_MAX;
1785 
1786 	// don't let the offset and length overflow
1787 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1788 		flock->l_len = OFF_MAX - flock->l_start;
1789 
1790 	if (flock->l_len < 0) {
1791 		// a negative length reverses the region
1792 		flock->l_start += flock->l_len;
1793 		flock->l_len = -flock->l_len;
1794 	}
1795 
1796 	return B_OK;
1797 }
1798 
1799 
1800 static void
1801 replace_vnode_if_disconnected(struct fs_mount* mount,
1802 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1803 	struct vnode* fallBack, bool lockRootLock)
1804 {
1805 	if (lockRootLock)
1806 		mutex_lock(&sIOContextRootLock);
1807 
1808 	struct vnode* obsoleteVnode = NULL;
1809 
1810 	if (vnode != NULL && vnode->mount == mount
1811 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1812 		obsoleteVnode = vnode;
1813 
1814 		if (vnode == mount->root_vnode) {
1815 			// redirect the vnode to the covered vnode
1816 			vnode = mount->covers_vnode;
1817 		} else
1818 			vnode = fallBack;
1819 
1820 		if (vnode != NULL)
1821 			inc_vnode_ref_count(vnode);
1822 	}
1823 
1824 	if (lockRootLock)
1825 		mutex_unlock(&sIOContextRootLock);
1826 
1827 	if (obsoleteVnode != NULL)
1828 		put_vnode(obsoleteVnode);
1829 }
1830 
1831 
1832 /*!	Disconnects all file descriptors that are associated with the
1833 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1834 	\a mount object.
1835 
1836 	Note, after you've called this function, there might still be ongoing
1837 	accesses - they won't be interrupted if they already happened before.
1838 	However, any subsequent access will fail.
1839 
1840 	This is not a cheap function and should be used with care and rarely.
1841 	TODO: there is currently no means to stop a blocking read/write!
1842 */
1843 void
1844 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1845 	struct vnode* vnodeToDisconnect)
1846 {
1847 	// iterate over all teams and peek into their file descriptors
1848 	int32 nextTeamID = 0;
1849 
1850 	while (true) {
1851 		struct io_context* context = NULL;
1852 		bool contextLocked = false;
1853 		struct team* team = NULL;
1854 		team_id lastTeamID;
1855 
1856 		cpu_status state = disable_interrupts();
1857 		SpinLocker teamsLock(gTeamSpinlock);
1858 
1859 		lastTeamID = peek_next_thread_id();
1860 		if (nextTeamID < lastTeamID) {
1861 			// get next valid team
1862 			while (nextTeamID < lastTeamID
1863 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1864 				nextTeamID++;
1865 			}
1866 
1867 			if (team) {
1868 				context = (io_context*)team->io_context;
1869 
1870 				// Some acrobatics to lock the context in a safe way
1871 				// (cf. _kern_get_next_fd_info() for details).
1872 				GRAB_THREAD_LOCK();
1873 				teamsLock.Unlock();
1874 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1875 					== B_OK;
1876 				RELEASE_THREAD_LOCK();
1877 
1878 				nextTeamID++;
1879 			}
1880 		}
1881 
1882 		teamsLock.Unlock();
1883 		restore_interrupts(state);
1884 
1885 		if (context == NULL)
1886 			break;
1887 
1888 		// we now have a context - since we couldn't lock it while having
1889 		// safe access to the team structure, we now need to lock the mutex
1890 		// manually
1891 
1892 		if (!contextLocked) {
1893 			// team seems to be gone, go over to the next team
1894 			continue;
1895 		}
1896 
1897 		// the team cannot be deleted completely while we're owning its
1898 		// io_context mutex, so we can safely play with it now
1899 
1900 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1901 			sRoot, true);
1902 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1903 			sRoot, false);
1904 
1905 		for (uint32 i = 0; i < context->table_size; i++) {
1906 			if (struct file_descriptor* descriptor = context->fds[i]) {
1907 				inc_fd_ref_count(descriptor);
1908 
1909 				// if this descriptor points at this mount, we
1910 				// need to disconnect it to be able to unmount
1911 				struct vnode* vnode = fd_vnode(descriptor);
1912 				if (vnodeToDisconnect != NULL) {
1913 					if (vnode == vnodeToDisconnect)
1914 						disconnect_fd(descriptor);
1915 				} else if ((vnode != NULL && vnode->mount == mount)
1916 					|| (vnode == NULL && descriptor->u.mount == mount))
1917 					disconnect_fd(descriptor);
1918 
1919 				put_fd(descriptor);
1920 			}
1921 		}
1922 
1923 		mutex_unlock(&context->io_mutex);
1924 	}
1925 }
1926 
1927 
1928 /*!	\brief Gets the root node of the current IO context.
1929 	If \a kernel is \c true, the kernel IO context will be used.
1930 	The caller obtains a reference to the returned node.
1931 */
1932 struct vnode*
1933 get_root_vnode(bool kernel)
1934 {
1935 	if (!kernel) {
1936 		// Get current working directory from io context
1937 		struct io_context* context = get_current_io_context(kernel);
1938 
1939 		mutex_lock(&sIOContextRootLock);
1940 
1941 		struct vnode* root = context->root;
1942 		if (root != NULL)
1943 			inc_vnode_ref_count(root);
1944 
1945 		mutex_unlock(&sIOContextRootLock);
1946 
1947 		if (root != NULL)
1948 			return root;
1949 
1950 		// That should never happen.
1951 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1952 			"root\n", team_get_current_team_id());
1953 	}
1954 
1955 	inc_vnode_ref_count(sRoot);
1956 	return sRoot;
1957 }
1958 
1959 
1960 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1961 		   by.
1962 
1963 	Given an arbitrary vnode, the function checks, whether the node is covered
1964 	by the root of a volume. If it is the function obtains a reference to the
1965 	volume root node and returns it.
1966 
1967 	\param vnode The vnode in question.
1968 	\return The volume root vnode the vnode cover is covered by, if it is
1969 			indeed a mount point, or \c NULL otherwise.
1970 */
1971 static struct vnode*
1972 resolve_mount_point_to_volume_root(struct vnode* vnode)
1973 {
1974 	if (!vnode)
1975 		return NULL;
1976 
1977 	struct vnode* volumeRoot = NULL;
1978 
1979 	mutex_lock(&sVnodeCoveredByMutex);
1980 	if (vnode->covered_by) {
1981 		volumeRoot = vnode->covered_by;
1982 		inc_vnode_ref_count(volumeRoot);
1983 	}
1984 	mutex_unlock(&sVnodeCoveredByMutex);
1985 
1986 	return volumeRoot;
1987 }
1988 
1989 
1990 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1991 		   by.
1992 
1993 	Given an arbitrary vnode (identified by mount and node ID), the function
1994 	checks, whether the node is covered by the root of a volume. If it is the
1995 	function returns the mount and node ID of the volume root node. Otherwise
1996 	it simply returns the supplied mount and node ID.
1997 
1998 	In case of error (e.g. the supplied node could not be found) the variables
1999 	for storing the resolved mount and node ID remain untouched and an error
2000 	code is returned.
2001 
2002 	\param mountID The mount ID of the vnode in question.
2003 	\param nodeID The node ID of the vnode in question.
2004 	\param resolvedMountID Pointer to storage for the resolved mount ID.
2005 	\param resolvedNodeID Pointer to storage for the resolved node ID.
2006 	\return
2007 	- \c B_OK, if everything went fine,
2008 	- another error code, if something went wrong.
2009 */
2010 status_t
2011 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
2012 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
2013 {
2014 	// get the node
2015 	struct vnode* node;
2016 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
2017 	if (error != B_OK)
2018 		return error;
2019 
2020 	// resolve the node
2021 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
2022 	if (resolvedNode) {
2023 		put_vnode(node);
2024 		node = resolvedNode;
2025 	}
2026 
2027 	// set the return values
2028 	*resolvedMountID = node->device;
2029 	*resolvedNodeID = node->id;
2030 
2031 	put_vnode(node);
2032 
2033 	return B_OK;
2034 }
2035 
2036 
2037 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
2038 
2039 	Given an arbitrary vnode, the function checks, whether the node is the
2040 	root of a volume. If it is (and if it is not "/"), the function obtains
2041 	a reference to the underlying mount point node and returns it.
2042 
2043 	\param vnode The vnode in question (caller must have a reference).
2044 	\return The mount point vnode the vnode covers, if it is indeed a volume
2045 			root and not "/", or \c NULL otherwise.
2046 */
2047 static struct vnode*
2048 resolve_volume_root_to_mount_point(struct vnode* vnode)
2049 {
2050 	if (!vnode)
2051 		return NULL;
2052 
2053 	struct vnode* mountPoint = NULL;
2054 
2055 	struct fs_mount* mount = vnode->mount;
2056 	if (vnode == mount->root_vnode && mount->covers_vnode) {
2057 		mountPoint = mount->covers_vnode;
2058 		inc_vnode_ref_count(mountPoint);
2059 	}
2060 
2061 	return mountPoint;
2062 }
2063 
2064 
2065 /*!	\brief Gets the directory path and leaf name for a given path.
2066 
2067 	The supplied \a path is transformed to refer to the directory part of
2068 	the entry identified by the original path, and into the buffer \a filename
2069 	the leaf name of the original entry is written.
2070 	Neither the returned path nor the leaf name can be expected to be
2071 	canonical.
2072 
2073 	\param path The path to be analyzed. Must be able to store at least one
2074 		   additional character.
2075 	\param filename The buffer into which the leaf name will be written.
2076 		   Must be of size B_FILE_NAME_LENGTH at least.
2077 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2078 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2079 		   if the given path name is empty.
2080 */
2081 static status_t
2082 get_dir_path_and_leaf(char* path, char* filename)
2083 {
2084 	if (*path == '\0')
2085 		return B_ENTRY_NOT_FOUND;
2086 
2087 	char* last = strrchr(path, '/');
2088 		// '/' are not allowed in file names!
2089 
2090 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2091 
2092 	if (last == NULL) {
2093 		// this path is single segment with no '/' in it
2094 		// ex. "foo"
2095 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2096 			return B_NAME_TOO_LONG;
2097 
2098 		strcpy(path, ".");
2099 	} else {
2100 		last++;
2101 		if (last[0] == '\0') {
2102 			// special case: the path ends in one or more '/' - remove them
2103 			while (*--last == '/' && last != path);
2104 			last[1] = '\0';
2105 
2106 			if (last == path && last[0] == '/') {
2107 				// This path points to the root of the file system
2108 				strcpy(filename, ".");
2109 				return B_OK;
2110 			}
2111 			for (; last != path && *(last - 1) != '/'; last--);
2112 				// rewind to the start of the leaf before the '/'
2113 		}
2114 
2115 		// normal leaf: replace the leaf portion of the path with a '.'
2116 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2117 			return B_NAME_TOO_LONG;
2118 
2119 		last[0] = '.';
2120 		last[1] = '\0';
2121 	}
2122 	return B_OK;
2123 }
2124 
2125 
2126 static status_t
2127 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2128 	bool traverse, bool kernel, struct vnode** _vnode)
2129 {
2130 	char clonedName[B_FILE_NAME_LENGTH + 1];
2131 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2132 		return B_NAME_TOO_LONG;
2133 
2134 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2135 	struct vnode* directory;
2136 
2137 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2138 	if (status < 0)
2139 		return status;
2140 
2141 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2142 		_vnode, NULL);
2143 }
2144 
2145 
2146 static status_t
2147 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2148 {
2149 	ino_t id;
2150 
2151 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2152 		return get_vnode(dir->device, id, _vnode, true, false);
2153 
2154 	status_t status = FS_CALL(dir, lookup, name, &id);
2155 	if (status != B_OK)
2156 		return status;
2157 
2158 	mutex_lock(&sVnodeMutex);
2159 	*_vnode = lookup_vnode(dir->device, id);
2160 	mutex_unlock(&sVnodeMutex);
2161 
2162 	if (*_vnode == NULL) {
2163 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2164 			"0x%Lx)\n", dir->device, id);
2165 		return B_ENTRY_NOT_FOUND;
2166 	}
2167 
2168 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2169 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2170 //		(*_vnode)->mount->id, (*_vnode)->id);
2171 
2172 	return B_OK;
2173 }
2174 
2175 
2176 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2177 	\a path must not be NULL.
2178 	If it returns successfully, \a path contains the name of the last path
2179 	component. This function clobbers the buffer pointed to by \a path only
2180 	if it does contain more than one component.
2181 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2182 	it is successful or not!
2183 */
2184 static status_t
2185 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2186 	int count, struct io_context* ioContext, struct vnode** _vnode,
2187 	ino_t* _parentID)
2188 {
2189 	status_t status = B_OK;
2190 	ino_t lastParentID = vnode->id;
2191 
2192 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2193 
2194 	if (path == NULL) {
2195 		put_vnode(vnode);
2196 		return B_BAD_VALUE;
2197 	}
2198 
2199 	if (*path == '\0') {
2200 		put_vnode(vnode);
2201 		return B_ENTRY_NOT_FOUND;
2202 	}
2203 
2204 	while (true) {
2205 		struct vnode* nextVnode;
2206 		char* nextPath;
2207 
2208 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2209 			path));
2210 
2211 		// done?
2212 		if (path[0] == '\0')
2213 			break;
2214 
2215 		// walk to find the next path component ("path" will point to a single
2216 		// path component), and filter out multiple slashes
2217 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2218 				nextPath++);
2219 
2220 		if (*nextPath == '/') {
2221 			*nextPath = '\0';
2222 			do
2223 				nextPath++;
2224 			while (*nextPath == '/');
2225 		}
2226 
2227 		// See if the '..' is at the root of a mount and move to the covered
2228 		// vnode so we pass the '..' path to the underlying filesystem.
2229 		// Also prevent breaking the root of the IO context.
2230 		if (strcmp("..", path) == 0) {
2231 			if (vnode == ioContext->root) {
2232 				// Attempted prison break! Keep it contained.
2233 				path = nextPath;
2234 				continue;
2235 			} else if (vnode->mount->root_vnode == vnode
2236 				&& vnode->mount->covers_vnode) {
2237 				nextVnode = vnode->mount->covers_vnode;
2238 				inc_vnode_ref_count(nextVnode);
2239 				put_vnode(vnode);
2240 				vnode = nextVnode;
2241 			}
2242 		}
2243 
2244 		// check if vnode is really a directory
2245 		if (status == B_OK && !S_ISDIR(vnode->type))
2246 			status = B_NOT_A_DIRECTORY;
2247 
2248 		// Check if we have the right to search the current directory vnode.
2249 		// If a file system doesn't have the access() function, we assume that
2250 		// searching a directory is always allowed
2251 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2252 			status = FS_CALL(vnode, access, X_OK);
2253 
2254 		// Tell the filesystem to get the vnode of this path component (if we
2255 		// got the permission from the call above)
2256 		if (status == B_OK)
2257 			status = lookup_dir_entry(vnode, path, &nextVnode);
2258 
2259 		if (status != B_OK) {
2260 			put_vnode(vnode);
2261 			return status;
2262 		}
2263 
2264 		// If the new node is a symbolic link, resolve it (if we've been told
2265 		// to do it)
2266 		if (S_ISLNK(nextVnode->type)
2267 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2268 			size_t bufferSize;
2269 			char* buffer;
2270 
2271 			TRACE(("traverse link\n"));
2272 
2273 			// it's not exactly nice style using goto in this way, but hey,
2274 			// it works :-/
2275 			if (count + 1 > B_MAX_SYMLINKS) {
2276 				status = B_LINK_LIMIT;
2277 				goto resolve_link_error;
2278 			}
2279 
2280 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2281 			if (buffer == NULL) {
2282 				status = B_NO_MEMORY;
2283 				goto resolve_link_error;
2284 			}
2285 
2286 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2287 				bufferSize--;
2288 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2289 				// null-terminate
2290 				if (status >= 0)
2291 					buffer[bufferSize] = '\0';
2292 			} else
2293 				status = B_BAD_VALUE;
2294 
2295 			if (status != B_OK) {
2296 				free(buffer);
2297 
2298 		resolve_link_error:
2299 				put_vnode(vnode);
2300 				put_vnode(nextVnode);
2301 
2302 				return status;
2303 			}
2304 			put_vnode(nextVnode);
2305 
2306 			// Check if we start from the root directory or the current
2307 			// directory ("vnode" still points to that one).
2308 			// Cut off all leading slashes if it's the root directory
2309 			path = buffer;
2310 			bool absoluteSymlink = false;
2311 			if (path[0] == '/') {
2312 				// we don't need the old directory anymore
2313 				put_vnode(vnode);
2314 
2315 				while (*++path == '/')
2316 					;
2317 
2318 				mutex_lock(&sIOContextRootLock);
2319 				vnode = ioContext->root;
2320 				inc_vnode_ref_count(vnode);
2321 				mutex_unlock(&sIOContextRootLock);
2322 
2323 				absoluteSymlink = true;
2324 			}
2325 
2326 			inc_vnode_ref_count(vnode);
2327 				// balance the next recursion - we will decrement the
2328 				// ref_count of the vnode, no matter if we succeeded or not
2329 
2330 			if (absoluteSymlink && *path == '\0') {
2331 				// symlink was just "/"
2332 				nextVnode = vnode;
2333 			} else {
2334 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2335 					ioContext, &nextVnode, &lastParentID);
2336 			}
2337 
2338 			free(buffer);
2339 
2340 			if (status != B_OK) {
2341 				put_vnode(vnode);
2342 				return status;
2343 			}
2344 		} else
2345 			lastParentID = vnode->id;
2346 
2347 		// decrease the ref count on the old dir we just looked up into
2348 		put_vnode(vnode);
2349 
2350 		path = nextPath;
2351 		vnode = nextVnode;
2352 
2353 		// see if we hit a mount point
2354 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2355 		if (mountPoint) {
2356 			put_vnode(vnode);
2357 			vnode = mountPoint;
2358 		}
2359 	}
2360 
2361 	*_vnode = vnode;
2362 	if (_parentID)
2363 		*_parentID = lastParentID;
2364 
2365 	return B_OK;
2366 }
2367 
2368 
2369 static status_t
2370 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2371 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2372 {
2373 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2374 		get_current_io_context(kernel), _vnode, _parentID);
2375 }
2376 
2377 
2378 static status_t
2379 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2380 	ino_t* _parentID, bool kernel)
2381 {
2382 	struct vnode* start = NULL;
2383 
2384 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2385 
2386 	if (!path)
2387 		return B_BAD_VALUE;
2388 
2389 	if (*path == '\0')
2390 		return B_ENTRY_NOT_FOUND;
2391 
2392 	// figure out if we need to start at root or at cwd
2393 	if (*path == '/') {
2394 		if (sRoot == NULL) {
2395 			// we're a bit early, aren't we?
2396 			return B_ERROR;
2397 		}
2398 
2399 		while (*++path == '/')
2400 			;
2401 		start = get_root_vnode(kernel);
2402 
2403 		if (*path == '\0') {
2404 			*_vnode = start;
2405 			return B_OK;
2406 		}
2407 
2408 	} else {
2409 		struct io_context* context = get_current_io_context(kernel);
2410 
2411 		mutex_lock(&context->io_mutex);
2412 		start = context->cwd;
2413 		if (start != NULL)
2414 			inc_vnode_ref_count(start);
2415 		mutex_unlock(&context->io_mutex);
2416 
2417 		if (start == NULL)
2418 			return B_ERROR;
2419 	}
2420 
2421 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2422 		_parentID);
2423 }
2424 
2425 
2426 /*! Returns the vnode in the next to last segment of the path, and returns
2427 	the last portion in filename.
2428 	The path buffer must be able to store at least one additional character.
2429 */
2430 static status_t
2431 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2432 	bool kernel)
2433 {
2434 	status_t status = get_dir_path_and_leaf(path, filename);
2435 	if (status != B_OK)
2436 		return status;
2437 
2438 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2439 }
2440 
2441 
2442 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2443 		   to by a FD + path pair.
2444 
2445 	\a path must be given in either case. \a fd might be omitted, in which
2446 	case \a path is either an absolute path or one relative to the current
2447 	directory. If both a supplied and \a path is relative it is reckoned off
2448 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2449 	ignored.
2450 
2451 	The caller has the responsibility to call put_vnode() on the returned
2452 	directory vnode.
2453 
2454 	\param fd The FD. May be < 0.
2455 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2456 	       is modified by this function. It must have at least room for a
2457 	       string one character longer than the path it contains.
2458 	\param _vnode A pointer to a variable the directory vnode shall be written
2459 		   into.
2460 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2461 		   the leaf name of the specified entry will be written.
2462 	\param kernel \c true, if invoked from inside the kernel, \c false if
2463 		   invoked from userland.
2464 	\return \c B_OK, if everything went fine, another error code otherwise.
2465 */
2466 static status_t
2467 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2468 	char* filename, bool kernel)
2469 {
2470 	if (!path)
2471 		return B_BAD_VALUE;
2472 	if (*path == '\0')
2473 		return B_ENTRY_NOT_FOUND;
2474 	if (fd < 0)
2475 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2476 
2477 	status_t status = get_dir_path_and_leaf(path, filename);
2478 	if (status != B_OK)
2479 		return status;
2480 
2481 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2482 }
2483 
2484 
2485 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2486 		   to by a vnode + path pair.
2487 
2488 	\a path must be given in either case. \a vnode might be omitted, in which
2489 	case \a path is either an absolute path or one relative to the current
2490 	directory. If both a supplied and \a path is relative it is reckoned off
2491 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2492 	ignored.
2493 
2494 	The caller has the responsibility to call put_vnode() on the returned
2495 	directory vnode.
2496 
2497 	\param vnode The vnode. May be \c NULL.
2498 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2499 	       is modified by this function. It must have at least room for a
2500 	       string one character longer than the path it contains.
2501 	\param _vnode A pointer to a variable the directory vnode shall be written
2502 		   into.
2503 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2504 		   the leaf name of the specified entry will be written.
2505 	\param kernel \c true, if invoked from inside the kernel, \c false if
2506 		   invoked from userland.
2507 	\return \c B_OK, if everything went fine, another error code otherwise.
2508 */
2509 static status_t
2510 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2511 	struct vnode** _vnode, char* filename, bool kernel)
2512 {
2513 	if (!path)
2514 		return B_BAD_VALUE;
2515 	if (*path == '\0')
2516 		return B_ENTRY_NOT_FOUND;
2517 	if (vnode == NULL || path[0] == '/')
2518 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2519 
2520 	status_t status = get_dir_path_and_leaf(path, filename);
2521 	if (status != B_OK)
2522 		return status;
2523 
2524 	inc_vnode_ref_count(vnode);
2525 		// vnode_path_to_vnode() always decrements the ref count
2526 
2527 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2528 }
2529 
2530 
2531 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2532 */
2533 static status_t
2534 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2535 	size_t bufferSize, struct io_context* ioContext)
2536 {
2537 	if (bufferSize < sizeof(struct dirent))
2538 		return B_BAD_VALUE;
2539 
2540 	// See if vnode is the root of a mount and move to the covered
2541 	// vnode so we get the underlying file system
2542 	VNodePutter vnodePutter;
2543 	if (vnode->mount->root_vnode == vnode
2544 		&& vnode->mount->covers_vnode != NULL) {
2545 		vnode = vnode->mount->covers_vnode;
2546 		inc_vnode_ref_count(vnode);
2547 		vnodePutter.SetTo(vnode);
2548 	}
2549 
2550 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2551 		// The FS supports getting the name of a vnode.
2552 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2553 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2554 			return B_OK;
2555 	}
2556 
2557 	// The FS doesn't support getting the name of a vnode. So we search the
2558 	// parent directory for the vnode, if the caller let us.
2559 
2560 	if (parent == NULL)
2561 		return EOPNOTSUPP;
2562 
2563 	void* cookie;
2564 
2565 	status_t status = FS_CALL(parent, open_dir, &cookie);
2566 	if (status >= B_OK) {
2567 		while (true) {
2568 			uint32 num = 1;
2569 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2570 				&num);
2571 			if (status != B_OK)
2572 				break;
2573 			if (num == 0) {
2574 				status = B_ENTRY_NOT_FOUND;
2575 				break;
2576 			}
2577 
2578 			if (vnode->id == buffer->d_ino) {
2579 				// found correct entry!
2580 				break;
2581 			}
2582 		}
2583 
2584 		FS_CALL(vnode, close_dir, cookie);
2585 		FS_CALL(vnode, free_dir_cookie, cookie);
2586 	}
2587 	return status;
2588 }
2589 
2590 
2591 static status_t
2592 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2593 	size_t nameSize, bool kernel)
2594 {
2595 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2596 	struct dirent* dirent = (struct dirent*)buffer;
2597 
2598 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2599 		get_current_io_context(kernel));
2600 	if (status != B_OK)
2601 		return status;
2602 
2603 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2604 		return B_BUFFER_OVERFLOW;
2605 
2606 	return B_OK;
2607 }
2608 
2609 
2610 /*!	Gets the full path to a given directory vnode.
2611 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2612 	file system doesn't support this call, it will fall back to iterating
2613 	through the parent directory to get the name of the child.
2614 
2615 	To protect against circular loops, it supports a maximum tree depth
2616 	of 256 levels.
2617 
2618 	Note that the path may not be correct the time this function returns!
2619 	It doesn't use any locking to prevent returning the correct path, as
2620 	paths aren't safe anyway: the path to a file can change at any time.
2621 
2622 	It might be a good idea, though, to check if the returned path exists
2623 	in the calling function (it's not done here because of efficiency)
2624 */
2625 static status_t
2626 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2627 	bool kernel)
2628 {
2629 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2630 
2631 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2632 		return B_BAD_VALUE;
2633 
2634 	if (!S_ISDIR(vnode->type))
2635 		return B_NOT_A_DIRECTORY;
2636 
2637 	char* path = buffer;
2638 	int32 insert = bufferSize;
2639 	int32 maxLevel = 256;
2640 	int32 length;
2641 	status_t status;
2642 	struct io_context* ioContext = get_current_io_context(kernel);
2643 
2644 	// we don't use get_vnode() here because this call is more
2645 	// efficient and does all we need from get_vnode()
2646 	inc_vnode_ref_count(vnode);
2647 
2648 	if (vnode != ioContext->root) {
2649 		// we don't hit the IO context root
2650 		// resolve a volume root to its mount point
2651 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2652 		if (mountPoint) {
2653 			put_vnode(vnode);
2654 			vnode = mountPoint;
2655 		}
2656 	}
2657 
2658 	path[--insert] = '\0';
2659 		// the path is filled right to left
2660 
2661 	while (true) {
2662 		// the name buffer is also used for fs_read_dir()
2663 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2664 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2665 		struct vnode* parentVnode;
2666 		ino_t parentID;
2667 
2668 		// lookup the parent vnode
2669 		if (vnode == ioContext->root) {
2670 			// we hit the IO context root
2671 			parentVnode = vnode;
2672 			inc_vnode_ref_count(vnode);
2673 		} else {
2674 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2675 			if (status != B_OK)
2676 				goto out;
2677 		}
2678 
2679 		// get the node's name
2680 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2681 			sizeof(nameBuffer), ioContext);
2682 
2683 		if (vnode != ioContext->root) {
2684 			// we don't hit the IO context root
2685 			// resolve a volume root to its mount point
2686 			struct vnode* mountPoint
2687 				= resolve_volume_root_to_mount_point(parentVnode);
2688 			if (mountPoint) {
2689 				put_vnode(parentVnode);
2690 				parentVnode = mountPoint;
2691 				parentID = parentVnode->id;
2692 			}
2693 		}
2694 
2695 		bool hitRoot = (parentVnode == vnode);
2696 
2697 		// release the current vnode, we only need its parent from now on
2698 		put_vnode(vnode);
2699 		vnode = parentVnode;
2700 
2701 		if (status != B_OK)
2702 			goto out;
2703 
2704 		if (hitRoot) {
2705 			// we have reached "/", which means we have constructed the full
2706 			// path
2707 			break;
2708 		}
2709 
2710 		// TODO: add an explicit check for loops in about 10 levels to do
2711 		// real loop detection
2712 
2713 		// don't go deeper as 'maxLevel' to prevent circular loops
2714 		if (maxLevel-- < 0) {
2715 			status = B_LINK_LIMIT;
2716 			goto out;
2717 		}
2718 
2719 		// add the name in front of the current path
2720 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2721 		length = strlen(name);
2722 		insert -= length;
2723 		if (insert <= 0) {
2724 			status = B_RESULT_NOT_REPRESENTABLE;
2725 			goto out;
2726 		}
2727 		memcpy(path + insert, name, length);
2728 		path[--insert] = '/';
2729 	}
2730 
2731 	// the root dir will result in an empty path: fix it
2732 	if (path[insert] == '\0')
2733 		path[--insert] = '/';
2734 
2735 	TRACE(("  path is: %s\n", path + insert));
2736 
2737 	// move the path to the start of the buffer
2738 	length = bufferSize - insert;
2739 	memmove(buffer, path + insert, length);
2740 
2741 out:
2742 	put_vnode(vnode);
2743 	return status;
2744 }
2745 
2746 
2747 /*!	Checks the length of every path component, and adds a '.'
2748 	if the path ends in a slash.
2749 	The given path buffer must be able to store at least one
2750 	additional character.
2751 */
2752 static status_t
2753 check_path(char* to)
2754 {
2755 	int32 length = 0;
2756 
2757 	// check length of every path component
2758 
2759 	while (*to) {
2760 		char* begin;
2761 		if (*to == '/')
2762 			to++, length++;
2763 
2764 		begin = to;
2765 		while (*to != '/' && *to)
2766 			to++, length++;
2767 
2768 		if (to - begin > B_FILE_NAME_LENGTH)
2769 			return B_NAME_TOO_LONG;
2770 	}
2771 
2772 	if (length == 0)
2773 		return B_ENTRY_NOT_FOUND;
2774 
2775 	// complete path if there is a slash at the end
2776 
2777 	if (*(to - 1) == '/') {
2778 		if (length > B_PATH_NAME_LENGTH - 2)
2779 			return B_NAME_TOO_LONG;
2780 
2781 		to[0] = '.';
2782 		to[1] = '\0';
2783 	}
2784 
2785 	return B_OK;
2786 }
2787 
2788 
2789 static struct file_descriptor*
2790 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2791 {
2792 	struct file_descriptor* descriptor
2793 		= get_fd(get_current_io_context(kernel), fd);
2794 	if (descriptor == NULL)
2795 		return NULL;
2796 
2797 	struct vnode* vnode = fd_vnode(descriptor);
2798 	if (vnode == NULL) {
2799 		put_fd(descriptor);
2800 		return NULL;
2801 	}
2802 
2803 	// ToDo: when we can close a file descriptor at any point, investigate
2804 	//	if this is still valid to do (accessing the vnode without ref_count
2805 	//	or locking)
2806 	*_vnode = vnode;
2807 	return descriptor;
2808 }
2809 
2810 
2811 static struct vnode*
2812 get_vnode_from_fd(int fd, bool kernel)
2813 {
2814 	struct file_descriptor* descriptor;
2815 	struct vnode* vnode;
2816 
2817 	descriptor = get_fd(get_current_io_context(kernel), fd);
2818 	if (descriptor == NULL)
2819 		return NULL;
2820 
2821 	vnode = fd_vnode(descriptor);
2822 	if (vnode != NULL)
2823 		inc_vnode_ref_count(vnode);
2824 
2825 	put_fd(descriptor);
2826 	return vnode;
2827 }
2828 
2829 
2830 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2831 	only the path will be considered. In this case, the \a path must not be
2832 	NULL.
2833 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2834 	and should be NULL for files.
2835 */
2836 static status_t
2837 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2838 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2839 {
2840 	if (fd < 0 && !path)
2841 		return B_BAD_VALUE;
2842 
2843 	if (path != NULL && *path == '\0')
2844 		return B_ENTRY_NOT_FOUND;
2845 
2846 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2847 		// no FD or absolute path
2848 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2849 	}
2850 
2851 	// FD only, or FD + relative path
2852 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2853 	if (!vnode)
2854 		return B_FILE_ERROR;
2855 
2856 	if (path != NULL) {
2857 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2858 			_vnode, _parentID);
2859 	}
2860 
2861 	// there is no relative path to take into account
2862 
2863 	*_vnode = vnode;
2864 	if (_parentID)
2865 		*_parentID = -1;
2866 
2867 	return B_OK;
2868 }
2869 
2870 
2871 static int
2872 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2873 	void* cookie, int openMode, bool kernel)
2874 {
2875 	struct file_descriptor* descriptor;
2876 	int fd;
2877 
2878 	// If the vnode is locked, we don't allow creating a new file/directory
2879 	// file_descriptor for it
2880 	if (vnode && vnode->mandatory_locked_by != NULL
2881 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2882 		return B_BUSY;
2883 
2884 	descriptor = alloc_fd();
2885 	if (!descriptor)
2886 		return B_NO_MEMORY;
2887 
2888 	if (vnode)
2889 		descriptor->u.vnode = vnode;
2890 	else
2891 		descriptor->u.mount = mount;
2892 	descriptor->cookie = cookie;
2893 
2894 	switch (type) {
2895 		// vnode types
2896 		case FDTYPE_FILE:
2897 			descriptor->ops = &sFileOps;
2898 			break;
2899 		case FDTYPE_DIR:
2900 			descriptor->ops = &sDirectoryOps;
2901 			break;
2902 		case FDTYPE_ATTR:
2903 			descriptor->ops = &sAttributeOps;
2904 			break;
2905 		case FDTYPE_ATTR_DIR:
2906 			descriptor->ops = &sAttributeDirectoryOps;
2907 			break;
2908 
2909 		// mount types
2910 		case FDTYPE_INDEX_DIR:
2911 			descriptor->ops = &sIndexDirectoryOps;
2912 			break;
2913 		case FDTYPE_QUERY:
2914 			descriptor->ops = &sQueryOps;
2915 			break;
2916 
2917 		default:
2918 			panic("get_new_fd() called with unknown type %d\n", type);
2919 			break;
2920 	}
2921 	descriptor->type = type;
2922 	descriptor->open_mode = openMode;
2923 
2924 	fd = new_fd(get_current_io_context(kernel), descriptor);
2925 	if (fd < 0) {
2926 		free(descriptor);
2927 		return B_NO_MORE_FDS;
2928 	}
2929 
2930 	return fd;
2931 }
2932 
2933 
2934 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2935 	vfs_normalize_path(). See there for more documentation.
2936 */
2937 static status_t
2938 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2939 {
2940 	VNodePutter dirPutter;
2941 	struct vnode* dir = NULL;
2942 	status_t error;
2943 
2944 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2945 		// get dir vnode + leaf name
2946 		struct vnode* nextDir;
2947 		char leaf[B_FILE_NAME_LENGTH];
2948 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2949 		if (error != B_OK)
2950 			return error;
2951 
2952 		dir = nextDir;
2953 		strcpy(path, leaf);
2954 		dirPutter.SetTo(dir);
2955 
2956 		// get file vnode, if we shall resolve links
2957 		bool fileExists = false;
2958 		struct vnode* fileVnode;
2959 		VNodePutter fileVnodePutter;
2960 		if (traverseLink) {
2961 			inc_vnode_ref_count(dir);
2962 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2963 					NULL) == B_OK) {
2964 				fileVnodePutter.SetTo(fileVnode);
2965 				fileExists = true;
2966 			}
2967 		}
2968 
2969 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->type)) {
2970 			// we're done -- construct the path
2971 			bool hasLeaf = true;
2972 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2973 				// special cases "." and ".." -- get the dir, forget the leaf
2974 				inc_vnode_ref_count(dir);
2975 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2976 					&nextDir, NULL);
2977 				if (error != B_OK)
2978 					return error;
2979 				dir = nextDir;
2980 				dirPutter.SetTo(dir);
2981 				hasLeaf = false;
2982 			}
2983 
2984 			// get the directory path
2985 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2986 			if (error != B_OK)
2987 				return error;
2988 
2989 			// append the leaf name
2990 			if (hasLeaf) {
2991 				// insert a directory separator if this is not the file system
2992 				// root
2993 				if ((strcmp(path, "/") != 0
2994 					&& strlcat(path, "/", pathSize) >= pathSize)
2995 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2996 					return B_NAME_TOO_LONG;
2997 				}
2998 			}
2999 
3000 			return B_OK;
3001 		}
3002 
3003 		// read link
3004 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
3005 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
3006 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
3007 			if (error != B_OK)
3008 				return error;
3009 			path[bufferSize] = '\0';
3010 		} else
3011 			return B_BAD_VALUE;
3012 	}
3013 
3014 	return B_LINK_LIMIT;
3015 }
3016 
3017 
3018 #ifdef ADD_DEBUGGER_COMMANDS
3019 
3020 
3021 static void
3022 _dump_advisory_locking(advisory_locking* locking)
3023 {
3024 	if (locking == NULL)
3025 		return;
3026 
3027 	kprintf("   lock:        %ld", locking->lock);
3028 	kprintf("   wait_sem:    %ld", locking->wait_sem);
3029 
3030 	int32 index = 0;
3031 	LockList::Iterator iterator = locking->locks.GetIterator();
3032 	while (iterator.HasNext()) {
3033 		struct advisory_lock* lock = iterator.Next();
3034 
3035 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
3036 		kprintf("        start:  %Ld\n", lock->start);
3037 		kprintf("        end:    %Ld\n", lock->end);
3038 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3039 	}
3040 }
3041 
3042 
3043 static void
3044 _dump_mount(struct fs_mount* mount)
3045 {
3046 	kprintf("MOUNT: %p\n", mount);
3047 	kprintf(" id:            %ld\n", mount->id);
3048 	kprintf(" device_name:   %s\n", mount->device_name);
3049 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3050 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
3051 	kprintf(" partition:     %p\n", mount->partition);
3052 	kprintf(" lock:          %p\n", &mount->rlock);
3053 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3054 		mount->owns_file_device ? " owns_file_device" : "");
3055 
3056 	fs_volume* volume = mount->volume;
3057 	while (volume != NULL) {
3058 		kprintf(" volume %p:\n", volume);
3059 		kprintf("  layer:            %ld\n", volume->layer);
3060 		kprintf("  private_volume:   %p\n", volume->private_volume);
3061 		kprintf("  ops:              %p\n", volume->ops);
3062 		kprintf("  file_system:      %p\n", volume->file_system);
3063 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3064 		volume = volume->super_volume;
3065 	}
3066 
3067 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3068 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3069 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
3070 	set_debug_variable("_partition", (addr_t)mount->partition);
3071 }
3072 
3073 
3074 static void
3075 _dump_vnode(struct vnode* vnode)
3076 {
3077 	kprintf("VNODE: %p\n", vnode);
3078 	kprintf(" device:        %ld\n", vnode->device);
3079 	kprintf(" id:            %Ld\n", vnode->id);
3080 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3081 	kprintf(" private_node:  %p\n", vnode->private_node);
3082 	kprintf(" mount:         %p\n", vnode->mount);
3083 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3084 	kprintf(" cache:         %p\n", vnode->cache);
3085 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
3086 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3087 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3088 
3089 	_dump_advisory_locking(vnode->advisory_locking);
3090 
3091 	set_debug_variable("_node", (addr_t)vnode->private_node);
3092 	set_debug_variable("_mount", (addr_t)vnode->mount);
3093 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3094 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3095 }
3096 
3097 
3098 static int
3099 dump_mount(int argc, char** argv)
3100 {
3101 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3102 		kprintf("usage: %s [id|address]\n", argv[0]);
3103 		return 0;
3104 	}
3105 
3106 	uint32 id = parse_expression(argv[1]);
3107 	struct fs_mount* mount = NULL;
3108 
3109 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3110 	if (mount == NULL) {
3111 		if (IS_USER_ADDRESS(id)) {
3112 			kprintf("fs_mount not found\n");
3113 			return 0;
3114 		}
3115 		mount = (fs_mount*)id;
3116 	}
3117 
3118 	_dump_mount(mount);
3119 	return 0;
3120 }
3121 
3122 
3123 static int
3124 dump_mounts(int argc, char** argv)
3125 {
3126 	if (argc != 1) {
3127 		kprintf("usage: %s\n", argv[0]);
3128 		return 0;
3129 	}
3130 
3131 	kprintf("address     id root       covers     cookie     fs_name\n");
3132 
3133 	struct hash_iterator iterator;
3134 	struct fs_mount* mount;
3135 
3136 	hash_open(sMountsTable, &iterator);
3137 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3138 			!= NULL) {
3139 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3140 			mount->covers_vnode, mount->volume->private_volume,
3141 			mount->volume->file_system_name);
3142 
3143 		fs_volume* volume = mount->volume;
3144 		while (volume->super_volume != NULL) {
3145 			volume = volume->super_volume;
3146 			kprintf("                                     %p %s\n",
3147 				volume->private_volume, volume->file_system_name);
3148 		}
3149 	}
3150 
3151 	hash_close(sMountsTable, &iterator, false);
3152 	return 0;
3153 }
3154 
3155 
3156 static int
3157 dump_vnode(int argc, char** argv)
3158 {
3159 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
3160 		kprintf("usage: %s <device> <id>\n"
3161 			"   or: %s <address>\n", argv[0], argv[0]);
3162 		return 0;
3163 	}
3164 
3165 	struct vnode* vnode = NULL;
3166 
3167 	if (argc == 2) {
3168 		vnode = (struct vnode*)parse_expression(argv[1]);
3169 		if (IS_USER_ADDRESS(vnode)) {
3170 			kprintf("invalid vnode address\n");
3171 			return 0;
3172 		}
3173 		_dump_vnode(vnode);
3174 		return 0;
3175 	}
3176 
3177 	struct hash_iterator iterator;
3178 	dev_t device = parse_expression(argv[1]);
3179 	ino_t id = parse_expression(argv[2]);
3180 
3181 	hash_open(sVnodeTable, &iterator);
3182 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3183 		if (vnode->id != id || vnode->device != device)
3184 			continue;
3185 
3186 		_dump_vnode(vnode);
3187 	}
3188 
3189 	hash_close(sVnodeTable, &iterator, false);
3190 	return 0;
3191 }
3192 
3193 
3194 static int
3195 dump_vnodes(int argc, char** argv)
3196 {
3197 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3198 		kprintf("usage: %s [device]\n", argv[0]);
3199 		return 0;
3200 	}
3201 
3202 	// restrict dumped nodes to a certain device if requested
3203 	dev_t device = parse_expression(argv[1]);
3204 
3205 	struct hash_iterator iterator;
3206 	struct vnode* vnode;
3207 
3208 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3209 		"flags\n");
3210 
3211 	hash_open(sVnodeTable, &iterator);
3212 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3213 		if (vnode->device != device)
3214 			continue;
3215 
3216 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3217 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3218 			vnode->advisory_locking, vnode->remove ? "r" : "-",
3219 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3220 	}
3221 
3222 	hash_close(sVnodeTable, &iterator, false);
3223 	return 0;
3224 }
3225 
3226 
3227 static int
3228 dump_vnode_caches(int argc, char** argv)
3229 {
3230 	struct hash_iterator iterator;
3231 	struct vnode* vnode;
3232 
3233 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3234 		kprintf("usage: %s [device]\n", argv[0]);
3235 		return 0;
3236 	}
3237 
3238 	// restrict dumped nodes to a certain device if requested
3239 	dev_t device = -1;
3240 	if (argc > 1)
3241 		device = parse_expression(argv[1]);
3242 
3243 	kprintf("address    dev     inode cache          size   pages\n");
3244 
3245 	hash_open(sVnodeTable, &iterator);
3246 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3247 		if (vnode->cache == NULL)
3248 			continue;
3249 		if (device != -1 && vnode->device != device)
3250 			continue;
3251 
3252 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3253 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3254 				/ B_PAGE_SIZE, vnode->cache->page_count);
3255 	}
3256 
3257 	hash_close(sVnodeTable, &iterator, false);
3258 	return 0;
3259 }
3260 
3261 
3262 int
3263 dump_io_context(int argc, char** argv)
3264 {
3265 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3266 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3267 		return 0;
3268 	}
3269 
3270 	struct io_context* context = NULL;
3271 
3272 	if (argc > 1) {
3273 		uint32 num = parse_expression(argv[1]);
3274 		if (IS_KERNEL_ADDRESS(num))
3275 			context = (struct io_context*)num;
3276 		else {
3277 			struct team* team = team_get_team_struct_locked(num);
3278 			if (team == NULL) {
3279 				kprintf("could not find team with ID %ld\n", num);
3280 				return 0;
3281 			}
3282 			context = (struct io_context*)team->io_context;
3283 		}
3284 	} else
3285 		context = get_current_io_context(true);
3286 
3287 	kprintf("I/O CONTEXT: %p\n", context);
3288 	kprintf(" root vnode:\t%p\n", context->root);
3289 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3290 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3291 	kprintf(" max fds:\t%lu\n", context->table_size);
3292 
3293 	if (context->num_used_fds)
3294 		kprintf("   no. type     ops ref open mode        pos cookie\n");
3295 
3296 	for (uint32 i = 0; i < context->table_size; i++) {
3297 		struct file_descriptor* fd = context->fds[i];
3298 		if (fd == NULL)
3299 			continue;
3300 
3301 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type,
3302 			fd->ops, fd->ref_count, fd->open_count, fd->open_mode, fd->pos,
3303 			fd->cookie, fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3304 				? "mount" : "vnode",
3305 			fd->u.vnode);
3306 	}
3307 
3308 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3309 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3310 
3311 	set_debug_variable("_cwd", (addr_t)context->cwd);
3312 
3313 	return 0;
3314 }
3315 
3316 
3317 int
3318 dump_vnode_usage(int argc, char** argv)
3319 {
3320 	if (argc != 1) {
3321 		kprintf("usage: %s\n", argv[0]);
3322 		return 0;
3323 	}
3324 
3325 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3326 		kMaxUnusedVnodes);
3327 
3328 	struct hash_iterator iterator;
3329 	hash_open(sVnodeTable, &iterator);
3330 
3331 	uint32 count = 0;
3332 	struct vnode* vnode;
3333 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3334 		count++;
3335 	}
3336 
3337 	hash_close(sVnodeTable, &iterator, false);
3338 
3339 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3340 	return 0;
3341 }
3342 
3343 #endif	// ADD_DEBUGGER_COMMANDS
3344 
3345 /*!	Clears an iovec array of physical pages.
3346 	Returns in \a _bytes the number of bytes successfully cleared.
3347 */
3348 static status_t
3349 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3350 {
3351 	size_t bytes = *_bytes;
3352 	size_t index = 0;
3353 
3354 	while (bytes > 0) {
3355 		size_t length = min_c(vecs[index].iov_len, bytes);
3356 
3357 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3358 			length);
3359 		if (status != B_OK) {
3360 			*_bytes -= bytes;
3361 			return status;
3362 		}
3363 
3364 		bytes -= length;
3365 	}
3366 
3367 	return B_OK;
3368 }
3369 
3370 
3371 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3372 	and calls the file system hooks to read/write the request to disk.
3373 */
3374 static status_t
3375 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3376 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3377 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3378 	bool doWrite)
3379 {
3380 	if (fileVecCount == 0) {
3381 		// There are no file vecs at this offset, so we're obviously trying
3382 		// to access the file outside of its bounds
3383 		return B_BAD_VALUE;
3384 	}
3385 
3386 	size_t numBytes = *_numBytes;
3387 	uint32 fileVecIndex;
3388 	size_t vecOffset = *_vecOffset;
3389 	uint32 vecIndex = *_vecIndex;
3390 	status_t status;
3391 	size_t size;
3392 
3393 	if (!doWrite && vecOffset == 0) {
3394 		// now directly read the data from the device
3395 		// the first file_io_vec can be read directly
3396 
3397 		if (fileVecs[0].length < numBytes)
3398 			size = fileVecs[0].length;
3399 		else
3400 			size = numBytes;
3401 
3402 		if (fileVecs[0].offset >= 0) {
3403 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3404 				&vecs[vecIndex], vecCount - vecIndex, &size);
3405 		} else {
3406 			// sparse read
3407 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3408 		}
3409 		if (status != B_OK)
3410 			return status;
3411 
3412 		// TODO: this is a work-around for buggy device drivers!
3413 		//	When our own drivers honour the length, we can:
3414 		//	a) also use this direct I/O for writes (otherwise, it would
3415 		//	   overwrite precious data)
3416 		//	b) panic if the term below is true (at least for writes)
3417 		if (size > fileVecs[0].length) {
3418 			//dprintf("warning: device driver %p doesn't respect total length "
3419 			//	"in read_pages() call!\n", ref->device);
3420 			size = fileVecs[0].length;
3421 		}
3422 
3423 		ASSERT(size <= fileVecs[0].length);
3424 
3425 		// If the file portion was contiguous, we're already done now
3426 		if (size == numBytes)
3427 			return B_OK;
3428 
3429 		// if we reached the end of the file, we can return as well
3430 		if (size != fileVecs[0].length) {
3431 			*_numBytes = size;
3432 			return B_OK;
3433 		}
3434 
3435 		fileVecIndex = 1;
3436 
3437 		// first, find out where we have to continue in our iovecs
3438 		for (; vecIndex < vecCount; vecIndex++) {
3439 			if (size < vecs[vecIndex].iov_len)
3440 				break;
3441 
3442 			size -= vecs[vecIndex].iov_len;
3443 		}
3444 
3445 		vecOffset = size;
3446 	} else {
3447 		fileVecIndex = 0;
3448 		size = 0;
3449 	}
3450 
3451 	// Too bad, let's process the rest of the file_io_vecs
3452 
3453 	size_t totalSize = size;
3454 	size_t bytesLeft = numBytes - size;
3455 
3456 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3457 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3458 		off_t fileOffset = fileVec.offset;
3459 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3460 
3461 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3462 
3463 		// process the complete fileVec
3464 		while (fileLeft > 0) {
3465 			iovec tempVecs[MAX_TEMP_IO_VECS];
3466 			uint32 tempCount = 0;
3467 
3468 			// size tracks how much of what is left of the current fileVec
3469 			// (fileLeft) has been assigned to tempVecs
3470 			size = 0;
3471 
3472 			// assign what is left of the current fileVec to the tempVecs
3473 			for (size = 0; size < fileLeft && vecIndex < vecCount
3474 					&& tempCount < MAX_TEMP_IO_VECS;) {
3475 				// try to satisfy one iovec per iteration (or as much as
3476 				// possible)
3477 
3478 				// bytes left of the current iovec
3479 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3480 				if (vecLeft == 0) {
3481 					vecOffset = 0;
3482 					vecIndex++;
3483 					continue;
3484 				}
3485 
3486 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3487 					vecIndex, vecOffset, size));
3488 
3489 				// actually available bytes
3490 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3491 
3492 				tempVecs[tempCount].iov_base
3493 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3494 				tempVecs[tempCount].iov_len = tempVecSize;
3495 				tempCount++;
3496 
3497 				size += tempVecSize;
3498 				vecOffset += tempVecSize;
3499 			}
3500 
3501 			size_t bytes = size;
3502 
3503 			if (fileOffset == -1) {
3504 				if (doWrite) {
3505 					panic("sparse write attempt: vnode %p", vnode);
3506 					status = B_IO_ERROR;
3507 				} else {
3508 					// sparse read
3509 					status = zero_pages(tempVecs, tempCount, &bytes);
3510 				}
3511 			} else if (doWrite) {
3512 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3513 					tempVecs, tempCount, &bytes);
3514 			} else {
3515 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3516 					tempVecs, tempCount, &bytes);
3517 			}
3518 			if (status != B_OK)
3519 				return status;
3520 
3521 			totalSize += bytes;
3522 			bytesLeft -= size;
3523 			if (fileOffset >= 0)
3524 				fileOffset += size;
3525 			fileLeft -= size;
3526 			//dprintf("-> file left = %Lu\n", fileLeft);
3527 
3528 			if (size != bytes || vecIndex >= vecCount) {
3529 				// there are no more bytes or iovecs, let's bail out
3530 				*_numBytes = totalSize;
3531 				return B_OK;
3532 			}
3533 		}
3534 	}
3535 
3536 	*_vecIndex = vecIndex;
3537 	*_vecOffset = vecOffset;
3538 	*_numBytes = totalSize;
3539 	return B_OK;
3540 }
3541 
3542 
3543 //	#pragma mark - public API for file systems
3544 
3545 
3546 extern "C" status_t
3547 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3548 	fs_vnode_ops* ops)
3549 {
3550 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3551 		volume, volume->id, vnodeID, privateNode));
3552 
3553 	if (privateNode == NULL)
3554 		return B_BAD_VALUE;
3555 
3556 	mutex_lock(&sVnodeMutex);
3557 
3558 	// file system integrity check:
3559 	// test if the vnode already exists and bail out if this is the case!
3560 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3561 	if (vnode != NULL) {
3562 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3563 			volume->id, vnodeID, privateNode, vnode->private_node);
3564 	}
3565 
3566 	status_t status = create_new_vnode(&vnode, volume->id, vnodeID);
3567 	if (status == B_OK) {
3568 		vnode->private_node = privateNode;
3569 		vnode->ops = ops;
3570 		vnode->busy = true;
3571 		vnode->unpublished = true;
3572 	}
3573 
3574 	TRACE(("returns: %s\n", strerror(status)));
3575 
3576 	mutex_unlock(&sVnodeMutex);
3577 	return status;
3578 }
3579 
3580 
3581 extern "C" status_t
3582 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3583 	fs_vnode_ops* ops, int type, uint32 flags)
3584 {
3585 	FUNCTION(("publish_vnode()\n"));
3586 
3587 	MutexLocker locker(sVnodeMutex);
3588 
3589 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3590 	status_t status = B_OK;
3591 
3592 	if (vnode != NULL && vnode->busy && vnode->unpublished
3593 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3594 		// already known, but not published
3595 	} else if (vnode == NULL && privateNode != NULL) {
3596 		status = create_new_vnode(&vnode, volume->id, vnodeID);
3597 		if (status == B_OK) {
3598 			vnode->private_node = privateNode;
3599 			vnode->ops = ops;
3600 			vnode->busy = true;
3601 			vnode->unpublished = true;
3602 		}
3603 	} else
3604 		status = B_BAD_VALUE;
3605 
3606 	bool publishSpecialSubNode = false;
3607 
3608 	if (status == B_OK) {
3609 		vnode->type = type;
3610 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
3611 		publishSpecialSubNode = is_special_node_type(type)
3612 			&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3613 	}
3614 
3615 
3616 	// create sub vnodes, if necessary
3617 	if (status == B_OK
3618 			&& (volume->sub_volume != NULL || publishSpecialSubNode)) {
3619 		locker.Unlock();
3620 
3621 		fs_volume* subVolume = volume;
3622 		if (volume->sub_volume != NULL) {
3623 			while (status == B_OK && subVolume->sub_volume != NULL) {
3624 				subVolume = subVolume->sub_volume;
3625 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3626 					vnode);
3627 			}
3628 		}
3629 
3630 		if (status == B_OK && publishSpecialSubNode)
3631 			status = create_special_sub_node(vnode, flags);
3632 
3633 		if (status != B_OK) {
3634 			// error -- clean up the created sub vnodes
3635 			while (subVolume->super_volume != volume) {
3636 				subVolume = subVolume->super_volume;
3637 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3638 			}
3639 		}
3640 
3641 		locker.Lock();
3642 
3643 		if (status != B_OK) {
3644 			hash_remove(sVnodeTable, vnode);
3645 			remove_vnode_from_mount_list(vnode, vnode->mount);
3646 			free(vnode);
3647 		}
3648 	}
3649 
3650 	if (status == B_OK) {
3651 		vnode->busy = false;
3652 		vnode->unpublished = false;
3653 	}
3654 
3655 	TRACE(("returns: %s\n", strerror(status)));
3656 
3657 	return status;
3658 }
3659 
3660 
3661 extern "C" status_t
3662 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3663 {
3664 	struct vnode* vnode;
3665 
3666 	if (volume == NULL)
3667 		return B_BAD_VALUE;
3668 
3669 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3670 	if (status != B_OK)
3671 		return status;
3672 
3673 	// If this is a layered FS, we need to get the node cookie for the requested
3674 	// layer.
3675 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3676 		fs_vnode resolvedNode;
3677 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3678 			&resolvedNode);
3679 		if (status != B_OK) {
3680 			panic("get_vnode(): Failed to get super node for vnode %p, "
3681 				"volume: %p", vnode, volume);
3682 			put_vnode(vnode);
3683 			return status;
3684 		}
3685 
3686 		if (_privateNode != NULL)
3687 			*_privateNode = resolvedNode.private_node;
3688 	} else if (_privateNode != NULL)
3689 		*_privateNode = vnode->private_node;
3690 
3691 	return B_OK;
3692 }
3693 
3694 
3695 extern "C" status_t
3696 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3697 {
3698 	struct vnode* vnode;
3699 
3700 	mutex_lock(&sVnodeMutex);
3701 	vnode = lookup_vnode(volume->id, vnodeID);
3702 	mutex_unlock(&sVnodeMutex);
3703 
3704 	if (vnode == NULL)
3705 		return B_BAD_VALUE;
3706 
3707 	inc_vnode_ref_count(vnode);
3708 	return B_OK;
3709 }
3710 
3711 
3712 extern "C" status_t
3713 put_vnode(fs_volume* volume, ino_t vnodeID)
3714 {
3715 	struct vnode* vnode;
3716 
3717 	mutex_lock(&sVnodeMutex);
3718 	vnode = lookup_vnode(volume->id, vnodeID);
3719 	mutex_unlock(&sVnodeMutex);
3720 
3721 	if (vnode == NULL)
3722 		return B_BAD_VALUE;
3723 
3724 	dec_vnode_ref_count(vnode, false, true);
3725 	return B_OK;
3726 }
3727 
3728 
3729 extern "C" status_t
3730 remove_vnode(fs_volume* volume, ino_t vnodeID)
3731 {
3732 	MutexLocker locker(sVnodeMutex);
3733 
3734 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3735 	if (vnode == NULL)
3736 		return B_ENTRY_NOT_FOUND;
3737 
3738 	if (vnode->covered_by != NULL) {
3739 		// this vnode is in use
3740 		return B_BUSY;
3741 	}
3742 
3743 	vnode->remove = true;
3744 	bool removeUnpublished = false;
3745 
3746 	if (vnode->unpublished) {
3747 		// prepare the vnode for deletion
3748 		removeUnpublished = true;
3749 		vnode->busy = true;
3750 	}
3751 
3752 	locker.Unlock();
3753 
3754 	if (removeUnpublished) {
3755 		// If the vnode hasn't been published yet, we delete it here
3756 		atomic_add(&vnode->ref_count, -1);
3757 		free_vnode(vnode, true);
3758 	}
3759 
3760 	return B_OK;
3761 }
3762 
3763 
3764 extern "C" status_t
3765 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3766 {
3767 	struct vnode* vnode;
3768 
3769 	mutex_lock(&sVnodeMutex);
3770 
3771 	vnode = lookup_vnode(volume->id, vnodeID);
3772 	if (vnode)
3773 		vnode->remove = false;
3774 
3775 	mutex_unlock(&sVnodeMutex);
3776 	return B_OK;
3777 }
3778 
3779 
3780 extern "C" status_t
3781 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3782 {
3783 	MutexLocker _(sVnodeMutex);
3784 
3785 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3786 		if (_removed != NULL)
3787 			*_removed = vnode->remove;
3788 		return B_OK;
3789 	}
3790 
3791 	return B_BAD_VALUE;
3792 }
3793 
3794 
3795 extern "C" fs_volume*
3796 volume_for_vnode(fs_vnode* _vnode)
3797 {
3798 	if (_vnode == NULL)
3799 		return NULL;
3800 
3801 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3802 	return vnode->mount->volume;
3803 }
3804 
3805 
3806 extern "C" status_t
3807 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3808 	size_t* _numBytes)
3809 {
3810 	struct file_descriptor* descriptor;
3811 	struct vnode* vnode;
3812 
3813 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3814 	if (descriptor == NULL)
3815 		return B_FILE_ERROR;
3816 
3817 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3818 		count, 0, _numBytes);
3819 
3820 	put_fd(descriptor);
3821 	return status;
3822 }
3823 
3824 
3825 extern "C" status_t
3826 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3827 	size_t* _numBytes)
3828 {
3829 	struct file_descriptor* descriptor;
3830 	struct vnode* vnode;
3831 
3832 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3833 	if (descriptor == NULL)
3834 		return B_FILE_ERROR;
3835 
3836 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3837 		count, 0, _numBytes);
3838 
3839 	put_fd(descriptor);
3840 	return status;
3841 }
3842 
3843 
3844 extern "C" status_t
3845 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3846 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3847 	size_t* _bytes)
3848 {
3849 	struct file_descriptor* descriptor;
3850 	struct vnode* vnode;
3851 
3852 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3853 	if (descriptor == NULL)
3854 		return B_FILE_ERROR;
3855 
3856 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3857 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3858 		false);
3859 
3860 	put_fd(descriptor);
3861 	return status;
3862 }
3863 
3864 
3865 extern "C" status_t
3866 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3867 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3868 	size_t* _bytes)
3869 {
3870 	struct file_descriptor* descriptor;
3871 	struct vnode* vnode;
3872 
3873 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3874 	if (descriptor == NULL)
3875 		return B_FILE_ERROR;
3876 
3877 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3878 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3879 		true);
3880 
3881 	put_fd(descriptor);
3882 	return status;
3883 }
3884 
3885 
3886 extern "C" status_t
3887 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3888 {
3889 	// lookup mount -- the caller is required to make sure that the mount
3890 	// won't go away
3891 	MutexLocker locker(sMountMutex);
3892 	struct fs_mount* mount = find_mount(mountID);
3893 	if (mount == NULL)
3894 		return B_BAD_VALUE;
3895 	locker.Unlock();
3896 
3897 	return mount->entry_cache.Add(dirID, name, nodeID);
3898 }
3899 
3900 
3901 extern "C" status_t
3902 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3903 {
3904 	// lookup mount -- the caller is required to make sure that the mount
3905 	// won't go away
3906 	MutexLocker locker(sMountMutex);
3907 	struct fs_mount* mount = find_mount(mountID);
3908 	if (mount == NULL)
3909 		return B_BAD_VALUE;
3910 	locker.Unlock();
3911 
3912 	return mount->entry_cache.Remove(dirID, name);
3913 }
3914 
3915 
3916 //	#pragma mark - private VFS API
3917 //	Functions the VFS exports for other parts of the kernel
3918 
3919 
3920 /*! Acquires another reference to the vnode that has to be released
3921 	by calling vfs_put_vnode().
3922 */
3923 void
3924 vfs_acquire_vnode(struct vnode* vnode)
3925 {
3926 	inc_vnode_ref_count(vnode);
3927 }
3928 
3929 
3930 /*! This is currently called from file_cache_create() only.
3931 	It's probably a temporary solution as long as devfs requires that
3932 	fs_read_pages()/fs_write_pages() are called with the standard
3933 	open cookie and not with a device cookie.
3934 	If that's done differently, remove this call; it has no other
3935 	purpose.
3936 */
3937 extern "C" status_t
3938 vfs_get_cookie_from_fd(int fd, void** _cookie)
3939 {
3940 	struct file_descriptor* descriptor;
3941 
3942 	descriptor = get_fd(get_current_io_context(true), fd);
3943 	if (descriptor == NULL)
3944 		return B_FILE_ERROR;
3945 
3946 	*_cookie = descriptor->cookie;
3947 	return B_OK;
3948 }
3949 
3950 
3951 extern "C" status_t
3952 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3953 {
3954 	*vnode = get_vnode_from_fd(fd, kernel);
3955 
3956 	if (*vnode == NULL)
3957 		return B_FILE_ERROR;
3958 
3959 	return B_NO_ERROR;
3960 }
3961 
3962 
3963 extern "C" status_t
3964 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
3965 {
3966 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3967 		path, kernel));
3968 
3969 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3970 	if (pathBuffer.InitCheck() != B_OK)
3971 		return B_NO_MEMORY;
3972 
3973 	char* buffer = pathBuffer.LockBuffer();
3974 	strlcpy(buffer, path, pathBuffer.BufferSize());
3975 
3976 	struct vnode* vnode;
3977 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3978 	if (status != B_OK)
3979 		return status;
3980 
3981 	*_vnode = vnode;
3982 	return B_OK;
3983 }
3984 
3985 
3986 extern "C" status_t
3987 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
3988 {
3989 	struct vnode* vnode;
3990 
3991 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3992 	if (status != B_OK)
3993 		return status;
3994 
3995 	*_vnode = vnode;
3996 	return B_OK;
3997 }
3998 
3999 
4000 extern "C" status_t
4001 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4002 	const char* name, struct vnode** _vnode)
4003 {
4004 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4005 }
4006 
4007 
4008 extern "C" void
4009 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4010 {
4011 	*_mountID = vnode->device;
4012 	*_vnodeID = vnode->id;
4013 }
4014 
4015 
4016 /*!
4017 	Calls fs_open() on the given vnode and returns a new
4018 	file descriptor for it
4019 */
4020 int
4021 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4022 {
4023 	return open_vnode(vnode, openMode, kernel);
4024 }
4025 
4026 
4027 /*!	Looks up a vnode with the given mount and vnode ID.
4028 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4029 	to the node.
4030 	It's currently only be used by file_cache_create().
4031 */
4032 extern "C" status_t
4033 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4034 {
4035 	mutex_lock(&sVnodeMutex);
4036 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4037 	mutex_unlock(&sVnodeMutex);
4038 
4039 	if (vnode == NULL)
4040 		return B_ERROR;
4041 
4042 	*_vnode = vnode;
4043 	return B_OK;
4044 }
4045 
4046 
4047 extern "C" status_t
4048 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4049 	bool traverseLeafLink, bool kernel, void** _node)
4050 {
4051 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4052 		volume, path, kernel));
4053 
4054 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4055 	if (pathBuffer.InitCheck() != B_OK)
4056 		return B_NO_MEMORY;
4057 
4058 	fs_mount* mount;
4059 	status_t status = get_mount(volume->id, &mount);
4060 	if (status != B_OK)
4061 		return status;
4062 
4063 	char* buffer = pathBuffer.LockBuffer();
4064 	strlcpy(buffer, path, pathBuffer.BufferSize());
4065 
4066 	struct vnode* vnode = mount->root_vnode;
4067 
4068 	if (buffer[0] == '/')
4069 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4070 	else {
4071 		inc_vnode_ref_count(vnode);
4072 			// vnode_path_to_vnode() releases a reference to the starting vnode
4073 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4074 			kernel, &vnode, NULL);
4075 	}
4076 
4077 	put_mount(mount);
4078 
4079 	if (status != B_OK)
4080 		return status;
4081 
4082 	if (vnode->device != volume->id) {
4083 		// wrong mount ID - must not gain access on foreign file system nodes
4084 		put_vnode(vnode);
4085 		return B_BAD_VALUE;
4086 	}
4087 
4088 	// Use get_vnode() to resolve the cookie for the right layer.
4089 	status = get_vnode(volume, vnode->id, _node);
4090 	put_vnode(vnode);
4091 
4092 	return status;
4093 }
4094 
4095 
4096 status_t
4097 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4098 	struct stat* stat, bool kernel)
4099 {
4100 	status_t status;
4101 
4102 	if (path) {
4103 		// path given: get the stat of the node referred to by (fd, path)
4104 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4105 		if (pathBuffer.InitCheck() != B_OK)
4106 			return B_NO_MEMORY;
4107 
4108 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4109 			traverseLeafLink, stat, kernel);
4110 	} else {
4111 		// no path given: get the FD and use the FD operation
4112 		struct file_descriptor* descriptor
4113 			= get_fd(get_current_io_context(kernel), fd);
4114 		if (descriptor == NULL)
4115 			return B_FILE_ERROR;
4116 
4117 		if (descriptor->ops->fd_read_stat)
4118 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4119 		else
4120 			status = EOPNOTSUPP;
4121 
4122 		put_fd(descriptor);
4123 	}
4124 
4125 	return status;
4126 }
4127 
4128 
4129 /*!	Finds the full path to the file that contains the module \a moduleName,
4130 	puts it into \a pathBuffer, and returns B_OK for success.
4131 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4132 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4133 	\a pathBuffer is clobbered in any case and must not be relied on if this
4134 	functions returns unsuccessfully.
4135 	\a basePath and \a pathBuffer must not point to the same space.
4136 */
4137 status_t
4138 vfs_get_module_path(const char* basePath, const char* moduleName,
4139 	char* pathBuffer, size_t bufferSize)
4140 {
4141 	struct vnode* dir;
4142 	struct vnode* file;
4143 	status_t status;
4144 	size_t length;
4145 	char* path;
4146 
4147 	if (bufferSize == 0
4148 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4149 		return B_BUFFER_OVERFLOW;
4150 
4151 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4152 	if (status != B_OK)
4153 		return status;
4154 
4155 	// the path buffer had been clobbered by the above call
4156 	length = strlcpy(pathBuffer, basePath, bufferSize);
4157 	if (pathBuffer[length - 1] != '/')
4158 		pathBuffer[length++] = '/';
4159 
4160 	path = pathBuffer + length;
4161 	bufferSize -= length;
4162 
4163 	while (moduleName) {
4164 		char* nextPath = strchr(moduleName, '/');
4165 		if (nextPath == NULL)
4166 			length = strlen(moduleName);
4167 		else {
4168 			length = nextPath - moduleName;
4169 			nextPath++;
4170 		}
4171 
4172 		if (length + 1 >= bufferSize) {
4173 			status = B_BUFFER_OVERFLOW;
4174 			goto err;
4175 		}
4176 
4177 		memcpy(path, moduleName, length);
4178 		path[length] = '\0';
4179 		moduleName = nextPath;
4180 
4181 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4182 		if (status != B_OK) {
4183 			// vnode_path_to_vnode() has already released the reference to dir
4184 			return status;
4185 		}
4186 
4187 		if (S_ISDIR(file->type)) {
4188 			// goto the next directory
4189 			path[length] = '/';
4190 			path[length + 1] = '\0';
4191 			path += length + 1;
4192 			bufferSize -= length + 1;
4193 
4194 			dir = file;
4195 		} else if (S_ISREG(file->type)) {
4196 			// it's a file so it should be what we've searched for
4197 			put_vnode(file);
4198 
4199 			return B_OK;
4200 		} else {
4201 			TRACE(("vfs_get_module_path(): something is strange here: "
4202 				"0x%08lx...\n", file->type));
4203 			status = B_ERROR;
4204 			dir = file;
4205 			goto err;
4206 		}
4207 	}
4208 
4209 	// if we got here, the moduleName just pointed to a directory, not to
4210 	// a real module - what should we do in this case?
4211 	status = B_ENTRY_NOT_FOUND;
4212 
4213 err:
4214 	put_vnode(dir);
4215 	return status;
4216 }
4217 
4218 
4219 /*!	\brief Normalizes a given path.
4220 
4221 	The path must refer to an existing or non-existing entry in an existing
4222 	directory, that is chopping off the leaf component the remaining path must
4223 	refer to an existing directory.
4224 
4225 	The returned will be canonical in that it will be absolute, will not
4226 	contain any "." or ".." components or duplicate occurrences of '/'s,
4227 	and none of the directory components will by symbolic links.
4228 
4229 	Any two paths referring to the same entry, will result in the same
4230 	normalized path (well, that is pretty much the definition of `normalized',
4231 	isn't it :-).
4232 
4233 	\param path The path to be normalized.
4234 	\param buffer The buffer into which the normalized path will be written.
4235 		   May be the same one as \a path.
4236 	\param bufferSize The size of \a buffer.
4237 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4238 	\param kernel \c true, if the IO context of the kernel shall be used,
4239 		   otherwise that of the team this thread belongs to. Only relevant,
4240 		   if the path is relative (to get the CWD).
4241 	\return \c B_OK if everything went fine, another error code otherwise.
4242 */
4243 status_t
4244 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4245 	bool traverseLink, bool kernel)
4246 {
4247 	if (!path || !buffer || bufferSize < 1)
4248 		return B_BAD_VALUE;
4249 
4250 	if (path != buffer) {
4251 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4252 			return B_BUFFER_OVERFLOW;
4253 	}
4254 
4255 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4256 }
4257 
4258 
4259 /*!	\brief Creates a special node in the file system.
4260 
4261 	The caller gets a reference to the newly created node (which is passed
4262 	back through \a _createdVnode) and is responsible for releasing it.
4263 
4264 	\param path The path where to create the entry for the node. Can be \c NULL,
4265 		in which case the node is created without an entry in the root FS -- it
4266 		will automatically be deleted when the last reference has been released.
4267 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4268 		the target file system will just create the node with its standard
4269 		operations. Depending on the type of the node a subnode might be created
4270 		automatically, though.
4271 	\param mode The type and permissions for the node to be created.
4272 	\param flags Flags to be passed to the creating FS.
4273 	\param kernel \c true, if called in the kernel context (relevant only if
4274 		\a path is not \c NULL and not absolute).
4275 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4276 		file system creating the node, with the private data pointer and
4277 		operations for the super node. Can be \c NULL.
4278 	\param _createVnode Pointer to pre-allocated storage where to store the
4279 		pointer to the newly created node.
4280 	\return \c B_OK, if everything went fine, another error code otherwise.
4281 */
4282 status_t
4283 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4284 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4285 	struct vnode** _createdVnode)
4286 {
4287 	struct vnode* dirNode;
4288 	char _leaf[B_FILE_NAME_LENGTH];
4289 	char* leaf = NULL;
4290 
4291 	if (path) {
4292 		// We've got a path. Get the dir vnode and the leaf name.
4293 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4294 		if (tmpPathBuffer.InitCheck() != B_OK)
4295 			return B_NO_MEMORY;
4296 
4297 		char* tmpPath = tmpPathBuffer.LockBuffer();
4298 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4299 			return B_NAME_TOO_LONG;
4300 
4301 		// get the dir vnode and the leaf name
4302 		leaf = _leaf;
4303 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4304 		if (error != B_OK)
4305 			return error;
4306 	} else {
4307 		// No path. Create the node in the root FS.
4308 		dirNode = sRoot;
4309 		inc_vnode_ref_count(dirNode);
4310 	}
4311 
4312 	VNodePutter _(dirNode);
4313 
4314 	// check support for creating special nodes
4315 	if (!HAS_FS_CALL(dirNode, create_special_node))
4316 		return B_UNSUPPORTED;
4317 
4318 	// create the node
4319 	fs_vnode superVnode;
4320 	ino_t nodeID;
4321 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4322 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4323 	if (status != B_OK)
4324 		return status;
4325 
4326 	// lookup the node
4327 	mutex_lock(&sVnodeMutex);
4328 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4329 	mutex_unlock(&sVnodeMutex);
4330 
4331 	if (*_createdVnode == NULL) {
4332 		panic("vfs_create_special_node(): lookup of node failed");
4333 		return B_ERROR;
4334 	}
4335 
4336 	return B_OK;
4337 }
4338 
4339 
4340 extern "C" void
4341 vfs_put_vnode(struct vnode* vnode)
4342 {
4343 	put_vnode(vnode);
4344 }
4345 
4346 
4347 extern "C" status_t
4348 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4349 {
4350 	// Get current working directory from io context
4351 	struct io_context* context = get_current_io_context(false);
4352 	status_t status = B_OK;
4353 
4354 	mutex_lock(&context->io_mutex);
4355 
4356 	if (context->cwd != NULL) {
4357 		*_mountID = context->cwd->device;
4358 		*_vnodeID = context->cwd->id;
4359 	} else
4360 		status = B_ERROR;
4361 
4362 	mutex_unlock(&context->io_mutex);
4363 	return status;
4364 }
4365 
4366 
4367 status_t
4368 vfs_unmount(dev_t mountID, uint32 flags)
4369 {
4370 	return fs_unmount(NULL, mountID, flags, true);
4371 }
4372 
4373 
4374 extern "C" status_t
4375 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4376 {
4377 	struct vnode* vnode;
4378 
4379 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4380 	if (status != B_OK)
4381 		return status;
4382 
4383 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4384 	put_vnode(vnode);
4385 	return B_OK;
4386 }
4387 
4388 
4389 extern "C" void
4390 vfs_free_unused_vnodes(int32 level)
4391 {
4392 	vnode_low_resource_handler(NULL,
4393 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4394 }
4395 
4396 
4397 extern "C" bool
4398 vfs_can_page(struct vnode* vnode, void* cookie)
4399 {
4400 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4401 
4402 	if (HAS_FS_CALL(vnode, can_page))
4403 		return FS_CALL(vnode, can_page, cookie);
4404 	return false;
4405 }
4406 
4407 
4408 extern "C" status_t
4409 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4410 	size_t count, uint32 flags, size_t* _numBytes)
4411 {
4412 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4413 		pos));
4414 
4415 #if VFS_PAGES_IO_TRACING
4416 	size_t bytesRequested = *_numBytes;
4417 #endif
4418 
4419 	IORequest request;
4420 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4421 	if (status == B_OK) {
4422 		status = vfs_vnode_io(vnode, cookie, &request);
4423 		if (status == B_OK)
4424 			status = request.Wait();
4425 		*_numBytes = request.TransferredBytes();
4426 	}
4427 
4428 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4429 		status, *_numBytes));
4430 
4431 	return status;
4432 }
4433 
4434 
4435 extern "C" status_t
4436 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos, const iovec* vecs,
4437 	size_t count, uint32 flags, size_t* _numBytes)
4438 {
4439 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4440 		pos));
4441 
4442 #if VFS_PAGES_IO_TRACING
4443 	size_t bytesRequested = *_numBytes;
4444 #endif
4445 
4446 	IORequest request;
4447 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4448 	if (status == B_OK) {
4449 		status = vfs_vnode_io(vnode, cookie, &request);
4450 		if (status == B_OK)
4451 			status = request.Wait();
4452 		*_numBytes = request.TransferredBytes();
4453 	}
4454 
4455 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4456 		status, *_numBytes));
4457 
4458 	return status;
4459 }
4460 
4461 
4462 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
4463 	created if \a allocate is \c true.
4464 	In case it's successful, it will also grab a reference to the cache
4465 	it returns.
4466 */
4467 extern "C" status_t
4468 vfs_get_vnode_cache(struct vnode* vnode, vm_cache** _cache, bool allocate)
4469 {
4470 	if (vnode->cache != NULL) {
4471 		vnode->cache->AcquireRef();
4472 		*_cache = vnode->cache;
4473 		return B_OK;
4474 	}
4475 
4476 	mutex_lock(&sVnodeMutex);
4477 
4478 	status_t status = B_OK;
4479 
4480 	// The cache could have been created in the meantime
4481 	if (vnode->cache == NULL) {
4482 		if (allocate) {
4483 			// TODO: actually the vnode need to be busy already here, or
4484 			//	else this won't work...
4485 			bool wasBusy = vnode->busy;
4486 			vnode->busy = true;
4487 			mutex_unlock(&sVnodeMutex);
4488 
4489 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4490 
4491 			mutex_lock(&sVnodeMutex);
4492 			vnode->busy = wasBusy;
4493 		} else
4494 			status = B_BAD_VALUE;
4495 	}
4496 
4497 	mutex_unlock(&sVnodeMutex);
4498 
4499 	if (status == B_OK) {
4500 		vnode->cache->AcquireRef();
4501 		*_cache = vnode->cache;
4502 	}
4503 
4504 	return status;
4505 }
4506 
4507 
4508 status_t
4509 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4510 	file_io_vec* vecs, size_t* _count)
4511 {
4512 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4513 		vnode, vecs, offset, size));
4514 
4515 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4516 }
4517 
4518 
4519 status_t
4520 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4521 {
4522 	status_t status = FS_CALL(vnode, read_stat, stat);
4523 
4524 	// fill in the st_dev and st_ino fields
4525 	if (status == B_OK) {
4526 		stat->st_dev = vnode->device;
4527 		stat->st_ino = vnode->id;
4528 		stat->st_rdev = -1;
4529 	}
4530 
4531 	return status;
4532 }
4533 
4534 
4535 status_t
4536 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4537 {
4538 	struct vnode* vnode;
4539 	status_t status = get_vnode(device, inode, &vnode, true, false);
4540 	if (status != B_OK)
4541 		return status;
4542 
4543 	status = FS_CALL(vnode, read_stat, stat);
4544 
4545 	// fill in the st_dev and st_ino fields
4546 	if (status == B_OK) {
4547 		stat->st_dev = vnode->device;
4548 		stat->st_ino = vnode->id;
4549 		stat->st_rdev = -1;
4550 	}
4551 
4552 	put_vnode(vnode);
4553 	return status;
4554 }
4555 
4556 
4557 status_t
4558 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4559 {
4560 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4561 }
4562 
4563 
4564 status_t
4565 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4566 	char* path, size_t pathLength)
4567 {
4568 	struct vnode* vnode;
4569 	status_t status;
4570 
4571 	// filter invalid leaf names
4572 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4573 		return B_BAD_VALUE;
4574 
4575 	// get the vnode matching the dir's node_ref
4576 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4577 		// special cases "." and "..": we can directly get the vnode of the
4578 		// referenced directory
4579 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4580 		leaf = NULL;
4581 	} else
4582 		status = get_vnode(device, inode, &vnode, true, false);
4583 	if (status != B_OK)
4584 		return status;
4585 
4586 	// get the directory path
4587 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4588 	put_vnode(vnode);
4589 		// we don't need the vnode anymore
4590 	if (status != B_OK)
4591 		return status;
4592 
4593 	// append the leaf name
4594 	if (leaf) {
4595 		// insert a directory separator if this is not the file system root
4596 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4597 				>= pathLength)
4598 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4599 			return B_NAME_TOO_LONG;
4600 		}
4601 	}
4602 
4603 	return B_OK;
4604 }
4605 
4606 
4607 /*!	If the given descriptor locked its vnode, that lock will be released. */
4608 void
4609 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4610 {
4611 	struct vnode* vnode = fd_vnode(descriptor);
4612 
4613 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4614 		vnode->mandatory_locked_by = NULL;
4615 }
4616 
4617 
4618 /*!	Closes all file descriptors of the specified I/O context that
4619 	have the O_CLOEXEC flag set.
4620 */
4621 void
4622 vfs_exec_io_context(io_context* context)
4623 {
4624 	uint32 i;
4625 
4626 	for (i = 0; i < context->table_size; i++) {
4627 		mutex_lock(&context->io_mutex);
4628 
4629 		struct file_descriptor* descriptor = context->fds[i];
4630 		bool remove = false;
4631 
4632 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4633 			context->fds[i] = NULL;
4634 			context->num_used_fds--;
4635 
4636 			remove = true;
4637 		}
4638 
4639 		mutex_unlock(&context->io_mutex);
4640 
4641 		if (remove) {
4642 			close_fd(descriptor);
4643 			put_fd(descriptor);
4644 		}
4645 	}
4646 }
4647 
4648 
4649 /*! Sets up a new io_control structure, and inherits the properties
4650 	of the parent io_control if it is given.
4651 */
4652 io_context*
4653 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4654 {
4655 	size_t tableSize;
4656 	struct io_context* context;
4657 
4658 	context = (io_context*)malloc(sizeof(struct io_context));
4659 	if (context == NULL)
4660 		return NULL;
4661 
4662 	memset(context, 0, sizeof(struct io_context));
4663 	context->ref_count = 1;
4664 
4665 	MutexLocker parentLocker;
4666 	if (parentContext) {
4667 		parentLocker.SetTo(parentContext->io_mutex, false);
4668 		tableSize = parentContext->table_size;
4669 	} else
4670 		tableSize = DEFAULT_FD_TABLE_SIZE;
4671 
4672 	// allocate space for FDs and their close-on-exec flag
4673 	context->fds = (file_descriptor**)malloc(
4674 		sizeof(struct file_descriptor*) * tableSize
4675 		+ sizeof(struct select_sync*) * tableSize
4676 		+ (tableSize + 7) / 8);
4677 	if (context->fds == NULL) {
4678 		free(context);
4679 		return NULL;
4680 	}
4681 
4682 	context->select_infos = (select_info**)(context->fds + tableSize);
4683 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4684 
4685 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4686 		+ sizeof(struct select_sync*) * tableSize
4687 		+ (tableSize + 7) / 8);
4688 
4689 	mutex_init(&context->io_mutex, "I/O context");
4690 
4691 	// Copy all parent file descriptors
4692 
4693 	if (parentContext) {
4694 		size_t i;
4695 
4696 		mutex_lock(&sIOContextRootLock);
4697 		context->root = parentContext->root;
4698 		if (context->root)
4699 			inc_vnode_ref_count(context->root);
4700 		mutex_unlock(&sIOContextRootLock);
4701 
4702 		context->cwd = parentContext->cwd;
4703 		if (context->cwd)
4704 			inc_vnode_ref_count(context->cwd);
4705 
4706 		for (i = 0; i < tableSize; i++) {
4707 			struct file_descriptor* descriptor = parentContext->fds[i];
4708 
4709 			if (descriptor != NULL) {
4710 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4711 				if (closeOnExec && purgeCloseOnExec)
4712 					continue;
4713 
4714 				context->fds[i] = descriptor;
4715 				context->num_used_fds++;
4716 				atomic_add(&descriptor->ref_count, 1);
4717 				atomic_add(&descriptor->open_count, 1);
4718 
4719 				if (closeOnExec)
4720 					fd_set_close_on_exec(context, i, true);
4721 			}
4722 		}
4723 
4724 		parentLocker.Unlock();
4725 	} else {
4726 		context->root = sRoot;
4727 		context->cwd = sRoot;
4728 
4729 		if (context->root)
4730 			inc_vnode_ref_count(context->root);
4731 
4732 		if (context->cwd)
4733 			inc_vnode_ref_count(context->cwd);
4734 	}
4735 
4736 	context->table_size = tableSize;
4737 
4738 	list_init(&context->node_monitors);
4739 	context->max_monitors = DEFAULT_NODE_MONITORS;
4740 
4741 	return context;
4742 }
4743 
4744 
4745 static status_t
4746 vfs_free_io_context(io_context* context)
4747 {
4748 	uint32 i;
4749 
4750 	if (context->root)
4751 		put_vnode(context->root);
4752 
4753 	if (context->cwd)
4754 		put_vnode(context->cwd);
4755 
4756 	mutex_lock(&context->io_mutex);
4757 
4758 	for (i = 0; i < context->table_size; i++) {
4759 		if (struct file_descriptor* descriptor = context->fds[i]) {
4760 			close_fd(descriptor);
4761 			put_fd(descriptor);
4762 		}
4763 	}
4764 
4765 	mutex_destroy(&context->io_mutex);
4766 
4767 	remove_node_monitors(context);
4768 	free(context->fds);
4769 	free(context);
4770 
4771 	return B_OK;
4772 }
4773 
4774 
4775 void
4776 vfs_get_io_context(io_context* context)
4777 {
4778 	atomic_add(&context->ref_count, 1);
4779 }
4780 
4781 
4782 void
4783 vfs_put_io_context(io_context* context)
4784 {
4785 	if (atomic_add(&context->ref_count, -1) == 1)
4786 		vfs_free_io_context(context);
4787 }
4788 
4789 
4790 static status_t
4791 vfs_resize_fd_table(struct io_context* context, const int newSize)
4792 {
4793 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4794 		return EINVAL;
4795 
4796 	MutexLocker _(context->io_mutex);
4797 
4798 	int oldSize = context->table_size;
4799 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4800 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4801 
4802 	// If the tables shrink, make sure none of the fds being dropped are in use.
4803 	if (newSize < oldSize) {
4804 		for (int i = oldSize; i-- > newSize;) {
4805 			if (context->fds[i])
4806 				return EBUSY;
4807 		}
4808 	}
4809 
4810 	// store pointers to the old tables
4811 	file_descriptor** oldFDs = context->fds;
4812 	select_info** oldSelectInfos = context->select_infos;
4813 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4814 
4815 	// allocate new tables
4816 	file_descriptor** newFDs = (file_descriptor**)malloc(
4817 		sizeof(struct file_descriptor*) * newSize
4818 		+ sizeof(struct select_sync*) * newSize
4819 		+ newCloseOnExitBitmapSize);
4820 	if (newFDs == NULL)
4821 		return ENOMEM;
4822 
4823 	context->fds = newFDs;
4824 	context->select_infos = (select_info**)(context->fds + newSize);
4825 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4826 	context->table_size = newSize;
4827 
4828 	// copy entries from old tables
4829 	int toCopy = min_c(oldSize, newSize);
4830 
4831 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4832 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4833 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4834 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4835 
4836 	// clear additional entries, if the tables grow
4837 	if (newSize > oldSize) {
4838 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4839 		memset(context->select_infos + oldSize, 0,
4840 			sizeof(void*) * (newSize - oldSize));
4841 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4842 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4843 	}
4844 
4845 	free(oldFDs);
4846 
4847 	return B_OK;
4848 }
4849 
4850 
4851 static status_t
4852 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4853 {
4854 	int	status = B_OK;
4855 
4856 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4857 		return EINVAL;
4858 
4859 	mutex_lock(&context->io_mutex);
4860 
4861 	if ((size_t)newSize < context->num_monitors) {
4862 		status = EBUSY;
4863 		goto out;
4864 	}
4865 	context->max_monitors = newSize;
4866 
4867 out:
4868 	mutex_unlock(&context->io_mutex);
4869 	return status;
4870 }
4871 
4872 
4873 int
4874 vfs_getrlimit(int resource, struct rlimit* rlp)
4875 {
4876 	if (!rlp)
4877 		return B_BAD_ADDRESS;
4878 
4879 	switch (resource) {
4880 		case RLIMIT_NOFILE:
4881 		{
4882 			struct io_context* context = get_current_io_context(false);
4883 			MutexLocker _(context->io_mutex);
4884 
4885 			rlp->rlim_cur = context->table_size;
4886 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4887 			return 0;
4888 		}
4889 
4890 		case RLIMIT_NOVMON:
4891 		{
4892 			struct io_context* context = get_current_io_context(false);
4893 			MutexLocker _(context->io_mutex);
4894 
4895 			rlp->rlim_cur = context->max_monitors;
4896 			rlp->rlim_max = MAX_NODE_MONITORS;
4897 			return 0;
4898 		}
4899 
4900 		default:
4901 			return B_BAD_VALUE;
4902 	}
4903 }
4904 
4905 
4906 int
4907 vfs_setrlimit(int resource, const struct rlimit* rlp)
4908 {
4909 	if (!rlp)
4910 		return B_BAD_ADDRESS;
4911 
4912 	switch (resource) {
4913 		case RLIMIT_NOFILE:
4914 			/* TODO: check getuid() */
4915 			if (rlp->rlim_max != RLIM_SAVED_MAX
4916 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4917 				return B_NOT_ALLOWED;
4918 
4919 			return vfs_resize_fd_table(get_current_io_context(false),
4920 				rlp->rlim_cur);
4921 
4922 		case RLIMIT_NOVMON:
4923 			/* TODO: check getuid() */
4924 			if (rlp->rlim_max != RLIM_SAVED_MAX
4925 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4926 				return B_NOT_ALLOWED;
4927 
4928 			return vfs_resize_monitor_table(get_current_io_context(false),
4929 				rlp->rlim_cur);
4930 
4931 		default:
4932 			return B_BAD_VALUE;
4933 	}
4934 }
4935 
4936 
4937 status_t
4938 vfs_init(kernel_args* args)
4939 {
4940 	struct vnode dummyVnode;
4941 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
4942 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
4943 	if (sVnodeTable == NULL)
4944 		panic("vfs_init: error creating vnode hash table\n");
4945 
4946 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
4947 
4948 	struct fs_mount dummyMount;
4949 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
4950 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
4951 	if (sMountsTable == NULL)
4952 		panic("vfs_init: error creating mounts hash table\n");
4953 
4954 	node_monitor_init();
4955 
4956 	sRoot = NULL;
4957 
4958 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
4959 
4960 	if (block_cache_init() != B_OK)
4961 		return B_ERROR;
4962 
4963 #ifdef ADD_DEBUGGER_COMMANDS
4964 	// add some debugger commands
4965 	add_debugger_command("vnode", &dump_vnode,
4966 		"info about the specified vnode");
4967 	add_debugger_command("vnodes", &dump_vnodes,
4968 		"list all vnodes (from the specified device)");
4969 	add_debugger_command("vnode_caches", &dump_vnode_caches,
4970 		"list all vnode caches");
4971 	add_debugger_command("mount", &dump_mount,
4972 		"info about the specified fs_mount");
4973 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4974 	add_debugger_command("io_context", &dump_io_context,
4975 		"info about the I/O context");
4976 	add_debugger_command("vnode_usage", &dump_vnode_usage,
4977 		"info about vnode usage");
4978 #endif
4979 
4980 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
4981 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
4982 
4983 	file_map_init();
4984 
4985 	return file_cache_init();
4986 }
4987 
4988 
4989 //	#pragma mark - fd_ops implementations
4990 
4991 
4992 /*!
4993 	Calls fs_open() on the given vnode and returns a new
4994 	file descriptor for it
4995 */
4996 static int
4997 open_vnode(struct vnode* vnode, int openMode, bool kernel)
4998 {
4999 	void* cookie;
5000 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5001 	if (status != B_OK)
5002 		return status;
5003 
5004 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5005 	if (fd < 0) {
5006 		FS_CALL(vnode, close, cookie);
5007 		FS_CALL(vnode, free_cookie, cookie);
5008 	}
5009 	return fd;
5010 }
5011 
5012 
5013 /*!
5014 	Calls fs_open() on the given vnode and returns a new
5015 	file descriptor for it
5016 */
5017 static int
5018 create_vnode(struct vnode* directory, const char* name, int openMode,
5019 	int perms, bool kernel)
5020 {
5021 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5022 	status_t status = B_ERROR;
5023 	struct vnode* vnode;
5024 	void* cookie;
5025 	ino_t newID;
5026 
5027 	// This is somewhat tricky: If the entry already exists, the FS responsible
5028 	// for the directory might not necessarily the one also responsible for the
5029 	// node the entry refers to. So we can actually never call the create() hook
5030 	// without O_EXCL. Instead we try to look the entry up first. If it already
5031 	// exists, we just open the node (unless O_EXCL), otherwise we call create()
5032 	// with O_EXCL. This introduces a race condition, since someone else
5033 	// might have created the entry in the meantime. We hope the respective
5034 	// FS returns the correct error code and retry (up to 3 times) again.
5035 
5036 	for (int i = 0; i < 3 && status != B_OK; i++) {
5037 		// look the node up
5038 		status = lookup_dir_entry(directory, name, &vnode);
5039 		if (status == B_OK) {
5040 			VNodePutter putter(vnode);
5041 
5042 			if ((openMode & O_EXCL) != 0)
5043 				return B_FILE_EXISTS;
5044 
5045 			// If the node is a symlink, we have to follow it, unless
5046 			// O_NOTRAVERSE is set.
5047 			if (S_ISLNK(vnode->type) && traverse) {
5048 				putter.Put();
5049 				char clonedName[B_FILE_NAME_LENGTH + 1];
5050 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5051 						>= B_FILE_NAME_LENGTH) {
5052 					return B_NAME_TOO_LONG;
5053 				}
5054 
5055 				inc_vnode_ref_count(directory);
5056 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5057 					kernel, &vnode, NULL);
5058 				if (status != B_OK)
5059 					return status;
5060 
5061 				putter.SetTo(vnode);
5062 			}
5063 
5064 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->type)) {
5065 				put_vnode(vnode);
5066 				return B_LINK_LIMIT;
5067 			}
5068 
5069 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5070 			// on success keep the vnode reference for the FD
5071 			if (fd >= 0)
5072 				putter.Detach();
5073 
5074 			return fd;
5075 		}
5076 
5077 		// it doesn't exist yet -- try to create it
5078 
5079 		if (!HAS_FS_CALL(directory, create))
5080 			return EROFS;
5081 
5082 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5083 			&cookie, &newID);
5084 		if (status != B_OK
5085 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5086 			return status;
5087 		}
5088 	}
5089 
5090 	if (status != B_OK)
5091 		return status;
5092 
5093 	// the node has been created successfully
5094 
5095 	mutex_lock(&sVnodeMutex);
5096 	vnode = lookup_vnode(directory->device, newID);
5097 	mutex_unlock(&sVnodeMutex);
5098 
5099 	if (vnode == NULL) {
5100 		panic("vfs: fs_create() returned success but there is no vnode, "
5101 			"mount ID %ld!\n", directory->device);
5102 		return B_BAD_VALUE;
5103 	}
5104 
5105 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5106 	if (fd >= 0)
5107 		return fd;
5108 
5109 	status = fd;
5110 
5111 	// something went wrong, clean up
5112 
5113 	FS_CALL(vnode, close, cookie);
5114 	FS_CALL(vnode, free_cookie, cookie);
5115 	put_vnode(vnode);
5116 
5117 	FS_CALL(directory, unlink, name);
5118 
5119 	return status;
5120 }
5121 
5122 
5123 /*! Calls fs open_dir() on the given vnode and returns a new
5124 	file descriptor for it
5125 */
5126 static int
5127 open_dir_vnode(struct vnode* vnode, bool kernel)
5128 {
5129 	void* cookie;
5130 	int status;
5131 
5132 	status = FS_CALL(vnode, open_dir, &cookie);
5133 	if (status != B_OK)
5134 		return status;
5135 
5136 	// directory is opened, create a fd
5137 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5138 	if (status >= 0)
5139 		return status;
5140 
5141 	FS_CALL(vnode, close_dir, cookie);
5142 	FS_CALL(vnode, free_dir_cookie, cookie);
5143 
5144 	return status;
5145 }
5146 
5147 
5148 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5149 	file descriptor for it.
5150 	Used by attr_dir_open(), and attr_dir_open_fd().
5151 */
5152 static int
5153 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5154 {
5155 	void* cookie;
5156 	int status;
5157 
5158 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5159 		return EOPNOTSUPP;
5160 
5161 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5162 	if (status != B_OK)
5163 		return status;
5164 
5165 	// directory is opened, create a fd
5166 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5167 	if (status >= 0)
5168 		return status;
5169 
5170 	FS_CALL(vnode, close_attr_dir, cookie);
5171 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5172 
5173 	return status;
5174 }
5175 
5176 
5177 static int
5178 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5179 	int openMode, int perms, bool kernel)
5180 {
5181 	struct vnode* directory;
5182 	int status;
5183 
5184 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5185 		"kernel %d\n", name, openMode, perms, kernel));
5186 
5187 	// get directory to put the new file in
5188 	status = get_vnode(mountID, directoryID, &directory, true, false);
5189 	if (status != B_OK)
5190 		return status;
5191 
5192 	status = create_vnode(directory, name, openMode, perms, kernel);
5193 	put_vnode(directory);
5194 
5195 	return status;
5196 }
5197 
5198 
5199 static int
5200 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5201 {
5202 	char name[B_FILE_NAME_LENGTH];
5203 	struct vnode* directory;
5204 	int status;
5205 
5206 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5207 		openMode, perms, kernel));
5208 
5209 	// get directory to put the new file in
5210 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5211 	if (status < 0)
5212 		return status;
5213 
5214 	status = create_vnode(directory, name, openMode, perms, kernel);
5215 
5216 	put_vnode(directory);
5217 	return status;
5218 }
5219 
5220 
5221 static int
5222 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5223 	int openMode, bool kernel)
5224 {
5225 	if (name == NULL || *name == '\0')
5226 		return B_BAD_VALUE;
5227 
5228 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5229 		mountID, directoryID, name, openMode));
5230 
5231 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5232 
5233 	// get the vnode matching the entry_ref
5234 	struct vnode* vnode;
5235 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5236 		kernel, &vnode);
5237 	if (status != B_OK)
5238 		return status;
5239 
5240 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->type)) {
5241 		put_vnode(vnode);
5242 		return B_LINK_LIMIT;
5243 	}
5244 
5245 	int fd = open_vnode(vnode, openMode, kernel);
5246 	if (fd < 0)
5247 		put_vnode(vnode);
5248 
5249 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5250 		vnode->id, name);
5251 	return fd;
5252 }
5253 
5254 
5255 static int
5256 file_open(int fd, char* path, int openMode, bool kernel)
5257 {
5258 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5259 
5260 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5261 		fd, path, openMode, kernel));
5262 
5263 	// get the vnode matching the vnode + path combination
5264 	struct vnode* vnode;
5265 	ino_t parentID;
5266 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5267 		&parentID, kernel);
5268 	if (status != B_OK)
5269 		return status;
5270 
5271 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->type)) {
5272 		put_vnode(vnode);
5273 		return B_LINK_LIMIT;
5274 	}
5275 
5276 	// open the vnode
5277 	int newFD = open_vnode(vnode, openMode, kernel);
5278 	// put only on error -- otherwise our reference was transferred to the FD
5279 	if (newFD < 0)
5280 		put_vnode(vnode);
5281 
5282 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5283 		vnode->device, parentID, vnode->id, NULL);
5284 
5285 	return newFD;
5286 }
5287 
5288 
5289 static status_t
5290 file_close(struct file_descriptor* descriptor)
5291 {
5292 	struct vnode* vnode = descriptor->u.vnode;
5293 	status_t status = B_OK;
5294 
5295 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5296 
5297 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5298 		vnode->id);
5299 	if (HAS_FS_CALL(vnode, close)) {
5300 		status = FS_CALL(vnode, close, descriptor->cookie);
5301 	}
5302 
5303 	if (status == B_OK) {
5304 		// remove all outstanding locks for this team
5305 		release_advisory_lock(vnode, NULL);
5306 	}
5307 	return status;
5308 }
5309 
5310 
5311 static void
5312 file_free_fd(struct file_descriptor* descriptor)
5313 {
5314 	struct vnode* vnode = descriptor->u.vnode;
5315 
5316 	if (vnode != NULL) {
5317 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5318 		put_vnode(vnode);
5319 	}
5320 }
5321 
5322 
5323 static status_t
5324 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5325 	size_t* length)
5326 {
5327 	struct vnode* vnode = descriptor->u.vnode;
5328 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5329 		*length));
5330 
5331 	if (S_ISDIR(vnode->type))
5332 		return B_IS_A_DIRECTORY;
5333 
5334 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5335 }
5336 
5337 
5338 static status_t
5339 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5340 	size_t* length)
5341 {
5342 	struct vnode* vnode = descriptor->u.vnode;
5343 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5344 
5345 	if (S_ISDIR(vnode->type))
5346 		return B_IS_A_DIRECTORY;
5347 	if (!HAS_FS_CALL(vnode, write))
5348 		return EROFS;
5349 
5350 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5351 }
5352 
5353 
5354 static off_t
5355 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5356 {
5357 	struct vnode* vnode = descriptor->u.vnode;
5358 	off_t offset;
5359 
5360 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5361 
5362 	// some kinds of files are not seekable
5363 	switch (vnode->type & S_IFMT) {
5364 		case S_IFIFO:
5365 		case S_IFSOCK:
5366 			return ESPIPE;
5367 
5368 		// The Open Group Base Specs don't mention any file types besides pipes,
5369 		// fifos, and sockets specially, so we allow seeking them.
5370 		case S_IFREG:
5371 		case S_IFBLK:
5372 		case S_IFDIR:
5373 		case S_IFLNK:
5374 		case S_IFCHR:
5375 			break;
5376 	}
5377 
5378 	switch (seekType) {
5379 		case SEEK_SET:
5380 			offset = 0;
5381 			break;
5382 		case SEEK_CUR:
5383 			offset = descriptor->pos;
5384 			break;
5385 		case SEEK_END:
5386 		{
5387 			// stat() the node
5388 			if (!HAS_FS_CALL(vnode, read_stat))
5389 				return EOPNOTSUPP;
5390 
5391 			struct stat stat;
5392 			status_t status = FS_CALL(vnode, read_stat, &stat);
5393 			if (status != B_OK)
5394 				return status;
5395 
5396 			offset = stat.st_size;
5397 			break;
5398 		}
5399 		default:
5400 			return B_BAD_VALUE;
5401 	}
5402 
5403 	// assumes off_t is 64 bits wide
5404 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5405 		return EOVERFLOW;
5406 
5407 	pos += offset;
5408 	if (pos < 0)
5409 		return B_BAD_VALUE;
5410 
5411 	return descriptor->pos = pos;
5412 }
5413 
5414 
5415 static status_t
5416 file_select(struct file_descriptor* descriptor, uint8 event,
5417 	struct selectsync* sync)
5418 {
5419 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5420 
5421 	struct vnode* vnode = descriptor->u.vnode;
5422 
5423 	// If the FS has no select() hook, notify select() now.
5424 	if (!HAS_FS_CALL(vnode, select))
5425 		return notify_select_event(sync, event);
5426 
5427 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5428 }
5429 
5430 
5431 static status_t
5432 file_deselect(struct file_descriptor* descriptor, uint8 event,
5433 	struct selectsync* sync)
5434 {
5435 	struct vnode* vnode = descriptor->u.vnode;
5436 
5437 	if (!HAS_FS_CALL(vnode, deselect))
5438 		return B_OK;
5439 
5440 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5441 }
5442 
5443 
5444 static status_t
5445 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5446 	bool kernel)
5447 {
5448 	struct vnode* vnode;
5449 	status_t status;
5450 
5451 	if (name == NULL || *name == '\0')
5452 		return B_BAD_VALUE;
5453 
5454 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5455 		"perms = %d)\n", mountID, parentID, name, perms));
5456 
5457 	status = get_vnode(mountID, parentID, &vnode, true, false);
5458 	if (status != B_OK)
5459 		return status;
5460 
5461 	if (HAS_FS_CALL(vnode, create_dir))
5462 		status = FS_CALL(vnode, create_dir, name, perms);
5463 	else
5464 		status = EROFS;
5465 
5466 	put_vnode(vnode);
5467 	return status;
5468 }
5469 
5470 
5471 static status_t
5472 dir_create(int fd, char* path, int perms, bool kernel)
5473 {
5474 	char filename[B_FILE_NAME_LENGTH];
5475 	struct vnode* vnode;
5476 	status_t status;
5477 
5478 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5479 		kernel));
5480 
5481 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5482 	if (status < 0)
5483 		return status;
5484 
5485 	if (HAS_FS_CALL(vnode, create_dir)) {
5486 		status = FS_CALL(vnode, create_dir, filename, perms);
5487 	} else
5488 		status = EROFS;
5489 
5490 	put_vnode(vnode);
5491 	return status;
5492 }
5493 
5494 
5495 static int
5496 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5497 {
5498 	struct vnode* vnode;
5499 	int status;
5500 
5501 	FUNCTION(("dir_open_entry_ref()\n"));
5502 
5503 	if (name && *name == '\0')
5504 		return B_BAD_VALUE;
5505 
5506 	// get the vnode matching the entry_ref/node_ref
5507 	if (name) {
5508 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5509 			&vnode);
5510 	} else
5511 		status = get_vnode(mountID, parentID, &vnode, true, false);
5512 	if (status != B_OK)
5513 		return status;
5514 
5515 	int fd = open_dir_vnode(vnode, kernel);
5516 	if (fd < 0)
5517 		put_vnode(vnode);
5518 
5519 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5520 		vnode->id, name);
5521 	return fd;
5522 }
5523 
5524 
5525 static int
5526 dir_open(int fd, char* path, bool kernel)
5527 {
5528 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5529 		kernel));
5530 
5531 	// get the vnode matching the vnode + path combination
5532 	struct vnode* vnode = NULL;
5533 	ino_t parentID;
5534 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5535 		kernel);
5536 	if (status != B_OK)
5537 		return status;
5538 
5539 	// open the dir
5540 	int newFD = open_dir_vnode(vnode, kernel);
5541 	if (newFD < 0)
5542 		put_vnode(vnode);
5543 
5544 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID,
5545 		vnode->id, NULL);
5546 	return newFD;
5547 }
5548 
5549 
5550 static status_t
5551 dir_close(struct file_descriptor* descriptor)
5552 {
5553 	struct vnode* vnode = descriptor->u.vnode;
5554 
5555 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5556 
5557 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5558 		vnode->id);
5559 	if (HAS_FS_CALL(vnode, close_dir))
5560 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5561 
5562 	return B_OK;
5563 }
5564 
5565 
5566 static void
5567 dir_free_fd(struct file_descriptor* descriptor)
5568 {
5569 	struct vnode* vnode = descriptor->u.vnode;
5570 
5571 	if (vnode != NULL) {
5572 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5573 		put_vnode(vnode);
5574 	}
5575 }
5576 
5577 
5578 static status_t
5579 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5580 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5581 {
5582 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5583 		bufferSize, _count);
5584 }
5585 
5586 
5587 static status_t
5588 fix_dirent(struct vnode* parent, struct dirent* entry,
5589 	struct io_context* ioContext)
5590 {
5591 	// set d_pdev and d_pino
5592 	entry->d_pdev = parent->device;
5593 	entry->d_pino = parent->id;
5594 
5595 	// If this is the ".." entry and the directory is the root of a FS,
5596 	// we need to replace d_dev and d_ino with the actual values.
5597 	if (strcmp(entry->d_name, "..") == 0
5598 		&& parent->mount->root_vnode == parent
5599 		&& parent->mount->covers_vnode) {
5600 		inc_vnode_ref_count(parent);
5601 			// vnode_path_to_vnode() puts the node
5602 
5603 		// Make sure the IO context root is not bypassed.
5604 		if (parent == ioContext->root) {
5605 			entry->d_dev = parent->device;
5606 			entry->d_ino = parent->id;
5607 		} else {
5608 			// ".." is guaranteed not to be clobbered by this call
5609 			struct vnode* vnode;
5610 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5611 				ioContext, &vnode, NULL);
5612 
5613 			if (status == B_OK) {
5614 				entry->d_dev = vnode->device;
5615 				entry->d_ino = vnode->id;
5616 			}
5617 		}
5618 	} else {
5619 		// resolve mount points
5620 		MutexLocker _(&sVnodeMutex);
5621 
5622 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5623 		if (vnode != NULL) {
5624 			MutexLocker _(&sVnodeCoveredByMutex);
5625 			if (vnode->covered_by != NULL) {
5626 				entry->d_dev = vnode->covered_by->device;
5627 				entry->d_ino = vnode->covered_by->id;
5628 			}
5629 		}
5630 	}
5631 
5632 	return B_OK;
5633 }
5634 
5635 
5636 static status_t
5637 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5638 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5639 {
5640 	if (!HAS_FS_CALL(vnode, read_dir))
5641 		return EOPNOTSUPP;
5642 
5643 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5644 		_count);
5645 	if (error != B_OK)
5646 		return error;
5647 
5648 	// we need to adjust the read dirents
5649 	uint32 count = *_count;
5650 	for (uint32 i = 0; i < count; i++) {
5651 		error = fix_dirent(vnode, buffer, ioContext);
5652 		if (error != B_OK)
5653 			return error;
5654 
5655 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5656 	}
5657 
5658 	return error;
5659 }
5660 
5661 
5662 static status_t
5663 dir_rewind(struct file_descriptor* descriptor)
5664 {
5665 	struct vnode* vnode = descriptor->u.vnode;
5666 
5667 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5668 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5669 	}
5670 
5671 	return EOPNOTSUPP;
5672 }
5673 
5674 
5675 static status_t
5676 dir_remove(int fd, char* path, bool kernel)
5677 {
5678 	char name[B_FILE_NAME_LENGTH];
5679 	struct vnode* directory;
5680 	status_t status;
5681 
5682 	if (path != NULL) {
5683 		// we need to make sure our path name doesn't stop with "/", ".",
5684 		// or ".."
5685 		char* lastSlash = strrchr(path, '/');
5686 		if (lastSlash != NULL) {
5687 			char* leaf = lastSlash + 1;
5688 			if (!strcmp(leaf, ".."))
5689 				return B_NOT_ALLOWED;
5690 
5691 			// omit multiple slashes
5692 			while (lastSlash > path && lastSlash[-1] == '/') {
5693 				lastSlash--;
5694 			}
5695 
5696 			if (!leaf[0]
5697 				|| !strcmp(leaf, ".")) {
5698 				// "name/" -> "name", or "name/." -> "name"
5699 				lastSlash[0] = '\0';
5700 			}
5701 		}
5702 
5703 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5704 			return B_NOT_ALLOWED;
5705 	}
5706 
5707 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5708 	if (status != B_OK)
5709 		return status;
5710 
5711 	if (HAS_FS_CALL(directory, remove_dir))
5712 		status = FS_CALL(directory, remove_dir, name);
5713 	else
5714 		status = EROFS;
5715 
5716 	put_vnode(directory);
5717 	return status;
5718 }
5719 
5720 
5721 static status_t
5722 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
5723 	size_t length)
5724 {
5725 	struct vnode* vnode = descriptor->u.vnode;
5726 
5727 	if (HAS_FS_CALL(vnode, ioctl))
5728 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5729 
5730 	return EOPNOTSUPP;
5731 }
5732 
5733 
5734 static status_t
5735 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5736 {
5737 	struct flock flock;
5738 
5739 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5740 		fd, op, argument, kernel ? "kernel" : "user"));
5741 
5742 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5743 		fd);
5744 	if (descriptor == NULL)
5745 		return B_FILE_ERROR;
5746 
5747 	struct vnode* vnode = fd_vnode(descriptor);
5748 
5749 	status_t status = B_OK;
5750 
5751 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5752 		if (descriptor->type != FDTYPE_FILE)
5753 			status = B_BAD_VALUE;
5754 		else if (user_memcpy(&flock, (struct flock*)argument,
5755 				sizeof(struct flock)) != B_OK)
5756 			status = B_BAD_ADDRESS;
5757 
5758 		if (status != B_OK) {
5759 			put_fd(descriptor);
5760 			return status;
5761 		}
5762 	}
5763 
5764 	switch (op) {
5765 		case F_SETFD:
5766 		{
5767 			struct io_context* context = get_current_io_context(kernel);
5768 			// Set file descriptor flags
5769 
5770 			// O_CLOEXEC is the only flag available at this time
5771 			mutex_lock(&context->io_mutex);
5772 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5773 			mutex_unlock(&context->io_mutex);
5774 
5775 			status = B_OK;
5776 			break;
5777 		}
5778 
5779 		case F_GETFD:
5780 		{
5781 			struct io_context* context = get_current_io_context(kernel);
5782 
5783 			// Get file descriptor flags
5784 			mutex_lock(&context->io_mutex);
5785 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5786 			mutex_unlock(&context->io_mutex);
5787 			break;
5788 		}
5789 
5790 		case F_SETFL:
5791 			// Set file descriptor open mode
5792 
5793 			// we only accept changes to O_APPEND and O_NONBLOCK
5794 			argument &= O_APPEND | O_NONBLOCK;
5795 			if (descriptor->ops->fd_set_flags != NULL) {
5796 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5797 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5798 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5799 					(int)argument);
5800 			} else
5801 				status = EOPNOTSUPP;
5802 
5803 			if (status == B_OK) {
5804 				// update this descriptor's open_mode field
5805 				descriptor->open_mode = (descriptor->open_mode
5806 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5807 			}
5808 
5809 			break;
5810 
5811 		case F_GETFL:
5812 			// Get file descriptor open mode
5813 			status = descriptor->open_mode;
5814 			break;
5815 
5816 		case F_DUPFD:
5817 		{
5818 			struct io_context* context = get_current_io_context(kernel);
5819 
5820 			status = new_fd_etc(context, descriptor, (int)argument);
5821 			if (status >= 0) {
5822 				mutex_lock(&context->io_mutex);
5823 				fd_set_close_on_exec(context, fd, false);
5824 				mutex_unlock(&context->io_mutex);
5825 
5826 				atomic_add(&descriptor->ref_count, 1);
5827 			}
5828 			break;
5829 		}
5830 
5831 		case F_GETLK:
5832 			if (vnode != NULL) {
5833 				status = get_advisory_lock(vnode, &flock);
5834 				if (status == B_OK) {
5835 					// copy back flock structure
5836 					status = user_memcpy((struct flock*)argument, &flock,
5837 						sizeof(struct flock));
5838 				}
5839 			} else
5840 				status = B_BAD_VALUE;
5841 			break;
5842 
5843 		case F_SETLK:
5844 		case F_SETLKW:
5845 			status = normalize_flock(descriptor, &flock);
5846 			if (status != B_OK)
5847 				break;
5848 
5849 			if (vnode == NULL) {
5850 				status = B_BAD_VALUE;
5851 			} else if (flock.l_type == F_UNLCK) {
5852 				status = release_advisory_lock(vnode, &flock);
5853 			} else {
5854 				// the open mode must match the lock type
5855 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5856 						&& flock.l_type == F_WRLCK)
5857 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5858 						&& flock.l_type == F_RDLCK))
5859 					status = B_FILE_ERROR;
5860 				else {
5861 					status = acquire_advisory_lock(vnode, -1,
5862 						&flock, op == F_SETLKW);
5863 				}
5864 			}
5865 			break;
5866 
5867 		// ToDo: add support for more ops?
5868 
5869 		default:
5870 			status = B_BAD_VALUE;
5871 	}
5872 
5873 	put_fd(descriptor);
5874 	return status;
5875 }
5876 
5877 
5878 static status_t
5879 common_sync(int fd, bool kernel)
5880 {
5881 	struct file_descriptor* descriptor;
5882 	struct vnode* vnode;
5883 	status_t status;
5884 
5885 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5886 
5887 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5888 	if (descriptor == NULL)
5889 		return B_FILE_ERROR;
5890 
5891 	if (HAS_FS_CALL(vnode, fsync))
5892 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5893 	else
5894 		status = EOPNOTSUPP;
5895 
5896 	put_fd(descriptor);
5897 	return status;
5898 }
5899 
5900 
5901 static status_t
5902 common_lock_node(int fd, bool kernel)
5903 {
5904 	struct file_descriptor* descriptor;
5905 	struct vnode* vnode;
5906 
5907 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5908 	if (descriptor == NULL)
5909 		return B_FILE_ERROR;
5910 
5911 	status_t status = B_OK;
5912 
5913 	// We need to set the locking atomically - someone
5914 	// else might set one at the same time
5915 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5916 			(file_descriptor*)NULL) != NULL)
5917 		status = B_BUSY;
5918 
5919 	put_fd(descriptor);
5920 	return status;
5921 }
5922 
5923 
5924 static status_t
5925 common_unlock_node(int fd, bool kernel)
5926 {
5927 	struct file_descriptor* descriptor;
5928 	struct vnode* vnode;
5929 
5930 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5931 	if (descriptor == NULL)
5932 		return B_FILE_ERROR;
5933 
5934 	status_t status = B_OK;
5935 
5936 	// We need to set the locking atomically - someone
5937 	// else might set one at the same time
5938 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
5939 			(file_descriptor*)NULL, descriptor) != descriptor)
5940 		status = B_BAD_VALUE;
5941 
5942 	put_fd(descriptor);
5943 	return status;
5944 }
5945 
5946 
5947 static status_t
5948 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
5949 	bool kernel)
5950 {
5951 	struct vnode* vnode;
5952 	status_t status;
5953 
5954 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5955 	if (status != B_OK)
5956 		return status;
5957 
5958 	if (HAS_FS_CALL(vnode, read_symlink)) {
5959 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5960 	} else
5961 		status = B_BAD_VALUE;
5962 
5963 	put_vnode(vnode);
5964 	return status;
5965 }
5966 
5967 
5968 static status_t
5969 common_create_symlink(int fd, char* path, const char* toPath, int mode,
5970 	bool kernel)
5971 {
5972 	// path validity checks have to be in the calling function!
5973 	char name[B_FILE_NAME_LENGTH];
5974 	struct vnode* vnode;
5975 	status_t status;
5976 
5977 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
5978 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5979 
5980 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5981 	if (status != B_OK)
5982 		return status;
5983 
5984 	if (HAS_FS_CALL(vnode, create_symlink))
5985 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5986 	else {
5987 		status = HAS_FS_CALL(vnode, write)
5988 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
5989 	}
5990 
5991 	put_vnode(vnode);
5992 
5993 	return status;
5994 }
5995 
5996 
5997 static status_t
5998 common_create_link(int pathFD, char* path, int toFD, char* toPath,
5999 	bool traverseLeafLink, bool kernel)
6000 {
6001 	// path validity checks have to be in the calling function!
6002 
6003 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6004 		toPath, kernel));
6005 
6006 	char name[B_FILE_NAME_LENGTH];
6007 	struct vnode* directory;
6008 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6009 		kernel);
6010 	if (status != B_OK)
6011 		return status;
6012 
6013 	struct vnode* vnode;
6014 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6015 		kernel);
6016 	if (status != B_OK)
6017 		goto err;
6018 
6019 	if (directory->mount != vnode->mount) {
6020 		status = B_CROSS_DEVICE_LINK;
6021 		goto err1;
6022 	}
6023 
6024 	if (HAS_FS_CALL(directory, link))
6025 		status = FS_CALL(directory, link, name, vnode);
6026 	else
6027 		status = EROFS;
6028 
6029 err1:
6030 	put_vnode(vnode);
6031 err:
6032 	put_vnode(directory);
6033 
6034 	return status;
6035 }
6036 
6037 
6038 static status_t
6039 common_unlink(int fd, char* path, bool kernel)
6040 {
6041 	char filename[B_FILE_NAME_LENGTH];
6042 	struct vnode* vnode;
6043 	status_t status;
6044 
6045 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6046 		kernel));
6047 
6048 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6049 	if (status < 0)
6050 		return status;
6051 
6052 	if (HAS_FS_CALL(vnode, unlink))
6053 		status = FS_CALL(vnode, unlink, filename);
6054 	else
6055 		status = EROFS;
6056 
6057 	put_vnode(vnode);
6058 
6059 	return status;
6060 }
6061 
6062 
6063 static status_t
6064 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6065 {
6066 	struct vnode* vnode;
6067 	status_t status;
6068 
6069 	// TODO: honor effectiveUserGroup argument
6070 
6071 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6072 	if (status != B_OK)
6073 		return status;
6074 
6075 	if (HAS_FS_CALL(vnode, access))
6076 		status = FS_CALL(vnode, access, mode);
6077 	else
6078 		status = B_OK;
6079 
6080 	put_vnode(vnode);
6081 
6082 	return status;
6083 }
6084 
6085 
6086 static status_t
6087 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6088 {
6089 	struct vnode* fromVnode;
6090 	struct vnode* toVnode;
6091 	char fromName[B_FILE_NAME_LENGTH];
6092 	char toName[B_FILE_NAME_LENGTH];
6093 	status_t status;
6094 
6095 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6096 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6097 
6098 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6099 	if (status != B_OK)
6100 		return status;
6101 
6102 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6103 	if (status != B_OK)
6104 		goto err1;
6105 
6106 	if (fromVnode->device != toVnode->device) {
6107 		status = B_CROSS_DEVICE_LINK;
6108 		goto err2;
6109 	}
6110 
6111 	if (fromName[0] == '\0' || toName == '\0'
6112 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6113 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6114 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6115 		status = B_BAD_VALUE;
6116 		goto err2;
6117 	}
6118 
6119 	if (HAS_FS_CALL(fromVnode, rename))
6120 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6121 	else
6122 		status = EROFS;
6123 
6124 err2:
6125 	put_vnode(toVnode);
6126 err1:
6127 	put_vnode(fromVnode);
6128 
6129 	return status;
6130 }
6131 
6132 
6133 static status_t
6134 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6135 {
6136 	struct vnode* vnode = descriptor->u.vnode;
6137 
6138 	FUNCTION(("common_read_stat: stat %p\n", stat));
6139 
6140 	// TODO: remove this once all file systems properly set them!
6141 	stat->st_crtim.tv_nsec = 0;
6142 	stat->st_ctim.tv_nsec = 0;
6143 	stat->st_mtim.tv_nsec = 0;
6144 	stat->st_atim.tv_nsec = 0;
6145 
6146 	status_t status = FS_CALL(vnode, read_stat, stat);
6147 
6148 	// fill in the st_dev and st_ino fields
6149 	if (status == B_OK) {
6150 		stat->st_dev = vnode->device;
6151 		stat->st_ino = vnode->id;
6152 		stat->st_rdev = -1;
6153 	}
6154 
6155 	return status;
6156 }
6157 
6158 
6159 static status_t
6160 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6161 	int statMask)
6162 {
6163 	struct vnode* vnode = descriptor->u.vnode;
6164 
6165 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6166 		vnode, stat, statMask));
6167 
6168 	if (!HAS_FS_CALL(vnode, write_stat))
6169 		return EROFS;
6170 
6171 	return FS_CALL(vnode, write_stat, stat, statMask);
6172 }
6173 
6174 
6175 static status_t
6176 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6177 	struct stat* stat, bool kernel)
6178 {
6179 	struct vnode* vnode;
6180 	status_t status;
6181 
6182 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6183 		stat));
6184 
6185 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6186 		kernel);
6187 	if (status < 0)
6188 		return status;
6189 
6190 	status = FS_CALL(vnode, read_stat, stat);
6191 
6192 	// fill in the st_dev and st_ino fields
6193 	if (status == B_OK) {
6194 		stat->st_dev = vnode->device;
6195 		stat->st_ino = vnode->id;
6196 		stat->st_rdev = -1;
6197 	}
6198 
6199 	put_vnode(vnode);
6200 	return status;
6201 }
6202 
6203 
6204 static status_t
6205 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6206 	const struct stat* stat, int statMask, bool kernel)
6207 {
6208 	struct vnode* vnode;
6209 	status_t status;
6210 
6211 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6212 		"kernel %d\n", fd, path, stat, statMask, kernel));
6213 
6214 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6215 		kernel);
6216 	if (status < 0)
6217 		return status;
6218 
6219 	if (HAS_FS_CALL(vnode, write_stat))
6220 		status = FS_CALL(vnode, write_stat, stat, statMask);
6221 	else
6222 		status = EROFS;
6223 
6224 	put_vnode(vnode);
6225 
6226 	return status;
6227 }
6228 
6229 
6230 static int
6231 attr_dir_open(int fd, char* path, bool kernel)
6232 {
6233 	struct vnode* vnode;
6234 	int status;
6235 
6236 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6237 		kernel));
6238 
6239 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6240 	if (status != B_OK)
6241 		return status;
6242 
6243 	status = open_attr_dir_vnode(vnode, kernel);
6244 	if (status < 0)
6245 		put_vnode(vnode);
6246 
6247 	return status;
6248 }
6249 
6250 
6251 static status_t
6252 attr_dir_close(struct file_descriptor* descriptor)
6253 {
6254 	struct vnode* vnode = descriptor->u.vnode;
6255 
6256 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6257 
6258 	if (HAS_FS_CALL(vnode, close_attr_dir))
6259 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6260 
6261 	return B_OK;
6262 }
6263 
6264 
6265 static void
6266 attr_dir_free_fd(struct file_descriptor* descriptor)
6267 {
6268 	struct vnode* vnode = descriptor->u.vnode;
6269 
6270 	if (vnode != NULL) {
6271 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6272 		put_vnode(vnode);
6273 	}
6274 }
6275 
6276 
6277 static status_t
6278 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6279 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6280 {
6281 	struct vnode* vnode = descriptor->u.vnode;
6282 
6283 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6284 
6285 	if (HAS_FS_CALL(vnode, read_attr_dir))
6286 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6287 			bufferSize, _count);
6288 
6289 	return EOPNOTSUPP;
6290 }
6291 
6292 
6293 static status_t
6294 attr_dir_rewind(struct file_descriptor* descriptor)
6295 {
6296 	struct vnode* vnode = descriptor->u.vnode;
6297 
6298 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6299 
6300 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6301 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6302 
6303 	return EOPNOTSUPP;
6304 }
6305 
6306 
6307 static int
6308 attr_create(int fd, char* path, const char* name, uint32 type,
6309 	int openMode, bool kernel)
6310 {
6311 	if (name == NULL || *name == '\0')
6312 		return B_BAD_VALUE;
6313 
6314 	struct vnode* vnode;
6315 	status_t status = fd_and_path_to_vnode(fd, path,
6316 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6317 	if (status != B_OK)
6318 		return status;
6319 
6320 	if (!HAS_FS_CALL(vnode, create_attr)) {
6321 		status = EROFS;
6322 		goto err;
6323 	}
6324 
6325 	void* cookie;
6326 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6327 	if (status != B_OK)
6328 		goto err;
6329 
6330 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6331 	if (fd >= 0)
6332 		return fd;
6333 
6334 	status = fd;
6335 
6336 	FS_CALL(vnode, close_attr, cookie);
6337 	FS_CALL(vnode, free_attr_cookie, cookie);
6338 
6339 	FS_CALL(vnode, remove_attr, name);
6340 
6341 err:
6342 	put_vnode(vnode);
6343 
6344 	return status;
6345 }
6346 
6347 
6348 static int
6349 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6350 {
6351 	if (name == NULL || *name == '\0')
6352 		return B_BAD_VALUE;
6353 
6354 	struct vnode* vnode;
6355 	status_t status = fd_and_path_to_vnode(fd, path,
6356 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6357 	if (status != B_OK)
6358 		return status;
6359 
6360 	if (!HAS_FS_CALL(vnode, open_attr)) {
6361 		status = EOPNOTSUPP;
6362 		goto err;
6363 	}
6364 
6365 	void* cookie;
6366 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6367 	if (status != B_OK)
6368 		goto err;
6369 
6370 	// now we only need a file descriptor for this attribute and we're done
6371 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6372 	if (fd >= 0)
6373 		return fd;
6374 
6375 	status = fd;
6376 
6377 	FS_CALL(vnode, close_attr, cookie);
6378 	FS_CALL(vnode, free_attr_cookie, cookie);
6379 
6380 err:
6381 	put_vnode(vnode);
6382 
6383 	return status;
6384 }
6385 
6386 
6387 static status_t
6388 attr_close(struct file_descriptor* descriptor)
6389 {
6390 	struct vnode* vnode = descriptor->u.vnode;
6391 
6392 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6393 
6394 	if (HAS_FS_CALL(vnode, close_attr))
6395 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6396 
6397 	return B_OK;
6398 }
6399 
6400 
6401 static void
6402 attr_free_fd(struct file_descriptor* descriptor)
6403 {
6404 	struct vnode* vnode = descriptor->u.vnode;
6405 
6406 	if (vnode != NULL) {
6407 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6408 		put_vnode(vnode);
6409 	}
6410 }
6411 
6412 
6413 static status_t
6414 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6415 	size_t* length)
6416 {
6417 	struct vnode* vnode = descriptor->u.vnode;
6418 
6419 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6420 		*length));
6421 
6422 	if (!HAS_FS_CALL(vnode, read_attr))
6423 		return EOPNOTSUPP;
6424 
6425 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6426 }
6427 
6428 
6429 static status_t
6430 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6431 	size_t* length)
6432 {
6433 	struct vnode* vnode = descriptor->u.vnode;
6434 
6435 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6436 	if (!HAS_FS_CALL(vnode, write_attr))
6437 		return EOPNOTSUPP;
6438 
6439 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6440 }
6441 
6442 
6443 static off_t
6444 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6445 {
6446 	off_t offset;
6447 
6448 	switch (seekType) {
6449 		case SEEK_SET:
6450 			offset = 0;
6451 			break;
6452 		case SEEK_CUR:
6453 			offset = descriptor->pos;
6454 			break;
6455 		case SEEK_END:
6456 		{
6457 			struct vnode* vnode = descriptor->u.vnode;
6458 			if (!HAS_FS_CALL(vnode, read_stat))
6459 				return EOPNOTSUPP;
6460 
6461 			struct stat stat;
6462 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6463 				&stat);
6464 			if (status != B_OK)
6465 				return status;
6466 
6467 			offset = stat.st_size;
6468 			break;
6469 		}
6470 		default:
6471 			return B_BAD_VALUE;
6472 	}
6473 
6474 	// assumes off_t is 64 bits wide
6475 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6476 		return EOVERFLOW;
6477 
6478 	pos += offset;
6479 	if (pos < 0)
6480 		return B_BAD_VALUE;
6481 
6482 	return descriptor->pos = pos;
6483 }
6484 
6485 
6486 static status_t
6487 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6488 {
6489 	struct vnode* vnode = descriptor->u.vnode;
6490 
6491 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6492 
6493 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6494 		return EOPNOTSUPP;
6495 
6496 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6497 }
6498 
6499 
6500 static status_t
6501 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6502 	int statMask)
6503 {
6504 	struct vnode* vnode = descriptor->u.vnode;
6505 
6506 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6507 
6508 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6509 		return EROFS;
6510 
6511 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6512 }
6513 
6514 
6515 static status_t
6516 attr_remove(int fd, const char* name, bool kernel)
6517 {
6518 	struct file_descriptor* descriptor;
6519 	struct vnode* vnode;
6520 	status_t status;
6521 
6522 	if (name == NULL || *name == '\0')
6523 		return B_BAD_VALUE;
6524 
6525 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6526 		kernel));
6527 
6528 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6529 	if (descriptor == NULL)
6530 		return B_FILE_ERROR;
6531 
6532 	if (HAS_FS_CALL(vnode, remove_attr))
6533 		status = FS_CALL(vnode, remove_attr, name);
6534 	else
6535 		status = EROFS;
6536 
6537 	put_fd(descriptor);
6538 
6539 	return status;
6540 }
6541 
6542 
6543 static status_t
6544 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6545 	bool kernel)
6546 {
6547 	struct file_descriptor* fromDescriptor;
6548 	struct file_descriptor* toDescriptor;
6549 	struct vnode* fromVnode;
6550 	struct vnode* toVnode;
6551 	status_t status;
6552 
6553 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6554 		|| *toName == '\0')
6555 		return B_BAD_VALUE;
6556 
6557 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6558 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6559 
6560 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6561 	if (fromDescriptor == NULL)
6562 		return B_FILE_ERROR;
6563 
6564 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6565 	if (toDescriptor == NULL) {
6566 		status = B_FILE_ERROR;
6567 		goto err;
6568 	}
6569 
6570 	// are the files on the same volume?
6571 	if (fromVnode->device != toVnode->device) {
6572 		status = B_CROSS_DEVICE_LINK;
6573 		goto err1;
6574 	}
6575 
6576 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6577 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6578 	} else
6579 		status = EROFS;
6580 
6581 err1:
6582 	put_fd(toDescriptor);
6583 err:
6584 	put_fd(fromDescriptor);
6585 
6586 	return status;
6587 }
6588 
6589 
6590 static int
6591 index_dir_open(dev_t mountID, bool kernel)
6592 {
6593 	struct fs_mount* mount;
6594 	void* cookie;
6595 
6596 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6597 
6598 	status_t status = get_mount(mountID, &mount);
6599 	if (status != B_OK)
6600 		return status;
6601 
6602 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6603 		status = EOPNOTSUPP;
6604 		goto error;
6605 	}
6606 
6607 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6608 	if (status != B_OK)
6609 		goto error;
6610 
6611 	// get fd for the index directory
6612 	int fd;
6613 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6614 	if (fd >= 0)
6615 		return fd;
6616 
6617 	// something went wrong
6618 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6619 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6620 
6621 	status = fd;
6622 
6623 error:
6624 	put_mount(mount);
6625 	return status;
6626 }
6627 
6628 
6629 static status_t
6630 index_dir_close(struct file_descriptor* descriptor)
6631 {
6632 	struct fs_mount* mount = descriptor->u.mount;
6633 
6634 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6635 
6636 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6637 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6638 
6639 	return B_OK;
6640 }
6641 
6642 
6643 static void
6644 index_dir_free_fd(struct file_descriptor* descriptor)
6645 {
6646 	struct fs_mount* mount = descriptor->u.mount;
6647 
6648 	if (mount != NULL) {
6649 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6650 		put_mount(mount);
6651 	}
6652 }
6653 
6654 
6655 static status_t
6656 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6657 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6658 {
6659 	struct fs_mount* mount = descriptor->u.mount;
6660 
6661 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6662 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6663 			bufferSize, _count);
6664 	}
6665 
6666 	return EOPNOTSUPP;
6667 }
6668 
6669 
6670 static status_t
6671 index_dir_rewind(struct file_descriptor* descriptor)
6672 {
6673 	struct fs_mount* mount = descriptor->u.mount;
6674 
6675 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6676 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6677 
6678 	return EOPNOTSUPP;
6679 }
6680 
6681 
6682 static status_t
6683 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6684 	bool kernel)
6685 {
6686 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6687 		name, kernel));
6688 
6689 	struct fs_mount* mount;
6690 	status_t status = get_mount(mountID, &mount);
6691 	if (status != B_OK)
6692 		return status;
6693 
6694 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6695 		status = EROFS;
6696 		goto out;
6697 	}
6698 
6699 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6700 
6701 out:
6702 	put_mount(mount);
6703 	return status;
6704 }
6705 
6706 
6707 #if 0
6708 static status_t
6709 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6710 {
6711 	struct vnode* vnode = descriptor->u.vnode;
6712 
6713 	// ToDo: currently unused!
6714 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6715 	if (!HAS_FS_CALL(vnode, read_index_stat))
6716 		return EOPNOTSUPP;
6717 
6718 	return EOPNOTSUPP;
6719 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6720 }
6721 
6722 
6723 static void
6724 index_free_fd(struct file_descriptor* descriptor)
6725 {
6726 	struct vnode* vnode = descriptor->u.vnode;
6727 
6728 	if (vnode != NULL) {
6729 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6730 		put_vnode(vnode);
6731 	}
6732 }
6733 #endif
6734 
6735 
6736 static status_t
6737 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6738 	bool kernel)
6739 {
6740 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6741 		name, kernel));
6742 
6743 	struct fs_mount* mount;
6744 	status_t status = get_mount(mountID, &mount);
6745 	if (status != B_OK)
6746 		return status;
6747 
6748 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6749 		status = EOPNOTSUPP;
6750 		goto out;
6751 	}
6752 
6753 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6754 
6755 out:
6756 	put_mount(mount);
6757 	return status;
6758 }
6759 
6760 
6761 static status_t
6762 index_remove(dev_t mountID, const char* name, bool kernel)
6763 {
6764 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6765 		name, kernel));
6766 
6767 	struct fs_mount* mount;
6768 	status_t status = get_mount(mountID, &mount);
6769 	if (status != B_OK)
6770 		return status;
6771 
6772 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6773 		status = EROFS;
6774 		goto out;
6775 	}
6776 
6777 	status = FS_MOUNT_CALL(mount, remove_index, name);
6778 
6779 out:
6780 	put_mount(mount);
6781 	return status;
6782 }
6783 
6784 
6785 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6786 		It would be nice if the FS would find some more kernel support
6787 		for them.
6788 		For example, query parsing should be moved into the kernel.
6789 */
6790 static int
6791 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6792 	int32 token, bool kernel)
6793 {
6794 	struct fs_mount* mount;
6795 	void* cookie;
6796 
6797 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6798 		query, kernel));
6799 
6800 	status_t status = get_mount(device, &mount);
6801 	if (status != B_OK)
6802 		return status;
6803 
6804 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6805 		status = EOPNOTSUPP;
6806 		goto error;
6807 	}
6808 
6809 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6810 		&cookie);
6811 	if (status != B_OK)
6812 		goto error;
6813 
6814 	// get fd for the index directory
6815 	int fd;
6816 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6817 	if (fd >= 0)
6818 		return fd;
6819 
6820 	status = fd;
6821 
6822 	// something went wrong
6823 	FS_MOUNT_CALL(mount, close_query, cookie);
6824 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6825 
6826 error:
6827 	put_mount(mount);
6828 	return status;
6829 }
6830 
6831 
6832 static status_t
6833 query_close(struct file_descriptor* descriptor)
6834 {
6835 	struct fs_mount* mount = descriptor->u.mount;
6836 
6837 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6838 
6839 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6840 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6841 
6842 	return B_OK;
6843 }
6844 
6845 
6846 static void
6847 query_free_fd(struct file_descriptor* descriptor)
6848 {
6849 	struct fs_mount* mount = descriptor->u.mount;
6850 
6851 	if (mount != NULL) {
6852 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6853 		put_mount(mount);
6854 	}
6855 }
6856 
6857 
6858 static status_t
6859 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6860 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6861 {
6862 	struct fs_mount* mount = descriptor->u.mount;
6863 
6864 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6865 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6866 			bufferSize, _count);
6867 	}
6868 
6869 	return EOPNOTSUPP;
6870 }
6871 
6872 
6873 static status_t
6874 query_rewind(struct file_descriptor* descriptor)
6875 {
6876 	struct fs_mount* mount = descriptor->u.mount;
6877 
6878 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6879 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6880 
6881 	return EOPNOTSUPP;
6882 }
6883 
6884 
6885 //	#pragma mark - General File System functions
6886 
6887 
6888 static dev_t
6889 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6890 	const char* args, bool kernel)
6891 {
6892 	struct ::fs_mount* mount;
6893 	status_t status = B_OK;
6894 	fs_volume* volume = NULL;
6895 	int32 layer = 0;
6896 
6897 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6898 
6899 	// The path is always safe, we just have to make sure that fsName is
6900 	// almost valid - we can't make any assumptions about args, though.
6901 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6902 	// We'll get it from the DDM later.
6903 	if (fsName == NULL) {
6904 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6905 			return B_BAD_VALUE;
6906 	} else if (fsName[0] == '\0')
6907 		return B_BAD_VALUE;
6908 
6909 	RecursiveLocker mountOpLocker(sMountOpLock);
6910 
6911 	// Helper to delete a newly created file device on failure.
6912 	// Not exactly beautiful, but helps to keep the code below cleaner.
6913 	struct FileDeviceDeleter {
6914 		FileDeviceDeleter() : id(-1) {}
6915 		~FileDeviceDeleter()
6916 		{
6917 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6918 		}
6919 
6920 		partition_id id;
6921 	} fileDeviceDeleter;
6922 
6923 	// If the file system is not a "virtual" one, the device argument should
6924 	// point to a real file/device (if given at all).
6925 	// get the partition
6926 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6927 	KPartition* partition = NULL;
6928 	KPath normalizedDevice;
6929 	bool newlyCreatedFileDevice = false;
6930 
6931 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
6932 		// normalize the device path
6933 		status = normalizedDevice.SetTo(device, true);
6934 		if (status != B_OK)
6935 			return status;
6936 
6937 		// get a corresponding partition from the DDM
6938 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6939 		if (partition == NULL) {
6940 			// Partition not found: This either means, the user supplied
6941 			// an invalid path, or the path refers to an image file. We try
6942 			// to let the DDM create a file device for the path.
6943 			partition_id deviceID = ddm->CreateFileDevice(
6944 				normalizedDevice.Path(), &newlyCreatedFileDevice);
6945 			if (deviceID >= 0) {
6946 				partition = ddm->RegisterPartition(deviceID);
6947 				if (newlyCreatedFileDevice)
6948 					fileDeviceDeleter.id = deviceID;
6949 			}
6950 		}
6951 
6952 		if (!partition) {
6953 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6954 				normalizedDevice.Path()));
6955 			return B_ENTRY_NOT_FOUND;
6956 		}
6957 
6958 		device = normalizedDevice.Path();
6959 			// correct path to file device
6960 	}
6961 	PartitionRegistrar partitionRegistrar(partition, true);
6962 
6963 	// Write lock the partition's device. For the time being, we keep the lock
6964 	// until we're done mounting -- not nice, but ensure, that no-one is
6965 	// interfering.
6966 	// TODO: Just mark the partition busy while mounting!
6967 	KDiskDevice* diskDevice = NULL;
6968 	if (partition) {
6969 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6970 		if (!diskDevice) {
6971 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6972 			return B_ERROR;
6973 		}
6974 	}
6975 
6976 	DeviceWriteLocker writeLocker(diskDevice, true);
6977 		// this takes over the write lock acquired before
6978 
6979 	if (partition != NULL) {
6980 		// make sure, that the partition is not busy
6981 		if (partition->IsBusy()) {
6982 			TRACE(("fs_mount(): Partition is busy.\n"));
6983 			return B_BUSY;
6984 		}
6985 
6986 		// if no FS name had been supplied, we get it from the partition
6987 		if (fsName == NULL) {
6988 			KDiskSystem* diskSystem = partition->DiskSystem();
6989 			if (!diskSystem) {
6990 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6991 					"recognize it.\n"));
6992 				return B_BAD_VALUE;
6993 			}
6994 
6995 			if (!diskSystem->IsFileSystem()) {
6996 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6997 					"partitioning system.\n"));
6998 				return B_BAD_VALUE;
6999 			}
7000 
7001 			// The disk system name will not change, and the KDiskSystem
7002 			// object will not go away while the disk device is locked (and
7003 			// the partition has a reference to it), so this is safe.
7004 			fsName = diskSystem->Name();
7005 		}
7006 	}
7007 
7008 	mount = new(std::nothrow) (struct ::fs_mount);
7009 	if (mount == NULL)
7010 		return B_NO_MEMORY;
7011 
7012 	mount->device_name = strdup(device);
7013 		// "device" can be NULL
7014 
7015 	status = mount->entry_cache.Init();
7016 	if (status != B_OK)
7017 		goto err1;
7018 
7019 	// initialize structure
7020 	mount->id = sNextMountID++;
7021 	mount->partition = NULL;
7022 	mount->root_vnode = NULL;
7023 	mount->covers_vnode = NULL;
7024 	mount->unmounting = false;
7025 	mount->owns_file_device = false;
7026 	mount->volume = NULL;
7027 
7028 	// build up the volume(s)
7029 	while (true) {
7030 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7031 		if (layerFSName == NULL) {
7032 			if (layer == 0) {
7033 				status = B_NO_MEMORY;
7034 				goto err1;
7035 			}
7036 
7037 			break;
7038 		}
7039 
7040 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7041 		if (volume == NULL) {
7042 			status = B_NO_MEMORY;
7043 			free(layerFSName);
7044 			goto err1;
7045 		}
7046 
7047 		volume->id = mount->id;
7048 		volume->partition = partition != NULL ? partition->ID() : -1;
7049 		volume->layer = layer++;
7050 		volume->private_volume = NULL;
7051 		volume->ops = NULL;
7052 		volume->sub_volume = NULL;
7053 		volume->super_volume = NULL;
7054 		volume->file_system = NULL;
7055 		volume->file_system_name = NULL;
7056 
7057 		volume->file_system_name = get_file_system_name(layerFSName);
7058 		if (volume->file_system_name == NULL) {
7059 			status = B_NO_MEMORY;
7060 			free(layerFSName);
7061 			free(volume);
7062 			goto err1;
7063 		}
7064 
7065 		volume->file_system = get_file_system(layerFSName);
7066 		if (volume->file_system == NULL) {
7067 			status = ENODEV;
7068 			free(layerFSName);
7069 			free(volume->file_system_name);
7070 			free(volume);
7071 			goto err1;
7072 		}
7073 
7074 		if (mount->volume == NULL)
7075 			mount->volume = volume;
7076 		else {
7077 			volume->super_volume = mount->volume;
7078 			mount->volume->sub_volume = volume;
7079 			mount->volume = volume;
7080 		}
7081 	}
7082 
7083 	// insert mount struct into list before we call FS's mount() function
7084 	// so that vnodes can be created for this mount
7085 	mutex_lock(&sMountMutex);
7086 	hash_insert(sMountsTable, mount);
7087 	mutex_unlock(&sMountMutex);
7088 
7089 	ino_t rootID;
7090 
7091 	if (!sRoot) {
7092 		// we haven't mounted anything yet
7093 		if (strcmp(path, "/") != 0) {
7094 			status = B_ERROR;
7095 			goto err2;
7096 		}
7097 
7098 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7099 			args, &rootID);
7100 		if (status != 0)
7101 			goto err2;
7102 	} else {
7103 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7104 		if (status != B_OK)
7105 			goto err2;
7106 
7107 		// make sure covered_vnode is a directory
7108 		if (!S_ISDIR(mount->covers_vnode->type)) {
7109 			status = B_NOT_A_DIRECTORY;
7110 			goto err3;
7111 		}
7112 
7113 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7114 			// this is already a mount point
7115 			status = B_BUSY;
7116 			goto err3;
7117 		}
7118 
7119 		// mount it/them
7120 		fs_volume* volume = mount->volume;
7121 		while (volume) {
7122 			status = volume->file_system->mount(volume, device, flags, args,
7123 				&rootID);
7124 			if (status != B_OK) {
7125 				if (volume->sub_volume)
7126 					goto err4;
7127 				goto err3;
7128 			}
7129 
7130 			volume = volume->super_volume;
7131 		}
7132 
7133 		volume = mount->volume;
7134 		while (volume) {
7135 			if (volume->ops->all_layers_mounted != NULL)
7136 				volume->ops->all_layers_mounted(volume);
7137 			volume = volume->super_volume;
7138 		}
7139 	}
7140 
7141 	// the root node is supposed to be owned by the file system - it must
7142 	// exist at this point
7143 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7144 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7145 		panic("fs_mount: file system does not own its root node!\n");
7146 		status = B_ERROR;
7147 		goto err4;
7148 	}
7149 
7150 	// No race here, since fs_mount() is the only function changing
7151 	// covers_vnode (and holds sMountOpLock at that time).
7152 	mutex_lock(&sVnodeCoveredByMutex);
7153 	if (mount->covers_vnode)
7154 		mount->covers_vnode->covered_by = mount->root_vnode;
7155 	mutex_unlock(&sVnodeCoveredByMutex);
7156 
7157 	if (!sRoot) {
7158 		sRoot = mount->root_vnode;
7159 		mutex_lock(&sIOContextRootLock);
7160 		get_current_io_context(true)->root = sRoot;
7161 		mutex_unlock(&sIOContextRootLock);
7162 		inc_vnode_ref_count(sRoot);
7163 	}
7164 
7165 	// supply the partition (if any) with the mount cookie and mark it mounted
7166 	if (partition) {
7167 		partition->SetMountCookie(mount->volume->private_volume);
7168 		partition->SetVolumeID(mount->id);
7169 
7170 		// keep a partition reference as long as the partition is mounted
7171 		partitionRegistrar.Detach();
7172 		mount->partition = partition;
7173 		mount->owns_file_device = newlyCreatedFileDevice;
7174 		fileDeviceDeleter.id = -1;
7175 	}
7176 
7177 	notify_mount(mount->id,
7178 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7179 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7180 
7181 	return mount->id;
7182 
7183 err4:
7184 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7185 err3:
7186 	if (mount->covers_vnode != NULL)
7187 		put_vnode(mount->covers_vnode);
7188 err2:
7189 	mutex_lock(&sMountMutex);
7190 	hash_remove(sMountsTable, mount);
7191 	mutex_unlock(&sMountMutex);
7192 err1:
7193 	delete mount;
7194 
7195 	return status;
7196 }
7197 
7198 
7199 static status_t
7200 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7201 {
7202 	struct vnode* vnode = NULL;
7203 	struct fs_mount* mount;
7204 	status_t err;
7205 
7206 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7207 		kernel));
7208 
7209 	if (path != NULL) {
7210 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
7211 		if (err != B_OK)
7212 			return B_ENTRY_NOT_FOUND;
7213 	}
7214 
7215 	RecursiveLocker mountOpLocker(sMountOpLock);
7216 
7217 	// this lock is not strictly necessary, but here in case of KDEBUG
7218 	// to keep the ASSERT in find_mount() working.
7219 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7220 	mount = find_mount(path != NULL ? vnode->device : mountID);
7221 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7222 	if (mount == NULL) {
7223 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7224 			vnode);
7225 	}
7226 
7227 	if (path != NULL) {
7228 		put_vnode(vnode);
7229 
7230 		if (mount->root_vnode != vnode) {
7231 			// not mountpoint
7232 			return B_BAD_VALUE;
7233 		}
7234 	}
7235 
7236 	// if the volume is associated with a partition, lock the device of the
7237 	// partition as long as we are unmounting
7238 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7239 	KPartition* partition = mount->partition;
7240 	KDiskDevice* diskDevice = NULL;
7241 	if (partition != NULL) {
7242 		if (partition->Device() == NULL) {
7243 			dprintf("fs_unmount(): There is no device!\n");
7244 			return B_ERROR;
7245 		}
7246 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7247 		if (!diskDevice) {
7248 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7249 			return B_ERROR;
7250 		}
7251 	}
7252 	DeviceWriteLocker writeLocker(diskDevice, true);
7253 
7254 	// make sure, that the partition is not busy
7255 	if (partition != NULL) {
7256 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7257 			TRACE(("fs_unmount(): Partition is busy.\n"));
7258 			return B_BUSY;
7259 		}
7260 	}
7261 
7262 	// grab the vnode master mutex to keep someone from creating
7263 	// a vnode while we're figuring out if we can continue
7264 	mutex_lock(&sVnodeMutex);
7265 
7266 	bool disconnectedDescriptors = false;
7267 
7268 	while (true) {
7269 		bool busy = false;
7270 
7271 		// cycle through the list of vnodes associated with this mount and
7272 		// make sure all of them are not busy or have refs on them
7273 		vnode = NULL;
7274 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7275 		while (iterator.HasNext()) {
7276 			vnode = iterator.Next();
7277 
7278 			// The root vnode ref_count needs to be 1 here (the mount has a
7279 			// reference).
7280 			if (vnode->busy
7281 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7282 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7283 				// there are still vnodes in use on this mount, so we cannot
7284 				// unmount yet
7285 				busy = true;
7286 				break;
7287 			}
7288 		}
7289 
7290 		if (!busy)
7291 			break;
7292 
7293 		if ((flags & B_FORCE_UNMOUNT) == 0) {
7294 			mutex_unlock(&sVnodeMutex);
7295 
7296 			return B_BUSY;
7297 		}
7298 
7299 		if (disconnectedDescriptors) {
7300 			// wait a bit until the last access is finished, and then try again
7301 			mutex_unlock(&sVnodeMutex);
7302 			snooze(100000);
7303 			// TODO: if there is some kind of bug that prevents the ref counts
7304 			// from getting back to zero, this will fall into an endless loop...
7305 			mutex_lock(&sVnodeMutex);
7306 			continue;
7307 		}
7308 
7309 		// the file system is still busy - but we're forced to unmount it,
7310 		// so let's disconnect all open file descriptors
7311 
7312 		mount->unmounting = true;
7313 			// prevent new vnodes from being created
7314 
7315 		mutex_unlock(&sVnodeMutex);
7316 
7317 		disconnect_mount_or_vnode_fds(mount, NULL);
7318 		disconnectedDescriptors = true;
7319 
7320 		mutex_lock(&sVnodeMutex);
7321 	}
7322 
7323 	// we can safely continue, mark all of the vnodes busy and this mount
7324 	// structure in unmounting state
7325 	mount->unmounting = true;
7326 
7327 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7328 	while (iterator.HasNext()) {
7329 		vnode = iterator.Next();
7330 		vnode->busy = true;
7331 
7332 		if (vnode->ref_count == 0) {
7333 			// this vnode has been unused before
7334 			list_remove_item(&sUnusedVnodeList, vnode);
7335 			sUnusedVnodes--;
7336 		}
7337 	}
7338 
7339 	// The ref_count of the root node is 1 at this point, see above why this is
7340 	mount->root_vnode->ref_count--;
7341 
7342 	mutex_unlock(&sVnodeMutex);
7343 
7344 	mutex_lock(&sVnodeCoveredByMutex);
7345 	mount->covers_vnode->covered_by = NULL;
7346 	mutex_unlock(&sVnodeCoveredByMutex);
7347 	put_vnode(mount->covers_vnode);
7348 
7349 	// Free all vnodes associated with this mount.
7350 	// They will be removed from the mount list by free_vnode(), so
7351 	// we don't have to do this.
7352 	while ((vnode = mount->vnodes.Head()) != NULL) {
7353 		free_vnode(vnode, false);
7354 	}
7355 
7356 	// remove the mount structure from the hash table
7357 	mutex_lock(&sMountMutex);
7358 	hash_remove(sMountsTable, mount);
7359 	mutex_unlock(&sMountMutex);
7360 
7361 	mountOpLocker.Unlock();
7362 
7363 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7364 	notify_unmount(mount->id);
7365 
7366 	// dereference the partition and mark it unmounted
7367 	if (partition) {
7368 		partition->SetVolumeID(-1);
7369 		partition->SetMountCookie(NULL);
7370 
7371 		if (mount->owns_file_device)
7372 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7373 		partition->Unregister();
7374 	}
7375 
7376 	delete mount;
7377 	return B_OK;
7378 }
7379 
7380 
7381 static status_t
7382 fs_sync(dev_t device)
7383 {
7384 	struct fs_mount* mount;
7385 	status_t status = get_mount(device, &mount);
7386 	if (status != B_OK)
7387 		return status;
7388 
7389 	struct vnode marker;
7390 	marker.remove = true;
7391 
7392 	// First, synchronize all file caches
7393 
7394 	while (true) {
7395 		MutexLocker locker(sVnodeMutex);
7396 
7397 		// synchronize access to vnode list
7398 		recursive_lock_lock(&mount->rlock);
7399 
7400 		struct vnode* vnode;
7401 		if (!marker.remove) {
7402 			vnode = mount->vnodes.GetNext(&marker);
7403 			mount->vnodes.Remove(&marker);
7404 			marker.remove =	true;
7405 		} else
7406 			vnode = mount->vnodes.First();
7407 
7408 		while (vnode != NULL && (vnode->cache == NULL
7409 			|| vnode->remove || vnode->busy)) {
7410 			// TODO: we could track writes (and writable mapped vnodes)
7411 			//	and have a simple flag that we could test for here
7412 			vnode = mount->vnodes.GetNext(vnode);
7413 		}
7414 
7415 		if (vnode != NULL) {
7416 			// insert marker vnode again
7417 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7418 			marker.remove = false;
7419 		}
7420 
7421 		recursive_lock_unlock(&mount->rlock);
7422 
7423 		if (vnode == NULL)
7424 			break;
7425 
7426 		vnode = lookup_vnode(mount->id, vnode->id);
7427 		if (vnode == NULL || vnode->busy)
7428 			continue;
7429 
7430 		if (vnode->ref_count == 0) {
7431 			// this vnode has been unused before
7432 			list_remove_item(&sUnusedVnodeList, vnode);
7433 			sUnusedVnodes--;
7434 		}
7435 		inc_vnode_ref_count(vnode);
7436 
7437 		locker.Unlock();
7438 
7439 		if (vnode->cache != NULL && !vnode->remove)
7440 			vnode->cache->WriteModified();
7441 
7442 		put_vnode(vnode);
7443 	}
7444 
7445 	// And then, let the file systems do their synchronizing work
7446 
7447 	if (HAS_FS_MOUNT_CALL(mount, sync))
7448 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7449 
7450 	put_mount(mount);
7451 	return status;
7452 }
7453 
7454 
7455 static status_t
7456 fs_read_info(dev_t device, struct fs_info* info)
7457 {
7458 	struct fs_mount* mount;
7459 	status_t status = get_mount(device, &mount);
7460 	if (status != B_OK)
7461 		return status;
7462 
7463 	memset(info, 0, sizeof(struct fs_info));
7464 
7465 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7466 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7467 
7468 	// fill in info the file system doesn't (have to) know about
7469 	if (status == B_OK) {
7470 		info->dev = mount->id;
7471 		info->root = mount->root_vnode->id;
7472 
7473 		fs_volume* volume = mount->volume;
7474 		while (volume->super_volume != NULL)
7475 			volume = volume->super_volume;
7476 
7477 		strlcpy(info->fsh_name, volume->file_system_name,
7478 			sizeof(info->fsh_name));
7479 		if (mount->device_name != NULL) {
7480 			strlcpy(info->device_name, mount->device_name,
7481 				sizeof(info->device_name));
7482 		}
7483 	}
7484 
7485 	// if the call is not supported by the file system, there are still
7486 	// the parts that we filled out ourselves
7487 
7488 	put_mount(mount);
7489 	return status;
7490 }
7491 
7492 
7493 static status_t
7494 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7495 {
7496 	struct fs_mount* mount;
7497 	status_t status = get_mount(device, &mount);
7498 	if (status != B_OK)
7499 		return status;
7500 
7501 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7502 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7503 	else
7504 		status = EROFS;
7505 
7506 	put_mount(mount);
7507 	return status;
7508 }
7509 
7510 
7511 static dev_t
7512 fs_next_device(int32* _cookie)
7513 {
7514 	struct fs_mount* mount = NULL;
7515 	dev_t device = *_cookie;
7516 
7517 	mutex_lock(&sMountMutex);
7518 
7519 	// Since device IDs are assigned sequentially, this algorithm
7520 	// does work good enough. It makes sure that the device list
7521 	// returned is sorted, and that no device is skipped when an
7522 	// already visited device got unmounted.
7523 
7524 	while (device < sNextMountID) {
7525 		mount = find_mount(device++);
7526 		if (mount != NULL && mount->volume->private_volume != NULL)
7527 			break;
7528 	}
7529 
7530 	*_cookie = device;
7531 
7532 	if (mount != NULL)
7533 		device = mount->id;
7534 	else
7535 		device = B_BAD_VALUE;
7536 
7537 	mutex_unlock(&sMountMutex);
7538 
7539 	return device;
7540 }
7541 
7542 
7543 ssize_t
7544 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7545 	void *buffer, size_t readBytes)
7546 {
7547 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7548 	if (attrFD < 0)
7549 		return attrFD;
7550 
7551 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7552 
7553 	_kern_close(attrFD);
7554 
7555 	return bytesRead;
7556 }
7557 
7558 
7559 static status_t
7560 get_cwd(char* buffer, size_t size, bool kernel)
7561 {
7562 	// Get current working directory from io context
7563 	struct io_context* context = get_current_io_context(kernel);
7564 	status_t status;
7565 
7566 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7567 
7568 	mutex_lock(&context->io_mutex);
7569 
7570 	struct vnode* vnode = context->cwd;
7571 	if (vnode)
7572 		inc_vnode_ref_count(vnode);
7573 
7574 	mutex_unlock(&context->io_mutex);
7575 
7576 	if (vnode) {
7577 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7578 		put_vnode(vnode);
7579 	} else
7580 		status = B_ERROR;
7581 
7582 	return status;
7583 }
7584 
7585 
7586 static status_t
7587 set_cwd(int fd, char* path, bool kernel)
7588 {
7589 	struct io_context* context;
7590 	struct vnode* vnode = NULL;
7591 	struct vnode* oldDirectory;
7592 	status_t status;
7593 
7594 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7595 
7596 	// Get vnode for passed path, and bail if it failed
7597 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7598 	if (status < 0)
7599 		return status;
7600 
7601 	if (!S_ISDIR(vnode->type)) {
7602 		// nope, can't cwd to here
7603 		status = B_NOT_A_DIRECTORY;
7604 		goto err;
7605 	}
7606 
7607 	// Get current io context and lock
7608 	context = get_current_io_context(kernel);
7609 	mutex_lock(&context->io_mutex);
7610 
7611 	// save the old current working directory first
7612 	oldDirectory = context->cwd;
7613 	context->cwd = vnode;
7614 
7615 	mutex_unlock(&context->io_mutex);
7616 
7617 	if (oldDirectory)
7618 		put_vnode(oldDirectory);
7619 
7620 	return B_NO_ERROR;
7621 
7622 err:
7623 	put_vnode(vnode);
7624 	return status;
7625 }
7626 
7627 
7628 //	#pragma mark - kernel mirrored syscalls
7629 
7630 
7631 dev_t
7632 _kern_mount(const char* path, const char* device, const char* fsName,
7633 	uint32 flags, const char* args, size_t argsLength)
7634 {
7635 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7636 	if (pathBuffer.InitCheck() != B_OK)
7637 		return B_NO_MEMORY;
7638 
7639 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7640 }
7641 
7642 
7643 status_t
7644 _kern_unmount(const char* path, uint32 flags)
7645 {
7646 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7647 	if (pathBuffer.InitCheck() != B_OK)
7648 		return B_NO_MEMORY;
7649 
7650 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7651 }
7652 
7653 
7654 status_t
7655 _kern_read_fs_info(dev_t device, struct fs_info* info)
7656 {
7657 	if (info == NULL)
7658 		return B_BAD_VALUE;
7659 
7660 	return fs_read_info(device, info);
7661 }
7662 
7663 
7664 status_t
7665 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7666 {
7667 	if (info == NULL)
7668 		return B_BAD_VALUE;
7669 
7670 	return fs_write_info(device, info, mask);
7671 }
7672 
7673 
7674 status_t
7675 _kern_sync(void)
7676 {
7677 	// Note: _kern_sync() is also called from _user_sync()
7678 	int32 cookie = 0;
7679 	dev_t device;
7680 	while ((device = next_dev(&cookie)) >= 0) {
7681 		status_t status = fs_sync(device);
7682 		if (status != B_OK && status != B_BAD_VALUE) {
7683 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7684 				strerror(status));
7685 		}
7686 	}
7687 
7688 	return B_OK;
7689 }
7690 
7691 
7692 dev_t
7693 _kern_next_device(int32* _cookie)
7694 {
7695 	return fs_next_device(_cookie);
7696 }
7697 
7698 
7699 status_t
7700 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7701 	size_t infoSize)
7702 {
7703 	if (infoSize != sizeof(fd_info))
7704 		return B_BAD_VALUE;
7705 
7706 	struct io_context* context = NULL;
7707 	struct team* team = NULL;
7708 
7709 	cpu_status state = disable_interrupts();
7710 	GRAB_TEAM_LOCK();
7711 
7712 	bool contextLocked = false;
7713 	team = team_get_team_struct_locked(teamID);
7714 	if (team) {
7715 		// We cannot lock the IO context while holding the team lock, nor can
7716 		// we just drop the team lock, since it might be deleted in the
7717 		// meantime. team_remove_team() acquires the thread lock when removing
7718 		// the team from the team hash table, though. Hence we switch to the
7719 		// thread lock and use mutex_lock_threads_locked().
7720 		context = (io_context*)team->io_context;
7721 
7722 		GRAB_THREAD_LOCK();
7723 		RELEASE_TEAM_LOCK();
7724 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7725 		RELEASE_THREAD_LOCK();
7726 	} else
7727 		RELEASE_TEAM_LOCK();
7728 
7729 	restore_interrupts(state);
7730 
7731 	if (!contextLocked) {
7732 		// team doesn't exit or seems to be gone
7733 		return B_BAD_TEAM_ID;
7734 	}
7735 
7736 	// the team cannot be deleted completely while we're owning its
7737 	// io_context mutex, so we can safely play with it now
7738 
7739 	uint32 slot = *_cookie;
7740 
7741 	struct file_descriptor* descriptor;
7742 	while (slot < context->table_size
7743 		&& (descriptor = context->fds[slot]) == NULL) {
7744 		slot++;
7745 	}
7746 
7747 	if (slot >= context->table_size) {
7748 		mutex_unlock(&context->io_mutex);
7749 		return B_ENTRY_NOT_FOUND;
7750 	}
7751 
7752 	info->number = slot;
7753 	info->open_mode = descriptor->open_mode;
7754 
7755 	struct vnode* vnode = fd_vnode(descriptor);
7756 	if (vnode != NULL) {
7757 		info->device = vnode->device;
7758 		info->node = vnode->id;
7759 	} else if (descriptor->u.mount != NULL) {
7760 		info->device = descriptor->u.mount->id;
7761 		info->node = -1;
7762 	}
7763 
7764 	mutex_unlock(&context->io_mutex);
7765 
7766 	*_cookie = slot + 1;
7767 	return B_OK;
7768 }
7769 
7770 
7771 int
7772 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7773 	int perms)
7774 {
7775 	if ((openMode & O_CREAT) != 0) {
7776 		return file_create_entry_ref(device, inode, name, openMode, perms,
7777 			true);
7778 	}
7779 
7780 	return file_open_entry_ref(device, inode, name, openMode, true);
7781 }
7782 
7783 
7784 /*!	\brief Opens a node specified by a FD + path pair.
7785 
7786 	At least one of \a fd and \a path must be specified.
7787 	If only \a fd is given, the function opens the node identified by this
7788 	FD. If only a path is given, this path is opened. If both are given and
7789 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7790 	of the directory (!) identified by \a fd.
7791 
7792 	\param fd The FD. May be < 0.
7793 	\param path The absolute or relative path. May be \c NULL.
7794 	\param openMode The open mode.
7795 	\return A FD referring to the newly opened node, or an error code,
7796 			if an error occurs.
7797 */
7798 int
7799 _kern_open(int fd, const char* path, int openMode, int perms)
7800 {
7801 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7802 	if (pathBuffer.InitCheck() != B_OK)
7803 		return B_NO_MEMORY;
7804 
7805 	if (openMode & O_CREAT)
7806 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7807 
7808 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7809 }
7810 
7811 
7812 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7813 
7814 	The supplied name may be \c NULL, in which case directory identified
7815 	by \a device and \a inode will be opened. Otherwise \a device and
7816 	\a inode identify the parent directory of the directory to be opened
7817 	and \a name its entry name.
7818 
7819 	\param device If \a name is specified the ID of the device the parent
7820 		   directory of the directory to be opened resides on, otherwise
7821 		   the device of the directory itself.
7822 	\param inode If \a name is specified the node ID of the parent
7823 		   directory of the directory to be opened, otherwise node ID of the
7824 		   directory itself.
7825 	\param name The entry name of the directory to be opened. If \c NULL,
7826 		   the \a device + \a inode pair identify the node to be opened.
7827 	\return The FD of the newly opened directory or an error code, if
7828 			something went wrong.
7829 */
7830 int
7831 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7832 {
7833 	return dir_open_entry_ref(device, inode, name, true);
7834 }
7835 
7836 
7837 /*!	\brief Opens a directory specified by a FD + path pair.
7838 
7839 	At least one of \a fd and \a path must be specified.
7840 	If only \a fd is given, the function opens the directory identified by this
7841 	FD. If only a path is given, this path is opened. If both are given and
7842 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7843 	of the directory (!) identified by \a fd.
7844 
7845 	\param fd The FD. May be < 0.
7846 	\param path The absolute or relative path. May be \c NULL.
7847 	\return A FD referring to the newly opened directory, or an error code,
7848 			if an error occurs.
7849 */
7850 int
7851 _kern_open_dir(int fd, const char* path)
7852 {
7853 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7854 	if (pathBuffer.InitCheck() != B_OK)
7855 		return B_NO_MEMORY;
7856 
7857 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7858 }
7859 
7860 
7861 status_t
7862 _kern_fcntl(int fd, int op, uint32 argument)
7863 {
7864 	return common_fcntl(fd, op, argument, true);
7865 }
7866 
7867 
7868 status_t
7869 _kern_fsync(int fd)
7870 {
7871 	return common_sync(fd, true);
7872 }
7873 
7874 
7875 status_t
7876 _kern_lock_node(int fd)
7877 {
7878 	return common_lock_node(fd, true);
7879 }
7880 
7881 
7882 status_t
7883 _kern_unlock_node(int fd)
7884 {
7885 	return common_unlock_node(fd, true);
7886 }
7887 
7888 
7889 status_t
7890 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7891 	int perms)
7892 {
7893 	return dir_create_entry_ref(device, inode, name, perms, true);
7894 }
7895 
7896 
7897 /*!	\brief Creates a directory specified by a FD + path pair.
7898 
7899 	\a path must always be specified (it contains the name of the new directory
7900 	at least). If only a path is given, this path identifies the location at
7901 	which the directory shall be created. If both \a fd and \a path are given
7902 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
7903 	of the directory (!) identified by \a fd.
7904 
7905 	\param fd The FD. May be < 0.
7906 	\param path The absolute or relative path. Must not be \c NULL.
7907 	\param perms The access permissions the new directory shall have.
7908 	\return \c B_OK, if the directory has been created successfully, another
7909 			error code otherwise.
7910 */
7911 status_t
7912 _kern_create_dir(int fd, const char* path, int perms)
7913 {
7914 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7915 	if (pathBuffer.InitCheck() != B_OK)
7916 		return B_NO_MEMORY;
7917 
7918 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7919 }
7920 
7921 
7922 status_t
7923 _kern_remove_dir(int fd, const char* path)
7924 {
7925 	if (path) {
7926 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7927 		if (pathBuffer.InitCheck() != B_OK)
7928 			return B_NO_MEMORY;
7929 
7930 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7931 	}
7932 
7933 	return dir_remove(fd, NULL, true);
7934 }
7935 
7936 
7937 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7938 
7939 	At least one of \a fd and \a path must be specified.
7940 	If only \a fd is given, the function the symlink to be read is the node
7941 	identified by this FD. If only a path is given, this path identifies the
7942 	symlink to be read. If both are given and the path is absolute, \a fd is
7943 	ignored; a relative path is reckoned off of the directory (!) identified
7944 	by \a fd.
7945 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7946 	will still be updated to reflect the required buffer size.
7947 
7948 	\param fd The FD. May be < 0.
7949 	\param path The absolute or relative path. May be \c NULL.
7950 	\param buffer The buffer into which the contents of the symlink shall be
7951 		   written.
7952 	\param _bufferSize A pointer to the size of the supplied buffer.
7953 	\return The length of the link on success or an appropriate error code
7954 */
7955 status_t
7956 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
7957 {
7958 	if (path) {
7959 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7960 		if (pathBuffer.InitCheck() != B_OK)
7961 			return B_NO_MEMORY;
7962 
7963 		return common_read_link(fd, pathBuffer.LockBuffer(),
7964 			buffer, _bufferSize, true);
7965 	}
7966 
7967 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7968 }
7969 
7970 
7971 /*!	\brief Creates a symlink specified by a FD + path pair.
7972 
7973 	\a path must always be specified (it contains the name of the new symlink
7974 	at least). If only a path is given, this path identifies the location at
7975 	which the symlink shall be created. If both \a fd and \a path are given and
7976 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7977 	of the directory (!) identified by \a fd.
7978 
7979 	\param fd The FD. May be < 0.
7980 	\param toPath The absolute or relative path. Must not be \c NULL.
7981 	\param mode The access permissions the new symlink shall have.
7982 	\return \c B_OK, if the symlink has been created successfully, another
7983 			error code otherwise.
7984 */
7985 status_t
7986 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
7987 {
7988 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7989 	if (pathBuffer.InitCheck() != B_OK)
7990 		return B_NO_MEMORY;
7991 
7992 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7993 		toPath, mode, true);
7994 }
7995 
7996 
7997 status_t
7998 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
7999 	bool traverseLeafLink)
8000 {
8001 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8002 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8003 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8004 		return B_NO_MEMORY;
8005 
8006 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8007 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8008 }
8009 
8010 
8011 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8012 
8013 	\a path must always be specified (it contains at least the name of the entry
8014 	to be deleted). If only a path is given, this path identifies the entry
8015 	directly. If both \a fd and \a path are given and the path is absolute,
8016 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8017 	identified by \a fd.
8018 
8019 	\param fd The FD. May be < 0.
8020 	\param path The absolute or relative path. Must not be \c NULL.
8021 	\return \c B_OK, if the entry has been removed successfully, another
8022 			error code otherwise.
8023 */
8024 status_t
8025 _kern_unlink(int fd, const char* path)
8026 {
8027 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8028 	if (pathBuffer.InitCheck() != B_OK)
8029 		return B_NO_MEMORY;
8030 
8031 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8032 }
8033 
8034 
8035 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8036 		   by another FD + path pair.
8037 
8038 	\a oldPath and \a newPath must always be specified (they contain at least
8039 	the name of the entry). If only a path is given, this path identifies the
8040 	entry directly. If both a FD and a path are given and the path is absolute,
8041 	the FD is ignored; a relative path is reckoned off of the directory (!)
8042 	identified by the respective FD.
8043 
8044 	\param oldFD The FD of the old location. May be < 0.
8045 	\param oldPath The absolute or relative path of the old location. Must not
8046 		   be \c NULL.
8047 	\param newFD The FD of the new location. May be < 0.
8048 	\param newPath The absolute or relative path of the new location. Must not
8049 		   be \c NULL.
8050 	\return \c B_OK, if the entry has been moved successfully, another
8051 			error code otherwise.
8052 */
8053 status_t
8054 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8055 {
8056 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8057 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8058 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8059 		return B_NO_MEMORY;
8060 
8061 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8062 		newFD, newPathBuffer.LockBuffer(), true);
8063 }
8064 
8065 
8066 status_t
8067 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8068 {
8069 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8070 	if (pathBuffer.InitCheck() != B_OK)
8071 		return B_NO_MEMORY;
8072 
8073 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8074 		true);
8075 }
8076 
8077 
8078 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8079 
8080 	If only \a fd is given, the stat operation associated with the type
8081 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8082 	given, this path identifies the entry for whose node to retrieve the
8083 	stat data. If both \a fd and \a path are given and the path is absolute,
8084 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8085 	identified by \a fd and specifies the entry whose stat data shall be
8086 	retrieved.
8087 
8088 	\param fd The FD. May be < 0.
8089 	\param path The absolute or relative path. Must not be \c NULL.
8090 	\param traverseLeafLink If \a path is given, \c true specifies that the
8091 		   function shall not stick to symlinks, but traverse them.
8092 	\param stat The buffer the stat data shall be written into.
8093 	\param statSize The size of the supplied stat buffer.
8094 	\return \c B_OK, if the the stat data have been read successfully, another
8095 			error code otherwise.
8096 */
8097 status_t
8098 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8099 	struct stat* stat, size_t statSize)
8100 {
8101 	struct stat completeStat;
8102 	struct stat* originalStat = NULL;
8103 	status_t status;
8104 
8105 	if (statSize > sizeof(struct stat))
8106 		return B_BAD_VALUE;
8107 
8108 	// this supports different stat extensions
8109 	if (statSize < sizeof(struct stat)) {
8110 		originalStat = stat;
8111 		stat = &completeStat;
8112 	}
8113 
8114 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8115 
8116 	if (status == B_OK && originalStat != NULL)
8117 		memcpy(originalStat, stat, statSize);
8118 
8119 	return status;
8120 }
8121 
8122 
8123 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8124 
8125 	If only \a fd is given, the stat operation associated with the type
8126 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8127 	given, this path identifies the entry for whose node to write the
8128 	stat data. If both \a fd and \a path are given and the path is absolute,
8129 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8130 	identified by \a fd and specifies the entry whose stat data shall be
8131 	written.
8132 
8133 	\param fd The FD. May be < 0.
8134 	\param path The absolute or relative path. Must not be \c NULL.
8135 	\param traverseLeafLink If \a path is given, \c true specifies that the
8136 		   function shall not stick to symlinks, but traverse them.
8137 	\param stat The buffer containing the stat data to be written.
8138 	\param statSize The size of the supplied stat buffer.
8139 	\param statMask A mask specifying which parts of the stat data shall be
8140 		   written.
8141 	\return \c B_OK, if the the stat data have been written successfully,
8142 			another error code otherwise.
8143 */
8144 status_t
8145 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8146 	const struct stat* stat, size_t statSize, int statMask)
8147 {
8148 	struct stat completeStat;
8149 
8150 	if (statSize > sizeof(struct stat))
8151 		return B_BAD_VALUE;
8152 
8153 	// this supports different stat extensions
8154 	if (statSize < sizeof(struct stat)) {
8155 		memset((uint8*)&completeStat + statSize, 0,
8156 			sizeof(struct stat) - statSize);
8157 		memcpy(&completeStat, stat, statSize);
8158 		stat = &completeStat;
8159 	}
8160 
8161 	status_t status;
8162 
8163 	if (path) {
8164 		// path given: write the stat of the node referred to by (fd, path)
8165 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8166 		if (pathBuffer.InitCheck() != B_OK)
8167 			return B_NO_MEMORY;
8168 
8169 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8170 			traverseLeafLink, stat, statMask, true);
8171 	} else {
8172 		// no path given: get the FD and use the FD operation
8173 		struct file_descriptor* descriptor
8174 			= get_fd(get_current_io_context(true), fd);
8175 		if (descriptor == NULL)
8176 			return B_FILE_ERROR;
8177 
8178 		if (descriptor->ops->fd_write_stat)
8179 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8180 		else
8181 			status = EOPNOTSUPP;
8182 
8183 		put_fd(descriptor);
8184 	}
8185 
8186 	return status;
8187 }
8188 
8189 
8190 int
8191 _kern_open_attr_dir(int fd, const char* path)
8192 {
8193 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8194 	if (pathBuffer.InitCheck() != B_OK)
8195 		return B_NO_MEMORY;
8196 
8197 	if (path != NULL)
8198 		pathBuffer.SetTo(path);
8199 
8200 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8201 }
8202 
8203 
8204 int
8205 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8206 	int openMode)
8207 {
8208 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8209 	if (pathBuffer.InitCheck() != B_OK)
8210 		return B_NO_MEMORY;
8211 
8212 	if ((openMode & O_CREAT) != 0) {
8213 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8214 			true);
8215 	}
8216 
8217 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8218 }
8219 
8220 
8221 status_t
8222 _kern_remove_attr(int fd, const char* name)
8223 {
8224 	return attr_remove(fd, name, true);
8225 }
8226 
8227 
8228 status_t
8229 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8230 	const char* toName)
8231 {
8232 	return attr_rename(fromFile, fromName, toFile, toName, true);
8233 }
8234 
8235 
8236 int
8237 _kern_open_index_dir(dev_t device)
8238 {
8239 	return index_dir_open(device, true);
8240 }
8241 
8242 
8243 status_t
8244 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8245 {
8246 	return index_create(device, name, type, flags, true);
8247 }
8248 
8249 
8250 status_t
8251 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8252 {
8253 	return index_name_read_stat(device, name, stat, true);
8254 }
8255 
8256 
8257 status_t
8258 _kern_remove_index(dev_t device, const char* name)
8259 {
8260 	return index_remove(device, name, true);
8261 }
8262 
8263 
8264 status_t
8265 _kern_getcwd(char* buffer, size_t size)
8266 {
8267 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8268 
8269 	// Call vfs to get current working directory
8270 	return get_cwd(buffer, size, true);
8271 }
8272 
8273 
8274 status_t
8275 _kern_setcwd(int fd, const char* path)
8276 {
8277 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8278 	if (pathBuffer.InitCheck() != B_OK)
8279 		return B_NO_MEMORY;
8280 
8281 	if (path != NULL)
8282 		pathBuffer.SetTo(path);
8283 
8284 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8285 }
8286 
8287 
8288 //	#pragma mark - userland syscalls
8289 
8290 
8291 dev_t
8292 _user_mount(const char* userPath, const char* userDevice,
8293 	const char* userFileSystem, uint32 flags, const char* userArgs,
8294 	size_t argsLength)
8295 {
8296 	char fileSystem[B_FILE_NAME_LENGTH];
8297 	KPath path, device;
8298 	char* args = NULL;
8299 	status_t status;
8300 
8301 	if (!IS_USER_ADDRESS(userPath)
8302 		|| !IS_USER_ADDRESS(userFileSystem)
8303 		|| !IS_USER_ADDRESS(userDevice))
8304 		return B_BAD_ADDRESS;
8305 
8306 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8307 		return B_NO_MEMORY;
8308 
8309 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8310 		return B_BAD_ADDRESS;
8311 
8312 	if (userFileSystem != NULL
8313 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8314 		return B_BAD_ADDRESS;
8315 
8316 	if (userDevice != NULL
8317 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8318 			< B_OK)
8319 		return B_BAD_ADDRESS;
8320 
8321 	if (userArgs != NULL && argsLength > 0) {
8322 		// this is a safety restriction
8323 		if (argsLength >= 65536)
8324 			return B_NAME_TOO_LONG;
8325 
8326 		args = (char*)malloc(argsLength + 1);
8327 		if (args == NULL)
8328 			return B_NO_MEMORY;
8329 
8330 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8331 			free(args);
8332 			return B_BAD_ADDRESS;
8333 		}
8334 	}
8335 	path.UnlockBuffer();
8336 	device.UnlockBuffer();
8337 
8338 	status = fs_mount(path.LockBuffer(),
8339 		userDevice != NULL ? device.Path() : NULL,
8340 		userFileSystem ? fileSystem : NULL, flags, args, false);
8341 
8342 	free(args);
8343 	return status;
8344 }
8345 
8346 
8347 status_t
8348 _user_unmount(const char* userPath, uint32 flags)
8349 {
8350 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8351 	if (pathBuffer.InitCheck() != B_OK)
8352 		return B_NO_MEMORY;
8353 
8354 	char* path = pathBuffer.LockBuffer();
8355 
8356 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8357 		return B_BAD_ADDRESS;
8358 
8359 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8360 }
8361 
8362 
8363 status_t
8364 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8365 {
8366 	struct fs_info info;
8367 	status_t status;
8368 
8369 	if (userInfo == NULL)
8370 		return B_BAD_VALUE;
8371 
8372 	if (!IS_USER_ADDRESS(userInfo))
8373 		return B_BAD_ADDRESS;
8374 
8375 	status = fs_read_info(device, &info);
8376 	if (status != B_OK)
8377 		return status;
8378 
8379 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8380 		return B_BAD_ADDRESS;
8381 
8382 	return B_OK;
8383 }
8384 
8385 
8386 status_t
8387 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8388 {
8389 	struct fs_info info;
8390 
8391 	if (userInfo == NULL)
8392 		return B_BAD_VALUE;
8393 
8394 	if (!IS_USER_ADDRESS(userInfo)
8395 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8396 		return B_BAD_ADDRESS;
8397 
8398 	return fs_write_info(device, &info, mask);
8399 }
8400 
8401 
8402 dev_t
8403 _user_next_device(int32* _userCookie)
8404 {
8405 	int32 cookie;
8406 	dev_t device;
8407 
8408 	if (!IS_USER_ADDRESS(_userCookie)
8409 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8410 		return B_BAD_ADDRESS;
8411 
8412 	device = fs_next_device(&cookie);
8413 
8414 	if (device >= B_OK) {
8415 		// update user cookie
8416 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8417 			return B_BAD_ADDRESS;
8418 	}
8419 
8420 	return device;
8421 }
8422 
8423 
8424 status_t
8425 _user_sync(void)
8426 {
8427 	return _kern_sync();
8428 }
8429 
8430 
8431 status_t
8432 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8433 	size_t infoSize)
8434 {
8435 	struct fd_info info;
8436 	uint32 cookie;
8437 
8438 	// only root can do this (or should root's group be enough?)
8439 	if (geteuid() != 0)
8440 		return B_NOT_ALLOWED;
8441 
8442 	if (infoSize != sizeof(fd_info))
8443 		return B_BAD_VALUE;
8444 
8445 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8446 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8447 		return B_BAD_ADDRESS;
8448 
8449 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8450 	if (status != B_OK)
8451 		return status;
8452 
8453 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8454 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8455 		return B_BAD_ADDRESS;
8456 
8457 	return status;
8458 }
8459 
8460 
8461 status_t
8462 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8463 	char* userPath, size_t pathLength)
8464 {
8465 	if (!IS_USER_ADDRESS(userPath))
8466 		return B_BAD_ADDRESS;
8467 
8468 	KPath path(B_PATH_NAME_LENGTH + 1);
8469 	if (path.InitCheck() != B_OK)
8470 		return B_NO_MEMORY;
8471 
8472 	// copy the leaf name onto the stack
8473 	char stackLeaf[B_FILE_NAME_LENGTH];
8474 	if (leaf) {
8475 		if (!IS_USER_ADDRESS(leaf))
8476 			return B_BAD_ADDRESS;
8477 
8478 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8479 		if (length < 0)
8480 			return length;
8481 		if (length >= B_FILE_NAME_LENGTH)
8482 			return B_NAME_TOO_LONG;
8483 
8484 		leaf = stackLeaf;
8485 	}
8486 
8487 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8488 		path.LockBuffer(), path.BufferSize());
8489 	if (status != B_OK)
8490 		return status;
8491 
8492 	path.UnlockBuffer();
8493 
8494 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8495 	if (length < 0)
8496 		return length;
8497 	if (length >= (int)pathLength)
8498 		return B_BUFFER_OVERFLOW;
8499 
8500 	return B_OK;
8501 }
8502 
8503 
8504 status_t
8505 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8506 {
8507 	if (userPath == NULL || buffer == NULL)
8508 		return B_BAD_VALUE;
8509 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8510 		return B_BAD_ADDRESS;
8511 
8512 	// copy path from userland
8513 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8514 	if (pathBuffer.InitCheck() != B_OK)
8515 		return B_NO_MEMORY;
8516 	char* path = pathBuffer.LockBuffer();
8517 
8518 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8519 		return B_BAD_ADDRESS;
8520 
8521 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8522 		false);
8523 	if (error != B_OK)
8524 		return error;
8525 
8526 	// copy back to userland
8527 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8528 	if (len < 0)
8529 		return len;
8530 	if (len >= B_PATH_NAME_LENGTH)
8531 		return B_BUFFER_OVERFLOW;
8532 
8533 	return B_OK;
8534 }
8535 
8536 
8537 int
8538 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8539 	int openMode, int perms)
8540 {
8541 	char name[B_FILE_NAME_LENGTH];
8542 
8543 	if (userName == NULL || device < 0 || inode < 0)
8544 		return B_BAD_VALUE;
8545 	if (!IS_USER_ADDRESS(userName)
8546 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8547 		return B_BAD_ADDRESS;
8548 
8549 	if ((openMode & O_CREAT) != 0) {
8550 		return file_create_entry_ref(device, inode, name, openMode, perms,
8551 		 false);
8552 	}
8553 
8554 	return file_open_entry_ref(device, inode, name, openMode, false);
8555 }
8556 
8557 
8558 int
8559 _user_open(int fd, const char* userPath, int openMode, int perms)
8560 {
8561 	KPath path(B_PATH_NAME_LENGTH + 1);
8562 	if (path.InitCheck() != B_OK)
8563 		return B_NO_MEMORY;
8564 
8565 	char* buffer = path.LockBuffer();
8566 
8567 	if (!IS_USER_ADDRESS(userPath)
8568 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8569 		return B_BAD_ADDRESS;
8570 
8571 	if ((openMode & O_CREAT) != 0)
8572 		return file_create(fd, buffer, openMode, perms, false);
8573 
8574 	return file_open(fd, buffer, openMode, false);
8575 }
8576 
8577 
8578 int
8579 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8580 {
8581 	if (userName != NULL) {
8582 		char name[B_FILE_NAME_LENGTH];
8583 
8584 		if (!IS_USER_ADDRESS(userName)
8585 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8586 			return B_BAD_ADDRESS;
8587 
8588 		return dir_open_entry_ref(device, inode, name, false);
8589 	}
8590 	return dir_open_entry_ref(device, inode, NULL, false);
8591 }
8592 
8593 
8594 int
8595 _user_open_dir(int fd, const char* userPath)
8596 {
8597 	if (userPath == NULL)
8598 		return dir_open(fd, NULL, false);
8599 
8600 	KPath path(B_PATH_NAME_LENGTH + 1);
8601 	if (path.InitCheck() != B_OK)
8602 		return B_NO_MEMORY;
8603 
8604 	char* buffer = path.LockBuffer();
8605 
8606 	if (!IS_USER_ADDRESS(userPath)
8607 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8608 		return B_BAD_ADDRESS;
8609 
8610 	return dir_open(fd, buffer, false);
8611 }
8612 
8613 
8614 /*!	\brief Opens a directory's parent directory and returns the entry name
8615 		   of the former.
8616 
8617 	Aside from that is returns the directory's entry name, this method is
8618 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8619 	equivalent, if \a userName is \c NULL.
8620 
8621 	If a name buffer is supplied and the name does not fit the buffer, the
8622 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8623 
8624 	\param fd A FD referring to a directory.
8625 	\param userName Buffer the directory's entry name shall be written into.
8626 		   May be \c NULL.
8627 	\param nameLength Size of the name buffer.
8628 	\return The file descriptor of the opened parent directory, if everything
8629 			went fine, an error code otherwise.
8630 */
8631 int
8632 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8633 {
8634 	bool kernel = false;
8635 
8636 	if (userName && !IS_USER_ADDRESS(userName))
8637 		return B_BAD_ADDRESS;
8638 
8639 	// open the parent dir
8640 	int parentFD = dir_open(fd, (char*)"..", kernel);
8641 	if (parentFD < 0)
8642 		return parentFD;
8643 	FDCloser fdCloser(parentFD, kernel);
8644 
8645 	if (userName) {
8646 		// get the vnodes
8647 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8648 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8649 		VNodePutter parentVNodePutter(parentVNode);
8650 		VNodePutter dirVNodePutter(dirVNode);
8651 		if (!parentVNode || !dirVNode)
8652 			return B_FILE_ERROR;
8653 
8654 		// get the vnode name
8655 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8656 		struct dirent* buffer = (struct dirent*)_buffer;
8657 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8658 			sizeof(_buffer), get_current_io_context(false));
8659 		if (status != B_OK)
8660 			return status;
8661 
8662 		// copy the name to the userland buffer
8663 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8664 		if (len < 0)
8665 			return len;
8666 		if (len >= (int)nameLength)
8667 			return B_BUFFER_OVERFLOW;
8668 	}
8669 
8670 	return fdCloser.Detach();
8671 }
8672 
8673 
8674 status_t
8675 _user_fcntl(int fd, int op, uint32 argument)
8676 {
8677 	status_t status = common_fcntl(fd, op, argument, false);
8678 	if (op == F_SETLKW)
8679 		syscall_restart_handle_post(status);
8680 
8681 	return status;
8682 }
8683 
8684 
8685 status_t
8686 _user_fsync(int fd)
8687 {
8688 	return common_sync(fd, false);
8689 }
8690 
8691 
8692 status_t
8693 _user_flock(int fd, int operation)
8694 {
8695 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8696 
8697 	// Check if the operation is valid
8698 	switch (operation & ~LOCK_NB) {
8699 		case LOCK_UN:
8700 		case LOCK_SH:
8701 		case LOCK_EX:
8702 			break;
8703 
8704 		default:
8705 			return B_BAD_VALUE;
8706 	}
8707 
8708 	struct file_descriptor* descriptor;
8709 	struct vnode* vnode;
8710 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8711 	if (descriptor == NULL)
8712 		return B_FILE_ERROR;
8713 
8714 	if (descriptor->type != FDTYPE_FILE) {
8715 		put_fd(descriptor);
8716 		return B_BAD_VALUE;
8717 	}
8718 
8719 	struct flock flock;
8720 	flock.l_start = 0;
8721 	flock.l_len = OFF_MAX;
8722 	flock.l_whence = 0;
8723 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8724 
8725 	status_t status;
8726 	if ((operation & LOCK_UN) != 0)
8727 		status = release_advisory_lock(vnode, &flock);
8728 	else {
8729 		status = acquire_advisory_lock(vnode,
8730 			thread_get_current_thread()->team->session_id, &flock,
8731 			(operation & LOCK_NB) == 0);
8732 	}
8733 
8734 	syscall_restart_handle_post(status);
8735 
8736 	put_fd(descriptor);
8737 	return status;
8738 }
8739 
8740 
8741 status_t
8742 _user_lock_node(int fd)
8743 {
8744 	return common_lock_node(fd, false);
8745 }
8746 
8747 
8748 status_t
8749 _user_unlock_node(int fd)
8750 {
8751 	return common_unlock_node(fd, false);
8752 }
8753 
8754 
8755 status_t
8756 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8757 	int perms)
8758 {
8759 	char name[B_FILE_NAME_LENGTH];
8760 	status_t status;
8761 
8762 	if (!IS_USER_ADDRESS(userName))
8763 		return B_BAD_ADDRESS;
8764 
8765 	status = user_strlcpy(name, userName, sizeof(name));
8766 	if (status < 0)
8767 		return status;
8768 
8769 	return dir_create_entry_ref(device, inode, name, perms, false);
8770 }
8771 
8772 
8773 status_t
8774 _user_create_dir(int fd, const char* userPath, int perms)
8775 {
8776 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8777 	if (pathBuffer.InitCheck() != B_OK)
8778 		return B_NO_MEMORY;
8779 
8780 	char* path = pathBuffer.LockBuffer();
8781 
8782 	if (!IS_USER_ADDRESS(userPath)
8783 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8784 		return B_BAD_ADDRESS;
8785 
8786 	return dir_create(fd, path, perms, false);
8787 }
8788 
8789 
8790 status_t
8791 _user_remove_dir(int fd, const char* userPath)
8792 {
8793 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8794 	if (pathBuffer.InitCheck() != B_OK)
8795 		return B_NO_MEMORY;
8796 
8797 	char* path = pathBuffer.LockBuffer();
8798 
8799 	if (userPath != NULL) {
8800 		if (!IS_USER_ADDRESS(userPath)
8801 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8802 			return B_BAD_ADDRESS;
8803 	}
8804 
8805 	return dir_remove(fd, userPath ? path : NULL, false);
8806 }
8807 
8808 
8809 status_t
8810 _user_read_link(int fd, const char* userPath, char* userBuffer,
8811 	size_t* userBufferSize)
8812 {
8813 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8814 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8815 		return B_NO_MEMORY;
8816 
8817 	size_t bufferSize;
8818 
8819 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8820 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8821 		return B_BAD_ADDRESS;
8822 
8823 	char* path = pathBuffer.LockBuffer();
8824 	char* buffer = linkBuffer.LockBuffer();
8825 
8826 	if (userPath) {
8827 		if (!IS_USER_ADDRESS(userPath)
8828 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8829 			return B_BAD_ADDRESS;
8830 
8831 		if (bufferSize > B_PATH_NAME_LENGTH)
8832 			bufferSize = B_PATH_NAME_LENGTH;
8833 	}
8834 
8835 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8836 		&bufferSize, false);
8837 
8838 	// we also update the bufferSize in case of errors
8839 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8840 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8841 		return B_BAD_ADDRESS;
8842 
8843 	if (status != B_OK)
8844 		return status;
8845 
8846 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8847 		return B_BAD_ADDRESS;
8848 
8849 	return B_OK;
8850 }
8851 
8852 
8853 status_t
8854 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8855 	int mode)
8856 {
8857 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8858 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8859 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8860 		return B_NO_MEMORY;
8861 
8862 	char* path = pathBuffer.LockBuffer();
8863 	char* toPath = toPathBuffer.LockBuffer();
8864 
8865 	if (!IS_USER_ADDRESS(userPath)
8866 		|| !IS_USER_ADDRESS(userToPath)
8867 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8868 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8869 		return B_BAD_ADDRESS;
8870 
8871 	return common_create_symlink(fd, path, toPath, mode, false);
8872 }
8873 
8874 
8875 status_t
8876 _user_create_link(int pathFD, const char* userPath, int toFD,
8877 	const char* userToPath, bool traverseLeafLink)
8878 {
8879 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8880 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8881 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8882 		return B_NO_MEMORY;
8883 
8884 	char* path = pathBuffer.LockBuffer();
8885 	char* toPath = toPathBuffer.LockBuffer();
8886 
8887 	if (!IS_USER_ADDRESS(userPath)
8888 		|| !IS_USER_ADDRESS(userToPath)
8889 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8890 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8891 		return B_BAD_ADDRESS;
8892 
8893 	status_t status = check_path(toPath);
8894 	if (status != B_OK)
8895 		return status;
8896 
8897 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
8898 		false);
8899 }
8900 
8901 
8902 status_t
8903 _user_unlink(int fd, const char* userPath)
8904 {
8905 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8906 	if (pathBuffer.InitCheck() != B_OK)
8907 		return B_NO_MEMORY;
8908 
8909 	char* path = pathBuffer.LockBuffer();
8910 
8911 	if (!IS_USER_ADDRESS(userPath)
8912 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8913 		return B_BAD_ADDRESS;
8914 
8915 	return common_unlink(fd, path, false);
8916 }
8917 
8918 
8919 status_t
8920 _user_rename(int oldFD, const char* userOldPath, int newFD,
8921 	const char* userNewPath)
8922 {
8923 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8924 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8925 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8926 		return B_NO_MEMORY;
8927 
8928 	char* oldPath = oldPathBuffer.LockBuffer();
8929 	char* newPath = newPathBuffer.LockBuffer();
8930 
8931 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8932 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8933 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8934 		return B_BAD_ADDRESS;
8935 
8936 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8937 }
8938 
8939 
8940 status_t
8941 _user_create_fifo(int fd, const char* userPath, mode_t perms)
8942 {
8943 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8944 	if (pathBuffer.InitCheck() != B_OK)
8945 		return B_NO_MEMORY;
8946 
8947 	char* path = pathBuffer.LockBuffer();
8948 
8949 	if (!IS_USER_ADDRESS(userPath)
8950 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8951 		return B_BAD_ADDRESS;
8952 	}
8953 
8954 	// split into directory vnode and filename path
8955 	char filename[B_FILE_NAME_LENGTH];
8956 	struct vnode* dir;
8957 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
8958 	if (status != B_OK)
8959 		return status;
8960 
8961 	VNodePutter _(dir);
8962 
8963 	// the underlying FS needs to support creating FIFOs
8964 	if (!HAS_FS_CALL(dir, create_special_node))
8965 		return B_UNSUPPORTED;
8966 
8967 	// create the entry	-- the FIFO sub node is set up automatically
8968 	fs_vnode superVnode;
8969 	ino_t nodeID;
8970 	status = FS_CALL(dir, create_special_node, filename, NULL,
8971 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8972 
8973 	// create_special_node() acquired a reference for us that we don't need.
8974 	if (status == B_OK)
8975 		put_vnode(dir->mount->volume, nodeID);
8976 
8977 	return status;
8978 }
8979 
8980 
8981 status_t
8982 _user_create_pipe(int* userFDs)
8983 {
8984 	// rootfs should support creating FIFOs, but let's be sure
8985 	if (!HAS_FS_CALL(sRoot, create_special_node))
8986 		return B_UNSUPPORTED;
8987 
8988 	// create the node	-- the FIFO sub node is set up automatically
8989 	fs_vnode superVnode;
8990 	ino_t nodeID;
8991 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8992 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8993 	if (status != B_OK)
8994 		return status;
8995 
8996 	// We've got one reference to the node and need another one.
8997 	struct vnode* vnode;
8998 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8999 	if (status != B_OK) {
9000 		// that should not happen
9001 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9002 			sRoot->mount->id, sRoot->id);
9003 		return status;
9004 	}
9005 
9006 	// Everything looks good so far. Open two FDs for reading respectively
9007 	// writing.
9008 	int fds[2];
9009 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9010 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9011 
9012 	FDCloser closer0(fds[0], false);
9013 	FDCloser closer1(fds[1], false);
9014 
9015 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9016 
9017 	// copy FDs to userland
9018 	if (status == B_OK) {
9019 		if (!IS_USER_ADDRESS(userFDs)
9020 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9021 			status = B_BAD_ADDRESS;
9022 		}
9023 	}
9024 
9025 	// keep FDs, if everything went fine
9026 	if (status == B_OK) {
9027 		closer0.Detach();
9028 		closer1.Detach();
9029 	}
9030 
9031 	return status;
9032 }
9033 
9034 
9035 status_t
9036 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9037 {
9038 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9039 	if (pathBuffer.InitCheck() != B_OK)
9040 		return B_NO_MEMORY;
9041 
9042 	char* path = pathBuffer.LockBuffer();
9043 
9044 	if (!IS_USER_ADDRESS(userPath)
9045 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9046 		return B_BAD_ADDRESS;
9047 
9048 	return common_access(fd, path, mode, effectiveUserGroup, false);
9049 }
9050 
9051 
9052 status_t
9053 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9054 	struct stat* userStat, size_t statSize)
9055 {
9056 	struct stat stat;
9057 	status_t status;
9058 
9059 	if (statSize > sizeof(struct stat))
9060 		return B_BAD_VALUE;
9061 
9062 	if (!IS_USER_ADDRESS(userStat))
9063 		return B_BAD_ADDRESS;
9064 
9065 	if (userPath) {
9066 		// path given: get the stat of the node referred to by (fd, path)
9067 		if (!IS_USER_ADDRESS(userPath))
9068 			return B_BAD_ADDRESS;
9069 
9070 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9071 		if (pathBuffer.InitCheck() != B_OK)
9072 			return B_NO_MEMORY;
9073 
9074 		char* path = pathBuffer.LockBuffer();
9075 
9076 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9077 		if (length < B_OK)
9078 			return length;
9079 		if (length >= B_PATH_NAME_LENGTH)
9080 			return B_NAME_TOO_LONG;
9081 
9082 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9083 	} else {
9084 		// no path given: get the FD and use the FD operation
9085 		struct file_descriptor* descriptor
9086 			= get_fd(get_current_io_context(false), fd);
9087 		if (descriptor == NULL)
9088 			return B_FILE_ERROR;
9089 
9090 		if (descriptor->ops->fd_read_stat)
9091 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9092 		else
9093 			status = EOPNOTSUPP;
9094 
9095 		put_fd(descriptor);
9096 	}
9097 
9098 	if (status != B_OK)
9099 		return status;
9100 
9101 	return user_memcpy(userStat, &stat, statSize);
9102 }
9103 
9104 
9105 status_t
9106 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9107 	const struct stat* userStat, size_t statSize, int statMask)
9108 {
9109 	if (statSize > sizeof(struct stat))
9110 		return B_BAD_VALUE;
9111 
9112 	struct stat stat;
9113 
9114 	if (!IS_USER_ADDRESS(userStat)
9115 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9116 		return B_BAD_ADDRESS;
9117 
9118 	// clear additional stat fields
9119 	if (statSize < sizeof(struct stat))
9120 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9121 
9122 	status_t status;
9123 
9124 	if (userPath) {
9125 		// path given: write the stat of the node referred to by (fd, path)
9126 		if (!IS_USER_ADDRESS(userPath))
9127 			return B_BAD_ADDRESS;
9128 
9129 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9130 		if (pathBuffer.InitCheck() != B_OK)
9131 			return B_NO_MEMORY;
9132 
9133 		char* path = pathBuffer.LockBuffer();
9134 
9135 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9136 		if (length < B_OK)
9137 			return length;
9138 		if (length >= B_PATH_NAME_LENGTH)
9139 			return B_NAME_TOO_LONG;
9140 
9141 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9142 			statMask, false);
9143 	} else {
9144 		// no path given: get the FD and use the FD operation
9145 		struct file_descriptor* descriptor
9146 			= get_fd(get_current_io_context(false), fd);
9147 		if (descriptor == NULL)
9148 			return B_FILE_ERROR;
9149 
9150 		if (descriptor->ops->fd_write_stat) {
9151 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9152 				statMask);
9153 		} else
9154 			status = EOPNOTSUPP;
9155 
9156 		put_fd(descriptor);
9157 	}
9158 
9159 	return status;
9160 }
9161 
9162 
9163 int
9164 _user_open_attr_dir(int fd, const char* userPath)
9165 {
9166 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9167 	if (pathBuffer.InitCheck() != B_OK)
9168 		return B_NO_MEMORY;
9169 
9170 	char* path = pathBuffer.LockBuffer();
9171 
9172 	if (userPath != NULL) {
9173 		if (!IS_USER_ADDRESS(userPath)
9174 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9175 			return B_BAD_ADDRESS;
9176 	}
9177 
9178 	return attr_dir_open(fd, userPath ? path : NULL, false);
9179 }
9180 
9181 
9182 ssize_t
9183 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9184 	size_t readBytes)
9185 {
9186 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9187 	if (attr < 0)
9188 		return attr;
9189 
9190 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9191 	_user_close(attr);
9192 
9193 	return bytes;
9194 }
9195 
9196 
9197 ssize_t
9198 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9199 	const void* buffer, size_t writeBytes)
9200 {
9201 	// Try to support the BeOS typical truncation as well as the position
9202 	// argument
9203 	int attr = attr_create(fd, NULL, attribute, type,
9204 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9205 	if (attr < 0)
9206 		return attr;
9207 
9208 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9209 	_user_close(attr);
9210 
9211 	return bytes;
9212 }
9213 
9214 
9215 status_t
9216 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9217 {
9218 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9219 	if (attr < 0)
9220 		return attr;
9221 
9222 	struct file_descriptor* descriptor
9223 		= get_fd(get_current_io_context(false), attr);
9224 	if (descriptor == NULL) {
9225 		_user_close(attr);
9226 		return B_FILE_ERROR;
9227 	}
9228 
9229 	struct stat stat;
9230 	status_t status;
9231 	if (descriptor->ops->fd_read_stat)
9232 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9233 	else
9234 		status = EOPNOTSUPP;
9235 
9236 	put_fd(descriptor);
9237 	_user_close(attr);
9238 
9239 	if (status == B_OK) {
9240 		attr_info info;
9241 		info.type = stat.st_type;
9242 		info.size = stat.st_size;
9243 
9244 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9245 			return B_BAD_ADDRESS;
9246 	}
9247 
9248 	return status;
9249 }
9250 
9251 
9252 int
9253 _user_open_attr(int fd, const char* userPath, const char* userName,
9254 	uint32 type, int openMode)
9255 {
9256 	char name[B_FILE_NAME_LENGTH];
9257 
9258 	if (!IS_USER_ADDRESS(userName)
9259 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9260 		return B_BAD_ADDRESS;
9261 
9262 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9263 	if (pathBuffer.InitCheck() != B_OK)
9264 		return B_NO_MEMORY;
9265 
9266 	char* path = pathBuffer.LockBuffer();
9267 
9268 	if (userPath != NULL) {
9269 		if (!IS_USER_ADDRESS(userPath)
9270 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9271 			return B_BAD_ADDRESS;
9272 	}
9273 
9274 	if ((openMode & O_CREAT) != 0) {
9275 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9276 			false);
9277 	}
9278 
9279 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9280 }
9281 
9282 
9283 status_t
9284 _user_remove_attr(int fd, const char* userName)
9285 {
9286 	char name[B_FILE_NAME_LENGTH];
9287 
9288 	if (!IS_USER_ADDRESS(userName)
9289 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9290 		return B_BAD_ADDRESS;
9291 
9292 	return attr_remove(fd, name, false);
9293 }
9294 
9295 
9296 status_t
9297 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9298 	const char* userToName)
9299 {
9300 	if (!IS_USER_ADDRESS(userFromName)
9301 		|| !IS_USER_ADDRESS(userToName))
9302 		return B_BAD_ADDRESS;
9303 
9304 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9305 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9306 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9307 		return B_NO_MEMORY;
9308 
9309 	char* fromName = fromNameBuffer.LockBuffer();
9310 	char* toName = toNameBuffer.LockBuffer();
9311 
9312 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9313 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9314 		return B_BAD_ADDRESS;
9315 
9316 	return attr_rename(fromFile, fromName, toFile, toName, false);
9317 }
9318 
9319 
9320 int
9321 _user_open_index_dir(dev_t device)
9322 {
9323 	return index_dir_open(device, false);
9324 }
9325 
9326 
9327 status_t
9328 _user_create_index(dev_t device, const char* userName, uint32 type,
9329 	uint32 flags)
9330 {
9331 	char name[B_FILE_NAME_LENGTH];
9332 
9333 	if (!IS_USER_ADDRESS(userName)
9334 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9335 		return B_BAD_ADDRESS;
9336 
9337 	return index_create(device, name, type, flags, false);
9338 }
9339 
9340 
9341 status_t
9342 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9343 {
9344 	char name[B_FILE_NAME_LENGTH];
9345 	struct stat stat;
9346 	status_t status;
9347 
9348 	if (!IS_USER_ADDRESS(userName)
9349 		|| !IS_USER_ADDRESS(userStat)
9350 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9351 		return B_BAD_ADDRESS;
9352 
9353 	status = index_name_read_stat(device, name, &stat, false);
9354 	if (status == B_OK) {
9355 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9356 			return B_BAD_ADDRESS;
9357 	}
9358 
9359 	return status;
9360 }
9361 
9362 
9363 status_t
9364 _user_remove_index(dev_t device, const char* userName)
9365 {
9366 	char name[B_FILE_NAME_LENGTH];
9367 
9368 	if (!IS_USER_ADDRESS(userName)
9369 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9370 		return B_BAD_ADDRESS;
9371 
9372 	return index_remove(device, name, false);
9373 }
9374 
9375 
9376 status_t
9377 _user_getcwd(char* userBuffer, size_t size)
9378 {
9379 	if (size == 0)
9380 		return B_BAD_VALUE;
9381 	if (!IS_USER_ADDRESS(userBuffer))
9382 		return B_BAD_ADDRESS;
9383 
9384 	if (size > kMaxPathLength)
9385 		size = kMaxPathLength;
9386 
9387 	KPath pathBuffer(size);
9388 	if (pathBuffer.InitCheck() != B_OK)
9389 		return B_NO_MEMORY;
9390 
9391 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9392 
9393 	char* path = pathBuffer.LockBuffer();
9394 
9395 	status_t status = get_cwd(path, size, false);
9396 	if (status != B_OK)
9397 		return status;
9398 
9399 	// Copy back the result
9400 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9401 		return B_BAD_ADDRESS;
9402 
9403 	return status;
9404 }
9405 
9406 
9407 status_t
9408 _user_setcwd(int fd, const char* userPath)
9409 {
9410 	TRACE(("user_setcwd: path = %p\n", userPath));
9411 
9412 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9413 	if (pathBuffer.InitCheck() != B_OK)
9414 		return B_NO_MEMORY;
9415 
9416 	char* path = pathBuffer.LockBuffer();
9417 
9418 	if (userPath != NULL) {
9419 		if (!IS_USER_ADDRESS(userPath)
9420 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9421 			return B_BAD_ADDRESS;
9422 	}
9423 
9424 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9425 }
9426 
9427 
9428 status_t
9429 _user_change_root(const char* userPath)
9430 {
9431 	// only root is allowed to chroot()
9432 	if (geteuid() != 0)
9433 		return EPERM;
9434 
9435 	// alloc path buffer
9436 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9437 	if (pathBuffer.InitCheck() != B_OK)
9438 		return B_NO_MEMORY;
9439 
9440 	// copy userland path to kernel
9441 	char* path = pathBuffer.LockBuffer();
9442 	if (userPath != NULL) {
9443 		if (!IS_USER_ADDRESS(userPath)
9444 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9445 			return B_BAD_ADDRESS;
9446 	}
9447 
9448 	// get the vnode
9449 	struct vnode* vnode;
9450 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9451 	if (status != B_OK)
9452 		return status;
9453 
9454 	// set the new root
9455 	struct io_context* context = get_current_io_context(false);
9456 	mutex_lock(&sIOContextRootLock);
9457 	struct vnode* oldRoot = context->root;
9458 	context->root = vnode;
9459 	mutex_unlock(&sIOContextRootLock);
9460 
9461 	put_vnode(oldRoot);
9462 
9463 	return B_OK;
9464 }
9465 
9466 
9467 int
9468 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9469 	uint32 flags, port_id port, int32 token)
9470 {
9471 	char* query;
9472 
9473 	if (device < 0 || userQuery == NULL || queryLength == 0)
9474 		return B_BAD_VALUE;
9475 
9476 	// this is a safety restriction
9477 	if (queryLength >= 65536)
9478 		return B_NAME_TOO_LONG;
9479 
9480 	query = (char*)malloc(queryLength + 1);
9481 	if (query == NULL)
9482 		return B_NO_MEMORY;
9483 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9484 		free(query);
9485 		return B_BAD_ADDRESS;
9486 	}
9487 
9488 	int fd = query_open(device, query, flags, port, token, false);
9489 
9490 	free(query);
9491 	return fd;
9492 }
9493 
9494 
9495 #include "vfs_request_io.cpp"
9496