xref: /haiku/src/system/kernel/fs/vfs.cpp (revision c28bcbdf58b7a03ea773ae82f2e0a3befcc96ac4)
1 /*
2  * Copyright 2005-2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 /*! Virtual File System and File System Interface Layer */
11 
12 
13 #include <ctype.h>
14 #include <fcntl.h>
15 #include <limits.h>
16 #include <stddef.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/file.h>
20 #include <sys/resource.h>
21 #include <sys/stat.h>
22 #include <unistd.h>
23 
24 #include <fs_info.h>
25 #include <fs_interface.h>
26 #include <fs_volume.h>
27 #include <OS.h>
28 #include <StorageDefs.h>
29 
30 #include <AutoDeleter.h>
31 #include <block_cache.h>
32 #include <boot/kernel_args.h>
33 #include <disk_device_manager/KDiskDevice.h>
34 #include <disk_device_manager/KDiskDeviceManager.h>
35 #include <disk_device_manager/KDiskDeviceUtils.h>
36 #include <disk_device_manager/KDiskSystem.h>
37 #include <fd.h>
38 #include <file_cache.h>
39 #include <fs/node_monitor.h>
40 #include <khash.h>
41 #include <KPath.h>
42 #include <lock.h>
43 #include <low_resource_manager.h>
44 #include <syscalls.h>
45 #include <syscall_restart.h>
46 #include <tracing.h>
47 #include <util/atomic.h>
48 #include <util/AutoLock.h>
49 #include <util/DoublyLinkedList.h>
50 #include <util/OpenHashTable.h>
51 #include <vfs.h>
52 #include <vm.h>
53 #include <vm_cache.h>
54 
55 #include "fifo.h"
56 #include "io_requests.h"
57 
58 
59 //#define TRACE_VFS
60 #ifdef TRACE_VFS
61 #	define TRACE(x) dprintf x
62 #	define FUNCTION(x) dprintf x
63 #else
64 #	define TRACE(x) ;
65 #	define FUNCTION(x) ;
66 #endif
67 
68 #define ADD_DEBUGGER_COMMANDS
69 
70 
71 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
72 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
73 
74 #ifdef KDEBUG
75 #	define FS_CALL(vnode, op, params...) \
76 		( HAS_FS_CALL(vnode, op) ? \
77 			vnode->ops->op(vnode->mount->volume, vnode, params) \
78 			: (panic("FS_CALL op " #op " is NULL"), 0))
79 #	define FS_CALL_NO_PARAMS(vnode, op) \
80 		( HAS_FS_CALL(vnode, op) ? \
81 			vnode->ops->op(vnode->mount->volume, vnode) \
82 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
83 #	define FS_MOUNT_CALL(mount, op, params...) \
84 		( HAS_FS_MOUNT_CALL(mount, op) ? \
85 			mount->volume->ops->op(mount->volume, params) \
86 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
87 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
88 		( HAS_FS_MOUNT_CALL(mount, op) ? \
89 			mount->volume->ops->op(mount->volume) \
90 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
91 #else
92 #	define FS_CALL(vnode, op, params...) \
93 			vnode->ops->op(vnode->mount->volume, vnode, params)
94 #	define FS_CALL_NO_PARAMS(vnode, op) \
95 			vnode->ops->op(vnode->mount->volume, vnode)
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 			mount->volume->ops->op(mount->volume, params)
98 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
99 			mount->volume->ops->op(mount->volume)
100 #endif
101 
102 
103 const static uint32 kMaxUnusedVnodes = 8192;
104 	// This is the maximum number of unused vnodes that the system
105 	// will keep around (weak limit, if there is enough memory left,
106 	// they won't get flushed even when hitting that limit).
107 	// It may be chosen with respect to the available memory or enhanced
108 	// by some timestamp/frequency heurism.
109 
110 const static uint32 kMaxEntryCacheEntryCount = 8192;
111 	// Maximum number of entries per entry cache. It's a hard limit ATM.
112 
113 struct EntryCacheKey {
114 	EntryCacheKey(ino_t dirID, const char* name)
115 		:
116 		dir_id(dirID),
117 		name(name)
118 	{
119 	}
120 
121 	ino_t		dir_id;
122 	const char*	name;
123 };
124 
125 
126 struct EntryCacheEntry : HashTableLink<EntryCacheEntry>,
127 		DoublyLinkedListLinkImpl<EntryCacheEntry> {
128 	ino_t	node_id;
129 	ino_t	dir_id;
130 	char	name[1];
131 };
132 
133 
134 struct EntryCacheHashDefinition {
135 	typedef EntryCacheKey	KeyType;
136 	typedef EntryCacheEntry	ValueType;
137 
138 	uint32 HashKey(const EntryCacheKey& key) const
139 	{
140 		return (uint32)key.dir_id ^ (uint32)(key.dir_id >> 32)
141 			^ hash_hash_string(key.name);
142 	}
143 
144 	size_t Hash(const EntryCacheEntry* value) const
145 	{
146 		return (uint32)value->dir_id ^ (uint32)(value->dir_id >> 32)
147 			^ hash_hash_string(value->name);
148 	}
149 
150 	bool Compare(const EntryCacheKey& key, const EntryCacheEntry* value) const
151 	{
152 		return value->dir_id == key.dir_id
153 			&& strcmp(value->name, key.name) == 0;
154 	}
155 
156 	HashTableLink<EntryCacheEntry>* GetLink(EntryCacheEntry* value) const
157 	{
158 		return value;
159 	}
160 };
161 
162 
163 class EntryCache {
164 public:
165 	// Note: Constructor and destructor are never invoked, since instances of
166 	// this class are member of the fs_mount C structure. Hence we do all
167 	// initialization/uninitialization in Init()/Uninit() explicitly.
168 
169 	status_t Init()
170 	{
171 		mutex_init(&fLock, "entry cache");
172 
173 		new(&fEntries) EntryTable;
174 		new(&fUsedEntries) EntryList;
175 		fEntryCount = 0;
176 
177 		return fEntries.Init();
178 	}
179 
180 	void Uninit()
181 	{
182 		while (EntryCacheEntry* entry = fUsedEntries.Head())
183 			_Remove(entry);
184 
185 		fEntries.~EntryTable();
186 
187 		mutex_destroy(&fLock);
188 	}
189 
190 	status_t Add(ino_t dirID, const char* name, ino_t nodeID)
191 	{
192 		MutexLocker _(fLock);
193 
194 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
195 		if (entry != NULL) {
196 			entry->node_id = nodeID;
197 			return B_OK;
198 		}
199 
200 		if (fEntryCount >= kMaxEntryCacheEntryCount)
201 			_Remove(fUsedEntries.Head());
202 
203 		entry = (EntryCacheEntry*)malloc(sizeof(EntryCacheEntry)
204 			+ strlen(name));
205 		if (entry == NULL)
206 			return B_NO_MEMORY;
207 
208 		entry->node_id = nodeID;
209 		entry->dir_id = dirID;
210 		strcpy(entry->name, name);
211 
212 		fEntries.Insert(entry);
213 		fUsedEntries.Add(entry);
214 		fEntryCount++;
215 
216 		return B_OK;
217 	}
218 
219 	status_t Remove(ino_t dirID, const char* name)
220 	{
221 		MutexLocker _(fLock);
222 
223 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
224 		if (entry == NULL)
225 			return B_ENTRY_NOT_FOUND;
226 
227 		_Remove(entry);
228 
229 		return B_OK;
230 	}
231 
232 	bool Lookup(ino_t dirID, const char* name, ino_t& nodeID)
233 	{
234 		MutexLocker _(fLock);
235 
236 		EntryCacheEntry* entry = fEntries.Lookup(EntryCacheKey(dirID, name));
237 		if (entry == NULL)
238 			return false;
239 
240 		// requeue at the end
241 		fUsedEntries.Remove(entry);
242 		fUsedEntries.Add(entry);
243 
244 		nodeID = entry->node_id;
245 		return true;
246 	}
247 
248 	void _Remove(EntryCacheEntry* entry)
249 	{
250 		fEntries.Remove(entry);
251 		fUsedEntries.Remove(entry);
252 		free(entry);
253 		fEntryCount--;
254 	}
255 
256 private:
257 	typedef OpenHashTable<EntryCacheHashDefinition> EntryTable;
258 	typedef DoublyLinkedList<EntryCacheEntry> EntryList;
259 
260 	mutex		fLock;
261 	EntryTable	fEntries;
262 	EntryList	fUsedEntries;	// LRU queue (LRU entry at the head)
263 	uint32		fEntryCount;
264 };
265 
266 
267 struct vnode : fs_vnode {
268 	struct vnode	*next;
269 	vm_cache		*cache;
270 	dev_t			device;
271 	list_link		mount_link;
272 	list_link		unused_link;
273 	ino_t			id;
274 	struct fs_mount	*mount;
275 	struct vnode	*covered_by;
276 	int32			ref_count;
277 	uint32			type : 29;
278 						// TODO: S_INDEX_DIR actually needs another bit.
279 						// Better combine this field with the following ones.
280 	uint32			remove : 1;
281 	uint32			busy : 1;
282 	uint32			unpublished : 1;
283 	struct advisory_locking	*advisory_locking;
284 	struct file_descriptor *mandatory_locked_by;
285 };
286 
287 struct vnode_hash_key {
288 	dev_t	device;
289 	ino_t	vnode;
290 };
291 
292 /*!	\brief Structure to manage a mounted file system
293 
294 	Note: The root_vnode and covers_vnode fields (what others?) are
295 	initialized in fs_mount() and not changed afterwards. That is as soon
296 	as the mount is mounted and it is made sure it won't be unmounted
297 	(e.g. by holding a reference to a vnode of that mount) (read) access
298 	to those fields is always safe, even without additional locking. Morever
299 	while mounted the mount holds a reference to the covers_vnode, and thus
300 	making the access path vnode->mount->covers_vnode->mount->... safe if a
301 	reference to vnode is held (note that for the root mount covers_vnode
302 	is NULL, though).
303 */
304 struct fs_mount {
305 	struct fs_mount	*next;
306 	file_system_module_info *fs;
307 	dev_t			id;
308 	fs_volume		*volume;
309 	char			*device_name;
310 	char			*fs_name;
311 	recursive_lock	rlock;	// guards the vnodes list
312 	struct vnode	*root_vnode;
313 	struct vnode	*covers_vnode;
314 	KPartition		*partition;
315 	struct list		vnodes;
316 	EntryCache		entry_cache;
317 	bool			unmounting;
318 	bool			owns_file_device;
319 };
320 
321 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
322 	list_link		link;
323 	team_id			team;
324 	pid_t			session;
325 	off_t			start;
326 	off_t			end;
327 	bool			shared;
328 };
329 
330 typedef DoublyLinkedList<advisory_lock> LockList;
331 
332 struct advisory_locking {
333 	sem_id			lock;
334 	sem_id			wait_sem;
335 	LockList		locks;
336 
337 	advisory_locking()
338 		:
339 		lock(-1),
340 		wait_sem(-1)
341 	{
342 	}
343 
344 	~advisory_locking()
345 	{
346 		if (lock >= 0)
347 			delete_sem(lock);
348 		if (wait_sem >= 0)
349 			delete_sem(wait_sem);
350 	}
351 };
352 
353 /*!	\brief Guards sMountsTable.
354 
355 	The holder is allowed to read/write access the sMountsTable.
356 	Manipulation of the fs_mount structures themselves
357 	(and their destruction) requires different locks though.
358 */
359 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
360 
361 /*!	\brief Guards mount/unmount operations.
362 
363 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
364 	That is locking the lock ensures that no FS is mounted/unmounted. In
365 	particular this means that
366 	- sMountsTable will not be modified,
367 	- the fields immutable after initialization of the fs_mount structures in
368 	  sMountsTable will not be modified,
369 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
370 
371 	The thread trying to lock the lock must not hold sVnodeMutex or
372 	sMountMutex.
373 */
374 static recursive_lock sMountOpLock;
375 
376 /*!	\brief Guards the vnode::covered_by field of any vnode
377 
378 	The holder is allowed to read access the vnode::covered_by field of any
379 	vnode. Additionally holding sMountOpLock allows for write access.
380 
381 	The thread trying to lock the must not hold sVnodeMutex.
382 */
383 static mutex sVnodeCoveredByMutex
384 	= MUTEX_INITIALIZER("vfs_vnode_covered_by_lock");
385 
386 /*!	\brief Guards sVnodeTable.
387 
388 	The holder is allowed read/write access to sVnodeTable and to
389 	any unbusy vnode in that table, save to the immutable fields (device, id,
390 	private_node, mount) to which
391 	only read-only access is allowed, and to the field covered_by, which is
392 	guarded by sMountOpLock and sVnodeCoveredByMutex.
393 
394 	The thread trying to lock the mutex must not hold sMountMutex.
395 	You must not have this mutex held when calling create_sem(), as this
396 	might call vfs_free_unused_vnodes().
397 */
398 static mutex sVnodeMutex = MUTEX_INITIALIZER("vfs_vnode_lock");
399 
400 /*!	\brief Guards io_context::root.
401 
402 	Must be held when setting or getting the io_context::root field.
403 	The only operation allowed while holding this lock besides getting or
404 	setting the field is inc_vnode_ref_count() on io_context::root.
405 */
406 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
407 
408 #define VNODE_HASH_TABLE_SIZE 1024
409 static hash_table *sVnodeTable;
410 static list sUnusedVnodeList;
411 static uint32 sUnusedVnodes = 0;
412 static struct vnode *sRoot;
413 
414 #define MOUNTS_HASH_TABLE_SIZE 16
415 static hash_table *sMountsTable;
416 static dev_t sNextMountID = 1;
417 
418 #define MAX_TEMP_IO_VECS 8
419 
420 mode_t __gUmask = 022;
421 
422 /* function declarations */
423 
424 // file descriptor operation prototypes
425 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
426 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
427 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
428 static void file_free_fd(struct file_descriptor *);
429 static status_t file_close(struct file_descriptor *);
430 static status_t file_select(struct file_descriptor *, uint8 event,
431 	struct selectsync *sync);
432 static status_t file_deselect(struct file_descriptor *, uint8 event,
433 	struct selectsync *sync);
434 static status_t dir_read(struct io_context *, struct file_descriptor *,
435 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
436 static status_t dir_read(struct io_context* ioContext, struct vnode *vnode,
437 	void *cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
438 static status_t dir_rewind(struct file_descriptor *);
439 static void dir_free_fd(struct file_descriptor *);
440 static status_t dir_close(struct file_descriptor *);
441 static status_t attr_dir_read(struct io_context *, struct file_descriptor *,
442 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
443 static status_t attr_dir_rewind(struct file_descriptor *);
444 static void attr_dir_free_fd(struct file_descriptor *);
445 static status_t attr_dir_close(struct file_descriptor *);
446 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
447 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
448 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
449 static void attr_free_fd(struct file_descriptor *);
450 static status_t attr_close(struct file_descriptor *);
451 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
452 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
453 static status_t index_dir_read(struct io_context *, struct file_descriptor *,
454 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
455 static status_t index_dir_rewind(struct file_descriptor *);
456 static void index_dir_free_fd(struct file_descriptor *);
457 static status_t index_dir_close(struct file_descriptor *);
458 static status_t query_read(struct io_context *, struct file_descriptor *,
459 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
460 static status_t query_rewind(struct file_descriptor *);
461 static void query_free_fd(struct file_descriptor *);
462 static status_t query_close(struct file_descriptor *);
463 
464 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
465 static status_t common_read_stat(struct file_descriptor *, struct stat *);
466 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
467 
468 static status_t common_path_read_stat(int fd, char *path, bool traverseLeafLink,
469 	struct stat *stat, bool kernel);
470 
471 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
472 	bool traverseLeafLink, int count, bool kernel,
473 	struct vnode **_vnode, ino_t *_parentID);
474 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer,
475 	size_t bufferSize, bool kernel);
476 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
477 	struct vnode **_vnode, ino_t *_parentID, bool kernel);
478 static void inc_vnode_ref_count(struct vnode *vnode);
479 static status_t dec_vnode_ref_count(struct vnode *vnode, bool alwaysFree,
480 	bool reenter);
481 static inline void put_vnode(struct vnode *vnode);
482 static status_t fs_unmount(char *path, dev_t mountID, uint32 flags,
483 	bool kernel);
484 
485 
486 static struct fd_ops sFileOps = {
487 	file_read,
488 	file_write,
489 	file_seek,
490 	common_ioctl,
491 	NULL,		// set_flags
492 	file_select,
493 	file_deselect,
494 	NULL,		// read_dir()
495 	NULL,		// rewind_dir()
496 	common_read_stat,
497 	common_write_stat,
498 	file_close,
499 	file_free_fd
500 };
501 
502 static struct fd_ops sDirectoryOps = {
503 	NULL,		// read()
504 	NULL,		// write()
505 	NULL,		// seek()
506 	common_ioctl,
507 	NULL,		// set_flags
508 	NULL,		// select()
509 	NULL,		// deselect()
510 	dir_read,
511 	dir_rewind,
512 	common_read_stat,
513 	common_write_stat,
514 	dir_close,
515 	dir_free_fd
516 };
517 
518 static struct fd_ops sAttributeDirectoryOps = {
519 	NULL,		// read()
520 	NULL,		// write()
521 	NULL,		// seek()
522 	common_ioctl,
523 	NULL,		// set_flags
524 	NULL,		// select()
525 	NULL,		// deselect()
526 	attr_dir_read,
527 	attr_dir_rewind,
528 	common_read_stat,
529 	common_write_stat,
530 	attr_dir_close,
531 	attr_dir_free_fd
532 };
533 
534 static struct fd_ops sAttributeOps = {
535 	attr_read,
536 	attr_write,
537 	attr_seek,
538 	common_ioctl,
539 	NULL,		// set_flags
540 	NULL,		// select()
541 	NULL,		// deselect()
542 	NULL,		// read_dir()
543 	NULL,		// rewind_dir()
544 	attr_read_stat,
545 	attr_write_stat,
546 	attr_close,
547 	attr_free_fd
548 };
549 
550 static struct fd_ops sIndexDirectoryOps = {
551 	NULL,		// read()
552 	NULL,		// write()
553 	NULL,		// seek()
554 	NULL,		// ioctl()
555 	NULL,		// set_flags
556 	NULL,		// select()
557 	NULL,		// deselect()
558 	index_dir_read,
559 	index_dir_rewind,
560 	NULL,		// read_stat()
561 	NULL,		// write_stat()
562 	index_dir_close,
563 	index_dir_free_fd
564 };
565 
566 #if 0
567 static struct fd_ops sIndexOps = {
568 	NULL,		// read()
569 	NULL,		// write()
570 	NULL,		// seek()
571 	NULL,		// ioctl()
572 	NULL,		// set_flags
573 	NULL,		// select()
574 	NULL,		// deselect()
575 	NULL,		// dir_read()
576 	NULL,		// dir_rewind()
577 	index_read_stat,	// read_stat()
578 	NULL,		// write_stat()
579 	NULL,		// dir_close()
580 	NULL		// free_fd()
581 };
582 #endif
583 
584 static struct fd_ops sQueryOps = {
585 	NULL,		// read()
586 	NULL,		// write()
587 	NULL,		// seek()
588 	NULL,		// ioctl()
589 	NULL,		// set_flags
590 	NULL,		// select()
591 	NULL,		// deselect()
592 	query_read,
593 	query_rewind,
594 	NULL,		// read_stat()
595 	NULL,		// write_stat()
596 	query_close,
597 	query_free_fd
598 };
599 
600 
601 // VNodePutter
602 class VNodePutter {
603 public:
604 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
605 
606 	~VNodePutter()
607 	{
608 		Put();
609 	}
610 
611 	void SetTo(struct vnode *vnode)
612 	{
613 		Put();
614 		fVNode = vnode;
615 	}
616 
617 	void Put()
618 	{
619 		if (fVNode) {
620 			put_vnode(fVNode);
621 			fVNode = NULL;
622 		}
623 	}
624 
625 	struct vnode *Detach()
626 	{
627 		struct vnode *vnode = fVNode;
628 		fVNode = NULL;
629 		return vnode;
630 	}
631 
632 private:
633 	struct vnode *fVNode;
634 };
635 
636 
637 class FDCloser {
638 public:
639 	FDCloser() : fFD(-1), fKernel(true) {}
640 
641 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
642 
643 	~FDCloser()
644 	{
645 		Close();
646 	}
647 
648 	void SetTo(int fd, bool kernel)
649 	{
650 		Close();
651 		fFD = fd;
652 		fKernel = kernel;
653 	}
654 
655 	void Close()
656 	{
657 		if (fFD >= 0) {
658 			if (fKernel)
659 				_kern_close(fFD);
660 			else
661 				_user_close(fFD);
662 			fFD = -1;
663 		}
664 	}
665 
666 	int Detach()
667 	{
668 		int fd = fFD;
669 		fFD = -1;
670 		return fd;
671 	}
672 
673 private:
674 	int		fFD;
675 	bool	fKernel;
676 };
677 
678 
679 #if VFS_PAGES_IO_TRACING
680 
681 namespace VFSPagesIOTracing {
682 
683 class PagesIOTraceEntry : public AbstractTraceEntry {
684 protected:
685 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
686 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
687 		status_t status, size_t bytesTransferred)
688 		:
689 		fVnode(vnode),
690 		fMountID(vnode->mount->id),
691 		fNodeID(vnode->id),
692 		fCookie(cookie),
693 		fPos(pos),
694 		fCount(count),
695 		fFlags(flags),
696 		fBytesRequested(bytesRequested),
697 		fStatus(status),
698 		fBytesTransferred(bytesTransferred)
699 	{
700 		fVecs = (iovec*)alloc_tracing_buffer_memcpy(vecs, sizeof(iovec) * count,
701 			false);
702 	}
703 
704 	void AddDump(TraceOutput& out, const char* mode)
705 	{
706 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
707 			"pos: %lld, size: %lu, vecs: {", mode, fVnode, fMountID, fNodeID,
708 			fCookie, fPos, fBytesRequested);
709 
710 		if (fVecs != NULL) {
711 			for (uint32 i = 0; i < fCount; i++) {
712 				if (i > 0)
713 					out.Print(", ");
714 				out.Print("(%p, %lu)", fVecs[i].iov_base, fVecs[i].iov_len);
715 			}
716 		}
717 
718 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %lu",
719 			fFlags, fStatus, fBytesTransferred);
720 	}
721 
722 protected:
723 	struct vnode*	fVnode;
724 	dev_t			fMountID;
725 	ino_t			fNodeID;
726 	void*			fCookie;
727 	off_t			fPos;
728 	iovec*			fVecs;
729 	uint32			fCount;
730 	uint32			fFlags;
731 	size_t			fBytesRequested;
732 	status_t		fStatus;
733 	size_t			fBytesTransferred;
734 };
735 
736 
737 class ReadPages : public PagesIOTraceEntry {
738 public:
739 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
740 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
741 		status_t status, size_t bytesTransferred)
742 		:
743 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
744 			bytesRequested, status, bytesTransferred)
745 	{
746 		Initialized();
747 	}
748 
749 	virtual void AddDump(TraceOutput& out)
750 	{
751 		PagesIOTraceEntry::AddDump(out, "read");
752 	}
753 };
754 
755 
756 class WritePages : public PagesIOTraceEntry {
757 public:
758 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
759 		const iovec* vecs, uint32 count, uint32 flags, size_t bytesRequested,
760 		status_t status, size_t bytesTransferred)
761 		:
762 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
763 			bytesRequested, status, bytesTransferred)
764 	{
765 		Initialized();
766 	}
767 
768 	virtual void AddDump(TraceOutput& out)
769 	{
770 		PagesIOTraceEntry::AddDump(out, "write");
771 	}
772 };
773 
774 }	// namespace VFSPagesIOTracing
775 
776 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
777 #else
778 #	define TPIO(x) ;
779 #endif	// VFS_PAGES_IO_TRACING
780 
781 
782 static int
783 mount_compare(void *_m, const void *_key)
784 {
785 	struct fs_mount *mount = (fs_mount *)_m;
786 	const dev_t *id = (dev_t *)_key;
787 
788 	if (mount->id == *id)
789 		return 0;
790 
791 	return -1;
792 }
793 
794 
795 static uint32
796 mount_hash(void *_m, const void *_key, uint32 range)
797 {
798 	struct fs_mount *mount = (fs_mount *)_m;
799 	const dev_t *id = (dev_t *)_key;
800 
801 	if (mount)
802 		return mount->id % range;
803 
804 	return (uint32)*id % range;
805 }
806 
807 
808 /*! Finds the mounted device (the fs_mount structure) with the given ID.
809 	Note, you must hold the gMountMutex lock when you call this function.
810 */
811 static struct fs_mount *
812 find_mount(dev_t id)
813 {
814 	ASSERT_LOCKED_MUTEX(&sMountMutex);
815 
816 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
817 }
818 
819 
820 static status_t
821 get_mount(dev_t id, struct fs_mount **_mount)
822 {
823 	struct fs_mount *mount;
824 
825 	MutexLocker nodeLocker(sVnodeMutex);
826 	MutexLocker mountLocker(sMountMutex);
827 
828 	mount = find_mount(id);
829 	if (mount == NULL)
830 		return B_BAD_VALUE;
831 
832 	struct vnode* rootNode = mount->root_vnode;
833 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
834 		// might have been called during a mount/unmount operation
835 		return B_BUSY;
836 	}
837 
838 	inc_vnode_ref_count(mount->root_vnode);
839 	*_mount = mount;
840 	return B_OK;
841 }
842 
843 
844 static void
845 put_mount(struct fs_mount *mount)
846 {
847 	if (mount)
848 		put_vnode(mount->root_vnode);
849 }
850 
851 
852 static status_t
853 put_file_system(file_system_module_info *fs)
854 {
855 	return put_module(fs->info.name);
856 }
857 
858 
859 /*!	Tries to open the specified file system module.
860 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
861 	Returns a pointer to file system module interface, or NULL if it
862 	could not open the module.
863 */
864 static file_system_module_info *
865 get_file_system(const char *fsName)
866 {
867 	char name[B_FILE_NAME_LENGTH];
868 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
869 		// construct module name if we didn't get one
870 		// (we currently support only one API)
871 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
872 		fsName = NULL;
873 	}
874 
875 	file_system_module_info *info;
876 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
877 		return NULL;
878 
879 	return info;
880 }
881 
882 
883 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
884 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
885 	The name is allocated for you, and you have to free() it when you're
886 	done with it.
887 	Returns NULL if the required memory is no available.
888 */
889 static char *
890 get_file_system_name(const char *fsName)
891 {
892 	const size_t length = strlen("file_systems/");
893 
894 	if (strncmp(fsName, "file_systems/", length)) {
895 		// the name already seems to be the module's file name
896 		return strdup(fsName);
897 	}
898 
899 	fsName += length;
900 	const char *end = strchr(fsName, '/');
901 	if (end == NULL) {
902 		// this doesn't seem to be a valid name, but well...
903 		return strdup(fsName);
904 	}
905 
906 	// cut off the trailing /v1
907 
908 	char *name = (char *)malloc(end + 1 - fsName);
909 	if (name == NULL)
910 		return NULL;
911 
912 	strlcpy(name, fsName, end + 1 - fsName);
913 	return name;
914 }
915 
916 
917 static int
918 vnode_compare(void *_vnode, const void *_key)
919 {
920 	struct vnode *vnode = (struct vnode *)_vnode;
921 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
922 
923 	if (vnode->device == key->device && vnode->id == key->vnode)
924 		return 0;
925 
926 	return -1;
927 }
928 
929 
930 static uint32
931 vnode_hash(void *_vnode, const void *_key, uint32 range)
932 {
933 	struct vnode *vnode = (struct vnode *)_vnode;
934 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
935 
936 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
937 
938 	if (vnode != NULL)
939 		return VHASH(vnode->device, vnode->id) % range;
940 
941 	return VHASH(key->device, key->vnode) % range;
942 
943 #undef VHASH
944 }
945 
946 
947 static void
948 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
949 {
950 	recursive_lock_lock(&mount->rlock);
951 
952 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
953 
954 	recursive_lock_unlock(&mount->rlock);
955 }
956 
957 
958 static void
959 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
960 {
961 	recursive_lock_lock(&mount->rlock);
962 
963 	list_remove_link(&vnode->mount_link);
964 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
965 
966 	recursive_lock_unlock(&mount->rlock);
967 }
968 
969 
970 static status_t
971 create_new_vnode(struct vnode **_vnode, dev_t mountID, ino_t vnodeID)
972 {
973 	FUNCTION(("create_new_vnode()\n"));
974 
975 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
976 	if (vnode == NULL)
977 		return B_NO_MEMORY;
978 
979 	// initialize basic values
980 	memset(vnode, 0, sizeof(struct vnode));
981 	vnode->device = mountID;
982 	vnode->id = vnodeID;
983 
984 	// add the vnode to the mount structure
985 	mutex_lock(&sMountMutex);
986 	vnode->mount = find_mount(mountID);
987 	if (!vnode->mount || vnode->mount->unmounting) {
988 		mutex_unlock(&sMountMutex);
989 		free(vnode);
990 		return B_ENTRY_NOT_FOUND;
991 	}
992 
993 	hash_insert(sVnodeTable, vnode);
994 	add_vnode_to_mount_list(vnode, vnode->mount);
995 
996 	mutex_unlock(&sMountMutex);
997 
998 	vnode->ref_count = 1;
999 	*_vnode = vnode;
1000 
1001 	return B_OK;
1002 }
1003 
1004 
1005 /*!	Frees the vnode and all resources it has acquired, and removes
1006 	it from the vnode hash as well as from its mount structure.
1007 	Will also make sure that any cache modifications are written back.
1008 */
1009 static void
1010 free_vnode(struct vnode *vnode, bool reenter)
1011 {
1012 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
1013 
1014 	// write back any changes in this vnode's cache -- but only
1015 	// if the vnode won't be deleted, in which case the changes
1016 	// will be discarded
1017 
1018 	if (!vnode->remove && HAS_FS_CALL(vnode, fsync))
1019 		FS_CALL_NO_PARAMS(vnode, fsync);
1020 
1021 	// Note: If this vnode has a cache attached, there will still be two
1022 	// references to that cache at this point. The last one belongs to the vnode
1023 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1024 	// cache. Each but the last reference to a cache also includes a reference
1025 	// to the vnode. The file cache, however, released its reference (cf.
1026 	// file_cache_create()), so that this vnode's ref count has the chance to
1027 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1028 	// cache reference to be released, which will also release a (no longer
1029 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1030 	// count, so that it will neither become negative nor 0.
1031 	vnode->ref_count = 2;
1032 
1033 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
1034 	// cache either (i.e. no one can get a cache reference while we're deleting
1035 	// the vnode).. This is, however, not the case for the page daemon. It gets
1036 	// its cache references via the pages it scans, so it can in fact get a
1037 	// vnode reference while we're deleting the vnode.
1038 
1039 	if (!vnode->unpublished) {
1040 		if (vnode->remove)
1041 			FS_CALL(vnode, remove_vnode, reenter);
1042 		else
1043 			FS_CALL(vnode, put_vnode, reenter);
1044 	}
1045 
1046 	// The file system has removed the resources of the vnode now, so we can
1047 	// make it available again (and remove the busy vnode from the hash)
1048 	mutex_lock(&sVnodeMutex);
1049 	hash_remove(sVnodeTable, vnode);
1050 	mutex_unlock(&sVnodeMutex);
1051 
1052 	// if we have a vm_cache attached, remove it
1053 	if (vnode->cache)
1054 		vnode->cache->ReleaseRef();
1055 
1056 	vnode->cache = NULL;
1057 
1058 	remove_vnode_from_mount_list(vnode, vnode->mount);
1059 
1060 	free(vnode);
1061 }
1062 
1063 
1064 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1065 	if the counter dropped to 0.
1066 
1067 	The caller must, of course, own a reference to the vnode to call this
1068 	function.
1069 	The caller must not hold the sVnodeMutex or the sMountMutex.
1070 
1071 	\param vnode the vnode.
1072 	\param reenter \c true, if this function is called (indirectly) from within
1073 		   a file system.
1074 	\return \c B_OK, if everything went fine, an error code otherwise.
1075 */
1076 static status_t
1077 dec_vnode_ref_count(struct vnode *vnode, bool alwaysFree, bool reenter)
1078 {
1079 	mutex_lock(&sVnodeMutex);
1080 
1081 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1082 
1083 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1084 
1085 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
1086 
1087 	if (oldRefCount == 1) {
1088 		if (vnode->busy)
1089 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1090 
1091 		bool freeNode = false;
1092 
1093 		// Just insert the vnode into an unused list if we don't need
1094 		// to delete it
1095 		if (vnode->remove || alwaysFree) {
1096 			vnode->busy = true;
1097 			freeNode = true;
1098 		} else {
1099 			list_add_item(&sUnusedVnodeList, vnode);
1100 			if (++sUnusedVnodes > kMaxUnusedVnodes
1101 				&& low_resource_state(
1102 					B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY)
1103 						!= B_NO_LOW_RESOURCE) {
1104 				// there are too many unused vnodes so we free the oldest one
1105 				// TODO: evaluate this mechanism
1106 				vnode = (struct vnode*)list_remove_head_item(&sUnusedVnodeList);
1107 				vnode->busy = true;
1108 				freeNode = true;
1109 				sUnusedVnodes--;
1110 			}
1111 		}
1112 
1113 		mutex_unlock(&sVnodeMutex);
1114 
1115 		if (freeNode)
1116 			free_vnode(vnode, reenter);
1117 	} else
1118 		mutex_unlock(&sVnodeMutex);
1119 
1120 	return B_OK;
1121 }
1122 
1123 
1124 /*!	\brief Increments the reference counter of the given vnode.
1125 
1126 	The caller must either already have a reference to the vnode or hold
1127 	the sVnodeMutex.
1128 
1129 	\param vnode the vnode.
1130 */
1131 static void
1132 inc_vnode_ref_count(struct vnode *vnode)
1133 {
1134 	atomic_add(&vnode->ref_count, 1);
1135 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
1136 }
1137 
1138 
1139 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
1140 
1141 	The caller must hold the sVnodeMutex.
1142 
1143 	\param mountID the mount ID.
1144 	\param vnodeID the node ID.
1145 
1146 	\return The vnode structure, if it was found in the hash table, \c NULL
1147 			otherwise.
1148 */
1149 static struct vnode *
1150 lookup_vnode(dev_t mountID, ino_t vnodeID)
1151 {
1152 	struct vnode_hash_key key;
1153 
1154 	key.device = mountID;
1155 	key.vnode = vnodeID;
1156 
1157 	return (vnode *)hash_lookup(sVnodeTable, &key);
1158 }
1159 
1160 
1161 static bool
1162 is_special_node_type(int type)
1163 {
1164 	// at the moment only FIFOs are supported
1165 	return S_ISFIFO(type);
1166 }
1167 
1168 
1169 static status_t
1170 create_special_sub_node(struct vnode* vnode, uint32 flags)
1171 {
1172 	if (S_ISFIFO(vnode->type))
1173 		return create_fifo_vnode(vnode->mount->volume, vnode);
1174 
1175 	return B_BAD_VALUE;
1176 }
1177 
1178 
1179 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1180 
1181 	If the node is not yet in memory, it will be loaded.
1182 
1183 	The caller must not hold the sVnodeMutex or the sMountMutex.
1184 
1185 	\param mountID the mount ID.
1186 	\param vnodeID the node ID.
1187 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1188 		   retrieved vnode structure shall be written.
1189 	\param reenter \c true, if this function is called (indirectly) from within
1190 		   a file system.
1191 	\return \c B_OK, if everything when fine, an error code otherwise.
1192 */
1193 static status_t
1194 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode, bool canWait,
1195 	int reenter)
1196 {
1197 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
1198 
1199 	mutex_lock(&sVnodeMutex);
1200 
1201 	int32 tries = 1000;
1202 		// try for 10 secs
1203 restart:
1204 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
1205 	if (vnode && vnode->busy) {
1206 		mutex_unlock(&sVnodeMutex);
1207 		if (!canWait || --tries < 0) {
1208 			// vnode doesn't seem to become unbusy
1209 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
1210 			return B_BUSY;
1211 		}
1212 		snooze(10000); // 10 ms
1213 		mutex_lock(&sVnodeMutex);
1214 		goto restart;
1215 	}
1216 
1217 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1218 
1219 	status_t status;
1220 
1221 	if (vnode) {
1222 		if (vnode->ref_count == 0) {
1223 			// this vnode has been unused before
1224 			list_remove_item(&sUnusedVnodeList, vnode);
1225 			sUnusedVnodes--;
1226 		}
1227 		inc_vnode_ref_count(vnode);
1228 	} else {
1229 		// we need to create a new vnode and read it in
1230 		status = create_new_vnode(&vnode, mountID, vnodeID);
1231 		if (status < B_OK)
1232 			goto err;
1233 
1234 		vnode->busy = true;
1235 		mutex_unlock(&sVnodeMutex);
1236 
1237 		int type;
1238 		uint32 flags;
1239 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1240 			&flags, reenter);
1241 		if (status == B_OK && vnode->private_node == NULL)
1242 			status = B_BAD_VALUE;
1243 
1244 		bool gotNode = status == B_OK;
1245 		bool publishSpecialSubNode = false;
1246 		if (gotNode) {
1247 			vnode->type = type;
1248 			publishSpecialSubNode = is_special_node_type(type)
1249 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1250 		}
1251 
1252 		if (gotNode && publishSpecialSubNode)
1253 			status = create_special_sub_node(vnode, flags);
1254 
1255 		mutex_lock(&sVnodeMutex);
1256 
1257 		if (status < B_OK) {
1258 			if (gotNode)
1259 				FS_CALL(vnode, put_vnode, reenter);
1260 
1261 			goto err1;
1262 		}
1263 
1264 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
1265 		vnode->busy = false;
1266 	}
1267 
1268 	mutex_unlock(&sVnodeMutex);
1269 
1270 	TRACE(("get_vnode: returning %p\n", vnode));
1271 
1272 	*_vnode = vnode;
1273 	return B_OK;
1274 
1275 err1:
1276 	hash_remove(sVnodeTable, vnode);
1277 	remove_vnode_from_mount_list(vnode, vnode->mount);
1278 err:
1279 	mutex_unlock(&sVnodeMutex);
1280 	if (vnode)
1281 		free(vnode);
1282 
1283 	return status;
1284 }
1285 
1286 
1287 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1288 	if the counter dropped to 0.
1289 
1290 	The caller must, of course, own a reference to the vnode to call this
1291 	function.
1292 	The caller must not hold the sVnodeMutex or the sMountMutex.
1293 
1294 	\param vnode the vnode.
1295 */
1296 static inline void
1297 put_vnode(struct vnode *vnode)
1298 {
1299 	dec_vnode_ref_count(vnode, false, false);
1300 }
1301 
1302 
1303 static void
1304 vnode_low_resource_handler(void */*data*/, uint32 resources, int32 level)
1305 {
1306 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1307 
1308 	uint32 count = 1;
1309 	switch (level) {
1310 		case B_NO_LOW_RESOURCE:
1311 			return;
1312 		case B_LOW_RESOURCE_NOTE:
1313 			count = sUnusedVnodes / 100;
1314 			break;
1315 		case B_LOW_RESOURCE_WARNING:
1316 			count = sUnusedVnodes / 10;
1317 			break;
1318 		case B_LOW_RESOURCE_CRITICAL:
1319 			count = sUnusedVnodes;
1320 			break;
1321 	}
1322 
1323 	if (count > sUnusedVnodes)
1324 		count = sUnusedVnodes;
1325 
1326 	// Write back the modified pages of some unused vnodes and free them
1327 
1328 	for (uint32 i = 0; i < count; i++) {
1329 		mutex_lock(&sVnodeMutex);
1330 		struct vnode *vnode = (struct vnode *)list_remove_head_item(
1331 			&sUnusedVnodeList);
1332 		if (vnode == NULL) {
1333 			mutex_unlock(&sVnodeMutex);
1334 			break;
1335 		}
1336 
1337 		inc_vnode_ref_count(vnode);
1338 		sUnusedVnodes--;
1339 
1340 		mutex_unlock(&sVnodeMutex);
1341 
1342 		if (vnode->cache != NULL)
1343 			vnode->cache->WriteModified();
1344 
1345 		dec_vnode_ref_count(vnode, true, false);
1346 			// this should free the vnode when it's still unused
1347 	}
1348 }
1349 
1350 
1351 static inline void
1352 put_advisory_locking(struct advisory_locking *locking)
1353 {
1354 	release_sem(locking->lock);
1355 }
1356 
1357 
1358 /*!	Returns the advisory_locking object of the \a vnode in case it
1359 	has one, and locks it.
1360 	You have to call put_advisory_locking() when you're done with
1361 	it.
1362 	Note, you must not have the vnode mutex locked when calling
1363 	this function.
1364 */
1365 static struct advisory_locking *
1366 get_advisory_locking(struct vnode *vnode)
1367 {
1368 	mutex_lock(&sVnodeMutex);
1369 
1370 	struct advisory_locking *locking = vnode->advisory_locking;
1371 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1372 
1373 	mutex_unlock(&sVnodeMutex);
1374 
1375 	if (lock >= B_OK)
1376 		lock = acquire_sem(lock);
1377 	if (lock < B_OK) {
1378 		// This means the locking has been deleted in the mean time
1379 		// or had never existed in the first place - otherwise, we
1380 		// would get the lock at some point.
1381 		return NULL;
1382 	}
1383 
1384 	return locking;
1385 }
1386 
1387 
1388 /*!	Creates a locked advisory_locking object, and attaches it to the
1389 	given \a vnode.
1390 	Returns B_OK in case of success - also if the vnode got such an
1391 	object from someone else in the mean time, you'll still get this
1392 	one locked then.
1393 */
1394 static status_t
1395 create_advisory_locking(struct vnode *vnode)
1396 {
1397 	if (vnode == NULL)
1398 		return B_FILE_ERROR;
1399 
1400 	ObjectDeleter<advisory_locking> lockingDeleter;
1401 	struct advisory_locking *locking = NULL;
1402 
1403 	while (get_advisory_locking(vnode) == NULL) {
1404 		// no locking object set on the vnode yet, create one
1405 		if (locking == NULL) {
1406 			locking = new(std::nothrow) advisory_locking;
1407 			if (locking == NULL)
1408 				return B_NO_MEMORY;
1409 			lockingDeleter.SetTo(locking);
1410 
1411 			locking->wait_sem = create_sem(0, "advisory lock");
1412 			if (locking->wait_sem < B_OK)
1413 				return locking->wait_sem;
1414 
1415 			locking->lock = create_sem(0, "advisory locking");
1416 			if (locking->lock < B_OK)
1417 				return locking->lock;
1418 		}
1419 
1420 		// set our newly created locking object
1421 		MutexLocker _(sVnodeMutex);
1422 		if (vnode->advisory_locking == NULL) {
1423 			vnode->advisory_locking = locking;
1424 			lockingDeleter.Detach();
1425 			return B_OK;
1426 		}
1427 	}
1428 
1429 	// The vnode already had a locking object. That's just as well.
1430 
1431 	return B_OK;
1432 }
1433 
1434 
1435 /*!	Retrieves the first lock that has been set by the current team.
1436 */
1437 static status_t
1438 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1439 {
1440 	struct advisory_locking *locking = get_advisory_locking(vnode);
1441 	if (locking == NULL)
1442 		return B_BAD_VALUE;
1443 
1444 	// TODO: this should probably get the flock by its file descriptor!
1445 	team_id team = team_get_current_team_id();
1446 	status_t status = B_BAD_VALUE;
1447 
1448 	LockList::Iterator iterator = locking->locks.GetIterator();
1449 	while (iterator.HasNext()) {
1450 		struct advisory_lock *lock = iterator.Next();
1451 
1452 		if (lock->team == team) {
1453 			flock->l_start = lock->start;
1454 			flock->l_len = lock->end - lock->start + 1;
1455 			status = B_OK;
1456 			break;
1457 		}
1458 	}
1459 
1460 	put_advisory_locking(locking);
1461 	return status;
1462 }
1463 
1464 
1465 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1466 	with the advisory_lock \a lock.
1467 */
1468 static bool
1469 advisory_lock_intersects(struct advisory_lock *lock, struct flock *flock)
1470 {
1471 	if (flock == NULL)
1472 		return true;
1473 
1474 	return lock->start <= flock->l_start - 1 + flock->l_len
1475 		&& lock->end >= flock->l_start;
1476 }
1477 
1478 
1479 /*!	Removes the specified lock, or all locks of the calling team
1480 	if \a flock is NULL.
1481 */
1482 static status_t
1483 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1484 {
1485 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1486 
1487 	struct advisory_locking *locking = get_advisory_locking(vnode);
1488 	if (locking == NULL)
1489 		return B_OK;
1490 
1491 	// TODO: use the thread ID instead??
1492 	team_id team = team_get_current_team_id();
1493 	pid_t session = thread_get_current_thread()->team->session_id;
1494 
1495 	// find matching lock entries
1496 
1497 	LockList::Iterator iterator = locking->locks.GetIterator();
1498 	while (iterator.HasNext()) {
1499 		struct advisory_lock *lock = iterator.Next();
1500 		bool removeLock = false;
1501 
1502 		if (lock->session == session)
1503 			removeLock = true;
1504 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1505 			bool endsBeyond = false;
1506 			bool startsBefore = false;
1507 			if (flock != NULL) {
1508 				startsBefore = lock->start < flock->l_start;
1509 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1510 			}
1511 
1512 			if (!startsBefore && !endsBeyond) {
1513 				// lock is completely contained in flock
1514 				removeLock = true;
1515 			} else if (startsBefore && !endsBeyond) {
1516 				// cut the end of the lock
1517 				lock->end = flock->l_start - 1;
1518 			} else if (!startsBefore && endsBeyond) {
1519 				// cut the start of the lock
1520 				lock->start = flock->l_start + flock->l_len;
1521 			} else {
1522 				// divide the lock into two locks
1523 				struct advisory_lock *secondLock = new advisory_lock;
1524 				if (secondLock == NULL) {
1525 					// TODO: we should probably revert the locks we already
1526 					// changed... (ie. allocate upfront)
1527 					put_advisory_locking(locking);
1528 					return B_NO_MEMORY;
1529 				}
1530 
1531 				lock->end = flock->l_start - 1;
1532 
1533 				secondLock->team = lock->team;
1534 				secondLock->session = lock->session;
1535 				// values must already be normalized when getting here
1536 				secondLock->start = flock->l_start + flock->l_len;
1537 				secondLock->end = lock->end;
1538 				secondLock->shared = lock->shared;
1539 
1540 				locking->locks.Add(secondLock);
1541 			}
1542 		}
1543 
1544 		if (removeLock) {
1545 			// this lock is no longer used
1546 			iterator.Remove();
1547 			free(lock);
1548 		}
1549 	}
1550 
1551 	bool removeLocking = locking->locks.IsEmpty();
1552 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1553 
1554 	put_advisory_locking(locking);
1555 
1556 	if (removeLocking) {
1557 		// We can remove the whole advisory locking structure; it's no
1558 		// longer used
1559 		locking = get_advisory_locking(vnode);
1560 		if (locking != NULL) {
1561 			MutexLocker locker(sVnodeMutex);
1562 
1563 			// the locking could have been changed in the mean time
1564 			if (locking->locks.IsEmpty()) {
1565 				vnode->advisory_locking = NULL;
1566 				locker.Unlock();
1567 
1568 				// we've detached the locking from the vnode, so we can
1569 				// safely delete it
1570 				delete_sem(locking->lock);
1571 				delete_sem(locking->wait_sem);
1572 				delete locking;
1573 			} else {
1574 				// the locking is in use again
1575 				locker.Unlock();
1576 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1577 			}
1578 		}
1579 	}
1580 
1581 	return B_OK;
1582 }
1583 
1584 
1585 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1586 	will wait for the lock to become available, if there are any collisions
1587 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1588 
1589 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1590 	BSD flock() semantics are used, that is, all children can unlock the file
1591 	in question (we even allow parents to remove the lock, though, but that
1592 	seems to be in line to what the BSD's are doing).
1593 */
1594 static status_t
1595 acquire_advisory_lock(struct vnode *vnode, pid_t session, struct flock *flock,
1596 	bool wait)
1597 {
1598 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1599 		vnode, flock, wait ? "yes" : "no"));
1600 
1601 	bool shared = flock->l_type == F_RDLCK;
1602 	status_t status = B_OK;
1603 
1604 	// TODO: do deadlock detection!
1605 
1606 	struct advisory_locking *locking;
1607 	sem_id waitForLock;
1608 
1609 	while (true) {
1610 		// if this vnode has an advisory_locking structure attached,
1611 		// lock that one and search for any colliding file lock
1612 		status = create_advisory_locking(vnode);
1613 		if (status != B_OK)
1614 			return status;
1615 
1616 		locking = vnode->advisory_locking;
1617 		team_id team = team_get_current_team_id();
1618 		waitForLock = -1;
1619 
1620 		// test for collisions
1621 		LockList::Iterator iterator = locking->locks.GetIterator();
1622 		while (iterator.HasNext()) {
1623 			struct advisory_lock *lock = iterator.Next();
1624 
1625 			// TODO: locks from the same team might be joinable!
1626 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1627 				// locks do overlap
1628 				if (!shared || !lock->shared) {
1629 					// we need to wait
1630 					waitForLock = locking->wait_sem;
1631 					break;
1632 				}
1633 			}
1634 		}
1635 
1636 		if (waitForLock < 0)
1637 			break;
1638 
1639 		// We need to wait. Do that or fail now, if we've been asked not to.
1640 
1641 		if (!wait) {
1642 			put_advisory_locking(locking);
1643 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1644 		}
1645 
1646 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1647 			B_CAN_INTERRUPT, 0);
1648 		if (status != B_OK && status != B_BAD_SEM_ID)
1649 			return status;
1650 
1651 		// We have been notified, but we need to re-lock the locking object. So
1652 		// go another round...
1653 	}
1654 
1655 	// install new lock
1656 
1657 	struct advisory_lock *lock = (struct advisory_lock *)malloc(
1658 		sizeof(struct advisory_lock));
1659 	if (lock == NULL) {
1660 		if (waitForLock >= B_OK)
1661 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1662 		release_sem(locking->lock);
1663 		return B_NO_MEMORY;
1664 	}
1665 
1666 	lock->team = team_get_current_team_id();
1667 	lock->session = session;
1668 	// values must already be normalized when getting here
1669 	lock->start = flock->l_start;
1670 	lock->end = flock->l_start - 1 + flock->l_len;
1671 	lock->shared = shared;
1672 
1673 	locking->locks.Add(lock);
1674 	put_advisory_locking(locking);
1675 
1676 	return status;
1677 }
1678 
1679 
1680 /*!	Normalizes the \a flock structure to make it easier to compare the
1681 	structure with others. The l_start and l_len fields are set to absolute
1682 	values according to the l_whence field.
1683 */
1684 static status_t
1685 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1686 {
1687 	switch (flock->l_whence) {
1688 		case SEEK_SET:
1689 			break;
1690 		case SEEK_CUR:
1691 			flock->l_start += descriptor->pos;
1692 			break;
1693 		case SEEK_END:
1694 		{
1695 			struct vnode *vnode = descriptor->u.vnode;
1696 			struct stat stat;
1697 			status_t status;
1698 
1699 			if (!HAS_FS_CALL(vnode, read_stat))
1700 				return EOPNOTSUPP;
1701 
1702 			status = FS_CALL(vnode, read_stat, &stat);
1703 			if (status < B_OK)
1704 				return status;
1705 
1706 			flock->l_start += stat.st_size;
1707 			break;
1708 		}
1709 		default:
1710 			return B_BAD_VALUE;
1711 	}
1712 
1713 	if (flock->l_start < 0)
1714 		flock->l_start = 0;
1715 	if (flock->l_len == 0)
1716 		flock->l_len = OFF_MAX;
1717 
1718 	// don't let the offset and length overflow
1719 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1720 		flock->l_len = OFF_MAX - flock->l_start;
1721 
1722 	if (flock->l_len < 0) {
1723 		// a negative length reverses the region
1724 		flock->l_start += flock->l_len;
1725 		flock->l_len = -flock->l_len;
1726 	}
1727 
1728 	return B_OK;
1729 }
1730 
1731 
1732 static void
1733 replace_vnode_if_disconnected(struct fs_mount* mount,
1734 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1735 	struct vnode* fallBack, bool lockRootLock)
1736 {
1737 	if (lockRootLock)
1738 		mutex_lock(&sIOContextRootLock);
1739 
1740 	struct vnode* obsoleteVnode = NULL;
1741 
1742 	if (vnode != NULL && vnode->mount == mount
1743 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1744 		obsoleteVnode = vnode;
1745 
1746 		if (vnode == mount->root_vnode) {
1747 			// redirect the vnode to the covered vnode
1748 			vnode = mount->covers_vnode;
1749 		} else
1750 			vnode = fallBack;
1751 
1752 		if (vnode != NULL)
1753 			inc_vnode_ref_count(vnode);
1754 	}
1755 
1756 	if (lockRootLock)
1757 		mutex_unlock(&sIOContextRootLock);
1758 
1759 	if (obsoleteVnode != NULL)
1760 		put_vnode(obsoleteVnode);
1761 }
1762 
1763 
1764 /*!	Disconnects all file descriptors that are associated with the
1765 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1766 	\a mount object.
1767 
1768 	Note, after you've called this function, there might still be ongoing
1769 	accesses - they won't be interrupted if they already happened before.
1770 	However, any subsequent access will fail.
1771 
1772 	This is not a cheap function and should be used with care and rarely.
1773 	TODO: there is currently no means to stop a blocking read/write!
1774 */
1775 void
1776 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1777 	struct vnode *vnodeToDisconnect)
1778 {
1779 	// iterate over all teams and peek into their file descriptors
1780 	int32 nextTeamID = 0;
1781 
1782 	while (true) {
1783 		struct io_context *context = NULL;
1784 		bool contextLocked = false;
1785 		struct team *team = NULL;
1786 		team_id lastTeamID;
1787 
1788 		cpu_status state = disable_interrupts();
1789 		SpinLocker teamsLock(gTeamSpinlock);
1790 
1791 		lastTeamID = peek_next_thread_id();
1792 		if (nextTeamID < lastTeamID) {
1793 			// get next valid team
1794 			while (nextTeamID < lastTeamID
1795 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1796 				nextTeamID++;
1797 			}
1798 
1799 			if (team) {
1800 				context = (io_context *)team->io_context;
1801 
1802 				// Some acrobatics to lock the context in a safe way
1803 				// (cf. _kern_get_next_fd_info() for details).
1804 				GRAB_THREAD_LOCK();
1805 				teamsLock.Unlock();
1806 				contextLocked = mutex_lock_threads_locked(&context->io_mutex)
1807 					== B_OK;
1808 				RELEASE_THREAD_LOCK();
1809 
1810 				nextTeamID++;
1811 			}
1812 		}
1813 
1814 		teamsLock.Unlock();
1815 		restore_interrupts(state);
1816 
1817 		if (context == NULL)
1818 			break;
1819 
1820 		// we now have a context - since we couldn't lock it while having
1821 		// safe access to the team structure, we now need to lock the mutex
1822 		// manually
1823 
1824 		if (!contextLocked) {
1825 			// team seems to be gone, go over to the next team
1826 			continue;
1827 		}
1828 
1829 		// the team cannot be deleted completely while we're owning its
1830 		// io_context mutex, so we can safely play with it now
1831 
1832 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1833 			sRoot, true);
1834 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1835 			sRoot, false);
1836 
1837 		for (uint32 i = 0; i < context->table_size; i++) {
1838 			if (struct file_descriptor *descriptor = context->fds[i]) {
1839 				inc_fd_ref_count(descriptor);
1840 
1841 				// if this descriptor points at this mount, we
1842 				// need to disconnect it to be able to unmount
1843 				struct vnode *vnode = fd_vnode(descriptor);
1844 				if (vnodeToDisconnect != NULL) {
1845 					if (vnode == vnodeToDisconnect)
1846 						disconnect_fd(descriptor);
1847 				} else if (vnode != NULL && vnode->mount == mount
1848 					|| vnode == NULL && descriptor->u.mount == mount)
1849 					disconnect_fd(descriptor);
1850 
1851 				put_fd(descriptor);
1852 			}
1853 		}
1854 
1855 		mutex_unlock(&context->io_mutex);
1856 	}
1857 }
1858 
1859 
1860 /*!	\brief Gets the root node of the current IO context.
1861 	If \a kernel is \c true, the kernel IO context will be used.
1862 	The caller obtains a reference to the returned node.
1863 */
1864 struct vnode*
1865 get_root_vnode(bool kernel)
1866 {
1867 	if (!kernel) {
1868 		// Get current working directory from io context
1869 		struct io_context* context = get_current_io_context(kernel);
1870 
1871 		mutex_lock(&sIOContextRootLock);
1872 
1873 		struct vnode* root = context->root;
1874 		if (root != NULL)
1875 			inc_vnode_ref_count(root);
1876 
1877 		mutex_unlock(&sIOContextRootLock);
1878 
1879 		if (root != NULL)
1880 			return root;
1881 
1882 		// That should never happen.
1883 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1884 			"root\n", team_get_current_team_id());
1885 	}
1886 
1887 	inc_vnode_ref_count(sRoot);
1888 	return sRoot;
1889 }
1890 
1891 
1892 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1893 		   by.
1894 
1895 	Given an arbitrary vnode, the function checks, whether the node is covered
1896 	by the root of a volume. If it is the function obtains a reference to the
1897 	volume root node and returns it.
1898 
1899 	\param vnode The vnode in question.
1900 	\return The volume root vnode the vnode cover is covered by, if it is
1901 			indeed a mount point, or \c NULL otherwise.
1902 */
1903 static struct vnode *
1904 resolve_mount_point_to_volume_root(struct vnode *vnode)
1905 {
1906 	if (!vnode)
1907 		return NULL;
1908 
1909 	struct vnode *volumeRoot = NULL;
1910 
1911 	mutex_lock(&sVnodeCoveredByMutex);
1912 	if (vnode->covered_by) {
1913 		volumeRoot = vnode->covered_by;
1914 		inc_vnode_ref_count(volumeRoot);
1915 	}
1916 	mutex_unlock(&sVnodeCoveredByMutex);
1917 
1918 	return volumeRoot;
1919 }
1920 
1921 
1922 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1923 		   by.
1924 
1925 	Given an arbitrary vnode (identified by mount and node ID), the function
1926 	checks, whether the node is covered by the root of a volume. If it is the
1927 	function returns the mount and node ID of the volume root node. Otherwise
1928 	it simply returns the supplied mount and node ID.
1929 
1930 	In case of error (e.g. the supplied node could not be found) the variables
1931 	for storing the resolved mount and node ID remain untouched and an error
1932 	code is returned.
1933 
1934 	\param mountID The mount ID of the vnode in question.
1935 	\param nodeID The node ID of the vnode in question.
1936 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1937 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1938 	\return
1939 	- \c B_OK, if everything went fine,
1940 	- another error code, if something went wrong.
1941 */
1942 status_t
1943 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1944 	dev_t *resolvedMountID, ino_t *resolvedNodeID)
1945 {
1946 	// get the node
1947 	struct vnode *node;
1948 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1949 	if (error != B_OK)
1950 		return error;
1951 
1952 	// resolve the node
1953 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1954 	if (resolvedNode) {
1955 		put_vnode(node);
1956 		node = resolvedNode;
1957 	}
1958 
1959 	// set the return values
1960 	*resolvedMountID = node->device;
1961 	*resolvedNodeID = node->id;
1962 
1963 	put_vnode(node);
1964 
1965 	return B_OK;
1966 }
1967 
1968 
1969 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1970 
1971 	Given an arbitrary vnode, the function checks, whether the node is the
1972 	root of a volume. If it is (and if it is not "/"), the function obtains
1973 	a reference to the underlying mount point node and returns it.
1974 
1975 	\param vnode The vnode in question (caller must have a reference).
1976 	\return The mount point vnode the vnode covers, if it is indeed a volume
1977 			root and not "/", or \c NULL otherwise.
1978 */
1979 static struct vnode *
1980 resolve_volume_root_to_mount_point(struct vnode *vnode)
1981 {
1982 	if (!vnode)
1983 		return NULL;
1984 
1985 	struct vnode *mountPoint = NULL;
1986 
1987 	struct fs_mount *mount = vnode->mount;
1988 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1989 		mountPoint = mount->covers_vnode;
1990 		inc_vnode_ref_count(mountPoint);
1991 	}
1992 
1993 	return mountPoint;
1994 }
1995 
1996 
1997 /*!	\brief Gets the directory path and leaf name for a given path.
1998 
1999 	The supplied \a path is transformed to refer to the directory part of
2000 	the entry identified by the original path, and into the buffer \a filename
2001 	the leaf name of the original entry is written.
2002 	Neither the returned path nor the leaf name can be expected to be
2003 	canonical.
2004 
2005 	\param path The path to be analyzed. Must be able to store at least one
2006 		   additional character.
2007 	\param filename The buffer into which the leaf name will be written.
2008 		   Must be of size B_FILE_NAME_LENGTH at least.
2009 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2010 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2011 		   if the given path name is empty.
2012 */
2013 static status_t
2014 get_dir_path_and_leaf(char *path, char *filename)
2015 {
2016 	if (*path == '\0')
2017 		return B_ENTRY_NOT_FOUND;
2018 
2019 	char *p = strrchr(path, '/');
2020 		// '/' are not allowed in file names!
2021 
2022 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2023 
2024 	if (!p) {
2025 		// this path is single segment with no '/' in it
2026 		// ex. "foo"
2027 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2028 			return B_NAME_TOO_LONG;
2029 		strcpy(path, ".");
2030 	} else {
2031 		p++;
2032 		if (p[0] == '\0') {
2033 			// special case: the path ends in one or more '/' - remove them
2034 			while (*--p == '/' && p != path);
2035 			p[1] = '\0';
2036 
2037 			if (p == path && p[0] == '/') {
2038 				// This path points to the root of the file system
2039 				strcpy(filename, ".");
2040 				return B_OK;
2041 			}
2042 			for (; p != path && *(p - 1) != '/'; p--);
2043 				// rewind to the start of the leaf before the '/'
2044 		}
2045 
2046 		// normal leaf: replace the leaf portion of the path with a '.'
2047 		if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
2048 				>= B_FILE_NAME_LENGTH) {
2049 			return B_NAME_TOO_LONG;
2050 		}
2051 		p[0] = '.';
2052 		p[1] = '\0';
2053 	}
2054 	return B_OK;
2055 }
2056 
2057 
2058 static status_t
2059 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char *name,
2060 	bool traverse, bool kernel, struct vnode **_vnode)
2061 {
2062 	char clonedName[B_FILE_NAME_LENGTH + 1];
2063 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2064 		return B_NAME_TOO_LONG;
2065 
2066 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2067 	struct vnode *directory;
2068 
2069 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2070 	if (status < 0)
2071 		return status;
2072 
2073 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2074 		_vnode, NULL);
2075 }
2076 
2077 
2078 static status_t
2079 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2080 {
2081 	ino_t id;
2082 
2083 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2084 		return get_vnode(dir->device, id, _vnode, true, false);
2085 
2086 	status_t status = FS_CALL(dir, lookup, name, &id);
2087 	if (status < B_OK)
2088 		return status;
2089 
2090 	mutex_lock(&sVnodeMutex);
2091 	*_vnode = lookup_vnode(dir->device, id);
2092 	mutex_unlock(&sVnodeMutex);
2093 
2094 	if (*_vnode == NULL) {
2095 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2096 			"0x%Lx)\n", dir->device, id);
2097 		return B_ENTRY_NOT_FOUND;
2098 	}
2099 
2100 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2101 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2102 //		(*_vnode)->mount->id, (*_vnode)->id);
2103 
2104 	return B_OK;
2105 }
2106 
2107 
2108 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2109 	\a path must not be NULL.
2110 	If it returns successfully, \a path contains the name of the last path
2111 	component. This function clobbers the buffer pointed to by \a path only
2112 	if it does contain more than one component.
2113 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2114 	it is successful or not!
2115 */
2116 static status_t
2117 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
2118 	int count, struct io_context *ioContext, struct vnode **_vnode,
2119 	ino_t *_parentID)
2120 {
2121 	status_t status = B_OK;
2122 	ino_t lastParentID = vnode->id;
2123 
2124 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2125 
2126 	if (path == NULL) {
2127 		put_vnode(vnode);
2128 		return B_BAD_VALUE;
2129 	}
2130 
2131 	if (*path == '\0') {
2132 		put_vnode(vnode);
2133 		return B_ENTRY_NOT_FOUND;
2134 	}
2135 
2136 	while (true) {
2137 		struct vnode *nextVnode;
2138 		char *nextPath;
2139 
2140 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
2141 
2142 		// done?
2143 		if (path[0] == '\0')
2144 			break;
2145 
2146 		// walk to find the next path component ("path" will point to a single
2147 		// path component), and filter out multiple slashes
2148 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2149 				nextPath++);
2150 
2151 		if (*nextPath == '/') {
2152 			*nextPath = '\0';
2153 			do
2154 				nextPath++;
2155 			while (*nextPath == '/');
2156 		}
2157 
2158 		// See if the '..' is at the root of a mount and move to the covered
2159 		// vnode so we pass the '..' path to the underlying filesystem.
2160 		// Also prevent breaking the root of the IO context.
2161 		if (strcmp("..", path) == 0) {
2162 			if (vnode == ioContext->root) {
2163 				// Attempted prison break! Keep it contained.
2164 				path = nextPath;
2165 				continue;
2166 			} else if (vnode->mount->root_vnode == vnode
2167 				&& vnode->mount->covers_vnode) {
2168 				nextVnode = vnode->mount->covers_vnode;
2169 				inc_vnode_ref_count(nextVnode);
2170 				put_vnode(vnode);
2171 				vnode = nextVnode;
2172 			}
2173 		}
2174 
2175 		// check if vnode is really a directory
2176 		if (status == B_OK && !S_ISDIR(vnode->type))
2177 			status = B_NOT_A_DIRECTORY;
2178 
2179 		// Check if we have the right to search the current directory vnode.
2180 		// If a file system doesn't have the access() function, we assume that
2181 		// searching a directory is always allowed
2182 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2183 			status = FS_CALL(vnode, access, X_OK);
2184 
2185 		// Tell the filesystem to get the vnode of this path component (if we got the
2186 		// permission from the call above)
2187 		if (status >= B_OK)
2188 			status = lookup_dir_entry(vnode, path, &nextVnode);
2189 
2190 		if (status < B_OK) {
2191 			put_vnode(vnode);
2192 			return status;
2193 		}
2194 
2195 		// If the new node is a symbolic link, resolve it (if we've been told
2196 		// to do it)
2197 		if (S_ISLNK(nextVnode->type)
2198 			&& !(!traverseLeafLink && nextPath[0] == '\0')) {
2199 			size_t bufferSize;
2200 			char *buffer;
2201 
2202 			TRACE(("traverse link\n"));
2203 
2204 			// it's not exactly nice style using goto in this way, but hey, it works :-/
2205 			if (count + 1 > B_MAX_SYMLINKS) {
2206 				status = B_LINK_LIMIT;
2207 				goto resolve_link_error;
2208 			}
2209 
2210 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
2211 			if (buffer == NULL) {
2212 				status = B_NO_MEMORY;
2213 				goto resolve_link_error;
2214 			}
2215 
2216 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2217 				bufferSize--;
2218 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2219 				// null-terminate
2220 				if (status >= 0)
2221 					buffer[bufferSize] = '\0';
2222 			} else
2223 				status = B_BAD_VALUE;
2224 
2225 			if (status < B_OK) {
2226 				free(buffer);
2227 
2228 		resolve_link_error:
2229 				put_vnode(vnode);
2230 				put_vnode(nextVnode);
2231 
2232 				return status;
2233 			}
2234 			put_vnode(nextVnode);
2235 
2236 			// Check if we start from the root directory or the current
2237 			// directory ("vnode" still points to that one).
2238 			// Cut off all leading slashes if it's the root directory
2239 			path = buffer;
2240 			bool absoluteSymlink = false;
2241 			if (path[0] == '/') {
2242 				// we don't need the old directory anymore
2243 				put_vnode(vnode);
2244 
2245 				while (*++path == '/')
2246 					;
2247 
2248 				mutex_lock(&sIOContextRootLock);
2249 				vnode = ioContext->root;
2250 				inc_vnode_ref_count(vnode);
2251 				mutex_unlock(&sIOContextRootLock);
2252 
2253 				absoluteSymlink = true;
2254 			}
2255 
2256 			inc_vnode_ref_count(vnode);
2257 				// balance the next recursion - we will decrement the
2258 				// ref_count of the vnode, no matter if we succeeded or not
2259 
2260 			if (absoluteSymlink && *path == '\0') {
2261 				// symlink was just "/"
2262 				nextVnode = vnode;
2263 			} else {
2264 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
2265 					count + 1, ioContext, &nextVnode, &lastParentID);
2266 			}
2267 
2268 			free(buffer);
2269 
2270 			if (status < B_OK) {
2271 				put_vnode(vnode);
2272 				return status;
2273 			}
2274 		} else
2275 			lastParentID = vnode->id;
2276 
2277 		// decrease the ref count on the old dir we just looked up into
2278 		put_vnode(vnode);
2279 
2280 		path = nextPath;
2281 		vnode = nextVnode;
2282 
2283 		// see if we hit a mount point
2284 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
2285 		if (mountPoint) {
2286 			put_vnode(vnode);
2287 			vnode = mountPoint;
2288 		}
2289 	}
2290 
2291 	*_vnode = vnode;
2292 	if (_parentID)
2293 		*_parentID = lastParentID;
2294 
2295 	return B_OK;
2296 }
2297 
2298 
2299 static status_t
2300 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
2301 	int count, bool kernel, struct vnode **_vnode, ino_t *_parentID)
2302 {
2303 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2304 		get_current_io_context(kernel), _vnode, _parentID);
2305 }
2306 
2307 
2308 static status_t
2309 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
2310 	ino_t *_parentID, bool kernel)
2311 {
2312 	struct vnode *start = NULL;
2313 
2314 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2315 
2316 	if (!path)
2317 		return B_BAD_VALUE;
2318 
2319 	if (*path == '\0')
2320 		return B_ENTRY_NOT_FOUND;
2321 
2322 	// figure out if we need to start at root or at cwd
2323 	if (*path == '/') {
2324 		if (sRoot == NULL) {
2325 			// we're a bit early, aren't we?
2326 			return B_ERROR;
2327 		}
2328 
2329 		while (*++path == '/')
2330 			;
2331 		start = get_root_vnode(kernel);
2332 
2333 		if (*path == '\0') {
2334 			*_vnode = start;
2335 			return B_OK;
2336 		}
2337 
2338 	} else {
2339 		struct io_context *context = get_current_io_context(kernel);
2340 
2341 		mutex_lock(&context->io_mutex);
2342 		start = context->cwd;
2343 		if (start != NULL)
2344 			inc_vnode_ref_count(start);
2345 		mutex_unlock(&context->io_mutex);
2346 
2347 		if (start == NULL)
2348 			return B_ERROR;
2349 	}
2350 
2351 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2352 		_parentID);
2353 }
2354 
2355 
2356 /*! Returns the vnode in the next to last segment of the path, and returns
2357 	the last portion in filename.
2358 	The path buffer must be able to store at least one additional character.
2359 */
2360 static status_t
2361 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
2362 {
2363 	status_t status = get_dir_path_and_leaf(path, filename);
2364 	if (status != B_OK)
2365 		return status;
2366 
2367 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2368 }
2369 
2370 
2371 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2372 		   to by a FD + path pair.
2373 
2374 	\a path must be given in either case. \a fd might be omitted, in which
2375 	case \a path is either an absolute path or one relative to the current
2376 	directory. If both a supplied and \a path is relative it is reckoned off
2377 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2378 	ignored.
2379 
2380 	The caller has the responsibility to call put_vnode() on the returned
2381 	directory vnode.
2382 
2383 	\param fd The FD. May be < 0.
2384 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2385 	       is modified by this function. It must have at least room for a
2386 	       string one character longer than the path it contains.
2387 	\param _vnode A pointer to a variable the directory vnode shall be written
2388 		   into.
2389 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2390 		   the leaf name of the specified entry will be written.
2391 	\param kernel \c true, if invoked from inside the kernel, \c false if
2392 		   invoked from userland.
2393 	\return \c B_OK, if everything went fine, another error code otherwise.
2394 */
2395 static status_t
2396 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
2397 	char *filename, bool kernel)
2398 {
2399 	if (!path)
2400 		return B_BAD_VALUE;
2401 	if (*path == '\0')
2402 		return B_ENTRY_NOT_FOUND;
2403 	if (fd < 0)
2404 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2405 
2406 	status_t status = get_dir_path_and_leaf(path, filename);
2407 	if (status != B_OK)
2408 		return status;
2409 
2410 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2411 }
2412 
2413 
2414 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2415 		   to by a vnode + path pair.
2416 
2417 	\a path must be given in either case. \a vnode might be omitted, in which
2418 	case \a path is either an absolute path or one relative to the current
2419 	directory. If both a supplied and \a path is relative it is reckoned off
2420 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2421 	ignored.
2422 
2423 	The caller has the responsibility to call put_vnode() on the returned
2424 	directory vnode.
2425 
2426 	\param vnode The vnode. May be \c NULL.
2427 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2428 	       is modified by this function. It must have at least room for a
2429 	       string one character longer than the path it contains.
2430 	\param _vnode A pointer to a variable the directory vnode shall be written
2431 		   into.
2432 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2433 		   the leaf name of the specified entry will be written.
2434 	\param kernel \c true, if invoked from inside the kernel, \c false if
2435 		   invoked from userland.
2436 	\return \c B_OK, if everything went fine, another error code otherwise.
2437 */
2438 static status_t
2439 vnode_and_path_to_dir_vnode(struct vnode* vnode, char *path,
2440 	struct vnode **_vnode, char *filename, bool kernel)
2441 {
2442 	if (!path)
2443 		return B_BAD_VALUE;
2444 	if (*path == '\0')
2445 		return B_ENTRY_NOT_FOUND;
2446 	if (vnode == NULL || path[0] == '/')
2447 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2448 
2449 	status_t status = get_dir_path_and_leaf(path, filename);
2450 	if (status != B_OK)
2451 		return status;
2452 
2453 	inc_vnode_ref_count(vnode);
2454 		// vnode_path_to_vnode() always decrements the ref count
2455 
2456 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2457 }
2458 
2459 
2460 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2461 */
2462 static status_t
2463 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
2464 	size_t bufferSize, struct io_context* ioContext)
2465 {
2466 	if (bufferSize < sizeof(struct dirent))
2467 		return B_BAD_VALUE;
2468 
2469 	// See if vnode is the root of a mount and move to the covered
2470 	// vnode so we get the underlying file system
2471 	VNodePutter vnodePutter;
2472 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
2473 		vnode = vnode->mount->covers_vnode;
2474 		inc_vnode_ref_count(vnode);
2475 		vnodePutter.SetTo(vnode);
2476 	}
2477 
2478 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2479 		// The FS supports getting the name of a vnode.
2480 		return FS_CALL(vnode, get_vnode_name, buffer->d_name,
2481 			(char*)buffer + bufferSize - buffer->d_name);
2482 	}
2483 
2484 	// The FS doesn't support getting the name of a vnode. So we search the
2485 	// parent directory for the vnode, if the caller let us.
2486 
2487 	if (parent == NULL)
2488 		return EOPNOTSUPP;
2489 
2490 	void *cookie;
2491 
2492 	status_t status = FS_CALL(parent, open_dir, &cookie);
2493 	if (status >= B_OK) {
2494 		while (true) {
2495 			uint32 num = 1;
2496 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2497 				&num);
2498 			if (status < B_OK)
2499 				break;
2500 			if (num == 0) {
2501 				status = B_ENTRY_NOT_FOUND;
2502 				break;
2503 			}
2504 
2505 			if (vnode->id == buffer->d_ino) {
2506 				// found correct entry!
2507 				break;
2508 			}
2509 		}
2510 
2511 		FS_CALL(vnode, close_dir, cookie);
2512 		FS_CALL(vnode, free_dir_cookie, cookie);
2513 	}
2514 	return status;
2515 }
2516 
2517 
2518 static status_t
2519 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
2520 	size_t nameSize, bool kernel)
2521 {
2522 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2523 	struct dirent *dirent = (struct dirent *)buffer;
2524 
2525 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2526 		get_current_io_context(kernel));
2527 	if (status != B_OK)
2528 		return status;
2529 
2530 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2531 		return B_BUFFER_OVERFLOW;
2532 
2533 	return B_OK;
2534 }
2535 
2536 
2537 /*!	Gets the full path to a given directory vnode.
2538 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2539 	file system doesn't support this call, it will fall back to iterating
2540 	through the parent directory to get the name of the child.
2541 
2542 	To protect against circular loops, it supports a maximum tree depth
2543 	of 256 levels.
2544 
2545 	Note that the path may not be correct the time this function returns!
2546 	It doesn't use any locking to prevent returning the correct path, as
2547 	paths aren't safe anyway: the path to a file can change at any time.
2548 
2549 	It might be a good idea, though, to check if the returned path exists
2550 	in the calling function (it's not done here because of efficiency)
2551 */
2552 static status_t
2553 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize,
2554 	bool kernel)
2555 {
2556 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2557 
2558 	if (vnode == NULL || buffer == NULL)
2559 		return B_BAD_VALUE;
2560 
2561 	if (!S_ISDIR(vnode->type))
2562 		return B_NOT_A_DIRECTORY;
2563 
2564 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2565 	KPath pathBuffer;
2566 	if (pathBuffer.InitCheck() != B_OK)
2567 		return B_NO_MEMORY;
2568 
2569 	char *path = pathBuffer.LockBuffer();
2570 	int32 insert = pathBuffer.BufferSize();
2571 	int32 maxLevel = 256;
2572 	int32 length;
2573 	status_t status;
2574 
2575 	// we don't use get_vnode() here because this call is more
2576 	// efficient and does all we need from get_vnode()
2577 	inc_vnode_ref_count(vnode);
2578 
2579 	// resolve a volume root to its mount point
2580 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
2581 	if (mountPoint) {
2582 		put_vnode(vnode);
2583 		vnode = mountPoint;
2584 	}
2585 
2586 	path[--insert] = '\0';
2587 
2588 	struct io_context* ioContext = get_current_io_context(kernel);
2589 
2590 	while (true) {
2591 		// the name buffer is also used for fs_read_dir()
2592 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2593 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
2594 		struct vnode *parentVnode;
2595 		ino_t parentID;
2596 
2597 		// lookup the parent vnode
2598 		if (vnode == ioContext->root) {
2599 			// we hit the IO context root
2600 			parentVnode = vnode;
2601 			inc_vnode_ref_count(vnode);
2602 		} else {
2603 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2604 			if (status < B_OK)
2605 				goto out;
2606 		}
2607 
2608 		// get the node's name
2609 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2610 			sizeof(nameBuffer), ioContext);
2611 
2612 		// resolve a volume root to its mount point
2613 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2614 		if (mountPoint) {
2615 			put_vnode(parentVnode);
2616 			parentVnode = mountPoint;
2617 			parentID = parentVnode->id;
2618 		}
2619 
2620 		bool hitRoot = (parentVnode == vnode);
2621 
2622 		// release the current vnode, we only need its parent from now on
2623 		put_vnode(vnode);
2624 		vnode = parentVnode;
2625 
2626 		if (status < B_OK)
2627 			goto out;
2628 
2629 		if (hitRoot) {
2630 			// we have reached "/", which means we have constructed the full
2631 			// path
2632 			break;
2633 		}
2634 
2635 		// ToDo: add an explicit check for loops in about 10 levels to do
2636 		// real loop detection
2637 
2638 		// don't go deeper as 'maxLevel' to prevent circular loops
2639 		if (maxLevel-- < 0) {
2640 			status = ELOOP;
2641 			goto out;
2642 		}
2643 
2644 		// add the name in front of the current path
2645 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2646 		length = strlen(name);
2647 		insert -= length;
2648 		if (insert <= 0) {
2649 			status = ENOBUFS;
2650 			goto out;
2651 		}
2652 		memcpy(path + insert, name, length);
2653 		path[--insert] = '/';
2654 	}
2655 
2656 	// the root dir will result in an empty path: fix it
2657 	if (path[insert] == '\0')
2658 		path[--insert] = '/';
2659 
2660 	TRACE(("  path is: %s\n", path + insert));
2661 
2662 	// copy the path to the output buffer
2663 	length = pathBuffer.BufferSize() - insert;
2664 	if (length <= (int)bufferSize)
2665 		memcpy(buffer, path + insert, length);
2666 	else
2667 		status = ENOBUFS;
2668 
2669 out:
2670 	put_vnode(vnode);
2671 	return status;
2672 }
2673 
2674 
2675 /*!	Checks the length of every path component, and adds a '.'
2676 	if the path ends in a slash.
2677 	The given path buffer must be able to store at least one
2678 	additional character.
2679 */
2680 static status_t
2681 check_path(char *to)
2682 {
2683 	int32 length = 0;
2684 
2685 	// check length of every path component
2686 
2687 	while (*to) {
2688 		char *begin;
2689 		if (*to == '/')
2690 			to++, length++;
2691 
2692 		begin = to;
2693 		while (*to != '/' && *to)
2694 			to++, length++;
2695 
2696 		if (to - begin > B_FILE_NAME_LENGTH)
2697 			return B_NAME_TOO_LONG;
2698 	}
2699 
2700 	if (length == 0)
2701 		return B_ENTRY_NOT_FOUND;
2702 
2703 	// complete path if there is a slash at the end
2704 
2705 	if (*(to - 1) == '/') {
2706 		if (length > B_PATH_NAME_LENGTH - 2)
2707 			return B_NAME_TOO_LONG;
2708 
2709 		to[0] = '.';
2710 		to[1] = '\0';
2711 	}
2712 
2713 	return B_OK;
2714 }
2715 
2716 
2717 static struct file_descriptor *
2718 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2719 {
2720 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2721 	if (descriptor == NULL)
2722 		return NULL;
2723 
2724 	struct vnode* vnode = fd_vnode(descriptor);
2725 	if (vnode == NULL) {
2726 		put_fd(descriptor);
2727 		return NULL;
2728 	}
2729 
2730 	// ToDo: when we can close a file descriptor at any point, investigate
2731 	//	if this is still valid to do (accessing the vnode without ref_count
2732 	//	or locking)
2733 	*_vnode = vnode;
2734 	return descriptor;
2735 }
2736 
2737 
2738 static struct vnode *
2739 get_vnode_from_fd(int fd, bool kernel)
2740 {
2741 	struct file_descriptor *descriptor;
2742 	struct vnode *vnode;
2743 
2744 	descriptor = get_fd(get_current_io_context(kernel), fd);
2745 	if (descriptor == NULL)
2746 		return NULL;
2747 
2748 	vnode = fd_vnode(descriptor);
2749 	if (vnode != NULL)
2750 		inc_vnode_ref_count(vnode);
2751 
2752 	put_fd(descriptor);
2753 	return vnode;
2754 }
2755 
2756 
2757 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2758 	only the path will be considered. In this case, the \a path must not be
2759 	NULL.
2760 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2761 	and should be NULL for files.
2762 */
2763 static status_t
2764 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2765 	struct vnode **_vnode, ino_t *_parentID, bool kernel)
2766 {
2767 	if (fd < 0 && !path)
2768 		return B_BAD_VALUE;
2769 
2770 	if (path != NULL && *path == '\0')
2771 		return B_ENTRY_NOT_FOUND;
2772 
2773 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2774 		// no FD or absolute path
2775 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2776 	}
2777 
2778 	// FD only, or FD + relative path
2779 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2780 	if (!vnode)
2781 		return B_FILE_ERROR;
2782 
2783 	if (path != NULL) {
2784 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2785 			_vnode, _parentID);
2786 	}
2787 
2788 	// there is no relative path to take into account
2789 
2790 	*_vnode = vnode;
2791 	if (_parentID)
2792 		*_parentID = -1;
2793 
2794 	return B_OK;
2795 }
2796 
2797 
2798 static int
2799 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2800 	void *cookie, int openMode, bool kernel)
2801 {
2802 	struct file_descriptor *descriptor;
2803 	int fd;
2804 
2805 	// If the vnode is locked, we don't allow creating a new file/directory
2806 	// file_descriptor for it
2807 	if (vnode && vnode->mandatory_locked_by != NULL
2808 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2809 		return B_BUSY;
2810 
2811 	descriptor = alloc_fd();
2812 	if (!descriptor)
2813 		return B_NO_MEMORY;
2814 
2815 	if (vnode)
2816 		descriptor->u.vnode = vnode;
2817 	else
2818 		descriptor->u.mount = mount;
2819 	descriptor->cookie = cookie;
2820 
2821 	switch (type) {
2822 		// vnode types
2823 		case FDTYPE_FILE:
2824 			descriptor->ops = &sFileOps;
2825 			break;
2826 		case FDTYPE_DIR:
2827 			descriptor->ops = &sDirectoryOps;
2828 			break;
2829 		case FDTYPE_ATTR:
2830 			descriptor->ops = &sAttributeOps;
2831 			break;
2832 		case FDTYPE_ATTR_DIR:
2833 			descriptor->ops = &sAttributeDirectoryOps;
2834 			break;
2835 
2836 		// mount types
2837 		case FDTYPE_INDEX_DIR:
2838 			descriptor->ops = &sIndexDirectoryOps;
2839 			break;
2840 		case FDTYPE_QUERY:
2841 			descriptor->ops = &sQueryOps;
2842 			break;
2843 
2844 		default:
2845 			panic("get_new_fd() called with unknown type %d\n", type);
2846 			break;
2847 	}
2848 	descriptor->type = type;
2849 	descriptor->open_mode = openMode;
2850 
2851 	fd = new_fd(get_current_io_context(kernel), descriptor);
2852 	if (fd < 0) {
2853 		free(descriptor);
2854 		return B_NO_MORE_FDS;
2855 	}
2856 
2857 	return fd;
2858 }
2859 
2860 
2861 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2862 	vfs_normalize_path(). See there for more documentation.
2863 */
2864 static status_t
2865 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2866 {
2867 	VNodePutter dirPutter;
2868 	struct vnode* dir = NULL;
2869 	status_t error;
2870 
2871 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2872 		// get dir vnode + leaf name
2873 		struct vnode* nextDir;
2874 		char leaf[B_FILE_NAME_LENGTH];
2875 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2876 		if (error != B_OK)
2877 			return error;
2878 
2879 		dir = nextDir;
2880 		strcpy(path, leaf);
2881 		dirPutter.SetTo(dir);
2882 
2883 		// get file vnode, if we shall resolve links
2884 		bool fileExists = false;
2885 		struct vnode* fileVnode;
2886 		VNodePutter fileVnodePutter;
2887 		if (traverseLink) {
2888 			inc_vnode_ref_count(dir);
2889 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2890 					NULL) == B_OK) {
2891 				fileVnodePutter.SetTo(fileVnode);
2892 				fileExists = true;
2893 			}
2894 		}
2895 
2896 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->type)) {
2897 			// we're done -- construct the path
2898 			bool hasLeaf = true;
2899 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2900 				// special cases "." and ".." -- get the dir, forget the leaf
2901 				inc_vnode_ref_count(dir);
2902 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2903 					&nextDir, NULL);
2904 				if (error != B_OK)
2905 					return error;
2906 				dir = nextDir;
2907 				dirPutter.SetTo(dir);
2908 				hasLeaf = false;
2909 			}
2910 
2911 			// get the directory path
2912 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2913 			if (error != B_OK)
2914 				return error;
2915 
2916 			// append the leaf name
2917 			if (hasLeaf) {
2918 				// insert a directory separator if this is not the file system
2919 				// root
2920 				if ((strcmp(path, "/") != 0
2921 					&& strlcat(path, "/", pathSize) >= pathSize)
2922 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2923 					return B_NAME_TOO_LONG;
2924 				}
2925 			}
2926 
2927 			return B_OK;
2928 		}
2929 
2930 		// read link
2931 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2932 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2933 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2934 			if (error != B_OK)
2935 				return error;
2936 			path[bufferSize] = '\0';
2937 		} else
2938 			return B_BAD_VALUE;
2939 	}
2940 
2941 	return B_LINK_LIMIT;
2942 }
2943 
2944 
2945 #ifdef ADD_DEBUGGER_COMMANDS
2946 
2947 
2948 static void
2949 _dump_advisory_locking(advisory_locking *locking)
2950 {
2951 	if (locking == NULL)
2952 		return;
2953 
2954 	kprintf("   lock:        %ld", locking->lock);
2955 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2956 
2957 	int32 index = 0;
2958 	LockList::Iterator iterator = locking->locks.GetIterator();
2959 	while (iterator.HasNext()) {
2960 		struct advisory_lock *lock = iterator.Next();
2961 
2962 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2963 		kprintf("        start:  %Ld\n", lock->start);
2964 		kprintf("        end:    %Ld\n", lock->end);
2965 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2966 	}
2967 }
2968 
2969 
2970 static void
2971 _dump_mount(struct fs_mount *mount)
2972 {
2973 	kprintf("MOUNT: %p\n", mount);
2974 	kprintf(" id:            %ld\n", mount->id);
2975 	kprintf(" device_name:   %s\n", mount->device_name);
2976 	kprintf(" fs_name:       %s\n", mount->fs_name);
2977 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2978 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2979 	kprintf(" partition:     %p\n", mount->partition);
2980 	kprintf(" lock:          %p\n", &mount->rlock);
2981 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2982 		mount->owns_file_device ? " owns_file_device" : "");
2983 
2984 	fs_volume *volume = mount->volume;
2985 	while (volume != NULL) {
2986 		kprintf(" volume %p:\n", volume);
2987 		kprintf("  layer:          %ld\n", volume->layer);
2988 		kprintf("  private_volume: %p\n", volume->private_volume);
2989 		kprintf("  ops:            %p\n", volume->ops);
2990 		volume = volume->super_volume;
2991 	}
2992 
2993 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2994 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2995 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2996 	set_debug_variable("_partition", (addr_t)mount->partition);
2997 }
2998 
2999 
3000 static void
3001 _dump_vnode(struct vnode *vnode)
3002 {
3003 	kprintf("VNODE: %p\n", vnode);
3004 	kprintf(" device:        %ld\n", vnode->device);
3005 	kprintf(" id:            %Ld\n", vnode->id);
3006 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3007 	kprintf(" private_node:  %p\n", vnode->private_node);
3008 	kprintf(" mount:         %p\n", vnode->mount);
3009 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3010 	kprintf(" cache:         %p\n", vnode->cache);
3011 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
3012 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3013 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3014 
3015 	_dump_advisory_locking(vnode->advisory_locking);
3016 
3017 	set_debug_variable("_node", (addr_t)vnode->private_node);
3018 	set_debug_variable("_mount", (addr_t)vnode->mount);
3019 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3020 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3021 }
3022 
3023 
3024 static int
3025 dump_mount(int argc, char **argv)
3026 {
3027 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3028 		kprintf("usage: %s [id|address]\n", argv[0]);
3029 		return 0;
3030 	}
3031 
3032 	uint32 id = parse_expression(argv[1]);
3033 	struct fs_mount *mount = NULL;
3034 
3035 	mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
3036 	if (mount == NULL) {
3037 		if (IS_USER_ADDRESS(id)) {
3038 			kprintf("fs_mount not found\n");
3039 			return 0;
3040 		}
3041 		mount = (fs_mount *)id;
3042 	}
3043 
3044 	_dump_mount(mount);
3045 	return 0;
3046 }
3047 
3048 
3049 static int
3050 dump_mounts(int argc, char **argv)
3051 {
3052 	if (argc != 1) {
3053 		kprintf("usage: %s\n", argv[0]);
3054 		return 0;
3055 	}
3056 
3057 	kprintf("address     id root       covers     cookie     fs_name\n");
3058 
3059 	struct hash_iterator iterator;
3060 	struct fs_mount *mount;
3061 
3062 	hash_open(sMountsTable, &iterator);
3063 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
3064 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3065 			mount->covers_vnode, mount->volume->private_volume, mount->fs_name);
3066 	}
3067 
3068 	hash_close(sMountsTable, &iterator, false);
3069 	return 0;
3070 }
3071 
3072 
3073 static int
3074 dump_vnode(int argc, char **argv)
3075 {
3076 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
3077 		kprintf("usage: %s <device> <id>\n"
3078 			"   or: %s <address>\n", argv[0], argv[0]);
3079 		return 0;
3080 	}
3081 
3082 	struct vnode *vnode = NULL;
3083 
3084 	if (argc == 2) {
3085 		vnode = (struct vnode *)parse_expression(argv[1]);
3086 		if (IS_USER_ADDRESS(vnode)) {
3087 			kprintf("invalid vnode address\n");
3088 			return 0;
3089 		}
3090 		_dump_vnode(vnode);
3091 		return 0;
3092 	}
3093 
3094 	struct hash_iterator iterator;
3095 	dev_t device = parse_expression(argv[1]);
3096 	ino_t id = atoll(argv[2]);
3097 
3098 	hash_open(sVnodeTable, &iterator);
3099 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
3100 		if (vnode->id != id || vnode->device != device)
3101 			continue;
3102 
3103 		_dump_vnode(vnode);
3104 	}
3105 
3106 	hash_close(sVnodeTable, &iterator, false);
3107 	return 0;
3108 }
3109 
3110 
3111 static int
3112 dump_vnodes(int argc, char **argv)
3113 {
3114 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3115 		kprintf("usage: %s [device]\n", argv[0]);
3116 		return 0;
3117 	}
3118 
3119 	// restrict dumped nodes to a certain device if requested
3120 	dev_t device = parse_expression(argv[1]);
3121 
3122 	struct hash_iterator iterator;
3123 	struct vnode *vnode;
3124 
3125 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3126 		"flags\n");
3127 
3128 	hash_open(sVnodeTable, &iterator);
3129 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
3130 		if (vnode->device != device)
3131 			continue;
3132 
3133 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3134 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3135 			vnode->advisory_locking, vnode->remove ? "r" : "-",
3136 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
3137 	}
3138 
3139 	hash_close(sVnodeTable, &iterator, false);
3140 	return 0;
3141 }
3142 
3143 
3144 static int
3145 dump_vnode_caches(int argc, char **argv)
3146 {
3147 	struct hash_iterator iterator;
3148 	struct vnode *vnode;
3149 
3150 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3151 		kprintf("usage: %s [device]\n", argv[0]);
3152 		return 0;
3153 	}
3154 
3155 	// restrict dumped nodes to a certain device if requested
3156 	dev_t device = -1;
3157 	if (argc > 1)
3158 		device = atoi(argv[1]);
3159 
3160 	kprintf("address    dev     inode cache          size   pages\n");
3161 
3162 	hash_open(sVnodeTable, &iterator);
3163 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
3164 		if (vnode->cache == NULL)
3165 			continue;
3166 		if (device != -1 && vnode->device != device)
3167 			continue;
3168 
3169 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3170 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3171 				/ B_PAGE_SIZE, vnode->cache->page_count);
3172 	}
3173 
3174 	hash_close(sVnodeTable, &iterator, false);
3175 	return 0;
3176 }
3177 
3178 
3179 int
3180 dump_io_context(int argc, char **argv)
3181 {
3182 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3183 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3184 		return 0;
3185 	}
3186 
3187 	struct io_context *context = NULL;
3188 
3189 	if (argc > 1) {
3190 		uint32 num = parse_expression(argv[1]);
3191 		if (IS_KERNEL_ADDRESS(num))
3192 			context = (struct io_context *)num;
3193 		else {
3194 			struct team *team = team_get_team_struct_locked(num);
3195 			if (team == NULL) {
3196 				kprintf("could not find team with ID %ld\n", num);
3197 				return 0;
3198 			}
3199 			context = (struct io_context *)team->io_context;
3200 		}
3201 	} else
3202 		context = get_current_io_context(true);
3203 
3204 	kprintf("I/O CONTEXT: %p\n", context);
3205 	kprintf(" root vnode:\t%p\n", context->root);
3206 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3207 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3208 	kprintf(" max fds:\t%lu\n", context->table_size);
3209 
3210 	if (context->num_used_fds)
3211 		kprintf("   no. type     ops ref open mode        pos cookie\n");
3212 
3213 	for (uint32 i = 0; i < context->table_size; i++) {
3214 		struct file_descriptor *fd = context->fds[i];
3215 		if (fd == NULL)
3216 			continue;
3217 
3218 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
3219 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
3220 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
3221 			fd->u.vnode);
3222 	}
3223 
3224 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3225 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3226 
3227 	set_debug_variable("_cwd", (addr_t)context->cwd);
3228 
3229 	return 0;
3230 }
3231 
3232 
3233 int
3234 dump_vnode_usage(int argc, char **argv)
3235 {
3236 	if (argc != 1) {
3237 		kprintf("usage: %s\n", argv[0]);
3238 		return 0;
3239 	}
3240 
3241 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3242 		kMaxUnusedVnodes);
3243 
3244 	struct hash_iterator iterator;
3245 	hash_open(sVnodeTable, &iterator);
3246 
3247 	uint32 count = 0;
3248 	struct vnode *vnode;
3249 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
3250 		count++;
3251 	}
3252 
3253 	hash_close(sVnodeTable, &iterator, false);
3254 
3255 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3256 	return 0;
3257 }
3258 
3259 #endif	// ADD_DEBUGGER_COMMANDS
3260 
3261 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3262 	and calls the file system hooks to read/write the request to disk.
3263 */
3264 static status_t
3265 common_file_io_vec_pages(struct vnode *vnode, void *cookie,
3266 	const file_io_vec *fileVecs, size_t fileVecCount, const iovec *vecs,
3267 	size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset, size_t *_numBytes,
3268 	bool doWrite)
3269 {
3270 	if (fileVecCount == 0) {
3271 		// There are no file vecs at this offset, so we're obviously trying
3272 		// to access the file outside of its bounds
3273 		return B_BAD_VALUE;
3274 	}
3275 
3276 	size_t numBytes = *_numBytes;
3277 	uint32 fileVecIndex;
3278 	size_t vecOffset = *_vecOffset;
3279 	uint32 vecIndex = *_vecIndex;
3280 	status_t status;
3281 	size_t size;
3282 
3283 	if (!doWrite && vecOffset == 0) {
3284 		// now directly read the data from the device
3285 		// the first file_io_vec can be read directly
3286 
3287 		size = fileVecs[0].length;
3288 		if (size > numBytes)
3289 			size = numBytes;
3290 
3291 		status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3292 			&vecs[vecIndex], vecCount - vecIndex, &size);
3293 		if (status < B_OK)
3294 			return status;
3295 
3296 		// TODO: this is a work-around for buggy device drivers!
3297 		//	When our own drivers honour the length, we can:
3298 		//	a) also use this direct I/O for writes (otherwise, it would
3299 		//	   overwrite precious data)
3300 		//	b) panic if the term below is true (at least for writes)
3301 		if (size > fileVecs[0].length) {
3302 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
3303 			size = fileVecs[0].length;
3304 		}
3305 
3306 		ASSERT(size <= fileVecs[0].length);
3307 
3308 		// If the file portion was contiguous, we're already done now
3309 		if (size == numBytes)
3310 			return B_OK;
3311 
3312 		// if we reached the end of the file, we can return as well
3313 		if (size != fileVecs[0].length) {
3314 			*_numBytes = size;
3315 			return B_OK;
3316 		}
3317 
3318 		fileVecIndex = 1;
3319 
3320 		// first, find out where we have to continue in our iovecs
3321 		for (; vecIndex < vecCount; vecIndex++) {
3322 			if (size < vecs[vecIndex].iov_len)
3323 				break;
3324 
3325 			size -= vecs[vecIndex].iov_len;
3326 		}
3327 
3328 		vecOffset = size;
3329 	} else {
3330 		fileVecIndex = 0;
3331 		size = 0;
3332 	}
3333 
3334 	// Too bad, let's process the rest of the file_io_vecs
3335 
3336 	size_t totalSize = size;
3337 	size_t bytesLeft = numBytes - size;
3338 
3339 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3340 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3341 		off_t fileOffset = fileVec.offset;
3342 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3343 
3344 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3345 
3346 		// process the complete fileVec
3347 		while (fileLeft > 0) {
3348 			iovec tempVecs[MAX_TEMP_IO_VECS];
3349 			uint32 tempCount = 0;
3350 
3351 			// size tracks how much of what is left of the current fileVec
3352 			// (fileLeft) has been assigned to tempVecs
3353 			size = 0;
3354 
3355 			// assign what is left of the current fileVec to the tempVecs
3356 			for (size = 0; size < fileLeft && vecIndex < vecCount
3357 					&& tempCount < MAX_TEMP_IO_VECS;) {
3358 				// try to satisfy one iovec per iteration (or as much as
3359 				// possible)
3360 
3361 				// bytes left of the current iovec
3362 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3363 				if (vecLeft == 0) {
3364 					vecOffset = 0;
3365 					vecIndex++;
3366 					continue;
3367 				}
3368 
3369 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3370 					vecIndex, vecOffset, size));
3371 
3372 				// actually available bytes
3373 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3374 
3375 				tempVecs[tempCount].iov_base
3376 					= (void *)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3377 				tempVecs[tempCount].iov_len = tempVecSize;
3378 				tempCount++;
3379 
3380 				size += tempVecSize;
3381 				vecOffset += tempVecSize;
3382 			}
3383 
3384 			size_t bytes = size;
3385 			if (doWrite) {
3386 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3387 					tempVecs, tempCount, &bytes);
3388 			} else {
3389 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3390 					tempVecs, tempCount, &bytes);
3391 			}
3392 			if (status < B_OK)
3393 				return status;
3394 
3395 			totalSize += bytes;
3396 			bytesLeft -= size;
3397 			fileOffset += size;
3398 			fileLeft -= size;
3399 			//dprintf("-> file left = %Lu\n", fileLeft);
3400 
3401 			if (size != bytes || vecIndex >= vecCount) {
3402 				// there are no more bytes or iovecs, let's bail out
3403 				*_numBytes = totalSize;
3404 				return B_OK;
3405 			}
3406 		}
3407 	}
3408 
3409 	*_vecIndex = vecIndex;
3410 	*_vecOffset = vecOffset;
3411 	*_numBytes = totalSize;
3412 	return B_OK;
3413 }
3414 
3415 
3416 //	#pragma mark - public API for file systems
3417 
3418 
3419 extern "C" status_t
3420 new_vnode(fs_volume *volume, ino_t vnodeID, void *privateNode,
3421 	fs_vnode_ops *ops)
3422 {
3423 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3424 		volume, volume->id, vnodeID, privateNode));
3425 
3426 	if (privateNode == NULL)
3427 		return B_BAD_VALUE;
3428 
3429 	mutex_lock(&sVnodeMutex);
3430 
3431 	// file system integrity check:
3432 	// test if the vnode already exists and bail out if this is the case!
3433 
3434 	// ToDo: the R5 implementation obviously checks for a different cookie
3435 	//	and doesn't panic if they are equal
3436 
3437 	struct vnode *vnode = lookup_vnode(volume->id, vnodeID);
3438 	if (vnode != NULL) {
3439 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3440 			volume->id, vnodeID, privateNode, vnode->private_node);
3441 	}
3442 
3443 	status_t status = create_new_vnode(&vnode, volume->id, vnodeID);
3444 	if (status == B_OK) {
3445 		vnode->private_node = privateNode;
3446 		vnode->ops = ops;
3447 		vnode->busy = true;
3448 		vnode->unpublished = true;
3449 	}
3450 
3451 	TRACE(("returns: %s\n", strerror(status)));
3452 
3453 	mutex_unlock(&sVnodeMutex);
3454 	return status;
3455 }
3456 
3457 
3458 extern "C" status_t
3459 publish_vnode(fs_volume *volume, ino_t vnodeID, void *privateNode,
3460 	fs_vnode_ops *ops, int type, uint32 flags)
3461 {
3462 	FUNCTION(("publish_vnode()\n"));
3463 
3464 	MutexLocker locker(sVnodeMutex);
3465 
3466 	struct vnode *vnode = lookup_vnode(volume->id, vnodeID);
3467 	status_t status = B_OK;
3468 
3469 	if (vnode != NULL && vnode->busy && vnode->unpublished
3470 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3471 		// already known, but not published
3472 	} else if (vnode == NULL && privateNode != NULL) {
3473 		status = create_new_vnode(&vnode, volume->id, vnodeID);
3474 		if (status == B_OK) {
3475 			vnode->private_node = privateNode;
3476 			vnode->ops = ops;
3477 			vnode->busy = true;
3478 			vnode->unpublished = true;
3479 		}
3480 	} else
3481 		status = B_BAD_VALUE;
3482 
3483 	bool publishSpecialSubNode = false;
3484 
3485 	if (status == B_OK) {
3486 		vnode->type = type;
3487 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
3488 		publishSpecialSubNode = is_special_node_type(type)
3489 			&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3490 	}
3491 
3492 
3493 	// create sub vnodes, if necessary
3494 	if (status == B_OK
3495 			&& (volume->sub_volume != NULL || publishSpecialSubNode)) {
3496 		locker.Unlock();
3497 
3498 		fs_volume *subVolume = volume;
3499 		if (volume->sub_volume != NULL) {
3500 			while (status == B_OK && subVolume->sub_volume != NULL) {
3501 				subVolume = subVolume->sub_volume;
3502 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3503 					vnode);
3504 			}
3505 		}
3506 
3507 		if (status == B_OK && publishSpecialSubNode)
3508 			status = create_special_sub_node(vnode, flags);
3509 
3510 		if (status != B_OK) {
3511 			// error -- clean up the created sub vnodes
3512 			while (subVolume->super_volume != volume) {
3513 				subVolume = subVolume->super_volume;
3514 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3515 			}
3516 		}
3517 
3518 		locker.Lock();
3519 
3520 		if (status != B_OK) {
3521 			hash_remove(sVnodeTable, vnode);
3522 			remove_vnode_from_mount_list(vnode, vnode->mount);
3523 			free(vnode);
3524 		}
3525 	}
3526 
3527 	if (status == B_OK) {
3528 		vnode->busy = false;
3529 		vnode->unpublished = false;
3530 	}
3531 
3532 	TRACE(("returns: %s\n", strerror(status)));
3533 
3534 	return status;
3535 }
3536 
3537 
3538 extern "C" status_t
3539 get_vnode(fs_volume *volume, ino_t vnodeID, void **_fsNode)
3540 {
3541 	struct vnode *vnode;
3542 
3543 	if (volume == NULL)
3544 		return B_BAD_VALUE;
3545 
3546 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3547 	if (status < B_OK)
3548 		return status;
3549 
3550 	// If this is a layered FS, we need to get the node cookie for the requested
3551 	// layer.
3552 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3553 		fs_vnode resolvedNode;
3554 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3555 			&resolvedNode);
3556 		if (status != B_OK) {
3557 			panic("get_vnode(): Failed to get super node for vnode %p, "
3558 				"volume: %p", vnode, volume);
3559 			put_vnode(vnode);
3560 			return status;
3561 		}
3562 
3563 		if (_fsNode != NULL)
3564 			*_fsNode = resolvedNode.private_node;
3565 	} else if (_fsNode != NULL)
3566 		*_fsNode = vnode->private_node;
3567 
3568 	return B_OK;
3569 }
3570 
3571 
3572 extern "C" status_t
3573 acquire_vnode(fs_volume *volume, ino_t vnodeID)
3574 {
3575 	struct vnode *vnode;
3576 
3577 	mutex_lock(&sVnodeMutex);
3578 	vnode = lookup_vnode(volume->id, vnodeID);
3579 	mutex_unlock(&sVnodeMutex);
3580 
3581 	if (vnode == NULL)
3582 		return B_BAD_VALUE;
3583 
3584 	inc_vnode_ref_count(vnode);
3585 	return B_OK;
3586 }
3587 
3588 
3589 extern "C" status_t
3590 put_vnode(fs_volume *volume, ino_t vnodeID)
3591 {
3592 	struct vnode *vnode;
3593 
3594 	mutex_lock(&sVnodeMutex);
3595 	vnode = lookup_vnode(volume->id, vnodeID);
3596 	mutex_unlock(&sVnodeMutex);
3597 
3598 	if (vnode == NULL)
3599 		return B_BAD_VALUE;
3600 
3601 	dec_vnode_ref_count(vnode, false, true);
3602 	return B_OK;
3603 }
3604 
3605 
3606 extern "C" status_t
3607 remove_vnode(fs_volume *volume, ino_t vnodeID)
3608 {
3609 	struct vnode *vnode;
3610 	bool remove = false;
3611 
3612 	MutexLocker locker(sVnodeMutex);
3613 
3614 	vnode = lookup_vnode(volume->id, vnodeID);
3615 	if (vnode == NULL)
3616 		return B_ENTRY_NOT_FOUND;
3617 
3618 	if (vnode->covered_by != NULL) {
3619 		// this vnode is in use
3620 		mutex_unlock(&sVnodeMutex);
3621 		return B_BUSY;
3622 	}
3623 
3624 	vnode->remove = true;
3625 	if (vnode->unpublished) {
3626 		// prepare the vnode for deletion
3627 		vnode->busy = true;
3628 		remove = true;
3629 	}
3630 
3631 	locker.Unlock();
3632 
3633 	if (remove) {
3634 		// if the vnode hasn't been published yet, we delete it here
3635 		atomic_add(&vnode->ref_count, -1);
3636 		free_vnode(vnode, true);
3637 	}
3638 
3639 	return B_OK;
3640 }
3641 
3642 
3643 extern "C" status_t
3644 unremove_vnode(fs_volume *volume, ino_t vnodeID)
3645 {
3646 	struct vnode *vnode;
3647 
3648 	mutex_lock(&sVnodeMutex);
3649 
3650 	vnode = lookup_vnode(volume->id, vnodeID);
3651 	if (vnode)
3652 		vnode->remove = false;
3653 
3654 	mutex_unlock(&sVnodeMutex);
3655 	return B_OK;
3656 }
3657 
3658 
3659 extern "C" status_t
3660 get_vnode_removed(fs_volume *volume, ino_t vnodeID, bool* removed)
3661 {
3662 	mutex_lock(&sVnodeMutex);
3663 
3664 	status_t result;
3665 
3666 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3667 		if (removed)
3668 			*removed = vnode->remove;
3669 		result = B_OK;
3670 	} else
3671 		result = B_BAD_VALUE;
3672 
3673 	mutex_unlock(&sVnodeMutex);
3674 	return result;
3675 }
3676 
3677 
3678 extern "C" fs_volume*
3679 volume_for_vnode(fs_vnode *_vnode)
3680 {
3681 	if (_vnode == NULL)
3682 		return NULL;
3683 
3684 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3685 	return vnode->mount->volume;
3686 }
3687 
3688 
3689 extern "C" status_t
3690 read_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3691 	size_t *_numBytes)
3692 {
3693 	struct file_descriptor *descriptor;
3694 	struct vnode *vnode;
3695 
3696 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3697 	if (descriptor == NULL)
3698 		return B_FILE_ERROR;
3699 
3700 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3701 		count, 0, _numBytes);
3702 
3703 	put_fd(descriptor);
3704 	return status;
3705 }
3706 
3707 
3708 extern "C" status_t
3709 write_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3710 	size_t *_numBytes)
3711 {
3712 	struct file_descriptor *descriptor;
3713 	struct vnode *vnode;
3714 
3715 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3716 	if (descriptor == NULL)
3717 		return B_FILE_ERROR;
3718 
3719 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3720 		count, 0, _numBytes);
3721 
3722 	put_fd(descriptor);
3723 	return status;
3724 }
3725 
3726 
3727 extern "C" status_t
3728 read_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3729 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3730 	size_t *_bytes)
3731 {
3732 	struct file_descriptor *descriptor;
3733 	struct vnode *vnode;
3734 
3735 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3736 	if (descriptor == NULL)
3737 		return B_FILE_ERROR;
3738 
3739 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3740 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3741 		false);
3742 
3743 	put_fd(descriptor);
3744 	return status;
3745 }
3746 
3747 
3748 extern "C" status_t
3749 write_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3750 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3751 	size_t *_bytes)
3752 {
3753 	struct file_descriptor *descriptor;
3754 	struct vnode *vnode;
3755 
3756 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3757 	if (descriptor == NULL)
3758 		return B_FILE_ERROR;
3759 
3760 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3761 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3762 		true);
3763 
3764 	put_fd(descriptor);
3765 	return status;
3766 }
3767 
3768 
3769 extern "C" status_t
3770 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3771 {
3772 	// lookup mount -- the caller is required to make sure that the mount
3773 	// won't go away
3774 	MutexLocker locker(sMountMutex);
3775 	struct fs_mount* mount = find_mount(mountID);
3776 	if (mount == NULL)
3777 		return B_BAD_VALUE;
3778 	locker.Unlock();
3779 
3780 	return mount->entry_cache.Add(dirID, name, nodeID);
3781 }
3782 
3783 
3784 extern "C" status_t
3785 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3786 {
3787 	// lookup mount -- the caller is required to make sure that the mount
3788 	// won't go away
3789 	MutexLocker locker(sMountMutex);
3790 	struct fs_mount* mount = find_mount(mountID);
3791 	if (mount == NULL)
3792 		return B_BAD_VALUE;
3793 	locker.Unlock();
3794 
3795 	return mount->entry_cache.Remove(dirID, name);
3796 }
3797 
3798 
3799 //	#pragma mark - private VFS API
3800 //	Functions the VFS exports for other parts of the kernel
3801 
3802 
3803 /*! Acquires another reference to the vnode that has to be released
3804 	by calling vfs_put_vnode().
3805 */
3806 void
3807 vfs_acquire_vnode(struct vnode *vnode)
3808 {
3809 	inc_vnode_ref_count(vnode);
3810 }
3811 
3812 
3813 /*! This is currently called from file_cache_create() only.
3814 	It's probably a temporary solution as long as devfs requires that
3815 	fs_read_pages()/fs_write_pages() are called with the standard
3816 	open cookie and not with a device cookie.
3817 	If that's done differently, remove this call; it has no other
3818 	purpose.
3819 */
3820 extern "C" status_t
3821 vfs_get_cookie_from_fd(int fd, void **_cookie)
3822 {
3823 	struct file_descriptor *descriptor;
3824 
3825 	descriptor = get_fd(get_current_io_context(true), fd);
3826 	if (descriptor == NULL)
3827 		return B_FILE_ERROR;
3828 
3829 	*_cookie = descriptor->cookie;
3830 	return B_OK;
3831 }
3832 
3833 
3834 extern "C" int
3835 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode **vnode)
3836 {
3837 	*vnode = get_vnode_from_fd(fd, kernel);
3838 
3839 	if (*vnode == NULL)
3840 		return B_FILE_ERROR;
3841 
3842 	return B_NO_ERROR;
3843 }
3844 
3845 
3846 extern "C" status_t
3847 vfs_get_vnode_from_path(const char *path, bool kernel, struct vnode **_vnode)
3848 {
3849 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3850 		path, kernel));
3851 
3852 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3853 	if (pathBuffer.InitCheck() != B_OK)
3854 		return B_NO_MEMORY;
3855 
3856 	char *buffer = pathBuffer.LockBuffer();
3857 	strlcpy(buffer, path, pathBuffer.BufferSize());
3858 
3859 	struct vnode *vnode;
3860 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3861 	if (status < B_OK)
3862 		return status;
3863 
3864 	*_vnode = vnode;
3865 	return B_OK;
3866 }
3867 
3868 
3869 extern "C" status_t
3870 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode **_vnode)
3871 {
3872 	struct vnode *vnode;
3873 
3874 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3875 	if (status < B_OK)
3876 		return status;
3877 
3878 	*_vnode = vnode;
3879 	return B_OK;
3880 }
3881 
3882 
3883 extern "C" status_t
3884 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3885 	const char *name, struct vnode **_vnode)
3886 {
3887 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
3888 }
3889 
3890 
3891 extern "C" void
3892 vfs_vnode_to_node_ref(struct vnode *vnode, dev_t *_mountID, ino_t *_vnodeID)
3893 {
3894 	*_mountID = vnode->device;
3895 	*_vnodeID = vnode->id;
3896 }
3897 
3898 
3899 /*!	Looks up a vnode with the given mount and vnode ID.
3900 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3901 	to the node.
3902 	It's currently only be used by file_cache_create().
3903 */
3904 extern "C" status_t
3905 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode)
3906 {
3907 	mutex_lock(&sVnodeMutex);
3908 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
3909 	mutex_unlock(&sVnodeMutex);
3910 
3911 	if (vnode == NULL)
3912 		return B_ERROR;
3913 
3914 	*_vnode = vnode;
3915 	return B_OK;
3916 }
3917 
3918 
3919 extern "C" status_t
3920 vfs_get_fs_node_from_path(fs_volume *volume, const char *path, bool kernel,
3921 	void **_node)
3922 {
3923 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
3924 		volume, path, kernel));
3925 
3926 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3927 	if (pathBuffer.InitCheck() != B_OK)
3928 		return B_NO_MEMORY;
3929 
3930 	fs_mount *mount;
3931 	status_t status = get_mount(volume->id, &mount);
3932 	if (status < B_OK)
3933 		return status;
3934 
3935 	char *buffer = pathBuffer.LockBuffer();
3936 	strlcpy(buffer, path, pathBuffer.BufferSize());
3937 
3938 	struct vnode *vnode = mount->root_vnode;
3939 
3940 	if (buffer[0] == '/')
3941 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
3942 	else {
3943 		inc_vnode_ref_count(vnode);
3944 			// vnode_path_to_vnode() releases a reference to the starting vnode
3945 		status = vnode_path_to_vnode(vnode, buffer, true, 0, kernel, &vnode,
3946 			NULL);
3947 	}
3948 
3949 	put_mount(mount);
3950 
3951 	if (status < B_OK)
3952 		return status;
3953 
3954 	if (vnode->device != volume->id) {
3955 		// wrong mount ID - must not gain access on foreign file system nodes
3956 		put_vnode(vnode);
3957 		return B_BAD_VALUE;
3958 	}
3959 
3960 	// Use get_vnode() to resolve the cookie for the right layer.
3961 	status = get_vnode(volume, vnode->id, _node);
3962 	put_vnode(vnode);
3963 
3964 	return status;
3965 }
3966 
3967 
3968 status_t
3969 vfs_read_stat(int fd, const char *path, bool traverseLeafLink,
3970 	struct stat *stat, bool kernel)
3971 {
3972 	status_t status;
3973 
3974 	if (path) {
3975 		// path given: get the stat of the node referred to by (fd, path)
3976 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
3977 		if (pathBuffer.InitCheck() != B_OK)
3978 			return B_NO_MEMORY;
3979 
3980 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
3981 			traverseLeafLink, stat, kernel);
3982 	} else {
3983 		// no path given: get the FD and use the FD operation
3984 		struct file_descriptor *descriptor
3985 			= get_fd(get_current_io_context(kernel), fd);
3986 		if (descriptor == NULL)
3987 			return B_FILE_ERROR;
3988 
3989 		if (descriptor->ops->fd_read_stat)
3990 			status = descriptor->ops->fd_read_stat(descriptor, stat);
3991 		else
3992 			status = EOPNOTSUPP;
3993 
3994 		put_fd(descriptor);
3995 	}
3996 
3997 	return status;
3998 }
3999 
4000 
4001 /*!	Finds the full path to the file that contains the module \a moduleName,
4002 	puts it into \a pathBuffer, and returns B_OK for success.
4003 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4004 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4005 	\a pathBuffer is clobbered in any case and must not be relied on if this
4006 	functions returns unsuccessfully.
4007 	\a basePath and \a pathBuffer must not point to the same space.
4008 */
4009 status_t
4010 vfs_get_module_path(const char *basePath, const char *moduleName,
4011 	char *pathBuffer, size_t bufferSize)
4012 {
4013 	struct vnode *dir, *file;
4014 	status_t status;
4015 	size_t length;
4016 	char *path;
4017 
4018 	if (bufferSize == 0
4019 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4020 		return B_BUFFER_OVERFLOW;
4021 
4022 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4023 	if (status < B_OK)
4024 		return status;
4025 
4026 	// the path buffer had been clobbered by the above call
4027 	length = strlcpy(pathBuffer, basePath, bufferSize);
4028 	if (pathBuffer[length - 1] != '/')
4029 		pathBuffer[length++] = '/';
4030 
4031 	path = pathBuffer + length;
4032 	bufferSize -= length;
4033 
4034 	while (moduleName) {
4035 		char *nextPath = strchr(moduleName, '/');
4036 		if (nextPath == NULL)
4037 			length = strlen(moduleName);
4038 		else {
4039 			length = nextPath - moduleName;
4040 			nextPath++;
4041 		}
4042 
4043 		if (length + 1 >= bufferSize) {
4044 			status = B_BUFFER_OVERFLOW;
4045 			goto err;
4046 		}
4047 
4048 		memcpy(path, moduleName, length);
4049 		path[length] = '\0';
4050 		moduleName = nextPath;
4051 
4052 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4053 		if (status < B_OK) {
4054 			// vnode_path_to_vnode() has already released the reference to dir
4055 			return status;
4056 		}
4057 
4058 		if (S_ISDIR(file->type)) {
4059 			// goto the next directory
4060 			path[length] = '/';
4061 			path[length + 1] = '\0';
4062 			path += length + 1;
4063 			bufferSize -= length + 1;
4064 
4065 			dir = file;
4066 		} else if (S_ISREG(file->type)) {
4067 			// it's a file so it should be what we've searched for
4068 			put_vnode(file);
4069 
4070 			return B_OK;
4071 		} else {
4072 			TRACE(("vfs_get_module_path(): something is strange here: 0x%08lx...\n",
4073 				file->type));
4074 			status = B_ERROR;
4075 			dir = file;
4076 			goto err;
4077 		}
4078 	}
4079 
4080 	// if we got here, the moduleName just pointed to a directory, not to
4081 	// a real module - what should we do in this case?
4082 	status = B_ENTRY_NOT_FOUND;
4083 
4084 err:
4085 	put_vnode(dir);
4086 	return status;
4087 }
4088 
4089 
4090 /*!	\brief Normalizes a given path.
4091 
4092 	The path must refer to an existing or non-existing entry in an existing
4093 	directory, that is chopping off the leaf component the remaining path must
4094 	refer to an existing directory.
4095 
4096 	The returned will be canonical in that it will be absolute, will not
4097 	contain any "." or ".." components or duplicate occurrences of '/'s,
4098 	and none of the directory components will by symbolic links.
4099 
4100 	Any two paths referring to the same entry, will result in the same
4101 	normalized path (well, that is pretty much the definition of `normalized',
4102 	isn't it :-).
4103 
4104 	\param path The path to be normalized.
4105 	\param buffer The buffer into which the normalized path will be written.
4106 		   May be the same one as \a path.
4107 	\param bufferSize The size of \a buffer.
4108 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4109 	\param kernel \c true, if the IO context of the kernel shall be used,
4110 		   otherwise that of the team this thread belongs to. Only relevant,
4111 		   if the path is relative (to get the CWD).
4112 	\return \c B_OK if everything went fine, another error code otherwise.
4113 */
4114 status_t
4115 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
4116 	bool traverseLink, bool kernel)
4117 {
4118 	if (!path || !buffer || bufferSize < 1)
4119 		return B_BAD_VALUE;
4120 
4121 	if (path != buffer) {
4122 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4123 			return B_BUFFER_OVERFLOW;
4124 	}
4125 
4126 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4127 }
4128 
4129 
4130 /*!	\brief Creates a special node in the file system.
4131 
4132 	The caller gets a reference to the newly created node (which is passed
4133 	back through \a _createdVnode) and is responsible for releasing it.
4134 
4135 	\param path The path where to create the entry for the node. Can be \c NULL,
4136 		in which case the node is created without an entry in the root FS -- it
4137 		will automatically be deleted when the last reference has been released.
4138 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4139 		the target file system will just create the node with its standard
4140 		operations. Depending on the type of the node a subnode might be created
4141 		automatically, though.
4142 	\param mode The type and permissions for the node to be created.
4143 	\param flags Flags to be passed to the creating FS.
4144 	\param kernel \c true, if called in the kernel context (relevant only if
4145 		\a path is not \c NULL and not absolute).
4146 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4147 		file system creating the node, with the private data pointer and
4148 		operations for the super node. Can be \c NULL.
4149 	\param _createVnode Pointer to pre-allocated storage where to store the
4150 		pointer to the newly created node.
4151 	\return \c B_OK, if everything went fine, another error code otherwise.
4152 */
4153 status_t
4154 vfs_create_special_node(const char *path, fs_vnode *subVnode, mode_t mode,
4155 	uint32 flags, bool kernel, fs_vnode *_superVnode,
4156 	struct vnode **_createdVnode)
4157 {
4158 	struct vnode* dirNode;
4159 	char _leaf[B_FILE_NAME_LENGTH];
4160 	char* leaf = NULL;
4161 
4162 	if (path) {
4163 		// We've got a path. Get the dir vnode and the leaf name.
4164 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4165 		if (tmpPathBuffer.InitCheck() != B_OK)
4166 			return B_NO_MEMORY;
4167 
4168 		char* tmpPath = tmpPathBuffer.LockBuffer();
4169 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4170 			return B_NAME_TOO_LONG;
4171 
4172 		// get the dir vnode and the leaf name
4173 		leaf = _leaf;
4174 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4175 		if (error != B_OK)
4176 			return error;
4177 	} else {
4178 		// No path. Create the node in the root FS.
4179 		dirNode = sRoot;
4180 		inc_vnode_ref_count(dirNode);
4181 	}
4182 
4183 	VNodePutter _(dirNode);
4184 
4185 	// check support for creating special nodes
4186 	if (!HAS_FS_CALL(dirNode, create_special_node))
4187 		return B_UNSUPPORTED;
4188 
4189 	// create the node
4190 	fs_vnode superVnode;
4191 	ino_t nodeID;
4192 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4193 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4194 	if (status != B_OK)
4195 		return status;
4196 
4197 	// lookup the node
4198 	mutex_lock(&sVnodeMutex);
4199 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4200 	mutex_unlock(&sVnodeMutex);
4201 
4202 	if (*_createdVnode == NULL) {
4203 		panic("vfs_create_special_node(): lookup of node failed");
4204 		return B_ERROR;
4205 	}
4206 
4207 	return B_OK;
4208 }
4209 
4210 
4211 extern "C" void
4212 vfs_put_vnode(struct vnode *vnode)
4213 {
4214 	put_vnode(vnode);
4215 }
4216 
4217 
4218 extern "C" status_t
4219 vfs_get_cwd(dev_t *_mountID, ino_t *_vnodeID)
4220 {
4221 	// Get current working directory from io context
4222 	struct io_context *context = get_current_io_context(false);
4223 	status_t status = B_OK;
4224 
4225 	mutex_lock(&context->io_mutex);
4226 
4227 	if (context->cwd != NULL) {
4228 		*_mountID = context->cwd->device;
4229 		*_vnodeID = context->cwd->id;
4230 	} else
4231 		status = B_ERROR;
4232 
4233 	mutex_unlock(&context->io_mutex);
4234 	return status;
4235 }
4236 
4237 
4238 status_t
4239 vfs_unmount(dev_t mountID, uint32 flags)
4240 {
4241 	return fs_unmount(NULL, mountID, flags, true);
4242 }
4243 
4244 
4245 extern "C" status_t
4246 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4247 {
4248 	struct vnode *vnode;
4249 
4250 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4251 	if (status < B_OK)
4252 		return status;
4253 
4254 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4255 	put_vnode(vnode);
4256 	return B_OK;
4257 }
4258 
4259 
4260 extern "C" void
4261 vfs_free_unused_vnodes(int32 level)
4262 {
4263 	vnode_low_resource_handler(NULL,
4264 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, level);
4265 }
4266 
4267 
4268 extern "C" bool
4269 vfs_can_page(struct vnode *vnode, void *cookie)
4270 {
4271 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4272 
4273 	if (HAS_FS_CALL(vnode, can_page))
4274 		return FS_CALL(vnode, can_page, cookie);
4275 	return false;
4276 }
4277 
4278 
4279 extern "C" status_t
4280 vfs_read_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
4281 	size_t count, uint32 flags, size_t *_numBytes)
4282 {
4283 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4284 		pos));
4285 
4286 #if VFS_PAGES_IO_TRACING
4287 	size_t bytesRequested = *_numBytes;
4288 #endif
4289 
4290 	IORequest request;
4291 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4292 	if (status == B_OK) {
4293 		status = vfs_vnode_io(vnode, cookie, &request);
4294 		if (status == B_OK)
4295 			status = request.Wait();
4296 		*_numBytes = request.TransferredBytes();
4297 	}
4298 
4299 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4300 		status, *_numBytes));
4301 
4302 	return status;
4303 }
4304 
4305 
4306 extern "C" status_t
4307 vfs_write_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
4308 	size_t count, uint32 flags, size_t *_numBytes)
4309 {
4310 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4311 		pos));
4312 
4313 #if VFS_PAGES_IO_TRACING
4314 	size_t bytesRequested = *_numBytes;
4315 #endif
4316 
4317 	IORequest request;
4318 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4319 	if (status == B_OK) {
4320 		status = vfs_vnode_io(vnode, cookie, &request);
4321 		if (status == B_OK)
4322 			status = request.Wait();
4323 		*_numBytes = request.TransferredBytes();
4324 	}
4325 
4326 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4327 		status, *_numBytes));
4328 
4329 	return status;
4330 }
4331 
4332 
4333 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
4334 	created if \a allocate is \c true.
4335 	In case it's successful, it will also grab a reference to the cache
4336 	it returns.
4337 */
4338 extern "C" status_t
4339 vfs_get_vnode_cache(struct vnode *vnode, vm_cache **_cache, bool allocate)
4340 {
4341 	if (vnode->cache != NULL) {
4342 		vnode->cache->AcquireRef();
4343 		*_cache = vnode->cache;
4344 		return B_OK;
4345 	}
4346 
4347 	mutex_lock(&sVnodeMutex);
4348 
4349 	status_t status = B_OK;
4350 
4351 	// The cache could have been created in the meantime
4352 	if (vnode->cache == NULL) {
4353 		if (allocate) {
4354 			// TODO: actually the vnode need to be busy already here, or
4355 			//	else this won't work...
4356 			bool wasBusy = vnode->busy;
4357 			vnode->busy = true;
4358 			mutex_unlock(&sVnodeMutex);
4359 
4360 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4361 
4362 			mutex_lock(&sVnodeMutex);
4363 			vnode->busy = wasBusy;
4364 		} else
4365 			status = B_BAD_VALUE;
4366 	}
4367 
4368 	mutex_unlock(&sVnodeMutex);
4369 
4370 	if (status == B_OK) {
4371 		vnode->cache->AcquireRef();
4372 		*_cache = vnode->cache;
4373 	}
4374 
4375 	return status;
4376 }
4377 
4378 
4379 status_t
4380 vfs_get_file_map(struct vnode *vnode, off_t offset, size_t size,
4381 	file_io_vec *vecs, size_t *_count)
4382 {
4383 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
4384 
4385 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4386 }
4387 
4388 
4389 status_t
4390 vfs_stat_vnode(struct vnode *vnode, struct stat *stat)
4391 {
4392 	status_t status = FS_CALL(vnode, read_stat, stat);
4393 
4394 	// fill in the st_dev and st_ino fields
4395 	if (status == B_OK) {
4396 		stat->st_dev = vnode->device;
4397 		stat->st_ino = vnode->id;
4398 	}
4399 
4400 	return status;
4401 }
4402 
4403 
4404 status_t
4405 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat *stat)
4406 {
4407 	struct vnode *vnode;
4408 	status_t status = get_vnode(device, inode, &vnode, true, false);
4409 	if (status < B_OK)
4410 		return status;
4411 
4412 	status = FS_CALL(vnode, read_stat, stat);
4413 	put_vnode(vnode);
4414 	return status;
4415 }
4416 
4417 
4418 status_t
4419 vfs_get_vnode_name(struct vnode *vnode, char *name, size_t nameSize)
4420 {
4421 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4422 }
4423 
4424 
4425 status_t
4426 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
4427 	char *path, size_t pathLength)
4428 {
4429 	struct vnode *vnode;
4430 	status_t status;
4431 
4432 	// filter invalid leaf names
4433 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4434 		return B_BAD_VALUE;
4435 
4436 	// get the vnode matching the dir's node_ref
4437 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4438 		// special cases "." and "..": we can directly get the vnode of the
4439 		// referenced directory
4440 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4441 		leaf = NULL;
4442 	} else
4443 		status = get_vnode(device, inode, &vnode, true, false);
4444 	if (status < B_OK)
4445 		return status;
4446 
4447 	// get the directory path
4448 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4449 	put_vnode(vnode);
4450 		// we don't need the vnode anymore
4451 	if (status < B_OK)
4452 		return status;
4453 
4454 	// append the leaf name
4455 	if (leaf) {
4456 		// insert a directory separator if this is not the file system root
4457 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4458 				>= pathLength)
4459 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4460 			return B_NAME_TOO_LONG;
4461 		}
4462 	}
4463 
4464 	return B_OK;
4465 }
4466 
4467 
4468 /*!	If the given descriptor locked its vnode, that lock will be released. */
4469 void
4470 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
4471 {
4472 	struct vnode *vnode = fd_vnode(descriptor);
4473 
4474 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4475 		vnode->mandatory_locked_by = NULL;
4476 }
4477 
4478 
4479 /*!	Closes all file descriptors of the specified I/O context that
4480 	have the O_CLOEXEC flag set.
4481 */
4482 void
4483 vfs_exec_io_context(void *_context)
4484 {
4485 	struct io_context *context = (struct io_context *)_context;
4486 	uint32 i;
4487 
4488 	for (i = 0; i < context->table_size; i++) {
4489 		mutex_lock(&context->io_mutex);
4490 
4491 		struct file_descriptor *descriptor = context->fds[i];
4492 		bool remove = false;
4493 
4494 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4495 			context->fds[i] = NULL;
4496 			context->num_used_fds--;
4497 
4498 			remove = true;
4499 		}
4500 
4501 		mutex_unlock(&context->io_mutex);
4502 
4503 		if (remove) {
4504 			close_fd(descriptor);
4505 			put_fd(descriptor);
4506 		}
4507 	}
4508 }
4509 
4510 
4511 /*! Sets up a new io_control structure, and inherits the properties
4512 	of the parent io_control if it is given.
4513 */
4514 void *
4515 vfs_new_io_context(void *_parentContext)
4516 {
4517 	size_t tableSize;
4518 	struct io_context *context;
4519 	struct io_context *parentContext;
4520 
4521 	context = (io_context *)malloc(sizeof(struct io_context));
4522 	if (context == NULL)
4523 		return NULL;
4524 
4525 	memset(context, 0, sizeof(struct io_context));
4526 
4527 	parentContext = (struct io_context *)_parentContext;
4528 	if (parentContext)
4529 		tableSize = parentContext->table_size;
4530 	else
4531 		tableSize = DEFAULT_FD_TABLE_SIZE;
4532 
4533 	// allocate space for FDs and their close-on-exec flag
4534 	context->fds = (file_descriptor**)malloc(
4535 		sizeof(struct file_descriptor*) * tableSize
4536 		+ sizeof(struct select_sync*) * tableSize
4537 		+ (tableSize + 7) / 8);
4538 	if (context->fds == NULL) {
4539 		free(context);
4540 		return NULL;
4541 	}
4542 
4543 	context->select_infos = (select_info**)(context->fds + tableSize);
4544 	context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
4545 
4546 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4547 		+ sizeof(struct select_sync*) * tableSize
4548 		+ (tableSize + 7) / 8);
4549 
4550 	mutex_init(&context->io_mutex, "I/O context");
4551 
4552 	// Copy all parent file descriptors
4553 
4554 	if (parentContext) {
4555 		size_t i;
4556 
4557 		mutex_lock(&parentContext->io_mutex);
4558 
4559 		mutex_lock(&sIOContextRootLock);
4560 		context->root = parentContext->root;
4561 		if (context->root)
4562 			inc_vnode_ref_count(context->root);
4563 		mutex_unlock(&sIOContextRootLock);
4564 
4565 		context->cwd = parentContext->cwd;
4566 		if (context->cwd)
4567 			inc_vnode_ref_count(context->cwd);
4568 
4569 		for (i = 0; i < tableSize; i++) {
4570 			struct file_descriptor *descriptor = parentContext->fds[i];
4571 
4572 			if (descriptor != NULL) {
4573 				context->fds[i] = descriptor;
4574 				context->num_used_fds++;
4575 				atomic_add(&descriptor->ref_count, 1);
4576 				atomic_add(&descriptor->open_count, 1);
4577 
4578 				if (fd_close_on_exec(parentContext, i))
4579 					fd_set_close_on_exec(context, i, true);
4580 			}
4581 		}
4582 
4583 		mutex_unlock(&parentContext->io_mutex);
4584 	} else {
4585 		context->root = sRoot;
4586 		context->cwd = sRoot;
4587 
4588 		if (context->root)
4589 			inc_vnode_ref_count(context->root);
4590 
4591 		if (context->cwd)
4592 			inc_vnode_ref_count(context->cwd);
4593 	}
4594 
4595 	context->table_size = tableSize;
4596 
4597 	list_init(&context->node_monitors);
4598 	context->max_monitors = DEFAULT_NODE_MONITORS;
4599 
4600 	return context;
4601 }
4602 
4603 
4604 status_t
4605 vfs_free_io_context(void *_ioContext)
4606 {
4607 	struct io_context *context = (struct io_context *)_ioContext;
4608 	uint32 i;
4609 
4610 	if (context->root)
4611 		put_vnode(context->root);
4612 
4613 	if (context->cwd)
4614 		put_vnode(context->cwd);
4615 
4616 	mutex_lock(&context->io_mutex);
4617 
4618 	for (i = 0; i < context->table_size; i++) {
4619 		if (struct file_descriptor *descriptor = context->fds[i]) {
4620 			close_fd(descriptor);
4621 			put_fd(descriptor);
4622 		}
4623 	}
4624 
4625 	mutex_destroy(&context->io_mutex);
4626 
4627 	remove_node_monitors(context);
4628 	free(context->fds);
4629 	free(context);
4630 
4631 	return B_OK;
4632 }
4633 
4634 
4635 static status_t
4636 vfs_resize_fd_table(struct io_context *context, const int newSize)
4637 {
4638 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4639 		return EINVAL;
4640 
4641 	MutexLocker(context->io_mutex);
4642 
4643 	int oldSize = context->table_size;
4644 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4645 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4646 
4647 	// If the tables shrink, make sure none of the fds being dropped are in use.
4648 	if (newSize < oldSize) {
4649 		for (int i = oldSize; i-- > newSize;) {
4650 			if (context->fds[i])
4651 				return EBUSY;
4652 		}
4653 	}
4654 
4655 	// store pointers to the old tables
4656 	file_descriptor** oldFDs = context->fds;
4657 	select_info** oldSelectInfos = context->select_infos;
4658 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4659 
4660 	// allocate new tables
4661 	file_descriptor** newFDs = (file_descriptor**)malloc(
4662 		sizeof(struct file_descriptor*) * newSize
4663 		+ sizeof(struct select_sync*) * newSize
4664 		+ newCloseOnExitBitmapSize);
4665 	if (newFDs == NULL)
4666 		return ENOMEM;
4667 
4668 	context->fds = newFDs;
4669 	context->select_infos = (select_info**)(context->fds + newSize);
4670 	context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
4671 	context->table_size = newSize;
4672 
4673 	// copy entries from old tables
4674 	int toCopy = min_c(oldSize, newSize);
4675 
4676 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4677 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4678 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4679 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4680 
4681 	// clear additional entries, if the tables grow
4682 	if (newSize > oldSize) {
4683 		memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
4684 		memset(context->select_infos + oldSize, 0,
4685 			sizeof(void *) * (newSize - oldSize));
4686 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4687 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4688 	}
4689 
4690 	free(oldFDs);
4691 
4692 	return B_OK;
4693 }
4694 
4695 
4696 static status_t
4697 vfs_resize_monitor_table(struct io_context *context, const int newSize)
4698 {
4699 	int	status = B_OK;
4700 
4701 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4702 		return EINVAL;
4703 
4704 	mutex_lock(&context->io_mutex);
4705 
4706 	if ((size_t)newSize < context->num_monitors) {
4707 		status = EBUSY;
4708 		goto out;
4709 	}
4710 	context->max_monitors = newSize;
4711 
4712 out:
4713 	mutex_unlock(&context->io_mutex);
4714 	return status;
4715 }
4716 
4717 
4718 int
4719 vfs_getrlimit(int resource, struct rlimit * rlp)
4720 {
4721 	if (!rlp)
4722 		return B_BAD_ADDRESS;
4723 
4724 	switch (resource) {
4725 		case RLIMIT_NOFILE:
4726 		{
4727 			struct io_context *ioctx = get_current_io_context(false);
4728 
4729 			mutex_lock(&ioctx->io_mutex);
4730 
4731 			rlp->rlim_cur = ioctx->table_size;
4732 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4733 
4734 			mutex_unlock(&ioctx->io_mutex);
4735 
4736 			return 0;
4737 		}
4738 
4739 		case RLIMIT_NOVMON:
4740 		{
4741 			struct io_context *ioctx = get_current_io_context(false);
4742 
4743 			mutex_lock(&ioctx->io_mutex);
4744 
4745 			rlp->rlim_cur = ioctx->max_monitors;
4746 			rlp->rlim_max = MAX_NODE_MONITORS;
4747 
4748 			mutex_unlock(&ioctx->io_mutex);
4749 
4750 			return 0;
4751 		}
4752 
4753 		default:
4754 			return EINVAL;
4755 	}
4756 }
4757 
4758 
4759 int
4760 vfs_setrlimit(int resource, const struct rlimit * rlp)
4761 {
4762 	if (!rlp)
4763 		return B_BAD_ADDRESS;
4764 
4765 	switch (resource) {
4766 		case RLIMIT_NOFILE:
4767 			/* TODO: check getuid() */
4768 			if (rlp->rlim_max != RLIM_SAVED_MAX
4769 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4770 				return EPERM;
4771 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
4772 
4773 		case RLIMIT_NOVMON:
4774 			/* TODO: check getuid() */
4775 			if (rlp->rlim_max != RLIM_SAVED_MAX
4776 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4777 				return EPERM;
4778 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
4779 
4780 		default:
4781 			return EINVAL;
4782 	}
4783 }
4784 
4785 
4786 status_t
4787 vfs_init(kernel_args *args)
4788 {
4789 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
4790 		&vnode_compare, &vnode_hash);
4791 	if (sVnodeTable == NULL)
4792 		panic("vfs_init: error creating vnode hash table\n");
4793 
4794 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
4795 
4796 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
4797 		&mount_compare, &mount_hash);
4798 	if (sMountsTable == NULL)
4799 		panic("vfs_init: error creating mounts hash table\n");
4800 
4801 	node_monitor_init();
4802 
4803 	sRoot = NULL;
4804 
4805 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
4806 
4807 	if (block_cache_init() != B_OK)
4808 		return B_ERROR;
4809 
4810 #ifdef ADD_DEBUGGER_COMMANDS
4811 	// add some debugger commands
4812 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
4813 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
4814 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
4815 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
4816 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4817 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
4818 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
4819 #endif
4820 
4821 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
4822 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY, 0);
4823 
4824 	file_map_init();
4825 
4826 	return file_cache_init();
4827 }
4828 
4829 
4830 //	#pragma mark - fd_ops implementations
4831 
4832 
4833 /*!
4834 	Calls fs_open() on the given vnode and returns a new
4835 	file descriptor for it
4836 */
4837 static int
4838 open_vnode(struct vnode *vnode, int openMode, bool kernel)
4839 {
4840 	void *cookie;
4841 	int status;
4842 
4843 	status = FS_CALL(vnode, open, openMode, &cookie);
4844 	if (status < 0)
4845 		return status;
4846 
4847 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4848 	if (status < 0) {
4849 		FS_CALL(vnode, close, cookie);
4850 		FS_CALL(vnode, free_cookie, cookie);
4851 	}
4852 	return status;
4853 }
4854 
4855 
4856 /*!
4857 	Calls fs_open() on the given vnode and returns a new
4858 	file descriptor for it
4859 */
4860 static int
4861 create_vnode(struct vnode *directory, const char *name, int openMode,
4862 	int perms, bool kernel)
4863 {
4864 	void *cookie;
4865 	ino_t newID;
4866 	int status = B_ERROR;
4867 	struct vnode *vnode;
4868 
4869 	// This is somewhat tricky: If the entry already exists, the FS responsible
4870 	// for the directory might not necessarily the one also responsible for the
4871 	// node the entry refers to. So we can actually never call the create() hook
4872 	// without O_EXCL. Instead we try to look the entry up first. If it already
4873 	// exists, we just open the node (unless O_EXCL), otherwise we call create()
4874 	// with O_EXCL. This introduces a race condition, since we someone else
4875 	// might have created the entry in the meantime. We hope the respective
4876 	// FS returns the correct error code and retry (up to 3 times) again.
4877 
4878 	for (int i = 0; i < 3 && status != B_OK; i++) {
4879 		// look the node up
4880 		status = lookup_dir_entry(directory, name, &vnode);
4881 		if (status == B_OK) {
4882 			VNodePutter putter(vnode);
4883 
4884 			if ((openMode & O_EXCL) != 0)
4885 				return B_FILE_EXISTS;
4886 
4887 			// If the node is a symlink, we have to follow it, unless
4888 			// O_NOTRAVERSE is set.
4889 			if (S_ISLNK(vnode->type) && (openMode & O_NOTRAVERSE) == 0) {
4890 				putter.Put();
4891 				char clonedName[B_FILE_NAME_LENGTH + 1];
4892 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
4893 						>= B_FILE_NAME_LENGTH) {
4894 					return B_NAME_TOO_LONG;
4895 				}
4896 
4897 				inc_vnode_ref_count(directory);
4898 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
4899 					kernel, &vnode, NULL);
4900 				if (status != B_OK)
4901 					return status;
4902 
4903 				putter.SetTo(vnode);
4904 			}
4905 
4906 			status = open_vnode(vnode, openMode & ~O_CREAT, kernel);
4907 			// on success keep the vnode reference for the FD
4908 			if (status >= 0)
4909 				putter.Detach();
4910 
4911 			return status;
4912 		}
4913 
4914 		// it doesn't exist yet -- try to create it
4915 
4916 		if (!HAS_FS_CALL(directory, create))
4917 			return EROFS;
4918 
4919 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
4920 			&cookie, &newID);
4921 		if (status != B_OK
4922 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
4923 			return status;
4924 		}
4925 	}
4926 
4927 	if (status != B_OK)
4928 		return status;
4929 
4930 	// the node has been created successfully
4931 
4932 	mutex_lock(&sVnodeMutex);
4933 	vnode = lookup_vnode(directory->device, newID);
4934 	mutex_unlock(&sVnodeMutex);
4935 
4936 	if (vnode == NULL) {
4937 		panic("vfs: fs_create() returned success but there is no vnode, "
4938 			"mount ID %ld!\n", directory->device);
4939 		return B_BAD_VALUE;
4940 	}
4941 
4942 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
4943 		return status;
4944 
4945 	// something went wrong, clean up
4946 
4947 	FS_CALL(vnode, close, cookie);
4948 	FS_CALL(vnode, free_cookie, cookie);
4949 	put_vnode(vnode);
4950 
4951 	FS_CALL(directory, unlink, name);
4952 
4953 	return status;
4954 }
4955 
4956 
4957 /*! Calls fs open_dir() on the given vnode and returns a new
4958 	file descriptor for it
4959 */
4960 static int
4961 open_dir_vnode(struct vnode *vnode, bool kernel)
4962 {
4963 	void *cookie;
4964 	int status;
4965 
4966 	status = FS_CALL(vnode, open_dir, &cookie);
4967 	if (status < B_OK)
4968 		return status;
4969 
4970 	// file is opened, create a fd
4971 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
4972 	if (status >= 0)
4973 		return status;
4974 
4975 	FS_CALL(vnode, close_dir, cookie);
4976 	FS_CALL(vnode, free_dir_cookie, cookie);
4977 
4978 	return status;
4979 }
4980 
4981 
4982 /*! Calls fs open_attr_dir() on the given vnode and returns a new
4983 	file descriptor for it.
4984 	Used by attr_dir_open(), and attr_dir_open_fd().
4985 */
4986 static int
4987 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
4988 {
4989 	void *cookie;
4990 	int status;
4991 
4992 	if (!HAS_FS_CALL(vnode, open_attr_dir))
4993 		return EOPNOTSUPP;
4994 
4995 	status = FS_CALL(vnode, open_attr_dir, &cookie);
4996 	if (status < 0)
4997 		return status;
4998 
4999 	// file is opened, create a fd
5000 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
5001 	if (status >= 0)
5002 		return status;
5003 
5004 	FS_CALL(vnode, close_attr_dir, cookie);
5005 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5006 
5007 	return status;
5008 }
5009 
5010 
5011 static int
5012 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
5013 	int openMode, int perms, bool kernel)
5014 {
5015 	struct vnode *directory;
5016 	int status;
5017 
5018 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
5019 
5020 	// get directory to put the new file in
5021 	status = get_vnode(mountID, directoryID, &directory, true, false);
5022 	if (status < B_OK)
5023 		return status;
5024 
5025 	status = create_vnode(directory, name, openMode, perms, kernel);
5026 	put_vnode(directory);
5027 
5028 	return status;
5029 }
5030 
5031 
5032 static int
5033 file_create(int fd, char *path, int openMode, int perms, bool kernel)
5034 {
5035 	char name[B_FILE_NAME_LENGTH];
5036 	struct vnode *directory;
5037 	int status;
5038 
5039 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
5040 
5041 	// get directory to put the new file in
5042 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5043 	if (status < 0)
5044 		return status;
5045 
5046 	status = create_vnode(directory, name, openMode, perms, kernel);
5047 
5048 	put_vnode(directory);
5049 	return status;
5050 }
5051 
5052 
5053 static int
5054 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
5055 	int openMode, bool kernel)
5056 {
5057 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5058 	struct vnode *vnode;
5059 	int status;
5060 
5061 	if (name == NULL || *name == '\0')
5062 		return B_BAD_VALUE;
5063 
5064 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5065 		mountID, directoryID, name, openMode));
5066 
5067 	// get the vnode matching the entry_ref
5068 	status = entry_ref_to_vnode(mountID, directoryID, name, traverse, kernel,
5069 		&vnode);
5070 	if (status < B_OK)
5071 		return status;
5072 
5073 	status = open_vnode(vnode, openMode, kernel);
5074 	if (status < B_OK)
5075 		put_vnode(vnode);
5076 
5077 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
5078 		vnode->id, name);
5079 	return status;
5080 }
5081 
5082 
5083 static int
5084 file_open(int fd, char *path, int openMode, bool kernel)
5085 {
5086 	int status = B_OK;
5087 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
5088 
5089 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5090 		fd, path, openMode, kernel));
5091 
5092 	// get the vnode matching the vnode + path combination
5093 	struct vnode *vnode = NULL;
5094 	ino_t parentID;
5095 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
5096 	if (status != B_OK)
5097 		return status;
5098 
5099 	// open the vnode
5100 	status = open_vnode(vnode, openMode, kernel);
5101 	// put only on error -- otherwise our reference was transferred to the FD
5102 	if (status < B_OK)
5103 		put_vnode(vnode);
5104 
5105 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5106 		vnode->device, parentID, vnode->id, NULL);
5107 
5108 	return status;
5109 }
5110 
5111 
5112 static status_t
5113 file_close(struct file_descriptor *descriptor)
5114 {
5115 	struct vnode *vnode = descriptor->u.vnode;
5116 	status_t status = B_OK;
5117 
5118 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5119 
5120 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
5121 	if (HAS_FS_CALL(vnode, close)) {
5122 		status = FS_CALL(vnode, close, descriptor->cookie);
5123 	}
5124 
5125 	if (status == B_OK) {
5126 		// remove all outstanding locks for this team
5127 		release_advisory_lock(vnode, NULL);
5128 	}
5129 	return status;
5130 }
5131 
5132 
5133 static void
5134 file_free_fd(struct file_descriptor *descriptor)
5135 {
5136 	struct vnode *vnode = descriptor->u.vnode;
5137 
5138 	if (vnode != NULL) {
5139 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5140 		put_vnode(vnode);
5141 	}
5142 }
5143 
5144 
5145 static status_t
5146 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer,
5147 	size_t *length)
5148 {
5149 	struct vnode *vnode = descriptor->u.vnode;
5150 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5151 
5152 	if (S_ISDIR(vnode->type))
5153 		return B_IS_A_DIRECTORY;
5154 
5155 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5156 }
5157 
5158 
5159 static status_t
5160 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer,
5161 	size_t *length)
5162 {
5163 	struct vnode *vnode = descriptor->u.vnode;
5164 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5165 
5166 	if (S_ISDIR(vnode->type))
5167 		return B_IS_A_DIRECTORY;
5168 
5169 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5170 }
5171 
5172 
5173 static off_t
5174 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
5175 {
5176 	struct vnode *vnode = descriptor->u.vnode;
5177 	off_t offset;
5178 
5179 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5180 
5181 	// some kinds of files are not seekable
5182 	switch (vnode->type & S_IFMT) {
5183 		case S_IFIFO:
5184 		case S_IFSOCK:
5185 			return ESPIPE;
5186 
5187 		// The Open Group Base Specs don't mention any file types besides pipes,
5188 		// fifos, and sockets specially, so we allow seeking them.
5189 		case S_IFREG:
5190 		case S_IFBLK:
5191 		case S_IFDIR:
5192 		case S_IFLNK:
5193 		case S_IFCHR:
5194 			break;
5195 	}
5196 
5197 	switch (seekType) {
5198 		case SEEK_SET:
5199 			offset = 0;
5200 			break;
5201 		case SEEK_CUR:
5202 			offset = descriptor->pos;
5203 			break;
5204 		case SEEK_END:
5205 		{
5206 			// stat() the node
5207 			if (!HAS_FS_CALL(vnode, read_stat))
5208 				return EOPNOTSUPP;
5209 
5210 			struct stat stat;
5211 			status_t status = FS_CALL(vnode, read_stat, &stat);
5212 			if (status < B_OK)
5213 				return status;
5214 
5215 			offset = stat.st_size;
5216 			break;
5217 		}
5218 		default:
5219 			return B_BAD_VALUE;
5220 	}
5221 
5222 	// assumes off_t is 64 bits wide
5223 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5224 		return EOVERFLOW;
5225 
5226 	pos += offset;
5227 	if (pos < 0)
5228 		return B_BAD_VALUE;
5229 
5230 	return descriptor->pos = pos;
5231 }
5232 
5233 
5234 static status_t
5235 file_select(struct file_descriptor *descriptor, uint8 event,
5236 	struct selectsync *sync)
5237 {
5238 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5239 
5240 	struct vnode *vnode = descriptor->u.vnode;
5241 
5242 	// If the FS has no select() hook, notify select() now.
5243 	if (!HAS_FS_CALL(vnode, select))
5244 		return notify_select_event(sync, event);
5245 
5246 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5247 }
5248 
5249 
5250 static status_t
5251 file_deselect(struct file_descriptor *descriptor, uint8 event,
5252 	struct selectsync *sync)
5253 {
5254 	struct vnode *vnode = descriptor->u.vnode;
5255 
5256 	if (!HAS_FS_CALL(vnode, deselect))
5257 		return B_OK;
5258 
5259 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5260 }
5261 
5262 
5263 static status_t
5264 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char *name, int perms, bool kernel)
5265 {
5266 	struct vnode *vnode;
5267 	ino_t newID;
5268 	status_t status;
5269 
5270 	if (name == NULL || *name == '\0')
5271 		return B_BAD_VALUE;
5272 
5273 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5274 
5275 	status = get_vnode(mountID, parentID, &vnode, true, false);
5276 	if (status < B_OK)
5277 		return status;
5278 
5279 	if (HAS_FS_CALL(vnode, create_dir))
5280 		status = FS_CALL(vnode, create_dir, name, perms, &newID);
5281 	else
5282 		status = EROFS;
5283 
5284 	put_vnode(vnode);
5285 	return status;
5286 }
5287 
5288 
5289 static status_t
5290 dir_create(int fd, char *path, int perms, bool kernel)
5291 {
5292 	char filename[B_FILE_NAME_LENGTH];
5293 	struct vnode *vnode;
5294 	ino_t newID;
5295 	status_t status;
5296 
5297 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
5298 
5299 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5300 	if (status < 0)
5301 		return status;
5302 
5303 	if (HAS_FS_CALL(vnode, create_dir)) {
5304 		status = FS_CALL(vnode, create_dir, filename, perms, &newID);
5305 	} else
5306 		status = EROFS;
5307 
5308 	put_vnode(vnode);
5309 	return status;
5310 }
5311 
5312 
5313 static int
5314 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char *name, bool kernel)
5315 {
5316 	struct vnode *vnode;
5317 	int status;
5318 
5319 	FUNCTION(("dir_open_entry_ref()\n"));
5320 
5321 	if (name && *name == '\0')
5322 		return B_BAD_VALUE;
5323 
5324 	// get the vnode matching the entry_ref/node_ref
5325 	if (name) {
5326 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5327 			&vnode);
5328 	} else
5329 		status = get_vnode(mountID, parentID, &vnode, true, false);
5330 	if (status < B_OK)
5331 		return status;
5332 
5333 	status = open_dir_vnode(vnode, kernel);
5334 	if (status < B_OK)
5335 		put_vnode(vnode);
5336 
5337 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5338 		vnode->id, name);
5339 	return status;
5340 }
5341 
5342 
5343 static int
5344 dir_open(int fd, char *path, bool kernel)
5345 {
5346 	int status = B_OK;
5347 
5348 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
5349 
5350 	// get the vnode matching the vnode + path combination
5351 	struct vnode *vnode = NULL;
5352 	ino_t parentID;
5353 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
5354 	if (status != B_OK)
5355 		return status;
5356 
5357 	// open the dir
5358 	status = open_dir_vnode(vnode, kernel);
5359 	if (status < B_OK)
5360 		put_vnode(vnode);
5361 
5362 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
5363 	return status;
5364 }
5365 
5366 
5367 static status_t
5368 dir_close(struct file_descriptor *descriptor)
5369 {
5370 	struct vnode *vnode = descriptor->u.vnode;
5371 
5372 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5373 
5374 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
5375 	if (HAS_FS_CALL(vnode, close_dir))
5376 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5377 
5378 	return B_OK;
5379 }
5380 
5381 
5382 static void
5383 dir_free_fd(struct file_descriptor *descriptor)
5384 {
5385 	struct vnode *vnode = descriptor->u.vnode;
5386 
5387 	if (vnode != NULL) {
5388 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5389 		put_vnode(vnode);
5390 	}
5391 }
5392 
5393 
5394 static status_t
5395 dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
5396 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
5397 {
5398 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5399 		bufferSize, _count);
5400 }
5401 
5402 
5403 static status_t
5404 fix_dirent(struct vnode *parent, struct dirent *userEntry,
5405 	struct io_context* ioContext, uint32* _length)
5406 {
5407 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
5408 	struct dirent* entry;
5409 
5410 	if (IS_USER_ADDRESS(userEntry)) {
5411 		entry = (struct dirent*)buffer;
5412 		if (user_memcpy(entry, userEntry, sizeof(struct dirent) - 1) != B_OK)
5413 			return B_BAD_ADDRESS;
5414 
5415 		ASSERT(entry->d_reclen >= sizeof(struct dirent));
5416 
5417 		if (user_memcpy(entry->d_name, userEntry->d_name,
5418 				entry->d_reclen - sizeof(struct dirent)) != B_OK)
5419 			return B_BAD_ADDRESS;
5420 	} else
5421 		entry = userEntry;
5422 
5423 	*_length = entry->d_reclen;
5424 
5425 	// set d_pdev and d_pino
5426 	entry->d_pdev = parent->device;
5427 	entry->d_pino = parent->id;
5428 
5429 	// If this is the ".." entry and the directory is the root of a FS,
5430 	// we need to replace d_dev and d_ino with the actual values.
5431 	if (strcmp(entry->d_name, "..") == 0
5432 		&& parent->mount->root_vnode == parent
5433 		&& parent->mount->covers_vnode) {
5434 		inc_vnode_ref_count(parent);
5435 			// vnode_path_to_vnode() puts the node
5436 
5437 		// Make sure the IO context root is not bypassed.
5438 		if (parent == ioContext->root) {
5439 			entry->d_dev = parent->device;
5440 			entry->d_ino = parent->id;
5441 		} else {
5442 			// ".." is guaranteed not to be clobbered by this call
5443 			struct vnode *vnode;
5444 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5445 				ioContext, &vnode, NULL);
5446 
5447 			if (status == B_OK) {
5448 				entry->d_dev = vnode->device;
5449 				entry->d_ino = vnode->id;
5450 			}
5451 		}
5452 	} else {
5453 		// resolve mount points
5454 		struct vnode *vnode = NULL;
5455 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
5456 			false);
5457 		if (status != B_OK)
5458 			return status;
5459 
5460 		mutex_lock(&sVnodeCoveredByMutex);
5461 		if (vnode->covered_by) {
5462 			entry->d_dev = vnode->covered_by->device;
5463 			entry->d_ino = vnode->covered_by->id;
5464 		}
5465 		mutex_unlock(&sVnodeCoveredByMutex);
5466 
5467 		put_vnode(vnode);
5468 	}
5469 
5470 	// copy back from userland buffer if needed
5471 	if (entry != userEntry)
5472 		return user_memcpy(userEntry, entry, sizeof(struct dirent) - 1);
5473 
5474 	return B_OK;
5475 }
5476 
5477 
5478 static status_t
5479 dir_read(struct io_context* ioContext, struct vnode *vnode, void *cookie,
5480 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
5481 {
5482 	if (!HAS_FS_CALL(vnode, read_dir))
5483 		return EOPNOTSUPP;
5484 
5485 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5486 		_count);
5487 	if (error != B_OK)
5488 		return error;
5489 
5490 	// we need to adjust the read dirents
5491 	uint32 count = *_count;
5492 	if (count > 0) {
5493 		for (uint32 i = 0; i < count; i++) {
5494 			uint32 length;
5495 			error = fix_dirent(vnode, buffer, ioContext, &length);
5496 			if (error != B_OK)
5497 				return error;
5498 
5499 			buffer = (struct dirent*)((uint8*)buffer + length);
5500 		}
5501 	}
5502 
5503 	return error;
5504 }
5505 
5506 
5507 static status_t
5508 dir_rewind(struct file_descriptor *descriptor)
5509 {
5510 	struct vnode *vnode = descriptor->u.vnode;
5511 
5512 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5513 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5514 	}
5515 
5516 	return EOPNOTSUPP;
5517 }
5518 
5519 
5520 static status_t
5521 dir_remove(int fd, char *path, bool kernel)
5522 {
5523 	char name[B_FILE_NAME_LENGTH];
5524 	struct vnode *directory;
5525 	status_t status;
5526 
5527 	if (path != NULL) {
5528 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
5529 		char *lastSlash = strrchr(path, '/');
5530 		if (lastSlash != NULL) {
5531 			char *leaf = lastSlash + 1;
5532 			if (!strcmp(leaf, ".."))
5533 				return B_NOT_ALLOWED;
5534 
5535 			// omit multiple slashes
5536 			while (lastSlash > path && lastSlash[-1] == '/') {
5537 				lastSlash--;
5538 			}
5539 
5540 			if (!leaf[0]
5541 				|| !strcmp(leaf, ".")) {
5542 				// "name/" -> "name", or "name/." -> "name"
5543 				lastSlash[0] = '\0';
5544 			}
5545 		} else if (!strcmp(path, ".."))
5546 			return B_NOT_ALLOWED;
5547 	}
5548 
5549 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5550 	if (status < B_OK)
5551 		return status;
5552 
5553 	if (HAS_FS_CALL(directory, remove_dir))
5554 		status = FS_CALL(directory, remove_dir, name);
5555 	else
5556 		status = EROFS;
5557 
5558 	put_vnode(directory);
5559 	return status;
5560 }
5561 
5562 
5563 static status_t
5564 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer,
5565 	size_t length)
5566 {
5567 	struct vnode *vnode = descriptor->u.vnode;
5568 
5569 	if (HAS_FS_CALL(vnode, ioctl))
5570 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5571 
5572 	return EOPNOTSUPP;
5573 }
5574 
5575 
5576 static status_t
5577 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5578 {
5579 	struct flock flock;
5580 
5581 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5582 		fd, op, argument, kernel ? "kernel" : "user"));
5583 
5584 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel),
5585 		fd);
5586 	if (descriptor == NULL)
5587 		return B_FILE_ERROR;
5588 
5589 	struct vnode* vnode = fd_vnode(descriptor);
5590 
5591 	status_t status = B_OK;
5592 
5593 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5594 		if (descriptor->type != FDTYPE_FILE)
5595 			status = B_BAD_VALUE;
5596 		else if (user_memcpy(&flock, (struct flock *)argument,
5597 				sizeof(struct flock)) < B_OK)
5598 			status = B_BAD_ADDRESS;
5599 
5600 		if (status != B_OK) {
5601 			put_fd(descriptor);
5602 			return status;
5603 		}
5604 	}
5605 
5606 	switch (op) {
5607 		case F_SETFD:
5608 		{
5609 			struct io_context *context = get_current_io_context(kernel);
5610 			// Set file descriptor flags
5611 
5612 			// O_CLOEXEC is the only flag available at this time
5613 			mutex_lock(&context->io_mutex);
5614 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5615 			mutex_unlock(&context->io_mutex);
5616 
5617 			status = B_OK;
5618 			break;
5619 		}
5620 
5621 		case F_GETFD:
5622 		{
5623 			struct io_context *context = get_current_io_context(kernel);
5624 
5625 			// Get file descriptor flags
5626 			mutex_lock(&context->io_mutex);
5627 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5628 			mutex_unlock(&context->io_mutex);
5629 			break;
5630 		}
5631 
5632 		case F_SETFL:
5633 			// Set file descriptor open mode
5634 
5635 			// we only accept changes to O_APPEND and O_NONBLOCK
5636 			argument &= O_APPEND | O_NONBLOCK;
5637 			if (descriptor->ops->fd_set_flags != NULL) {
5638 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5639 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5640 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5641 					(int)argument);
5642 			} else
5643 				status = EOPNOTSUPP;
5644 
5645 			if (status == B_OK) {
5646 				// update this descriptor's open_mode field
5647 				descriptor->open_mode = (descriptor->open_mode
5648 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5649 			}
5650 
5651 			break;
5652 
5653 		case F_GETFL:
5654 			// Get file descriptor open mode
5655 			status = descriptor->open_mode;
5656 			break;
5657 
5658 		case F_DUPFD:
5659 		{
5660 			struct io_context *context = get_current_io_context(kernel);
5661 
5662 			status = new_fd_etc(context, descriptor, (int)argument);
5663 			if (status >= 0) {
5664 				mutex_lock(&context->io_mutex);
5665 				fd_set_close_on_exec(context, fd, false);
5666 				mutex_unlock(&context->io_mutex);
5667 
5668 				atomic_add(&descriptor->ref_count, 1);
5669 			}
5670 			break;
5671 		}
5672 
5673 		case F_GETLK:
5674 			if (vnode != NULL) {
5675 				status = get_advisory_lock(vnode, &flock);
5676 				if (status == B_OK) {
5677 					// copy back flock structure
5678 					status = user_memcpy((struct flock *)argument, &flock,
5679 						sizeof(struct flock));
5680 				}
5681 			} else
5682 				status = B_BAD_VALUE;
5683 			break;
5684 
5685 		case F_SETLK:
5686 		case F_SETLKW:
5687 			status = normalize_flock(descriptor, &flock);
5688 			if (status < B_OK)
5689 				break;
5690 
5691 			if (vnode == NULL) {
5692 				status = B_BAD_VALUE;
5693 			} else if (flock.l_type == F_UNLCK) {
5694 				status = release_advisory_lock(vnode, &flock);
5695 			} else {
5696 				// the open mode must match the lock type
5697 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
5698 						&& flock.l_type == F_WRLCK
5699 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY
5700 						&& flock.l_type == F_RDLCK)
5701 					status = B_FILE_ERROR;
5702 				else {
5703 					status = acquire_advisory_lock(vnode, -1,
5704 						&flock, op == F_SETLKW);
5705 				}
5706 			}
5707 			break;
5708 
5709 		// ToDo: add support for more ops?
5710 
5711 		default:
5712 			status = B_BAD_VALUE;
5713 	}
5714 
5715 	put_fd(descriptor);
5716 	return status;
5717 }
5718 
5719 
5720 static status_t
5721 common_sync(int fd, bool kernel)
5722 {
5723 	struct file_descriptor *descriptor;
5724 	struct vnode *vnode;
5725 	status_t status;
5726 
5727 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5728 
5729 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5730 	if (descriptor == NULL)
5731 		return B_FILE_ERROR;
5732 
5733 	if (HAS_FS_CALL(vnode, fsync))
5734 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5735 	else
5736 		status = EOPNOTSUPP;
5737 
5738 	put_fd(descriptor);
5739 	return status;
5740 }
5741 
5742 
5743 static status_t
5744 common_lock_node(int fd, bool kernel)
5745 {
5746 	struct file_descriptor *descriptor;
5747 	struct vnode *vnode;
5748 
5749 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5750 	if (descriptor == NULL)
5751 		return B_FILE_ERROR;
5752 
5753 	status_t status = B_OK;
5754 
5755 	// We need to set the locking atomically - someone
5756 	// else might set one at the same time
5757 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
5758 			(file_descriptor*)NULL) != NULL)
5759 		status = B_BUSY;
5760 
5761 	put_fd(descriptor);
5762 	return status;
5763 }
5764 
5765 
5766 static status_t
5767 common_unlock_node(int fd, bool kernel)
5768 {
5769 	struct file_descriptor *descriptor;
5770 	struct vnode *vnode;
5771 
5772 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5773 	if (descriptor == NULL)
5774 		return B_FILE_ERROR;
5775 
5776 	status_t status = B_OK;
5777 
5778 	// We need to set the locking atomically - someone
5779 	// else might set one at the same time
5780 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
5781 			(file_descriptor*)NULL, descriptor) != descriptor)
5782 		status = B_BAD_VALUE;
5783 
5784 	put_fd(descriptor);
5785 	return status;
5786 }
5787 
5788 
5789 static status_t
5790 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
5791 	bool kernel)
5792 {
5793 	struct vnode *vnode;
5794 	status_t status;
5795 
5796 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5797 	if (status < B_OK)
5798 		return status;
5799 
5800 	if (HAS_FS_CALL(vnode, read_symlink)) {
5801 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5802 	} else
5803 		status = B_BAD_VALUE;
5804 
5805 	put_vnode(vnode);
5806 	return status;
5807 }
5808 
5809 
5810 static status_t
5811 common_create_symlink(int fd, char *path, const char *toPath, int mode,
5812 	bool kernel)
5813 {
5814 	// path validity checks have to be in the calling function!
5815 	char name[B_FILE_NAME_LENGTH];
5816 	struct vnode *vnode;
5817 	status_t status;
5818 
5819 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5820 
5821 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5822 	if (status < B_OK)
5823 		return status;
5824 
5825 	if (HAS_FS_CALL(vnode, create_symlink))
5826 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5827 	else
5828 		status = EROFS;
5829 
5830 	put_vnode(vnode);
5831 
5832 	return status;
5833 }
5834 
5835 
5836 static status_t
5837 common_create_link(char *path, char *toPath, bool kernel)
5838 {
5839 	// path validity checks have to be in the calling function!
5840 	char name[B_FILE_NAME_LENGTH];
5841 	struct vnode *directory, *vnode;
5842 	status_t status;
5843 
5844 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
5845 
5846 	status = path_to_dir_vnode(path, &directory, name, kernel);
5847 	if (status < B_OK)
5848 		return status;
5849 
5850 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
5851 	if (status < B_OK)
5852 		goto err;
5853 
5854 	if (directory->mount != vnode->mount) {
5855 		status = B_CROSS_DEVICE_LINK;
5856 		goto err1;
5857 	}
5858 
5859 	if (HAS_FS_CALL(directory, link))
5860 		status = FS_CALL(directory, link, name, vnode);
5861 	else
5862 		status = EROFS;
5863 
5864 err1:
5865 	put_vnode(vnode);
5866 err:
5867 	put_vnode(directory);
5868 
5869 	return status;
5870 }
5871 
5872 
5873 static status_t
5874 common_unlink(int fd, char *path, bool kernel)
5875 {
5876 	char filename[B_FILE_NAME_LENGTH];
5877 	struct vnode *vnode;
5878 	status_t status;
5879 
5880 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
5881 
5882 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5883 	if (status < 0)
5884 		return status;
5885 
5886 	if (HAS_FS_CALL(vnode, unlink))
5887 		status = FS_CALL(vnode, unlink, filename);
5888 	else
5889 		status = EROFS;
5890 
5891 	put_vnode(vnode);
5892 
5893 	return status;
5894 }
5895 
5896 
5897 static status_t
5898 common_access(char *path, int mode, bool kernel)
5899 {
5900 	struct vnode *vnode;
5901 	status_t status;
5902 
5903 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
5904 	if (status < B_OK)
5905 		return status;
5906 
5907 	if (HAS_FS_CALL(vnode, access))
5908 		status = FS_CALL(vnode, access, mode);
5909 	else
5910 		status = B_OK;
5911 
5912 	put_vnode(vnode);
5913 
5914 	return status;
5915 }
5916 
5917 
5918 static status_t
5919 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
5920 {
5921 	struct vnode *fromVnode, *toVnode;
5922 	char fromName[B_FILE_NAME_LENGTH];
5923 	char toName[B_FILE_NAME_LENGTH];
5924 	status_t status;
5925 
5926 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
5927 
5928 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
5929 	if (status < B_OK)
5930 		return status;
5931 
5932 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
5933 	if (status < B_OK)
5934 		goto err1;
5935 
5936 	if (fromVnode->device != toVnode->device) {
5937 		status = B_CROSS_DEVICE_LINK;
5938 		goto err2;
5939 	}
5940 
5941 	if (HAS_FS_CALL(fromVnode, rename))
5942 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
5943 	else
5944 		status = EROFS;
5945 
5946 err2:
5947 	put_vnode(toVnode);
5948 err1:
5949 	put_vnode(fromVnode);
5950 
5951 	return status;
5952 }
5953 
5954 
5955 static status_t
5956 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5957 {
5958 	struct vnode *vnode = descriptor->u.vnode;
5959 
5960 	FUNCTION(("common_read_stat: stat %p\n", stat));
5961 
5962 	status_t status = FS_CALL(vnode, read_stat, stat);
5963 
5964 	// fill in the st_dev and st_ino fields
5965 	if (status == B_OK) {
5966 		stat->st_dev = vnode->device;
5967 		stat->st_ino = vnode->id;
5968 	}
5969 
5970 	return status;
5971 }
5972 
5973 
5974 static status_t
5975 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5976 {
5977 	struct vnode *vnode = descriptor->u.vnode;
5978 
5979 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
5980 	if (!HAS_FS_CALL(vnode, write_stat))
5981 		return EROFS;
5982 
5983 	return FS_CALL(vnode, write_stat, stat, statMask);
5984 }
5985 
5986 
5987 static status_t
5988 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
5989 	struct stat *stat, bool kernel)
5990 {
5991 	struct vnode *vnode;
5992 	status_t status;
5993 
5994 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
5995 
5996 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5997 	if (status < 0)
5998 		return status;
5999 
6000 	status = FS_CALL(vnode, read_stat, stat);
6001 
6002 	// fill in the st_dev and st_ino fields
6003 	if (status == B_OK) {
6004 		stat->st_dev = vnode->device;
6005 		stat->st_ino = vnode->id;
6006 	}
6007 
6008 	put_vnode(vnode);
6009 	return status;
6010 }
6011 
6012 
6013 static status_t
6014 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
6015 	const struct stat *stat, int statMask, bool kernel)
6016 {
6017 	struct vnode *vnode;
6018 	status_t status;
6019 
6020 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
6021 
6022 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
6023 	if (status < 0)
6024 		return status;
6025 
6026 	if (HAS_FS_CALL(vnode, write_stat))
6027 		status = FS_CALL(vnode, write_stat, stat, statMask);
6028 	else
6029 		status = EROFS;
6030 
6031 	put_vnode(vnode);
6032 
6033 	return status;
6034 }
6035 
6036 
6037 static int
6038 attr_dir_open(int fd, char *path, bool kernel)
6039 {
6040 	struct vnode *vnode;
6041 	int status;
6042 
6043 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
6044 
6045 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6046 	if (status < B_OK)
6047 		return status;
6048 
6049 	status = open_attr_dir_vnode(vnode, kernel);
6050 	if (status < 0)
6051 		put_vnode(vnode);
6052 
6053 	return status;
6054 }
6055 
6056 
6057 static status_t
6058 attr_dir_close(struct file_descriptor *descriptor)
6059 {
6060 	struct vnode *vnode = descriptor->u.vnode;
6061 
6062 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6063 
6064 	if (HAS_FS_CALL(vnode, close_attr_dir))
6065 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6066 
6067 	return B_OK;
6068 }
6069 
6070 
6071 static void
6072 attr_dir_free_fd(struct file_descriptor *descriptor)
6073 {
6074 	struct vnode *vnode = descriptor->u.vnode;
6075 
6076 	if (vnode != NULL) {
6077 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6078 		put_vnode(vnode);
6079 	}
6080 }
6081 
6082 
6083 static status_t
6084 attr_dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
6085 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
6086 {
6087 	struct vnode *vnode = descriptor->u.vnode;
6088 
6089 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6090 
6091 	if (HAS_FS_CALL(vnode, read_attr_dir))
6092 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6093 			bufferSize, _count);
6094 
6095 	return EOPNOTSUPP;
6096 }
6097 
6098 
6099 static status_t
6100 attr_dir_rewind(struct file_descriptor *descriptor)
6101 {
6102 	struct vnode *vnode = descriptor->u.vnode;
6103 
6104 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6105 
6106 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6107 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6108 
6109 	return EOPNOTSUPP;
6110 }
6111 
6112 
6113 static int
6114 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
6115 {
6116 	struct vnode *vnode;
6117 	void *cookie;
6118 	int status;
6119 
6120 	if (name == NULL || *name == '\0')
6121 		return B_BAD_VALUE;
6122 
6123 	vnode = get_vnode_from_fd(fd, kernel);
6124 	if (vnode == NULL)
6125 		return B_FILE_ERROR;
6126 
6127 	if (!HAS_FS_CALL(vnode, create_attr)) {
6128 		status = EROFS;
6129 		goto err;
6130 	}
6131 
6132 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6133 	if (status < B_OK)
6134 		goto err;
6135 
6136 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
6137 		return status;
6138 
6139 	FS_CALL(vnode, close_attr, cookie);
6140 	FS_CALL(vnode, free_attr_cookie, cookie);
6141 
6142 	FS_CALL(vnode, remove_attr, name);
6143 
6144 err:
6145 	put_vnode(vnode);
6146 
6147 	return status;
6148 }
6149 
6150 
6151 static int
6152 attr_open(int fd, const char *name, int openMode, bool kernel)
6153 {
6154 	struct vnode *vnode;
6155 	void *cookie;
6156 	int status;
6157 
6158 	if (name == NULL || *name == '\0')
6159 		return B_BAD_VALUE;
6160 
6161 	vnode = get_vnode_from_fd(fd, kernel);
6162 	if (vnode == NULL)
6163 		return B_FILE_ERROR;
6164 
6165 	if (!HAS_FS_CALL(vnode, open_attr)) {
6166 		status = EOPNOTSUPP;
6167 		goto err;
6168 	}
6169 
6170 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6171 	if (status < B_OK)
6172 		goto err;
6173 
6174 	// now we only need a file descriptor for this attribute and we're done
6175 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
6176 		return status;
6177 
6178 	FS_CALL(vnode, close_attr, cookie);
6179 	FS_CALL(vnode, free_attr_cookie, cookie);
6180 
6181 err:
6182 	put_vnode(vnode);
6183 
6184 	return status;
6185 }
6186 
6187 
6188 static status_t
6189 attr_close(struct file_descriptor *descriptor)
6190 {
6191 	struct vnode *vnode = descriptor->u.vnode;
6192 
6193 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6194 
6195 	if (HAS_FS_CALL(vnode, close_attr))
6196 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6197 
6198 	return B_OK;
6199 }
6200 
6201 
6202 static void
6203 attr_free_fd(struct file_descriptor *descriptor)
6204 {
6205 	struct vnode *vnode = descriptor->u.vnode;
6206 
6207 	if (vnode != NULL) {
6208 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6209 		put_vnode(vnode);
6210 	}
6211 }
6212 
6213 
6214 static status_t
6215 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
6216 {
6217 	struct vnode *vnode = descriptor->u.vnode;
6218 
6219 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
6220 	if (!HAS_FS_CALL(vnode, read_attr))
6221 		return EOPNOTSUPP;
6222 
6223 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6224 }
6225 
6226 
6227 static status_t
6228 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
6229 {
6230 	struct vnode *vnode = descriptor->u.vnode;
6231 
6232 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6233 	if (!HAS_FS_CALL(vnode, write_attr))
6234 		return EOPNOTSUPP;
6235 
6236 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6237 }
6238 
6239 
6240 static off_t
6241 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
6242 {
6243 	off_t offset;
6244 
6245 	switch (seekType) {
6246 		case SEEK_SET:
6247 			offset = 0;
6248 			break;
6249 		case SEEK_CUR:
6250 			offset = descriptor->pos;
6251 			break;
6252 		case SEEK_END:
6253 		{
6254 			struct vnode *vnode = descriptor->u.vnode;
6255 			if (!HAS_FS_CALL(vnode, read_stat))
6256 				return EOPNOTSUPP;
6257 
6258 			struct stat stat;
6259 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6260 				&stat);
6261 			if (status < B_OK)
6262 				return status;
6263 
6264 			offset = stat.st_size;
6265 			break;
6266 		}
6267 		default:
6268 			return B_BAD_VALUE;
6269 	}
6270 
6271 	// assumes off_t is 64 bits wide
6272 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6273 		return EOVERFLOW;
6274 
6275 	pos += offset;
6276 	if (pos < 0)
6277 		return B_BAD_VALUE;
6278 
6279 	return descriptor->pos = pos;
6280 }
6281 
6282 
6283 static status_t
6284 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
6285 {
6286 	struct vnode *vnode = descriptor->u.vnode;
6287 
6288 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6289 
6290 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6291 		return EOPNOTSUPP;
6292 
6293 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6294 }
6295 
6296 
6297 static status_t
6298 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
6299 {
6300 	struct vnode *vnode = descriptor->u.vnode;
6301 
6302 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6303 
6304 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6305 		return EROFS;
6306 
6307 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6308 }
6309 
6310 
6311 static status_t
6312 attr_remove(int fd, const char *name, bool kernel)
6313 {
6314 	struct file_descriptor *descriptor;
6315 	struct vnode *vnode;
6316 	status_t status;
6317 
6318 	if (name == NULL || *name == '\0')
6319 		return B_BAD_VALUE;
6320 
6321 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
6322 
6323 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6324 	if (descriptor == NULL)
6325 		return B_FILE_ERROR;
6326 
6327 	if (HAS_FS_CALL(vnode, remove_attr))
6328 		status = FS_CALL(vnode, remove_attr, name);
6329 	else
6330 		status = EROFS;
6331 
6332 	put_fd(descriptor);
6333 
6334 	return status;
6335 }
6336 
6337 
6338 static status_t
6339 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
6340 {
6341 	struct file_descriptor *fromDescriptor, *toDescriptor;
6342 	struct vnode *fromVnode, *toVnode;
6343 	status_t status;
6344 
6345 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
6346 		return B_BAD_VALUE;
6347 
6348 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
6349 
6350 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
6351 	if (fromDescriptor == NULL)
6352 		return B_FILE_ERROR;
6353 
6354 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
6355 	if (toDescriptor == NULL) {
6356 		status = B_FILE_ERROR;
6357 		goto err;
6358 	}
6359 
6360 	// are the files on the same volume?
6361 	if (fromVnode->device != toVnode->device) {
6362 		status = B_CROSS_DEVICE_LINK;
6363 		goto err1;
6364 	}
6365 
6366 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6367 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6368 	} else
6369 		status = EROFS;
6370 
6371 err1:
6372 	put_fd(toDescriptor);
6373 err:
6374 	put_fd(fromDescriptor);
6375 
6376 	return status;
6377 }
6378 
6379 
6380 static status_t
6381 index_dir_open(dev_t mountID, bool kernel)
6382 {
6383 	struct fs_mount *mount;
6384 	void *cookie;
6385 
6386 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6387 
6388 	status_t status = get_mount(mountID, &mount);
6389 	if (status < B_OK)
6390 		return status;
6391 
6392 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6393 		status = EOPNOTSUPP;
6394 		goto out;
6395 	}
6396 
6397 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6398 	if (status < B_OK)
6399 		goto out;
6400 
6401 	// get fd for the index directory
6402 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
6403 	if (status >= 0)
6404 		goto out;
6405 
6406 	// something went wrong
6407 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6408 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6409 
6410 out:
6411 	put_mount(mount);
6412 	return status;
6413 }
6414 
6415 
6416 static status_t
6417 index_dir_close(struct file_descriptor *descriptor)
6418 {
6419 	struct fs_mount *mount = descriptor->u.mount;
6420 
6421 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6422 
6423 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6424 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6425 
6426 	return B_OK;
6427 }
6428 
6429 
6430 static void
6431 index_dir_free_fd(struct file_descriptor *descriptor)
6432 {
6433 	struct fs_mount *mount = descriptor->u.mount;
6434 
6435 	if (mount != NULL) {
6436 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6437 		// ToDo: find a replacement ref_count object - perhaps the root dir?
6438 		//put_vnode(vnode);
6439 	}
6440 }
6441 
6442 
6443 static status_t
6444 index_dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
6445 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
6446 {
6447 	struct fs_mount *mount = descriptor->u.mount;
6448 
6449 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6450 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6451 			bufferSize, _count);
6452 	}
6453 
6454 	return EOPNOTSUPP;
6455 }
6456 
6457 
6458 static status_t
6459 index_dir_rewind(struct file_descriptor *descriptor)
6460 {
6461 	struct fs_mount *mount = descriptor->u.mount;
6462 
6463 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6464 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6465 
6466 	return EOPNOTSUPP;
6467 }
6468 
6469 
6470 static status_t
6471 index_create(dev_t mountID, const char *name, uint32 type, uint32 flags, bool kernel)
6472 {
6473 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
6474 
6475 	struct fs_mount *mount;
6476 	status_t status = get_mount(mountID, &mount);
6477 	if (status < B_OK)
6478 		return status;
6479 
6480 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6481 		status = EROFS;
6482 		goto out;
6483 	}
6484 
6485 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6486 
6487 out:
6488 	put_mount(mount);
6489 	return status;
6490 }
6491 
6492 
6493 #if 0
6494 static status_t
6495 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
6496 {
6497 	struct vnode *vnode = descriptor->u.vnode;
6498 
6499 	// ToDo: currently unused!
6500 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6501 	if (!HAS_FS_CALL(vnode, read_index_stat))
6502 		return EOPNOTSUPP;
6503 
6504 	return EOPNOTSUPP;
6505 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6506 }
6507 
6508 
6509 static void
6510 index_free_fd(struct file_descriptor *descriptor)
6511 {
6512 	struct vnode *vnode = descriptor->u.vnode;
6513 
6514 	if (vnode != NULL) {
6515 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6516 		put_vnode(vnode);
6517 	}
6518 }
6519 #endif
6520 
6521 
6522 static status_t
6523 index_name_read_stat(dev_t mountID, const char *name, struct stat *stat, bool kernel)
6524 {
6525 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
6526 
6527 	struct fs_mount *mount;
6528 	status_t status = get_mount(mountID, &mount);
6529 	if (status < B_OK)
6530 		return status;
6531 
6532 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6533 		status = EOPNOTSUPP;
6534 		goto out;
6535 	}
6536 
6537 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6538 
6539 out:
6540 	put_mount(mount);
6541 	return status;
6542 }
6543 
6544 
6545 static status_t
6546 index_remove(dev_t mountID, const char *name, bool kernel)
6547 {
6548 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
6549 
6550 	struct fs_mount *mount;
6551 	status_t status = get_mount(mountID, &mount);
6552 	if (status < B_OK)
6553 		return status;
6554 
6555 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6556 		status = EROFS;
6557 		goto out;
6558 	}
6559 
6560 	status = FS_MOUNT_CALL(mount, remove_index, name);
6561 
6562 out:
6563 	put_mount(mount);
6564 	return status;
6565 }
6566 
6567 
6568 /*!	ToDo: the query FS API is still the pretty much the same as in R5.
6569 		It would be nice if the FS would find some more kernel support
6570 		for them.
6571 		For example, query parsing should be moved into the kernel.
6572 */
6573 static int
6574 query_open(dev_t device, const char *query, uint32 flags,
6575 	port_id port, int32 token, bool kernel)
6576 {
6577 	struct fs_mount *mount;
6578 	void *cookie;
6579 
6580 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
6581 
6582 	status_t status = get_mount(device, &mount);
6583 	if (status < B_OK)
6584 		return status;
6585 
6586 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6587 		status = EOPNOTSUPP;
6588 		goto out;
6589 	}
6590 
6591 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6592 		&cookie);
6593 	if (status < B_OK)
6594 		goto out;
6595 
6596 	// get fd for the index directory
6597 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
6598 	if (status >= 0)
6599 		goto out;
6600 
6601 	// something went wrong
6602 	FS_MOUNT_CALL(mount, close_query, cookie);
6603 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6604 
6605 out:
6606 	put_mount(mount);
6607 	return status;
6608 }
6609 
6610 
6611 static status_t
6612 query_close(struct file_descriptor *descriptor)
6613 {
6614 	struct fs_mount *mount = descriptor->u.mount;
6615 
6616 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6617 
6618 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6619 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6620 
6621 	return B_OK;
6622 }
6623 
6624 
6625 static void
6626 query_free_fd(struct file_descriptor *descriptor)
6627 {
6628 	struct fs_mount *mount = descriptor->u.mount;
6629 
6630 	if (mount != NULL) {
6631 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6632 		// ToDo: find a replacement ref_count object - perhaps the root dir?
6633 		//put_vnode(vnode);
6634 	}
6635 }
6636 
6637 
6638 static status_t
6639 query_read(struct io_context *ioContext, struct file_descriptor *descriptor,
6640 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
6641 {
6642 	struct fs_mount *mount = descriptor->u.mount;
6643 
6644 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6645 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6646 			bufferSize, _count);
6647 	}
6648 
6649 	return EOPNOTSUPP;
6650 }
6651 
6652 
6653 static status_t
6654 query_rewind(struct file_descriptor *descriptor)
6655 {
6656 	struct fs_mount *mount = descriptor->u.mount;
6657 
6658 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6659 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6660 
6661 	return EOPNOTSUPP;
6662 }
6663 
6664 
6665 //	#pragma mark - General File System functions
6666 
6667 
6668 static dev_t
6669 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
6670 	const char *args, bool kernel)
6671 {
6672 	struct fs_mount *mount;
6673 	status_t status = 0;
6674 
6675 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6676 
6677 	// The path is always safe, we just have to make sure that fsName is
6678 	// almost valid - we can't make any assumptions about args, though.
6679 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6680 	// We'll get it from the DDM later.
6681 	if (fsName == NULL) {
6682 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6683 			return B_BAD_VALUE;
6684 	} else if (fsName[0] == '\0')
6685 		return B_BAD_VALUE;
6686 
6687 	RecursiveLocker mountOpLocker(sMountOpLock);
6688 
6689 	// Helper to delete a newly created file device on failure.
6690 	// Not exactly beautiful, but helps to keep the code below cleaner.
6691 	struct FileDeviceDeleter {
6692 		FileDeviceDeleter() : id(-1) {}
6693 		~FileDeviceDeleter()
6694 		{
6695 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6696 		}
6697 
6698 		partition_id id;
6699 	} fileDeviceDeleter;
6700 
6701 	// If the file system is not a "virtual" one, the device argument should
6702 	// point to a real file/device (if given at all).
6703 	// get the partition
6704 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
6705 	KPartition *partition = NULL;
6706 	KPath normalizedDevice;
6707 	bool newlyCreatedFileDevice = false;
6708 
6709 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
6710 		// normalize the device path
6711 		status = normalizedDevice.SetTo(device, true);
6712 		if (status != B_OK)
6713 			return status;
6714 
6715 		// get a corresponding partition from the DDM
6716 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6717 
6718 		if (!partition) {
6719 			// Partition not found: This either means, the user supplied
6720 			// an invalid path, or the path refers to an image file. We try
6721 			// to let the DDM create a file device for the path.
6722 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
6723 				&newlyCreatedFileDevice);
6724 			if (deviceID >= 0) {
6725 				partition = ddm->RegisterPartition(deviceID);
6726 				if (newlyCreatedFileDevice)
6727 					fileDeviceDeleter.id = deviceID;
6728 			}
6729 		}
6730 
6731 		if (!partition) {
6732 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6733 				normalizedDevice.Path()));
6734 			return B_ENTRY_NOT_FOUND;
6735 		}
6736 
6737 		device = normalizedDevice.Path();
6738 			// correct path to file device
6739 	}
6740 	PartitionRegistrar partitionRegistrar(partition, true);
6741 
6742 	// Write lock the partition's device. For the time being, we keep the lock
6743 	// until we're done mounting -- not nice, but ensure, that no-one is
6744 	// interfering.
6745 	// TODO: Just mark the partition busy while mounting!
6746 	KDiskDevice *diskDevice = NULL;
6747 	if (partition) {
6748 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6749 		if (!diskDevice) {
6750 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6751 			return B_ERROR;
6752 		}
6753 	}
6754 
6755 	DeviceWriteLocker writeLocker(diskDevice, true);
6756 		// this takes over the write lock acquired before
6757 
6758 	if (partition) {
6759 		// make sure, that the partition is not busy
6760 		if (partition->IsBusy()) {
6761 			TRACE(("fs_mount(): Partition is busy.\n"));
6762 			return B_BUSY;
6763 		}
6764 
6765 		// if no FS name had been supplied, we get it from the partition
6766 		if (!fsName) {
6767 			KDiskSystem *diskSystem = partition->DiskSystem();
6768 			if (!diskSystem) {
6769 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6770 					"recognize it.\n"));
6771 				return B_BAD_VALUE;
6772 			}
6773 
6774 			if (!diskSystem->IsFileSystem()) {
6775 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6776 					"partitioning system.\n"));
6777 				return B_BAD_VALUE;
6778 			}
6779 
6780 			// The disk system name will not change, and the KDiskSystem
6781 			// object will not go away while the disk device is locked (and
6782 			// the partition has a reference to it), so this is safe.
6783 			fsName = diskSystem->Name();
6784 		}
6785 	}
6786 
6787 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
6788 	if (mount == NULL)
6789 		return B_NO_MEMORY;
6790 
6791 	mount->volume = (fs_volume*)malloc(sizeof(fs_volume));
6792 	if (mount->volume == NULL) {
6793 		free(mount);
6794 		return B_NO_MEMORY;
6795 	}
6796 
6797 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
6798 
6799 	mount->fs_name = get_file_system_name(fsName);
6800 	if (mount->fs_name == NULL) {
6801 		status = B_NO_MEMORY;
6802 		goto err1;
6803 	}
6804 
6805 	mount->device_name = strdup(device);
6806 		// "device" can be NULL
6807 
6808 	status = mount->entry_cache.Init();
6809 	if (status != B_OK)
6810 		goto err2;
6811 
6812 	mount->fs = get_file_system(fsName);
6813 	if (mount->fs == NULL) {
6814 		status = ENODEV;
6815 		goto err3;
6816 	}
6817 
6818 	recursive_lock_init(&mount->rlock, "mount rlock");
6819 
6820 	// initialize structure
6821 	mount->id = sNextMountID++;
6822 	mount->partition = NULL;
6823 	mount->root_vnode = NULL;
6824 	mount->covers_vnode = NULL;
6825 	mount->unmounting = false;
6826 	mount->owns_file_device = false;
6827 
6828 	mount->volume->id = mount->id;
6829 	mount->volume->partition = partition != NULL ? partition->ID() : -1;
6830 	mount->volume->layer = 0;
6831 	mount->volume->private_volume = NULL;
6832 	mount->volume->ops = NULL;
6833 	mount->volume->sub_volume = NULL;
6834 	mount->volume->super_volume = NULL;
6835 
6836 	// insert mount struct into list before we call FS's mount() function
6837 	// so that vnodes can be created for this mount
6838 	mutex_lock(&sMountMutex);
6839 	hash_insert(sMountsTable, mount);
6840 	mutex_unlock(&sMountMutex);
6841 
6842 	ino_t rootID;
6843 
6844 	if (!sRoot) {
6845 		// we haven't mounted anything yet
6846 		if (strcmp(path, "/") != 0) {
6847 			status = B_ERROR;
6848 			goto err5;
6849 		}
6850 
6851 		status = mount->fs->mount(mount->volume, device, flags, args, &rootID);
6852 		if (status < 0) {
6853 			// ToDo: why should we hide the error code from the file system here?
6854 			//status = ERR_VFS_GENERAL;
6855 			goto err5;
6856 		}
6857 	} else {
6858 		struct vnode *coveredVnode;
6859 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
6860 		if (status < B_OK)
6861 			goto err5;
6862 
6863 		// make sure covered_vnode is a directory
6864 		if (!S_ISDIR(coveredVnode->type)) {
6865 			status = B_NOT_A_DIRECTORY;
6866 			goto err5;
6867 		}
6868 
6869 		if (coveredVnode->mount->root_vnode == coveredVnode) {
6870 			// this is already a mount point
6871 			status = B_BUSY;
6872 			goto err5;
6873 		}
6874 
6875 		mount->covers_vnode = coveredVnode;
6876 
6877 		// mount it
6878 		status = mount->fs->mount(mount->volume, device, flags, args, &rootID);
6879 		if (status < B_OK)
6880 			goto err6;
6881 	}
6882 
6883 	// the root node is supposed to be owned by the file system - it must
6884 	// exist at this point
6885 	mount->root_vnode = lookup_vnode(mount->id, rootID);
6886 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
6887 		panic("fs_mount: file system does not own its root node!\n");
6888 		status = B_ERROR;
6889 		goto err7;
6890 	}
6891 
6892 	// No race here, since fs_mount() is the only function changing
6893 	// covers_vnode (and holds sMountOpLock at that time).
6894 	mutex_lock(&sVnodeCoveredByMutex);
6895 	if (mount->covers_vnode)
6896 		mount->covers_vnode->covered_by = mount->root_vnode;
6897 	mutex_unlock(&sVnodeCoveredByMutex);
6898 
6899 	if (!sRoot) {
6900 		sRoot = mount->root_vnode;
6901 		mutex_lock(&sIOContextRootLock);
6902 		get_current_io_context(true)->root = sRoot;
6903 		mutex_unlock(&sIOContextRootLock);
6904 		inc_vnode_ref_count(sRoot);
6905 	}
6906 
6907 	// supply the partition (if any) with the mount cookie and mark it mounted
6908 	if (partition) {
6909 		partition->SetMountCookie(mount->volume->private_volume);
6910 		partition->SetVolumeID(mount->id);
6911 
6912 		// keep a partition reference as long as the partition is mounted
6913 		partitionRegistrar.Detach();
6914 		mount->partition = partition;
6915 		mount->owns_file_device = newlyCreatedFileDevice;
6916 		fileDeviceDeleter.id = -1;
6917 	}
6918 
6919 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
6920 		mount->covers_vnode ? mount->covers_vnode->id : -1);
6921 
6922 	return mount->id;
6923 
6924 err7:
6925 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
6926 err6:
6927 	if (mount->covers_vnode)
6928 		put_vnode(mount->covers_vnode);
6929 err5:
6930 	mutex_lock(&sMountMutex);
6931 	hash_remove(sMountsTable, mount);
6932 	mutex_unlock(&sMountMutex);
6933 
6934 	recursive_lock_destroy(&mount->rlock);
6935 	put_file_system(mount->fs);
6936 	free(mount->device_name);
6937 err3:
6938 	mount->entry_cache.Uninit();
6939 err2:
6940 	free(mount->fs_name);
6941 err1:
6942 	free(mount->volume);
6943 	free(mount);
6944 
6945 	return status;
6946 }
6947 
6948 
6949 static status_t
6950 fs_unmount(char *path, dev_t mountID, uint32 flags, bool kernel)
6951 {
6952 	struct vnode *vnode = NULL;
6953 	struct fs_mount *mount;
6954 	status_t err;
6955 
6956 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
6957 		kernel));
6958 
6959 	if (path != NULL) {
6960 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
6961 		if (err != B_OK)
6962 			return B_ENTRY_NOT_FOUND;
6963 	}
6964 
6965 	RecursiveLocker mountOpLocker(sMountOpLock);
6966 
6967 	// this lock is not strictly necessary, but here in case of KDEBUG
6968 	// to keep the ASSERT in find_mount() working.
6969 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
6970 	mount = find_mount(path != NULL ? vnode->device : mountID);
6971 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
6972 	if (mount == NULL) {
6973 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
6974 			vnode);
6975 	}
6976 
6977 	if (path != NULL) {
6978 		put_vnode(vnode);
6979 
6980 		if (mount->root_vnode != vnode) {
6981 			// not mountpoint
6982 			return B_BAD_VALUE;
6983 		}
6984 	}
6985 
6986 	// if the volume is associated with a partition, lock the device of the
6987 	// partition as long as we are unmounting
6988 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6989 	KPartition *partition = mount->partition;
6990 	KDiskDevice *diskDevice = NULL;
6991 	if (partition) {
6992 		if (partition->Device() == NULL) {
6993 			dprintf("fs_unmount(): There is no device!\n");
6994 			return B_ERROR;
6995 		}
6996 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6997 		if (!diskDevice) {
6998 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
6999 			return B_ERROR;
7000 		}
7001 	}
7002 	DeviceWriteLocker writeLocker(diskDevice, true);
7003 
7004 	// make sure, that the partition is not busy
7005 	if (partition) {
7006 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7007 			TRACE(("fs_unmount(): Partition is busy.\n"));
7008 			return B_BUSY;
7009 		}
7010 	}
7011 
7012 	// grab the vnode master mutex to keep someone from creating
7013 	// a vnode while we're figuring out if we can continue
7014 	mutex_lock(&sVnodeMutex);
7015 
7016 	bool disconnectedDescriptors = false;
7017 
7018 	while (true) {
7019 		bool busy = false;
7020 
7021 		// cycle through the list of vnodes associated with this mount and
7022 		// make sure all of them are not busy or have refs on them
7023 		vnode = NULL;
7024 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
7025 				vnode)) != NULL) {
7026 			// The root vnode ref_count needs to be 1 here (the mount has a
7027 			// reference).
7028 			if (vnode->busy
7029 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7030 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7031 				// there are still vnodes in use on this mount, so we cannot
7032 				// unmount yet
7033 				busy = true;
7034 				break;
7035 			}
7036 		}
7037 
7038 		if (!busy)
7039 			break;
7040 
7041 		if ((flags & B_FORCE_UNMOUNT) == 0) {
7042 			mutex_unlock(&sVnodeMutex);
7043 
7044 			return B_BUSY;
7045 		}
7046 
7047 		if (disconnectedDescriptors) {
7048 			// wait a bit until the last access is finished, and then try again
7049 			mutex_unlock(&sVnodeMutex);
7050 			snooze(100000);
7051 			// TODO: if there is some kind of bug that prevents the ref counts
7052 			//	from getting back to zero, this will fall into an endless loop...
7053 			mutex_lock(&sVnodeMutex);
7054 			continue;
7055 		}
7056 
7057 		// the file system is still busy - but we're forced to unmount it,
7058 		// so let's disconnect all open file descriptors
7059 
7060 		mount->unmounting = true;
7061 			// prevent new vnodes from being created
7062 
7063 		mutex_unlock(&sVnodeMutex);
7064 
7065 		disconnect_mount_or_vnode_fds(mount, NULL);
7066 		disconnectedDescriptors = true;
7067 
7068 		mutex_lock(&sVnodeMutex);
7069 	}
7070 
7071 	// we can safely continue, mark all of the vnodes busy and this mount
7072 	// structure in unmounting state
7073 	mount->unmounting = true;
7074 
7075 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
7076 		vnode->busy = true;
7077 
7078 		if (vnode->ref_count == 0) {
7079 			// this vnode has been unused before
7080 			list_remove_item(&sUnusedVnodeList, vnode);
7081 			sUnusedVnodes--;
7082 		}
7083 	}
7084 
7085 	// The ref_count of the root node is 1 at this point, see above why this is
7086 	mount->root_vnode->ref_count--;
7087 
7088 	mutex_unlock(&sVnodeMutex);
7089 
7090 	mutex_lock(&sVnodeCoveredByMutex);
7091 	mount->covers_vnode->covered_by = NULL;
7092 	mutex_unlock(&sVnodeCoveredByMutex);
7093 	put_vnode(mount->covers_vnode);
7094 
7095 	// Free all vnodes associated with this mount.
7096 	// They will be removed from the mount list by free_vnode(), so
7097 	// we don't have to do this.
7098 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes))
7099 			!= NULL) {
7100 		free_vnode(vnode, false);
7101 	}
7102 
7103 	// remove the mount structure from the hash table
7104 	mutex_lock(&sMountMutex);
7105 	hash_remove(sMountsTable, mount);
7106 	mutex_unlock(&sMountMutex);
7107 
7108 	mountOpLocker.Unlock();
7109 
7110 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7111 	notify_unmount(mount->id);
7112 
7113 	// release the file system
7114 	put_file_system(mount->fs);
7115 
7116 	// dereference the partition and mark it unmounted
7117 	if (partition) {
7118 		partition->SetVolumeID(-1);
7119 		partition->SetMountCookie(NULL);
7120 
7121 		if (mount->owns_file_device)
7122 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7123 		partition->Unregister();
7124 	}
7125 
7126 	mount->entry_cache.Uninit();
7127 
7128 	free(mount->device_name);
7129 	free(mount->fs_name);
7130 	free(mount->volume);
7131 	free(mount);
7132 
7133 	return B_OK;
7134 }
7135 
7136 
7137 static status_t
7138 fs_sync(dev_t device)
7139 {
7140 	struct fs_mount *mount;
7141 	status_t status = get_mount(device, &mount);
7142 	if (status < B_OK)
7143 		return status;
7144 
7145 	// First, synchronize all file caches
7146 
7147 	struct vnode *previousVnode = NULL;
7148 	while (true) {
7149 		// synchronize access to vnode list
7150 		recursive_lock_lock(&mount->rlock);
7151 
7152 		struct vnode *vnode = previousVnode;
7153 		do {
7154 			// TODO: we could track writes (and writable mapped vnodes)
7155 			//	and have a simple flag that we could test for here
7156 			vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode);
7157 		} while (vnode != NULL && vnode->cache == NULL);
7158 
7159 		ino_t id = -1;
7160 		if (vnode != NULL)
7161 			id = vnode->id;
7162 
7163 		recursive_lock_unlock(&mount->rlock);
7164 
7165 		if (vnode == NULL)
7166 			break;
7167 
7168 		// acquire a reference to the vnode
7169 
7170 		if (get_vnode(mount->id, id, &vnode, true, false) == B_OK) {
7171 			if (previousVnode != NULL)
7172 				put_vnode(previousVnode);
7173 
7174 			if (vnode->cache != NULL)
7175 				vnode->cache->WriteModified();
7176 
7177 			// the next vnode might change until we lock the vnode list again,
7178 			// but this vnode won't go away since we keep a reference to it.
7179 			previousVnode = vnode;
7180 		} else {
7181 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n",
7182 				mount->id, id);
7183 			break;
7184 		}
7185 	}
7186 
7187 	if (previousVnode != NULL)
7188 		put_vnode(previousVnode);
7189 
7190 	// And then, let the file systems do their synchronizing work
7191 
7192 	if (HAS_FS_MOUNT_CALL(mount, sync))
7193 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7194 
7195 	put_mount(mount);
7196 	return status;
7197 }
7198 
7199 
7200 static status_t
7201 fs_read_info(dev_t device, struct fs_info *info)
7202 {
7203 	struct fs_mount *mount;
7204 	status_t status = get_mount(device, &mount);
7205 	if (status < B_OK)
7206 		return status;
7207 
7208 	memset(info, 0, sizeof(struct fs_info));
7209 
7210 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7211 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7212 
7213 	// fill in info the file system doesn't (have to) know about
7214 	if (status == B_OK) {
7215 		info->dev = mount->id;
7216 		info->root = mount->root_vnode->id;
7217 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
7218 		if (mount->device_name != NULL) {
7219 			strlcpy(info->device_name, mount->device_name,
7220 				sizeof(info->device_name));
7221 		}
7222 	}
7223 
7224 	// if the call is not supported by the file system, there are still
7225 	// the parts that we filled out ourselves
7226 
7227 	put_mount(mount);
7228 	return status;
7229 }
7230 
7231 
7232 static status_t
7233 fs_write_info(dev_t device, const struct fs_info *info, int mask)
7234 {
7235 	struct fs_mount *mount;
7236 	status_t status = get_mount(device, &mount);
7237 	if (status < B_OK)
7238 		return status;
7239 
7240 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7241 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7242 	else
7243 		status = EROFS;
7244 
7245 	put_mount(mount);
7246 	return status;
7247 }
7248 
7249 
7250 static dev_t
7251 fs_next_device(int32 *_cookie)
7252 {
7253 	struct fs_mount *mount = NULL;
7254 	dev_t device = *_cookie;
7255 
7256 	mutex_lock(&sMountMutex);
7257 
7258 	// Since device IDs are assigned sequentially, this algorithm
7259 	// does work good enough. It makes sure that the device list
7260 	// returned is sorted, and that no device is skipped when an
7261 	// already visited device got unmounted.
7262 
7263 	while (device < sNextMountID) {
7264 		mount = find_mount(device++);
7265 		if (mount != NULL && mount->volume->private_volume != NULL)
7266 			break;
7267 	}
7268 
7269 	*_cookie = device;
7270 
7271 	if (mount != NULL)
7272 		device = mount->id;
7273 	else
7274 		device = B_BAD_VALUE;
7275 
7276 	mutex_unlock(&sMountMutex);
7277 
7278 	return device;
7279 }
7280 
7281 
7282 static status_t
7283 get_cwd(char *buffer, size_t size, bool kernel)
7284 {
7285 	// Get current working directory from io context
7286 	struct io_context *context = get_current_io_context(kernel);
7287 	status_t status;
7288 
7289 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7290 
7291 	mutex_lock(&context->io_mutex);
7292 
7293 	struct vnode* vnode = context->cwd;
7294 	if (vnode)
7295 		inc_vnode_ref_count(vnode);
7296 
7297 	mutex_unlock(&context->io_mutex);
7298 
7299 	if (vnode) {
7300 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7301 		put_vnode(vnode);
7302 	} else
7303 		status = B_ERROR;
7304 
7305 	return status;
7306 }
7307 
7308 
7309 static status_t
7310 set_cwd(int fd, char *path, bool kernel)
7311 {
7312 	struct io_context *context;
7313 	struct vnode *vnode = NULL;
7314 	struct vnode *oldDirectory;
7315 	status_t status;
7316 
7317 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7318 
7319 	// Get vnode for passed path, and bail if it failed
7320 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7321 	if (status < 0)
7322 		return status;
7323 
7324 	if (!S_ISDIR(vnode->type)) {
7325 		// nope, can't cwd to here
7326 		status = B_NOT_A_DIRECTORY;
7327 		goto err;
7328 	}
7329 
7330 	// Get current io context and lock
7331 	context = get_current_io_context(kernel);
7332 	mutex_lock(&context->io_mutex);
7333 
7334 	// save the old current working directory first
7335 	oldDirectory = context->cwd;
7336 	context->cwd = vnode;
7337 
7338 	mutex_unlock(&context->io_mutex);
7339 
7340 	if (oldDirectory)
7341 		put_vnode(oldDirectory);
7342 
7343 	return B_NO_ERROR;
7344 
7345 err:
7346 	put_vnode(vnode);
7347 	return status;
7348 }
7349 
7350 
7351 //	#pragma mark - kernel mirrored syscalls
7352 
7353 
7354 dev_t
7355 _kern_mount(const char *path, const char *device, const char *fsName,
7356 	uint32 flags, const char *args, size_t argsLength)
7357 {
7358 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7359 	if (pathBuffer.InitCheck() != B_OK)
7360 		return B_NO_MEMORY;
7361 
7362 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7363 }
7364 
7365 
7366 status_t
7367 _kern_unmount(const char *path, uint32 flags)
7368 {
7369 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7370 	if (pathBuffer.InitCheck() != B_OK)
7371 		return B_NO_MEMORY;
7372 
7373 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7374 }
7375 
7376 
7377 status_t
7378 _kern_read_fs_info(dev_t device, struct fs_info *info)
7379 {
7380 	if (info == NULL)
7381 		return B_BAD_VALUE;
7382 
7383 	return fs_read_info(device, info);
7384 }
7385 
7386 
7387 status_t
7388 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
7389 {
7390 	if (info == NULL)
7391 		return B_BAD_VALUE;
7392 
7393 	return fs_write_info(device, info, mask);
7394 }
7395 
7396 
7397 status_t
7398 _kern_sync(void)
7399 {
7400 	// Note: _kern_sync() is also called from _user_sync()
7401 	int32 cookie = 0;
7402 	dev_t device;
7403 	while ((device = next_dev(&cookie)) >= 0) {
7404 		status_t status = fs_sync(device);
7405 		if (status != B_OK && status != B_BAD_VALUE)
7406 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
7407 	}
7408 
7409 	return B_OK;
7410 }
7411 
7412 
7413 dev_t
7414 _kern_next_device(int32 *_cookie)
7415 {
7416 	return fs_next_device(_cookie);
7417 }
7418 
7419 
7420 status_t
7421 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
7422 	size_t infoSize)
7423 {
7424 	if (infoSize != sizeof(fd_info))
7425 		return B_BAD_VALUE;
7426 
7427 	struct io_context *context = NULL;
7428 	struct team *team = NULL;
7429 
7430 	cpu_status state = disable_interrupts();
7431 	GRAB_TEAM_LOCK();
7432 
7433 	bool contextLocked = false;
7434 	team = team_get_team_struct_locked(teamID);
7435 	if (team) {
7436 		// We cannot lock the IO context while holding the team lock, nor can
7437 		// we just drop the team lock, since it might be deleted in the
7438 		// meantime. team_remove_team() acquires the thread lock when removing
7439 		// the team from the team hash table, though. Hence we switch to the
7440 		// thread lock and use mutex_lock_threads_locked().
7441 		context = (io_context *)team->io_context;
7442 
7443 		GRAB_THREAD_LOCK();
7444 		RELEASE_TEAM_LOCK();
7445 		contextLocked = mutex_lock_threads_locked(&context->io_mutex) == B_OK;
7446 		RELEASE_THREAD_LOCK();
7447 	} else
7448 		RELEASE_TEAM_LOCK();
7449 
7450 	restore_interrupts(state);
7451 
7452 	if (!contextLocked) {
7453 		// team doesn't exit or seems to be gone
7454 		return B_BAD_TEAM_ID;
7455 	}
7456 
7457 	// the team cannot be deleted completely while we're owning its
7458 	// io_context mutex, so we can safely play with it now
7459 
7460 	uint32 slot = *_cookie;
7461 
7462 	struct file_descriptor *descriptor;
7463 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
7464 		slot++;
7465 
7466 	if (slot >= context->table_size) {
7467 		mutex_unlock(&context->io_mutex);
7468 		return B_ENTRY_NOT_FOUND;
7469 	}
7470 
7471 	info->number = slot;
7472 	info->open_mode = descriptor->open_mode;
7473 
7474 	struct vnode *vnode = fd_vnode(descriptor);
7475 	if (vnode != NULL) {
7476 		info->device = vnode->device;
7477 		info->node = vnode->id;
7478 	} else if (descriptor->u.mount != NULL) {
7479 		info->device = descriptor->u.mount->id;
7480 		info->node = -1;
7481 	}
7482 
7483 	mutex_unlock(&context->io_mutex);
7484 
7485 	*_cookie = slot + 1;
7486 	return B_OK;
7487 }
7488 
7489 
7490 int
7491 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
7492 {
7493 	if (openMode & O_CREAT)
7494 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
7495 
7496 	return file_open_entry_ref(device, inode, name, openMode, true);
7497 }
7498 
7499 
7500 /*!	\brief Opens a node specified by a FD + path pair.
7501 
7502 	At least one of \a fd and \a path must be specified.
7503 	If only \a fd is given, the function opens the node identified by this
7504 	FD. If only a path is given, this path is opened. If both are given and
7505 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7506 	of the directory (!) identified by \a fd.
7507 
7508 	\param fd The FD. May be < 0.
7509 	\param path The absolute or relative path. May be \c NULL.
7510 	\param openMode The open mode.
7511 	\return A FD referring to the newly opened node, or an error code,
7512 			if an error occurs.
7513 */
7514 int
7515 _kern_open(int fd, const char *path, int openMode, int perms)
7516 {
7517 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7518 	if (pathBuffer.InitCheck() != B_OK)
7519 		return B_NO_MEMORY;
7520 
7521 	if (openMode & O_CREAT)
7522 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7523 
7524 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7525 }
7526 
7527 
7528 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7529 
7530 	The supplied name may be \c NULL, in which case directory identified
7531 	by \a device and \a inode will be opened. Otherwise \a device and
7532 	\a inode identify the parent directory of the directory to be opened
7533 	and \a name its entry name.
7534 
7535 	\param device If \a name is specified the ID of the device the parent
7536 		   directory of the directory to be opened resides on, otherwise
7537 		   the device of the directory itself.
7538 	\param inode If \a name is specified the node ID of the parent
7539 		   directory of the directory to be opened, otherwise node ID of the
7540 		   directory itself.
7541 	\param name The entry name of the directory to be opened. If \c NULL,
7542 		   the \a device + \a inode pair identify the node to be opened.
7543 	\return The FD of the newly opened directory or an error code, if
7544 			something went wrong.
7545 */
7546 int
7547 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
7548 {
7549 	return dir_open_entry_ref(device, inode, name, true);
7550 }
7551 
7552 
7553 /*!	\brief Opens a directory specified by a FD + path pair.
7554 
7555 	At least one of \a fd and \a path must be specified.
7556 	If only \a fd is given, the function opens the directory identified by this
7557 	FD. If only a path is given, this path is opened. If both are given and
7558 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7559 	of the directory (!) identified by \a fd.
7560 
7561 	\param fd The FD. May be < 0.
7562 	\param path The absolute or relative path. May be \c NULL.
7563 	\return A FD referring to the newly opened directory, or an error code,
7564 			if an error occurs.
7565 */
7566 int
7567 _kern_open_dir(int fd, const char *path)
7568 {
7569 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7570 	if (pathBuffer.InitCheck() != B_OK)
7571 		return B_NO_MEMORY;
7572 
7573 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7574 }
7575 
7576 
7577 status_t
7578 _kern_fcntl(int fd, int op, uint32 argument)
7579 {
7580 	return common_fcntl(fd, op, argument, true);
7581 }
7582 
7583 
7584 status_t
7585 _kern_fsync(int fd)
7586 {
7587 	return common_sync(fd, true);
7588 }
7589 
7590 
7591 status_t
7592 _kern_lock_node(int fd)
7593 {
7594 	return common_lock_node(fd, true);
7595 }
7596 
7597 
7598 status_t
7599 _kern_unlock_node(int fd)
7600 {
7601 	return common_unlock_node(fd, true);
7602 }
7603 
7604 
7605 status_t
7606 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
7607 {
7608 	return dir_create_entry_ref(device, inode, name, perms, true);
7609 }
7610 
7611 
7612 /*!	\brief Creates a directory specified by a FD + path pair.
7613 
7614 	\a path must always be specified (it contains the name of the new directory
7615 	at least). If only a path is given, this path identifies the location at
7616 	which the directory shall be created. If both \a fd and \a path are given and
7617 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7618 	of the directory (!) identified by \a fd.
7619 
7620 	\param fd The FD. May be < 0.
7621 	\param path The absolute or relative path. Must not be \c NULL.
7622 	\param perms The access permissions the new directory shall have.
7623 	\return \c B_OK, if the directory has been created successfully, another
7624 			error code otherwise.
7625 */
7626 status_t
7627 _kern_create_dir(int fd, const char *path, int perms)
7628 {
7629 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7630 	if (pathBuffer.InitCheck() != B_OK)
7631 		return B_NO_MEMORY;
7632 
7633 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7634 }
7635 
7636 
7637 status_t
7638 _kern_remove_dir(int fd, const char *path)
7639 {
7640 	if (path) {
7641 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7642 		if (pathBuffer.InitCheck() != B_OK)
7643 			return B_NO_MEMORY;
7644 
7645 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7646 	}
7647 
7648 	return dir_remove(fd, NULL, true);
7649 }
7650 
7651 
7652 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7653 
7654 	At least one of \a fd and \a path must be specified.
7655 	If only \a fd is given, the function the symlink to be read is the node
7656 	identified by this FD. If only a path is given, this path identifies the
7657 	symlink to be read. If both are given and the path is absolute, \a fd is
7658 	ignored; a relative path is reckoned off of the directory (!) identified
7659 	by \a fd.
7660 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7661 	will still be updated to reflect the required buffer size.
7662 
7663 	\param fd The FD. May be < 0.
7664 	\param path The absolute or relative path. May be \c NULL.
7665 	\param buffer The buffer into which the contents of the symlink shall be
7666 		   written.
7667 	\param _bufferSize A pointer to the size of the supplied buffer.
7668 	\return The length of the link on success or an appropriate error code
7669 */
7670 status_t
7671 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
7672 {
7673 	if (path) {
7674 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7675 		if (pathBuffer.InitCheck() != B_OK)
7676 			return B_NO_MEMORY;
7677 
7678 		return common_read_link(fd, pathBuffer.LockBuffer(),
7679 			buffer, _bufferSize, true);
7680 	}
7681 
7682 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7683 }
7684 
7685 
7686 /*!	\brief Creates a symlink specified by a FD + path pair.
7687 
7688 	\a path must always be specified (it contains the name of the new symlink
7689 	at least). If only a path is given, this path identifies the location at
7690 	which the symlink shall be created. If both \a fd and \a path are given and
7691 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7692 	of the directory (!) identified by \a fd.
7693 
7694 	\param fd The FD. May be < 0.
7695 	\param toPath The absolute or relative path. Must not be \c NULL.
7696 	\param mode The access permissions the new symlink shall have.
7697 	\return \c B_OK, if the symlink has been created successfully, another
7698 			error code otherwise.
7699 */
7700 status_t
7701 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
7702 {
7703 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7704 	if (pathBuffer.InitCheck() != B_OK)
7705 		return B_NO_MEMORY;
7706 
7707 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7708 		toPath, mode, true);
7709 }
7710 
7711 
7712 status_t
7713 _kern_create_link(const char *path, const char *toPath)
7714 {
7715 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7716 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
7717 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7718 		return B_NO_MEMORY;
7719 
7720 	return common_create_link(pathBuffer.LockBuffer(),
7721 		toPathBuffer.LockBuffer(), true);
7722 }
7723 
7724 
7725 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
7726 
7727 	\a path must always be specified (it contains at least the name of the entry
7728 	to be deleted). If only a path is given, this path identifies the entry
7729 	directly. If both \a fd and \a path are given and the path is absolute,
7730 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7731 	identified by \a fd.
7732 
7733 	\param fd The FD. May be < 0.
7734 	\param path The absolute or relative path. Must not be \c NULL.
7735 	\return \c B_OK, if the entry has been removed successfully, another
7736 			error code otherwise.
7737 */
7738 status_t
7739 _kern_unlink(int fd, const char *path)
7740 {
7741 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7742 	if (pathBuffer.InitCheck() != B_OK)
7743 		return B_NO_MEMORY;
7744 
7745 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
7746 }
7747 
7748 
7749 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
7750 		   by another FD + path pair.
7751 
7752 	\a oldPath and \a newPath must always be specified (they contain at least
7753 	the name of the entry). If only a path is given, this path identifies the
7754 	entry directly. If both a FD and a path are given and the path is absolute,
7755 	the FD is ignored; a relative path is reckoned off of the directory (!)
7756 	identified by the respective FD.
7757 
7758 	\param oldFD The FD of the old location. May be < 0.
7759 	\param oldPath The absolute or relative path of the old location. Must not
7760 		   be \c NULL.
7761 	\param newFD The FD of the new location. May be < 0.
7762 	\param newPath The absolute or relative path of the new location. Must not
7763 		   be \c NULL.
7764 	\return \c B_OK, if the entry has been moved successfully, another
7765 			error code otherwise.
7766 */
7767 status_t
7768 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
7769 {
7770 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
7771 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
7772 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7773 		return B_NO_MEMORY;
7774 
7775 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
7776 		newFD, newPathBuffer.LockBuffer(), true);
7777 }
7778 
7779 
7780 status_t
7781 _kern_access(const char *path, int mode)
7782 {
7783 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7784 	if (pathBuffer.InitCheck() != B_OK)
7785 		return B_NO_MEMORY;
7786 
7787 	return common_access(pathBuffer.LockBuffer(), mode, true);
7788 }
7789 
7790 
7791 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
7792 
7793 	If only \a fd is given, the stat operation associated with the type
7794 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
7795 	given, this path identifies the entry for whose node to retrieve the
7796 	stat data. If both \a fd and \a path are given and the path is absolute,
7797 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7798 	identified by \a fd and specifies the entry whose stat data shall be
7799 	retrieved.
7800 
7801 	\param fd The FD. May be < 0.
7802 	\param path The absolute or relative path. Must not be \c NULL.
7803 	\param traverseLeafLink If \a path is given, \c true specifies that the
7804 		   function shall not stick to symlinks, but traverse them.
7805 	\param stat The buffer the stat data shall be written into.
7806 	\param statSize The size of the supplied stat buffer.
7807 	\return \c B_OK, if the the stat data have been read successfully, another
7808 			error code otherwise.
7809 */
7810 status_t
7811 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
7812 	struct stat *stat, size_t statSize)
7813 {
7814 	struct stat completeStat;
7815 	struct stat *originalStat = NULL;
7816 	status_t status;
7817 
7818 	if (statSize > sizeof(struct stat))
7819 		return B_BAD_VALUE;
7820 
7821 	// this supports different stat extensions
7822 	if (statSize < sizeof(struct stat)) {
7823 		originalStat = stat;
7824 		stat = &completeStat;
7825 	}
7826 
7827 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
7828 
7829 	if (status == B_OK && originalStat != NULL)
7830 		memcpy(originalStat, stat, statSize);
7831 
7832 	return status;
7833 }
7834 
7835 
7836 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
7837 
7838 	If only \a fd is given, the stat operation associated with the type
7839 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
7840 	given, this path identifies the entry for whose node to write the
7841 	stat data. If both \a fd and \a path are given and the path is absolute,
7842 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7843 	identified by \a fd and specifies the entry whose stat data shall be
7844 	written.
7845 
7846 	\param fd The FD. May be < 0.
7847 	\param path The absolute or relative path. Must not be \c NULL.
7848 	\param traverseLeafLink If \a path is given, \c true specifies that the
7849 		   function shall not stick to symlinks, but traverse them.
7850 	\param stat The buffer containing the stat data to be written.
7851 	\param statSize The size of the supplied stat buffer.
7852 	\param statMask A mask specifying which parts of the stat data shall be
7853 		   written.
7854 	\return \c B_OK, if the the stat data have been written successfully,
7855 			another error code otherwise.
7856 */
7857 status_t
7858 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
7859 	const struct stat *stat, size_t statSize, int statMask)
7860 {
7861 	struct stat completeStat;
7862 
7863 	if (statSize > sizeof(struct stat))
7864 		return B_BAD_VALUE;
7865 
7866 	// this supports different stat extensions
7867 	if (statSize < sizeof(struct stat)) {
7868 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
7869 		memcpy(&completeStat, stat, statSize);
7870 		stat = &completeStat;
7871 	}
7872 
7873 	status_t status;
7874 
7875 	if (path) {
7876 		// path given: write the stat of the node referred to by (fd, path)
7877 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7878 		if (pathBuffer.InitCheck() != B_OK)
7879 			return B_NO_MEMORY;
7880 
7881 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
7882 			traverseLeafLink, stat, statMask, true);
7883 	} else {
7884 		// no path given: get the FD and use the FD operation
7885 		struct file_descriptor *descriptor
7886 			= get_fd(get_current_io_context(true), fd);
7887 		if (descriptor == NULL)
7888 			return B_FILE_ERROR;
7889 
7890 		if (descriptor->ops->fd_write_stat)
7891 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
7892 		else
7893 			status = EOPNOTSUPP;
7894 
7895 		put_fd(descriptor);
7896 	}
7897 
7898 	return status;
7899 }
7900 
7901 
7902 int
7903 _kern_open_attr_dir(int fd, const char *path)
7904 {
7905 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7906 	if (pathBuffer.InitCheck() != B_OK)
7907 		return B_NO_MEMORY;
7908 
7909 	if (path != NULL)
7910 		pathBuffer.SetTo(path);
7911 
7912 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
7913 }
7914 
7915 
7916 int
7917 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
7918 {
7919 	return attr_create(fd, name, type, openMode, true);
7920 }
7921 
7922 
7923 int
7924 _kern_open_attr(int fd, const char *name, int openMode)
7925 {
7926 	return attr_open(fd, name, openMode, true);
7927 }
7928 
7929 
7930 status_t
7931 _kern_remove_attr(int fd, const char *name)
7932 {
7933 	return attr_remove(fd, name, true);
7934 }
7935 
7936 
7937 status_t
7938 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
7939 {
7940 	return attr_rename(fromFile, fromName, toFile, toName, true);
7941 }
7942 
7943 
7944 int
7945 _kern_open_index_dir(dev_t device)
7946 {
7947 	return index_dir_open(device, true);
7948 }
7949 
7950 
7951 status_t
7952 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
7953 {
7954 	return index_create(device, name, type, flags, true);
7955 }
7956 
7957 
7958 status_t
7959 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
7960 {
7961 	return index_name_read_stat(device, name, stat, true);
7962 }
7963 
7964 
7965 status_t
7966 _kern_remove_index(dev_t device, const char *name)
7967 {
7968 	return index_remove(device, name, true);
7969 }
7970 
7971 
7972 status_t
7973 _kern_getcwd(char *buffer, size_t size)
7974 {
7975 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
7976 
7977 	// Call vfs to get current working directory
7978 	return get_cwd(buffer, size, true);
7979 }
7980 
7981 
7982 status_t
7983 _kern_setcwd(int fd, const char *path)
7984 {
7985 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7986 	if (pathBuffer.InitCheck() != B_OK)
7987 		return B_NO_MEMORY;
7988 
7989 	if (path != NULL)
7990 		pathBuffer.SetTo(path);
7991 
7992 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
7993 }
7994 
7995 
7996 //	#pragma mark - userland syscalls
7997 
7998 
7999 dev_t
8000 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
8001 	uint32 flags, const char *userArgs, size_t argsLength)
8002 {
8003 	char fileSystem[B_OS_NAME_LENGTH];
8004 	KPath path, device;
8005 	char *args = NULL;
8006 	status_t status;
8007 
8008 	if (!IS_USER_ADDRESS(userPath)
8009 		|| !IS_USER_ADDRESS(userFileSystem)
8010 		|| !IS_USER_ADDRESS(userDevice))
8011 		return B_BAD_ADDRESS;
8012 
8013 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8014 		return B_NO_MEMORY;
8015 
8016 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8017 		return B_BAD_ADDRESS;
8018 
8019 	if (userFileSystem != NULL
8020 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8021 		return B_BAD_ADDRESS;
8022 
8023 	if (userDevice != NULL
8024 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
8025 		return B_BAD_ADDRESS;
8026 
8027 	if (userArgs != NULL && argsLength > 0) {
8028 		// this is a safety restriction
8029 		if (argsLength >= 65536)
8030 			return B_NAME_TOO_LONG;
8031 
8032 		args = (char *)malloc(argsLength + 1);
8033 		if (args == NULL)
8034 			return B_NO_MEMORY;
8035 
8036 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8037 			free(args);
8038 			return B_BAD_ADDRESS;
8039 		}
8040 	}
8041 	path.UnlockBuffer();
8042 	device.UnlockBuffer();
8043 
8044 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
8045 		userFileSystem ? fileSystem : NULL, flags, args, false);
8046 
8047 	free(args);
8048 	return status;
8049 }
8050 
8051 
8052 status_t
8053 _user_unmount(const char *userPath, uint32 flags)
8054 {
8055 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8056 	if (pathBuffer.InitCheck() != B_OK)
8057 		return B_NO_MEMORY;
8058 
8059 	char *path = pathBuffer.LockBuffer();
8060 
8061 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8062 		return B_BAD_ADDRESS;
8063 
8064 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8065 }
8066 
8067 
8068 status_t
8069 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
8070 {
8071 	struct fs_info info;
8072 	status_t status;
8073 
8074 	if (userInfo == NULL)
8075 		return B_BAD_VALUE;
8076 
8077 	if (!IS_USER_ADDRESS(userInfo))
8078 		return B_BAD_ADDRESS;
8079 
8080 	status = fs_read_info(device, &info);
8081 	if (status != B_OK)
8082 		return status;
8083 
8084 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
8085 		return B_BAD_ADDRESS;
8086 
8087 	return B_OK;
8088 }
8089 
8090 
8091 status_t
8092 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
8093 {
8094 	struct fs_info info;
8095 
8096 	if (userInfo == NULL)
8097 		return B_BAD_VALUE;
8098 
8099 	if (!IS_USER_ADDRESS(userInfo)
8100 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
8101 		return B_BAD_ADDRESS;
8102 
8103 	return fs_write_info(device, &info, mask);
8104 }
8105 
8106 
8107 dev_t
8108 _user_next_device(int32 *_userCookie)
8109 {
8110 	int32 cookie;
8111 	dev_t device;
8112 
8113 	if (!IS_USER_ADDRESS(_userCookie)
8114 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
8115 		return B_BAD_ADDRESS;
8116 
8117 	device = fs_next_device(&cookie);
8118 
8119 	if (device >= B_OK) {
8120 		// update user cookie
8121 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
8122 			return B_BAD_ADDRESS;
8123 	}
8124 
8125 	return device;
8126 }
8127 
8128 
8129 status_t
8130 _user_sync(void)
8131 {
8132 	return _kern_sync();
8133 }
8134 
8135 
8136 status_t
8137 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
8138 	size_t infoSize)
8139 {
8140 	struct fd_info info;
8141 	uint32 cookie;
8142 
8143 	// only root can do this (or should root's group be enough?)
8144 	if (geteuid() != 0)
8145 		return B_NOT_ALLOWED;
8146 
8147 	if (infoSize != sizeof(fd_info))
8148 		return B_BAD_VALUE;
8149 
8150 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8151 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
8152 		return B_BAD_ADDRESS;
8153 
8154 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8155 	if (status < B_OK)
8156 		return status;
8157 
8158 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
8159 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
8160 		return B_BAD_ADDRESS;
8161 
8162 	return status;
8163 }
8164 
8165 
8166 status_t
8167 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
8168 	char *userPath, size_t pathLength)
8169 {
8170 	if (!IS_USER_ADDRESS(userPath))
8171 		return B_BAD_ADDRESS;
8172 
8173 	KPath path(B_PATH_NAME_LENGTH + 1);
8174 	if (path.InitCheck() != B_OK)
8175 		return B_NO_MEMORY;
8176 
8177 	// copy the leaf name onto the stack
8178 	char stackLeaf[B_FILE_NAME_LENGTH];
8179 	if (leaf) {
8180 		if (!IS_USER_ADDRESS(leaf))
8181 			return B_BAD_ADDRESS;
8182 
8183 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8184 		if (length < 0)
8185 			return length;
8186 		if (length >= B_FILE_NAME_LENGTH)
8187 			return B_NAME_TOO_LONG;
8188 
8189 		leaf = stackLeaf;
8190 	}
8191 
8192 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8193 		path.LockBuffer(), path.BufferSize());
8194 	if (status < B_OK)
8195 		return status;
8196 
8197 	path.UnlockBuffer();
8198 
8199 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8200 	if (length < 0)
8201 		return length;
8202 	if (length >= (int)pathLength)
8203 		return B_BUFFER_OVERFLOW;
8204 
8205 	return B_OK;
8206 }
8207 
8208 
8209 status_t
8210 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8211 {
8212 	if (userPath == NULL || buffer == NULL)
8213 		return B_BAD_VALUE;
8214 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8215 		return B_BAD_ADDRESS;
8216 
8217 	// copy path from userland
8218 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8219 	if (pathBuffer.InitCheck() != B_OK)
8220 		return B_NO_MEMORY;
8221 	char* path = pathBuffer.LockBuffer();
8222 
8223 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8224 		return B_BAD_ADDRESS;
8225 
8226 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8227 		false);
8228 	if (error != B_OK)
8229 		return error;
8230 
8231 	// copy back to userland
8232 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8233 	if (len < 0)
8234 		return len;
8235 	if (len >= B_PATH_NAME_LENGTH)
8236 		return B_BUFFER_OVERFLOW;
8237 
8238 	return B_OK;
8239 }
8240 
8241 
8242 int
8243 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
8244 	int openMode, int perms)
8245 {
8246 	char name[B_FILE_NAME_LENGTH];
8247 
8248 	if (userName == NULL || device < 0 || inode < 0)
8249 		return B_BAD_VALUE;
8250 	if (!IS_USER_ADDRESS(userName)
8251 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8252 		return B_BAD_ADDRESS;
8253 
8254 	if (openMode & O_CREAT)
8255 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
8256 
8257 	return file_open_entry_ref(device, inode, name, openMode, false);
8258 }
8259 
8260 
8261 int
8262 _user_open(int fd, const char *userPath, int openMode, int perms)
8263 {
8264 	KPath path(B_PATH_NAME_LENGTH + 1);
8265 	if (path.InitCheck() != B_OK)
8266 		return B_NO_MEMORY;
8267 
8268 	char *buffer = path.LockBuffer();
8269 
8270 	if (!IS_USER_ADDRESS(userPath)
8271 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8272 		return B_BAD_ADDRESS;
8273 
8274 	if (openMode & O_CREAT)
8275 		return file_create(fd, buffer, openMode, perms, false);
8276 
8277 	return file_open(fd, buffer, openMode, false);
8278 }
8279 
8280 
8281 int
8282 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
8283 {
8284 	if (userName != NULL) {
8285 		char name[B_FILE_NAME_LENGTH];
8286 
8287 		if (!IS_USER_ADDRESS(userName)
8288 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8289 			return B_BAD_ADDRESS;
8290 
8291 		return dir_open_entry_ref(device, inode, name, false);
8292 	}
8293 	return dir_open_entry_ref(device, inode, NULL, false);
8294 }
8295 
8296 
8297 int
8298 _user_open_dir(int fd, const char *userPath)
8299 {
8300 	KPath path(B_PATH_NAME_LENGTH + 1);
8301 	if (path.InitCheck() != B_OK)
8302 		return B_NO_MEMORY;
8303 
8304 	char *buffer = path.LockBuffer();
8305 
8306 	if (!IS_USER_ADDRESS(userPath)
8307 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8308 		return B_BAD_ADDRESS;
8309 
8310 	return dir_open(fd, buffer, false);
8311 }
8312 
8313 
8314 /*!	\brief Opens a directory's parent directory and returns the entry name
8315 		   of the former.
8316 
8317 	Aside from that is returns the directory's entry name, this method is
8318 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8319 	equivalent, if \a userName is \c NULL.
8320 
8321 	If a name buffer is supplied and the name does not fit the buffer, the
8322 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8323 
8324 	\param fd A FD referring to a directory.
8325 	\param userName Buffer the directory's entry name shall be written into.
8326 		   May be \c NULL.
8327 	\param nameLength Size of the name buffer.
8328 	\return The file descriptor of the opened parent directory, if everything
8329 			went fine, an error code otherwise.
8330 */
8331 int
8332 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
8333 {
8334 	bool kernel = false;
8335 
8336 	if (userName && !IS_USER_ADDRESS(userName))
8337 		return B_BAD_ADDRESS;
8338 
8339 	// open the parent dir
8340 	int parentFD = dir_open(fd, "..", kernel);
8341 	if (parentFD < 0)
8342 		return parentFD;
8343 	FDCloser fdCloser(parentFD, kernel);
8344 
8345 	if (userName) {
8346 		// get the vnodes
8347 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
8348 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
8349 		VNodePutter parentVNodePutter(parentVNode);
8350 		VNodePutter dirVNodePutter(dirVNode);
8351 		if (!parentVNode || !dirVNode)
8352 			return B_FILE_ERROR;
8353 
8354 		// get the vnode name
8355 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8356 		struct dirent *buffer = (struct dirent*)_buffer;
8357 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8358 			sizeof(_buffer), get_current_io_context(false));
8359 		if (status != B_OK)
8360 			return status;
8361 
8362 		// copy the name to the userland buffer
8363 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8364 		if (len < 0)
8365 			return len;
8366 		if (len >= (int)nameLength)
8367 			return B_BUFFER_OVERFLOW;
8368 	}
8369 
8370 	return fdCloser.Detach();
8371 }
8372 
8373 
8374 status_t
8375 _user_fcntl(int fd, int op, uint32 argument)
8376 {
8377 	status_t status = common_fcntl(fd, op, argument, false);
8378 	if (op == F_SETLKW)
8379 		syscall_restart_handle_post(status);
8380 
8381 	return status;
8382 }
8383 
8384 
8385 status_t
8386 _user_fsync(int fd)
8387 {
8388 	return common_sync(fd, false);
8389 }
8390 
8391 
8392 status_t
8393 _user_flock(int fd, int op)
8394 {
8395 	struct file_descriptor *descriptor;
8396 	struct vnode *vnode;
8397 	struct flock flock;
8398 	status_t status;
8399 
8400 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, op));
8401 
8402 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8403 	if (descriptor == NULL)
8404 		return B_FILE_ERROR;
8405 
8406 	if (descriptor->type != FDTYPE_FILE) {
8407 		put_fd(descriptor);
8408 		return B_BAD_VALUE;
8409 	}
8410 
8411 	flock.l_start = 0;
8412 	flock.l_len = OFF_MAX;
8413 	flock.l_whence = 0;
8414 	flock.l_type = (op & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8415 
8416 	if ((op & LOCK_UN) != 0)
8417 		status = release_advisory_lock(vnode, &flock);
8418 	else {
8419 		status = acquire_advisory_lock(vnode,
8420 			thread_get_current_thread()->team->session_id, &flock,
8421 			(op & LOCK_NB) == 0);
8422 	}
8423 
8424 	syscall_restart_handle_post(status);
8425 
8426 	put_fd(descriptor);
8427 	return status;
8428 }
8429 
8430 
8431 status_t
8432 _user_lock_node(int fd)
8433 {
8434 	return common_lock_node(fd, false);
8435 }
8436 
8437 
8438 status_t
8439 _user_unlock_node(int fd)
8440 {
8441 	return common_unlock_node(fd, false);
8442 }
8443 
8444 
8445 status_t
8446 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
8447 {
8448 	char name[B_FILE_NAME_LENGTH];
8449 	status_t status;
8450 
8451 	if (!IS_USER_ADDRESS(userName))
8452 		return B_BAD_ADDRESS;
8453 
8454 	status = user_strlcpy(name, userName, sizeof(name));
8455 	if (status < 0)
8456 		return status;
8457 
8458 	return dir_create_entry_ref(device, inode, name, perms, false);
8459 }
8460 
8461 
8462 status_t
8463 _user_create_dir(int fd, const char *userPath, int perms)
8464 {
8465 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8466 	if (pathBuffer.InitCheck() != B_OK)
8467 		return B_NO_MEMORY;
8468 
8469 	char *path = pathBuffer.LockBuffer();
8470 
8471 	if (!IS_USER_ADDRESS(userPath)
8472 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8473 		return B_BAD_ADDRESS;
8474 
8475 	return dir_create(fd, path, perms, false);
8476 }
8477 
8478 
8479 status_t
8480 _user_remove_dir(int fd, const char *userPath)
8481 {
8482 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8483 	if (pathBuffer.InitCheck() != B_OK)
8484 		return B_NO_MEMORY;
8485 
8486 	char *path = pathBuffer.LockBuffer();
8487 
8488 	if (userPath != NULL) {
8489 		if (!IS_USER_ADDRESS(userPath)
8490 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8491 			return B_BAD_ADDRESS;
8492 	}
8493 
8494 	return dir_remove(fd, userPath ? path : NULL, false);
8495 }
8496 
8497 
8498 status_t
8499 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
8500 {
8501 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8502 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8503 		return B_NO_MEMORY;
8504 
8505 	size_t bufferSize;
8506 
8507 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8508 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
8509 		return B_BAD_ADDRESS;
8510 
8511 	char *path = pathBuffer.LockBuffer();
8512 	char *buffer = linkBuffer.LockBuffer();
8513 
8514 	if (userPath) {
8515 		if (!IS_USER_ADDRESS(userPath)
8516 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8517 			return B_BAD_ADDRESS;
8518 
8519 		if (bufferSize > B_PATH_NAME_LENGTH)
8520 			bufferSize = B_PATH_NAME_LENGTH;
8521 	}
8522 
8523 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8524 		&bufferSize, false);
8525 
8526 	// we also update the bufferSize in case of errors
8527 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8528 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
8529 		return B_BAD_ADDRESS;
8530 
8531 	if (status < B_OK)
8532 		return status;
8533 
8534 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8535 		return B_BAD_ADDRESS;
8536 
8537 	return B_OK;
8538 }
8539 
8540 
8541 status_t
8542 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
8543 	int mode)
8544 {
8545 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8546 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8547 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8548 		return B_NO_MEMORY;
8549 
8550 	char *path = pathBuffer.LockBuffer();
8551 	char *toPath = toPathBuffer.LockBuffer();
8552 
8553 	if (!IS_USER_ADDRESS(userPath)
8554 		|| !IS_USER_ADDRESS(userToPath)
8555 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8556 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8557 		return B_BAD_ADDRESS;
8558 
8559 	return common_create_symlink(fd, path, toPath, mode, false);
8560 }
8561 
8562 
8563 status_t
8564 _user_create_link(const char *userPath, const char *userToPath)
8565 {
8566 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8567 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8568 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8569 		return B_NO_MEMORY;
8570 
8571 	char *path = pathBuffer.LockBuffer();
8572 	char *toPath = toPathBuffer.LockBuffer();
8573 
8574 	if (!IS_USER_ADDRESS(userPath)
8575 		|| !IS_USER_ADDRESS(userToPath)
8576 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8577 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8578 		return B_BAD_ADDRESS;
8579 
8580 	status_t status = check_path(toPath);
8581 	if (status < B_OK)
8582 		return status;
8583 
8584 	return common_create_link(path, toPath, false);
8585 }
8586 
8587 
8588 status_t
8589 _user_unlink(int fd, const char *userPath)
8590 {
8591 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8592 	if (pathBuffer.InitCheck() != B_OK)
8593 		return B_NO_MEMORY;
8594 
8595 	char *path = pathBuffer.LockBuffer();
8596 
8597 	if (!IS_USER_ADDRESS(userPath)
8598 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8599 		return B_BAD_ADDRESS;
8600 
8601 	return common_unlink(fd, path, false);
8602 }
8603 
8604 
8605 status_t
8606 _user_rename(int oldFD, const char *userOldPath, int newFD,
8607 	const char *userNewPath)
8608 {
8609 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8610 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8611 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8612 		return B_NO_MEMORY;
8613 
8614 	char *oldPath = oldPathBuffer.LockBuffer();
8615 	char *newPath = newPathBuffer.LockBuffer();
8616 
8617 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8618 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8619 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8620 		return B_BAD_ADDRESS;
8621 
8622 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8623 }
8624 
8625 
8626 status_t
8627 _user_create_fifo(const char *userPath, mode_t perms)
8628 {
8629 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8630 	if (pathBuffer.InitCheck() != B_OK)
8631 		return B_NO_MEMORY;
8632 
8633 	char *path = pathBuffer.LockBuffer();
8634 
8635 	if (!IS_USER_ADDRESS(userPath)
8636 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8637 		return B_BAD_ADDRESS;
8638 	}
8639 
8640 	// split into directory vnode and filename path
8641 	char filename[B_FILE_NAME_LENGTH];
8642 	struct vnode *dir;
8643 	status_t status = path_to_dir_vnode(path, &dir, filename, false);
8644 	if (status != B_OK)
8645 		return status;
8646 
8647 	VNodePutter _(dir);
8648 
8649 	// the underlying FS needs to support creating FIFOs
8650 	if (!HAS_FS_CALL(dir, create_special_node))
8651 		return B_UNSUPPORTED;
8652 
8653 	// create the entry	-- the FIFO sub node is set up automatically
8654 	fs_vnode superVnode;
8655 	ino_t nodeID;
8656 	status = FS_CALL(dir, create_special_node, filename, NULL,
8657 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8658 
8659 	// create_special_node() acquired a reference for us that we don't need.
8660 	if (status == B_OK)
8661 		put_vnode(dir->mount->volume, nodeID);
8662 
8663 	return status;
8664 }
8665 
8666 
8667 status_t
8668 _user_create_pipe(int *userFDs)
8669 {
8670 	// rootfs should support creating FIFOs, but let's be sure
8671 	if (!HAS_FS_CALL(sRoot, create_special_node))
8672 		return B_UNSUPPORTED;
8673 
8674 	// create the node	-- the FIFO sub node is set up automatically
8675 	fs_vnode superVnode;
8676 	ino_t nodeID;
8677 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8678 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8679 	if (status != B_OK)
8680 		return status;
8681 
8682 	// We've got one reference to the node and need another one.
8683 	struct vnode* vnode;
8684 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8685 	if (status != B_OK) {
8686 		// that should not happen
8687 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
8688 			sRoot->mount->id, sRoot->id);
8689 		return status;
8690 	}
8691 
8692 	// Everything looks good so far. Open two FDs for reading respectively
8693 	// writing.
8694 	int fds[2];
8695 	fds[0] = open_vnode(vnode, O_RDONLY, false);
8696 	fds[1] = open_vnode(vnode, O_WRONLY, false);
8697 
8698 	FDCloser closer0(fds[0], false);
8699 	FDCloser closer1(fds[1], false);
8700 
8701 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
8702 
8703 	// copy FDs to userland
8704 	if (status == B_OK) {
8705 		if (!IS_USER_ADDRESS(userFDs)
8706 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
8707 			status = B_BAD_ADDRESS;
8708 		}
8709 	}
8710 
8711 	// keep FDs, if everything went fine
8712 	if (status == B_OK) {
8713 		closer0.Detach();
8714 		closer1.Detach();
8715 	}
8716 
8717 	return status;
8718 }
8719 
8720 
8721 status_t
8722 _user_access(const char *userPath, int mode)
8723 {
8724 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8725 	if (pathBuffer.InitCheck() != B_OK)
8726 		return B_NO_MEMORY;
8727 
8728 	char *path = pathBuffer.LockBuffer();
8729 
8730 	if (!IS_USER_ADDRESS(userPath)
8731 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8732 		return B_BAD_ADDRESS;
8733 
8734 	return common_access(path, mode, false);
8735 }
8736 
8737 
8738 status_t
8739 _user_read_stat(int fd, const char *userPath, bool traverseLink,
8740 	struct stat *userStat, size_t statSize)
8741 {
8742 	struct stat stat;
8743 	status_t status;
8744 
8745 	if (statSize > sizeof(struct stat))
8746 		return B_BAD_VALUE;
8747 
8748 	if (!IS_USER_ADDRESS(userStat))
8749 		return B_BAD_ADDRESS;
8750 
8751 	if (userPath) {
8752 		// path given: get the stat of the node referred to by (fd, path)
8753 		if (!IS_USER_ADDRESS(userPath))
8754 			return B_BAD_ADDRESS;
8755 
8756 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8757 		if (pathBuffer.InitCheck() != B_OK)
8758 			return B_NO_MEMORY;
8759 
8760 		char *path = pathBuffer.LockBuffer();
8761 
8762 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
8763 		if (length < B_OK)
8764 			return length;
8765 		if (length >= B_PATH_NAME_LENGTH)
8766 			return B_NAME_TOO_LONG;
8767 
8768 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
8769 	} else {
8770 		// no path given: get the FD and use the FD operation
8771 		struct file_descriptor *descriptor
8772 			= get_fd(get_current_io_context(false), fd);
8773 		if (descriptor == NULL)
8774 			return B_FILE_ERROR;
8775 
8776 		if (descriptor->ops->fd_read_stat)
8777 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
8778 		else
8779 			status = EOPNOTSUPP;
8780 
8781 		put_fd(descriptor);
8782 	}
8783 
8784 	if (status < B_OK)
8785 		return status;
8786 
8787 	return user_memcpy(userStat, &stat, statSize);
8788 }
8789 
8790 
8791 status_t
8792 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
8793 	const struct stat *userStat, size_t statSize, int statMask)
8794 {
8795 	if (statSize > sizeof(struct stat))
8796 		return B_BAD_VALUE;
8797 
8798 	struct stat stat;
8799 
8800 	if (!IS_USER_ADDRESS(userStat)
8801 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
8802 		return B_BAD_ADDRESS;
8803 
8804 	// clear additional stat fields
8805 	if (statSize < sizeof(struct stat))
8806 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
8807 
8808 	status_t status;
8809 
8810 	if (userPath) {
8811 		// path given: write the stat of the node referred to by (fd, path)
8812 		if (!IS_USER_ADDRESS(userPath))
8813 			return B_BAD_ADDRESS;
8814 
8815 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8816 		if (pathBuffer.InitCheck() != B_OK)
8817 			return B_NO_MEMORY;
8818 
8819 		char *path = pathBuffer.LockBuffer();
8820 
8821 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
8822 		if (length < B_OK)
8823 			return length;
8824 		if (length >= B_PATH_NAME_LENGTH)
8825 			return B_NAME_TOO_LONG;
8826 
8827 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
8828 			statMask, false);
8829 	} else {
8830 		// no path given: get the FD and use the FD operation
8831 		struct file_descriptor *descriptor
8832 			= get_fd(get_current_io_context(false), fd);
8833 		if (descriptor == NULL)
8834 			return B_FILE_ERROR;
8835 
8836 		if (descriptor->ops->fd_write_stat)
8837 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
8838 		else
8839 			status = EOPNOTSUPP;
8840 
8841 		put_fd(descriptor);
8842 	}
8843 
8844 	return status;
8845 }
8846 
8847 
8848 int
8849 _user_open_attr_dir(int fd, const char *userPath)
8850 {
8851 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8852 	if (pathBuffer.InitCheck() != B_OK)
8853 		return B_NO_MEMORY;
8854 
8855 	char *path = pathBuffer.LockBuffer();
8856 
8857 	if (userPath != NULL) {
8858 		if (!IS_USER_ADDRESS(userPath)
8859 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8860 			return B_BAD_ADDRESS;
8861 	}
8862 
8863 	return attr_dir_open(fd, userPath ? path : NULL, false);
8864 }
8865 
8866 
8867 int
8868 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
8869 {
8870 	char name[B_FILE_NAME_LENGTH];
8871 
8872 	if (!IS_USER_ADDRESS(userName)
8873 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8874 		return B_BAD_ADDRESS;
8875 
8876 	return attr_create(fd, name, type, openMode, false);
8877 }
8878 
8879 
8880 int
8881 _user_open_attr(int fd, const char *userName, int openMode)
8882 {
8883 	char name[B_FILE_NAME_LENGTH];
8884 
8885 	if (!IS_USER_ADDRESS(userName)
8886 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8887 		return B_BAD_ADDRESS;
8888 
8889 	return attr_open(fd, name, openMode, false);
8890 }
8891 
8892 
8893 status_t
8894 _user_remove_attr(int fd, const char *userName)
8895 {
8896 	char name[B_FILE_NAME_LENGTH];
8897 
8898 	if (!IS_USER_ADDRESS(userName)
8899 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8900 		return B_BAD_ADDRESS;
8901 
8902 	return attr_remove(fd, name, false);
8903 }
8904 
8905 
8906 status_t
8907 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
8908 {
8909 	if (!IS_USER_ADDRESS(userFromName)
8910 		|| !IS_USER_ADDRESS(userToName))
8911 		return B_BAD_ADDRESS;
8912 
8913 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
8914 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
8915 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
8916 		return B_NO_MEMORY;
8917 
8918 	char *fromName = fromNameBuffer.LockBuffer();
8919 	char *toName = toNameBuffer.LockBuffer();
8920 
8921 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
8922 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
8923 		return B_BAD_ADDRESS;
8924 
8925 	return attr_rename(fromFile, fromName, toFile, toName, false);
8926 }
8927 
8928 
8929 int
8930 _user_open_index_dir(dev_t device)
8931 {
8932 	return index_dir_open(device, false);
8933 }
8934 
8935 
8936 status_t
8937 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
8938 {
8939 	char name[B_FILE_NAME_LENGTH];
8940 
8941 	if (!IS_USER_ADDRESS(userName)
8942 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8943 		return B_BAD_ADDRESS;
8944 
8945 	return index_create(device, name, type, flags, false);
8946 }
8947 
8948 
8949 status_t
8950 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
8951 {
8952 	char name[B_FILE_NAME_LENGTH];
8953 	struct stat stat;
8954 	status_t status;
8955 
8956 	if (!IS_USER_ADDRESS(userName)
8957 		|| !IS_USER_ADDRESS(userStat)
8958 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	status = index_name_read_stat(device, name, &stat, false);
8962 	if (status == B_OK) {
8963 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
8964 			return B_BAD_ADDRESS;
8965 	}
8966 
8967 	return status;
8968 }
8969 
8970 
8971 status_t
8972 _user_remove_index(dev_t device, const char *userName)
8973 {
8974 	char name[B_FILE_NAME_LENGTH];
8975 
8976 	if (!IS_USER_ADDRESS(userName)
8977 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8978 		return B_BAD_ADDRESS;
8979 
8980 	return index_remove(device, name, false);
8981 }
8982 
8983 
8984 status_t
8985 _user_getcwd(char *userBuffer, size_t size)
8986 {
8987 	if (!IS_USER_ADDRESS(userBuffer))
8988 		return B_BAD_ADDRESS;
8989 
8990 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8991 	if (pathBuffer.InitCheck() != B_OK)
8992 		return B_NO_MEMORY;
8993 
8994 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
8995 
8996 	if (size > B_PATH_NAME_LENGTH)
8997 		size = B_PATH_NAME_LENGTH;
8998 
8999 	char *path = pathBuffer.LockBuffer();
9000 
9001 	status_t status = get_cwd(path, size, false);
9002 	if (status < B_OK)
9003 		return status;
9004 
9005 	// Copy back the result
9006 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9007 		return B_BAD_ADDRESS;
9008 
9009 	return status;
9010 }
9011 
9012 
9013 status_t
9014 _user_setcwd(int fd, const char *userPath)
9015 {
9016 	TRACE(("user_setcwd: path = %p\n", userPath));
9017 
9018 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9019 	if (pathBuffer.InitCheck() != B_OK)
9020 		return B_NO_MEMORY;
9021 
9022 	char *path = pathBuffer.LockBuffer();
9023 
9024 	if (userPath != NULL) {
9025 		if (!IS_USER_ADDRESS(userPath)
9026 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9027 			return B_BAD_ADDRESS;
9028 	}
9029 
9030 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9031 }
9032 
9033 
9034 status_t
9035 _user_change_root(const char *userPath)
9036 {
9037 	// only root is allowed to chroot()
9038 	if (geteuid() != 0)
9039 		return EPERM;
9040 
9041 	// alloc path buffer
9042 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9043 	if (pathBuffer.InitCheck() != B_OK)
9044 		return B_NO_MEMORY;
9045 
9046 	// copy userland path to kernel
9047 	char *path = pathBuffer.LockBuffer();
9048 	if (userPath != NULL) {
9049 		if (!IS_USER_ADDRESS(userPath)
9050 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9051 			return B_BAD_ADDRESS;
9052 	}
9053 
9054 	// get the vnode
9055 	struct vnode* vnode;
9056 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9057 	if (status != B_OK)
9058 		return status;
9059 
9060 	// set the new root
9061 	struct io_context* context = get_current_io_context(false);
9062 	mutex_lock(&sIOContextRootLock);
9063 	struct vnode* oldRoot = context->root;
9064 	context->root = vnode;
9065 	mutex_unlock(&sIOContextRootLock);
9066 
9067 	put_vnode(oldRoot);
9068 
9069 	return B_OK;
9070 }
9071 
9072 
9073 int
9074 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
9075 	uint32 flags, port_id port, int32 token)
9076 {
9077 	char *query;
9078 
9079 	if (device < 0 || userQuery == NULL || queryLength == 0)
9080 		return B_BAD_VALUE;
9081 
9082 	// this is a safety restriction
9083 	if (queryLength >= 65536)
9084 		return B_NAME_TOO_LONG;
9085 
9086 	query = (char *)malloc(queryLength + 1);
9087 	if (query == NULL)
9088 		return B_NO_MEMORY;
9089 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9090 		free(query);
9091 		return B_BAD_ADDRESS;
9092 	}
9093 
9094 	int fd = query_open(device, query, flags, port, token, false);
9095 
9096 	free(query);
9097 	return fd;
9098 }
9099 
9100 
9101 #include "vfs_request_io.cpp"
9102