xref: /haiku/src/system/kernel/fs/vfs.cpp (revision cfc3fa87da824bdf593eb8b817a83b6376e77935)
1 /*
2  * Copyright 2005-2008, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 /*! Virtual File System and File System Interface Layer */
11 
12 
13 #include <ctype.h>
14 #include <fcntl.h>
15 #include <limits.h>
16 #include <stddef.h>
17 #include <stdio.h>
18 #include <string.h>
19 #include <sys/file.h>
20 #include <sys/resource.h>
21 #include <sys/stat.h>
22 #include <unistd.h>
23 
24 #include <fs_info.h>
25 #include <fs_interface.h>
26 #include <fs_volume.h>
27 #include <OS.h>
28 #include <StorageDefs.h>
29 
30 #include <util/AutoLock.h>
31 
32 #include <block_cache.h>
33 #include <fd.h>
34 #include <file_cache.h>
35 #include <khash.h>
36 #include <KPath.h>
37 #include <lock.h>
38 #include <syscalls.h>
39 #include <syscall_restart.h>
40 #include <vfs.h>
41 #include <vm.h>
42 #include <vm_cache.h>
43 #include <vm_low_memory.h>
44 
45 #include <boot/kernel_args.h>
46 #include <disk_device_manager/KDiskDevice.h>
47 #include <disk_device_manager/KDiskDeviceManager.h>
48 #include <disk_device_manager/KDiskDeviceUtils.h>
49 #include <disk_device_manager/KDiskSystem.h>
50 #include <fs/node_monitor.h>
51 
52 #include "fifo.h"
53 
54 
55 //#define TRACE_VFS
56 #ifdef TRACE_VFS
57 #	define TRACE(x) dprintf x
58 #	define FUNCTION(x) dprintf x
59 #else
60 #	define TRACE(x) ;
61 #	define FUNCTION(x) ;
62 #endif
63 
64 #define ADD_DEBUGGER_COMMANDS
65 
66 
67 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
68 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
69 
70 #define FS_CALL(vnode, op, params...) \
71 			vnode->ops->op(vnode->mount->volume, vnode, params)
72 #define FS_CALL_NO_PARAMS(vnode, op) \
73 			vnode->ops->op(vnode->mount->volume, vnode)
74 #define FS_MOUNT_CALL(mount, op, params...) \
75 			mount->volume->ops->op(mount->volume, params)
76 #define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
77 			mount->volume->ops->op(mount->volume)
78 
79 
80 const static uint32 kMaxUnusedVnodes = 8192;
81 	// This is the maximum number of unused vnodes that the system
82 	// will keep around (weak limit, if there is enough memory left,
83 	// they won't get flushed even when hitting that limit).
84 	// It may be chosen with respect to the available memory or enhanced
85 	// by some timestamp/frequency heurism.
86 
87 struct vnode : fs_vnode {
88 	struct vnode	*next;
89 	vm_cache		*cache;
90 	dev_t			device;
91 	list_link		mount_link;
92 	list_link		unused_link;
93 	ino_t			id;
94 	struct fs_mount	*mount;
95 	struct vnode	*covered_by;
96 	int32			ref_count;
97 	uint32			type : 29;
98 						// TODO: S_INDEX_DIR actually needs another bit.
99 						// Better combine this field with the following ones.
100 	uint32			remove : 1;
101 	uint32			busy : 1;
102 	uint32			unpublished : 1;
103 	struct advisory_locking	*advisory_locking;
104 	struct file_descriptor *mandatory_locked_by;
105 };
106 
107 struct vnode_hash_key {
108 	dev_t	device;
109 	ino_t	vnode;
110 };
111 
112 /*!	\brief Structure to manage a mounted file system
113 
114 	Note: The root_vnode and covers_vnode fields (what others?) are
115 	initialized in fs_mount() and not changed afterwards. That is as soon
116 	as the mount is mounted and it is made sure it won't be unmounted
117 	(e.g. by holding a reference to a vnode of that mount) (read) access
118 	to those fields is always safe, even without additional locking. Morever
119 	while mounted the mount holds a reference to the covers_vnode, and thus
120 	making the access path vnode->mount->covers_vnode->mount->... safe if a
121 	reference to vnode is held (note that for the root mount covers_vnode
122 	is NULL, though).
123 */
124 struct fs_mount {
125 	struct fs_mount	*next;
126 	file_system_module_info *fs;
127 	dev_t			id;
128 	fs_volume		*volume;
129 	char			*device_name;
130 	char			*fs_name;
131 	recursive_lock	rlock;	// guards the vnodes list
132 	struct vnode	*root_vnode;
133 	struct vnode	*covers_vnode;
134 	KPartition		*partition;
135 	struct list		vnodes;
136 	bool			unmounting;
137 	bool			owns_file_device;
138 };
139 
140 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
141 	list_link		link;
142 	team_id			team;
143 	pid_t			session;
144 	off_t			start;
145 	off_t			end;
146 	bool			shared;
147 };
148 
149 typedef DoublyLinkedList<advisory_lock> LockList;
150 
151 struct advisory_locking {
152 	sem_id			lock;
153 	sem_id			wait_sem;
154 	LockList		locks;
155 };
156 
157 static mutex sFileSystemsMutex;
158 
159 /*!	\brief Guards sMountsTable.
160 
161 	The holder is allowed to read/write access the sMountsTable.
162 	Manipulation of the fs_mount structures themselves
163 	(and their destruction) requires different locks though.
164 */
165 static mutex sMountMutex;
166 
167 /*!	\brief Guards mount/unmount operations.
168 
169 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
170 	That is locking the lock ensures that no FS is mounted/unmounted. In
171 	particular this means that
172 	- sMountsTable will not be modified,
173 	- the fields immutable after initialization of the fs_mount structures in
174 	  sMountsTable will not be modified,
175 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
176 
177 	The thread trying to lock the lock must not hold sVnodeMutex or
178 	sMountMutex.
179 */
180 static recursive_lock sMountOpLock;
181 
182 /*!	\brief Guards the vnode::covered_by field of any vnode
183 
184 	The holder is allowed to read access the vnode::covered_by field of any
185 	vnode. Additionally holding sMountOpLock allows for write access.
186 
187 	The thread trying to lock the must not hold sVnodeMutex.
188 */
189 static mutex sVnodeCoveredByMutex;
190 
191 /*!	\brief Guards sVnodeTable.
192 
193 	The holder is allowed to read/write access sVnodeTable and to
194 	any unbusy vnode in that table, save to the immutable fields (device, id,
195 	private_node, mount) to which
196 	only read-only access is allowed, and to the field covered_by, which is
197 	guarded by sMountOpLock and sVnodeCoveredByMutex.
198 
199 	The thread trying to lock the mutex must not hold sMountMutex.
200 	You must not have this mutex held when calling create_sem(), as this
201 	might call vfs_free_unused_vnodes().
202 */
203 static mutex sVnodeMutex;
204 
205 /*!	\brief Guards io_context::root.
206 
207 	Must be held when setting or getting the io_context::root field.
208 	The only operation allowed while holding this lock besides getting or
209 	setting the field is inc_vnode_ref_count() on io_context::root.
210 */
211 static benaphore sIOContextRootLock;
212 
213 #define VNODE_HASH_TABLE_SIZE 1024
214 static hash_table *sVnodeTable;
215 static list sUnusedVnodeList;
216 static uint32 sUnusedVnodes = 0;
217 static struct vnode *sRoot;
218 
219 #define MOUNTS_HASH_TABLE_SIZE 16
220 static hash_table *sMountsTable;
221 static dev_t sNextMountID = 1;
222 
223 #define MAX_TEMP_IO_VECS 8
224 
225 mode_t __gUmask = 022;
226 
227 /* function declarations */
228 
229 // file descriptor operation prototypes
230 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
231 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
232 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
233 static void file_free_fd(struct file_descriptor *);
234 static status_t file_close(struct file_descriptor *);
235 static status_t file_select(struct file_descriptor *, uint8 event,
236 	struct selectsync *sync);
237 static status_t file_deselect(struct file_descriptor *, uint8 event,
238 	struct selectsync *sync);
239 static status_t dir_read(struct io_context *, struct file_descriptor *,
240 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
241 static status_t dir_read(struct io_context* ioContext, struct vnode *vnode,
242 	void *cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
243 static status_t dir_rewind(struct file_descriptor *);
244 static void dir_free_fd(struct file_descriptor *);
245 static status_t dir_close(struct file_descriptor *);
246 static status_t attr_dir_read(struct io_context *, struct file_descriptor *,
247 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
248 static status_t attr_dir_rewind(struct file_descriptor *);
249 static void attr_dir_free_fd(struct file_descriptor *);
250 static status_t attr_dir_close(struct file_descriptor *);
251 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
252 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
253 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
254 static void attr_free_fd(struct file_descriptor *);
255 static status_t attr_close(struct file_descriptor *);
256 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
257 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
258 static status_t index_dir_read(struct io_context *, struct file_descriptor *,
259 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
260 static status_t index_dir_rewind(struct file_descriptor *);
261 static void index_dir_free_fd(struct file_descriptor *);
262 static status_t index_dir_close(struct file_descriptor *);
263 static status_t query_read(struct io_context *, struct file_descriptor *,
264 	struct dirent *buffer, size_t bufferSize, uint32 *_count);
265 static status_t query_rewind(struct file_descriptor *);
266 static void query_free_fd(struct file_descriptor *);
267 static status_t query_close(struct file_descriptor *);
268 
269 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
270 static status_t common_read_stat(struct file_descriptor *, struct stat *);
271 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
272 
273 static status_t common_path_read_stat(int fd, char *path, bool traverseLeafLink,
274 	struct stat *stat, bool kernel);
275 
276 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
277 	bool traverseLeafLink, int count, bool kernel,
278 	struct vnode **_vnode, ino_t *_parentID);
279 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer,
280 	size_t bufferSize, bool kernel);
281 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
282 	struct vnode **_vnode, ino_t *_parentID, bool kernel);
283 static void inc_vnode_ref_count(struct vnode *vnode);
284 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
285 static inline void put_vnode(struct vnode *vnode);
286 static status_t fs_unmount(char *path, dev_t mountID, uint32 flags,
287 	bool kernel);
288 
289 
290 static struct fd_ops sFileOps = {
291 	file_read,
292 	file_write,
293 	file_seek,
294 	common_ioctl,
295 	file_select,
296 	file_deselect,
297 	NULL,		// read_dir()
298 	NULL,		// rewind_dir()
299 	common_read_stat,
300 	common_write_stat,
301 	file_close,
302 	file_free_fd
303 };
304 
305 static struct fd_ops sDirectoryOps = {
306 	NULL,		// read()
307 	NULL,		// write()
308 	NULL,		// seek()
309 	common_ioctl,
310 	NULL,		// select()
311 	NULL,		// deselect()
312 	dir_read,
313 	dir_rewind,
314 	common_read_stat,
315 	common_write_stat,
316 	dir_close,
317 	dir_free_fd
318 };
319 
320 static struct fd_ops sAttributeDirectoryOps = {
321 	NULL,		// read()
322 	NULL,		// write()
323 	NULL,		// seek()
324 	common_ioctl,
325 	NULL,		// select()
326 	NULL,		// deselect()
327 	attr_dir_read,
328 	attr_dir_rewind,
329 	common_read_stat,
330 	common_write_stat,
331 	attr_dir_close,
332 	attr_dir_free_fd
333 };
334 
335 static struct fd_ops sAttributeOps = {
336 	attr_read,
337 	attr_write,
338 	attr_seek,
339 	common_ioctl,
340 	NULL,		// select()
341 	NULL,		// deselect()
342 	NULL,		// read_dir()
343 	NULL,		// rewind_dir()
344 	attr_read_stat,
345 	attr_write_stat,
346 	attr_close,
347 	attr_free_fd
348 };
349 
350 static struct fd_ops sIndexDirectoryOps = {
351 	NULL,		// read()
352 	NULL,		// write()
353 	NULL,		// seek()
354 	NULL,		// ioctl()
355 	NULL,		// select()
356 	NULL,		// deselect()
357 	index_dir_read,
358 	index_dir_rewind,
359 	NULL,		// read_stat()
360 	NULL,		// write_stat()
361 	index_dir_close,
362 	index_dir_free_fd
363 };
364 
365 #if 0
366 static struct fd_ops sIndexOps = {
367 	NULL,		// read()
368 	NULL,		// write()
369 	NULL,		// seek()
370 	NULL,		// ioctl()
371 	NULL,		// select()
372 	NULL,		// deselect()
373 	NULL,		// dir_read()
374 	NULL,		// dir_rewind()
375 	index_read_stat,	// read_stat()
376 	NULL,		// write_stat()
377 	NULL,		// dir_close()
378 	NULL		// free_fd()
379 };
380 #endif
381 
382 static struct fd_ops sQueryOps = {
383 	NULL,		// read()
384 	NULL,		// write()
385 	NULL,		// seek()
386 	NULL,		// ioctl()
387 	NULL,		// select()
388 	NULL,		// deselect()
389 	query_read,
390 	query_rewind,
391 	NULL,		// read_stat()
392 	NULL,		// write_stat()
393 	query_close,
394 	query_free_fd
395 };
396 
397 
398 // VNodePutter
399 class VNodePutter {
400 public:
401 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
402 
403 	~VNodePutter()
404 	{
405 		Put();
406 	}
407 
408 	void SetTo(struct vnode *vnode)
409 	{
410 		Put();
411 		fVNode = vnode;
412 	}
413 
414 	void Put()
415 	{
416 		if (fVNode) {
417 			put_vnode(fVNode);
418 			fVNode = NULL;
419 		}
420 	}
421 
422 	struct vnode *Detach()
423 	{
424 		struct vnode *vnode = fVNode;
425 		fVNode = NULL;
426 		return vnode;
427 	}
428 
429 private:
430 	struct vnode *fVNode;
431 };
432 
433 
434 class FDCloser {
435 public:
436 	FDCloser() : fFD(-1), fKernel(true) {}
437 
438 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
439 
440 	~FDCloser()
441 	{
442 		Close();
443 	}
444 
445 	void SetTo(int fd, bool kernel)
446 	{
447 		Close();
448 		fFD = fd;
449 		fKernel = kernel;
450 	}
451 
452 	void Close()
453 	{
454 		if (fFD >= 0) {
455 			if (fKernel)
456 				_kern_close(fFD);
457 			else
458 				_user_close(fFD);
459 			fFD = -1;
460 		}
461 	}
462 
463 	int Detach()
464 	{
465 		int fd = fFD;
466 		fFD = -1;
467 		return fd;
468 	}
469 
470 private:
471 	int		fFD;
472 	bool	fKernel;
473 };
474 
475 
476 static int
477 mount_compare(void *_m, const void *_key)
478 {
479 	struct fs_mount *mount = (fs_mount *)_m;
480 	const dev_t *id = (dev_t *)_key;
481 
482 	if (mount->id == *id)
483 		return 0;
484 
485 	return -1;
486 }
487 
488 
489 static uint32
490 mount_hash(void *_m, const void *_key, uint32 range)
491 {
492 	struct fs_mount *mount = (fs_mount *)_m;
493 	const dev_t *id = (dev_t *)_key;
494 
495 	if (mount)
496 		return mount->id % range;
497 
498 	return (uint32)*id % range;
499 }
500 
501 
502 /*! Finds the mounted device (the fs_mount structure) with the given ID.
503 	Note, you must hold the gMountMutex lock when you call this function.
504 */
505 static struct fs_mount *
506 find_mount(dev_t id)
507 {
508 	ASSERT_LOCKED_MUTEX(&sMountMutex);
509 
510 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
511 }
512 
513 
514 static status_t
515 get_mount(dev_t id, struct fs_mount **_mount)
516 {
517 	struct fs_mount *mount;
518 	status_t status;
519 
520 	MutexLocker nodeLocker(sVnodeMutex);
521 	MutexLocker mountLocker(sMountMutex);
522 
523 	mount = find_mount(id);
524 	if (mount == NULL)
525 		return B_BAD_VALUE;
526 
527 	struct vnode* rootNode = mount->root_vnode;
528 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
529 		// might have been called during a mount/unmount operation
530 		return B_BUSY;
531 	}
532 
533 	inc_vnode_ref_count(mount->root_vnode);
534 	*_mount = mount;
535 	return B_OK;
536 }
537 
538 
539 static void
540 put_mount(struct fs_mount *mount)
541 {
542 	if (mount)
543 		put_vnode(mount->root_vnode);
544 }
545 
546 
547 static status_t
548 put_file_system(file_system_module_info *fs)
549 {
550 	return put_module(fs->info.name);
551 }
552 
553 
554 /*!	Tries to open the specified file system module.
555 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
556 	Returns a pointer to file system module interface, or NULL if it
557 	could not open the module.
558 */
559 static file_system_module_info *
560 get_file_system(const char *fsName)
561 {
562 	char name[B_FILE_NAME_LENGTH];
563 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
564 		// construct module name if we didn't get one
565 		// (we currently support only one API)
566 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
567 		fsName = NULL;
568 	}
569 
570 	file_system_module_info *info;
571 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
572 		return NULL;
573 
574 	return info;
575 }
576 
577 
578 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
579 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
580 	The name is allocated for you, and you have to free() it when you're
581 	done with it.
582 	Returns NULL if the required memory is no available.
583 */
584 static char *
585 get_file_system_name(const char *fsName)
586 {
587 	const size_t length = strlen("file_systems/");
588 
589 	if (strncmp(fsName, "file_systems/", length)) {
590 		// the name already seems to be the module's file name
591 		return strdup(fsName);
592 	}
593 
594 	fsName += length;
595 	const char *end = strchr(fsName, '/');
596 	if (end == NULL) {
597 		// this doesn't seem to be a valid name, but well...
598 		return strdup(fsName);
599 	}
600 
601 	// cut off the trailing /v1
602 
603 	char *name = (char *)malloc(end + 1 - fsName);
604 	if (name == NULL)
605 		return NULL;
606 
607 	strlcpy(name, fsName, end + 1 - fsName);
608 	return name;
609 }
610 
611 
612 static int
613 vnode_compare(void *_vnode, const void *_key)
614 {
615 	struct vnode *vnode = (struct vnode *)_vnode;
616 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
617 
618 	if (vnode->device == key->device && vnode->id == key->vnode)
619 		return 0;
620 
621 	return -1;
622 }
623 
624 
625 static uint32
626 vnode_hash(void *_vnode, const void *_key, uint32 range)
627 {
628 	struct vnode *vnode = (struct vnode *)_vnode;
629 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
630 
631 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
632 
633 	if (vnode != NULL)
634 		return VHASH(vnode->device, vnode->id) % range;
635 
636 	return VHASH(key->device, key->vnode) % range;
637 
638 #undef VHASH
639 }
640 
641 
642 static void
643 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
644 {
645 	recursive_lock_lock(&mount->rlock);
646 
647 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
648 
649 	recursive_lock_unlock(&mount->rlock);
650 }
651 
652 
653 static void
654 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
655 {
656 	recursive_lock_lock(&mount->rlock);
657 
658 	list_remove_link(&vnode->mount_link);
659 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
660 
661 	recursive_lock_unlock(&mount->rlock);
662 }
663 
664 
665 static status_t
666 create_new_vnode(struct vnode **_vnode, dev_t mountID, ino_t vnodeID)
667 {
668 	FUNCTION(("create_new_vnode()\n"));
669 
670 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
671 	if (vnode == NULL)
672 		return B_NO_MEMORY;
673 
674 	// initialize basic values
675 	memset(vnode, 0, sizeof(struct vnode));
676 	vnode->device = mountID;
677 	vnode->id = vnodeID;
678 
679 	// add the vnode to the mount structure
680 	mutex_lock(&sMountMutex);
681 	vnode->mount = find_mount(mountID);
682 	if (!vnode->mount || vnode->mount->unmounting) {
683 		mutex_unlock(&sMountMutex);
684 		free(vnode);
685 		return B_ENTRY_NOT_FOUND;
686 	}
687 
688 	hash_insert(sVnodeTable, vnode);
689 	add_vnode_to_mount_list(vnode, vnode->mount);
690 
691 	mutex_unlock(&sMountMutex);
692 
693 	vnode->ref_count = 1;
694 	*_vnode = vnode;
695 
696 	return B_OK;
697 }
698 
699 
700 /*!	Frees the vnode and all resources it has acquired, and removes
701 	it from the vnode hash as well as from its mount structure.
702 	Will also make sure that any cache modifications are written back.
703 */
704 static void
705 free_vnode(struct vnode *vnode, bool reenter)
706 {
707 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
708 
709 	// write back any changes in this vnode's cache -- but only
710 	// if the vnode won't be deleted, in which case the changes
711 	// will be discarded
712 
713 	if (!vnode->remove && HAS_FS_CALL(vnode, fsync))
714 		FS_CALL_NO_PARAMS(vnode, fsync);
715 
716 	// Note: If this vnode has a cache attached, there will still be two
717 	// references to that cache at this point. The last one belongs to the vnode
718 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
719 	// cache. Each but the last reference to a cache also includes a reference
720 	// to the vnode. The file cache, however, released its reference (cf.
721 	// file_cache_create()), so that this vnode's ref count has the chance to
722 	// ever drop to 0. Deleting the file cache now, will cause the next to last
723 	// cache reference to be released, which will also release a (no longer
724 	// existing) vnode reference. To avoid problems, we set the vnode's ref
725 	// count, so that it will neither become negative nor 0.
726 	vnode->ref_count = 2;
727 
728 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
729 	// cache either (i.e. no one can get a cache reference while we're deleting
730 	// the vnode).. This is, however, not the case for the page daemon. It gets
731 	// its cache references via the pages it scans, so it can in fact get a
732 	// vnode reference while we're deleting the vnode.
733 
734 	if (!vnode->unpublished) {
735 		if (vnode->remove)
736 			FS_CALL(vnode, remove_vnode, reenter);
737 		else
738 			FS_CALL(vnode, put_vnode, reenter);
739 	}
740 
741 	// The file system has removed the resources of the vnode now, so we can
742 	// make it available again (and remove the busy vnode from the hash)
743 	mutex_lock(&sVnodeMutex);
744 	hash_remove(sVnodeTable, vnode);
745 	mutex_unlock(&sVnodeMutex);
746 
747 	// if we have a vm_cache attached, remove it
748 	if (vnode->cache)
749 		vm_cache_release_ref(vnode->cache);
750 
751 	vnode->cache = NULL;
752 
753 	remove_vnode_from_mount_list(vnode, vnode->mount);
754 
755 	free(vnode);
756 }
757 
758 
759 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
760 	if the counter dropped to 0.
761 
762 	The caller must, of course, own a reference to the vnode to call this
763 	function.
764 	The caller must not hold the sVnodeMutex or the sMountMutex.
765 
766 	\param vnode the vnode.
767 	\param reenter \c true, if this function is called (indirectly) from within
768 		   a file system.
769 	\return \c B_OK, if everything went fine, an error code otherwise.
770 */
771 static status_t
772 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
773 {
774 	mutex_lock(&sVnodeMutex);
775 
776 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
777 
778 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
779 
780 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
781 
782 	if (oldRefCount == 1) {
783 		if (vnode->busy)
784 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
785 
786 		bool freeNode = false;
787 
788 		// Just insert the vnode into an unused list if we don't need
789 		// to delete it
790 		if (vnode->remove) {
791 			vnode->busy = true;
792 			freeNode = true;
793 		} else {
794 			list_add_item(&sUnusedVnodeList, vnode);
795 			if (++sUnusedVnodes > kMaxUnusedVnodes
796 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
797 				// there are too many unused vnodes so we free the oldest one
798 				// ToDo: evaluate this mechanism
799 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
800 				vnode->busy = true;
801 				freeNode = true;
802 				sUnusedVnodes--;
803 			}
804 		}
805 
806 		mutex_unlock(&sVnodeMutex);
807 
808 		if (freeNode)
809 			free_vnode(vnode, reenter);
810 	} else
811 		mutex_unlock(&sVnodeMutex);
812 
813 	return B_OK;
814 }
815 
816 
817 /*!	\brief Increments the reference counter of the given vnode.
818 
819 	The caller must either already have a reference to the vnode or hold
820 	the sVnodeMutex.
821 
822 	\param vnode the vnode.
823 */
824 static void
825 inc_vnode_ref_count(struct vnode *vnode)
826 {
827 	atomic_add(&vnode->ref_count, 1);
828 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
829 }
830 
831 
832 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
833 
834 	The caller must hold the sVnodeMutex.
835 
836 	\param mountID the mount ID.
837 	\param vnodeID the node ID.
838 
839 	\return The vnode structure, if it was found in the hash table, \c NULL
840 			otherwise.
841 */
842 static struct vnode *
843 lookup_vnode(dev_t mountID, ino_t vnodeID)
844 {
845 	struct vnode_hash_key key;
846 
847 	key.device = mountID;
848 	key.vnode = vnodeID;
849 
850 	return (vnode *)hash_lookup(sVnodeTable, &key);
851 }
852 
853 
854 static bool
855 is_special_node_type(int type)
856 {
857 	// at the moment only FIFOs are supported
858 	return S_ISFIFO(type);
859 }
860 
861 
862 static status_t
863 create_special_sub_node(struct vnode* vnode, uint32 flags)
864 {
865 	if (S_ISFIFO(vnode->type))
866 		return create_fifo_vnode(vnode->mount->volume, vnode);
867 
868 	return B_BAD_VALUE;
869 }
870 
871 
872 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
873 
874 	If the node is not yet in memory, it will be loaded.
875 
876 	The caller must not hold the sVnodeMutex or the sMountMutex.
877 
878 	\param mountID the mount ID.
879 	\param vnodeID the node ID.
880 	\param _vnode Pointer to a vnode* variable into which the pointer to the
881 		   retrieved vnode structure shall be written.
882 	\param reenter \c true, if this function is called (indirectly) from within
883 		   a file system.
884 	\return \c B_OK, if everything when fine, an error code otherwise.
885 */
886 static status_t
887 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode, bool canWait,
888 	int reenter)
889 {
890 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
891 
892 	mutex_lock(&sVnodeMutex);
893 
894 	int32 tries = 1000;
895 		// try for 10 secs
896 restart:
897 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
898 	if (vnode && vnode->busy) {
899 		mutex_unlock(&sVnodeMutex);
900 		if (!canWait || --tries < 0) {
901 			// vnode doesn't seem to become unbusy
902 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
903 			return B_BUSY;
904 		}
905 		snooze(10000); // 10 ms
906 		mutex_lock(&sVnodeMutex);
907 		goto restart;
908 	}
909 
910 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
911 
912 	status_t status;
913 
914 	if (vnode) {
915 		if (vnode->ref_count == 0) {
916 			// this vnode has been unused before
917 			list_remove_item(&sUnusedVnodeList, vnode);
918 			sUnusedVnodes--;
919 		}
920 		inc_vnode_ref_count(vnode);
921 	} else {
922 		// we need to create a new vnode and read it in
923 		status = create_new_vnode(&vnode, mountID, vnodeID);
924 		if (status < B_OK)
925 			goto err;
926 
927 		vnode->busy = true;
928 		mutex_unlock(&sVnodeMutex);
929 
930 		int type;
931 		uint32 flags;
932 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
933 			&flags, reenter);
934 		if (status == B_OK && vnode->private_node == NULL)
935 			status = B_BAD_VALUE;
936 
937 		bool gotNode = status == B_OK;
938 		bool publishSpecialSubNode = false;
939 		if (gotNode) {
940 			vnode->type = type;
941 			publishSpecialSubNode = is_special_node_type(type)
942 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
943 		}
944 
945 		if (gotNode && publishSpecialSubNode)
946 			status = create_special_sub_node(vnode, flags);
947 
948 		mutex_lock(&sVnodeMutex);
949 
950 		if (status < B_OK) {
951 			if (gotNode)
952 				FS_CALL(vnode, put_vnode, reenter);
953 
954 			goto err1;
955 		}
956 
957 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
958 		vnode->busy = false;
959 	}
960 
961 	mutex_unlock(&sVnodeMutex);
962 
963 	TRACE(("get_vnode: returning %p\n", vnode));
964 
965 	*_vnode = vnode;
966 	return B_OK;
967 
968 err1:
969 	hash_remove(sVnodeTable, vnode);
970 	remove_vnode_from_mount_list(vnode, vnode->mount);
971 err:
972 	mutex_unlock(&sVnodeMutex);
973 	if (vnode)
974 		free(vnode);
975 
976 	return status;
977 }
978 
979 
980 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
981 	if the counter dropped to 0.
982 
983 	The caller must, of course, own a reference to the vnode to call this
984 	function.
985 	The caller must not hold the sVnodeMutex or the sMountMutex.
986 
987 	\param vnode the vnode.
988 */
989 static inline void
990 put_vnode(struct vnode *vnode)
991 {
992 	dec_vnode_ref_count(vnode, false);
993 }
994 
995 
996 static void
997 vnode_low_memory_handler(void */*data*/, int32 level)
998 {
999 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
1000 
1001 	uint32 count = 1;
1002 	switch (level) {
1003 		case B_NO_LOW_MEMORY:
1004 			return;
1005 		case B_LOW_MEMORY_NOTE:
1006 			count = sUnusedVnodes / 100;
1007 			break;
1008 		case B_LOW_MEMORY_WARNING:
1009 			count = sUnusedVnodes / 10;
1010 			break;
1011 		case B_LOW_MEMORY_CRITICAL:
1012 			count = sUnusedVnodes;
1013 			break;
1014 	}
1015 
1016 	if (count > sUnusedVnodes)
1017 		count = sUnusedVnodes;
1018 
1019 	// first, write back the modified pages of some unused vnodes
1020 
1021 	uint32 freeCount = count;
1022 
1023 	for (uint32 i = 0; i < count; i++) {
1024 		mutex_lock(&sVnodeMutex);
1025 		struct vnode *vnode = (struct vnode *)list_remove_head_item(
1026 			&sUnusedVnodeList);
1027 		if (vnode == NULL) {
1028 			mutex_unlock(&sVnodeMutex);
1029 			break;
1030 		}
1031 
1032 		inc_vnode_ref_count(vnode);
1033 		sUnusedVnodes--;
1034 
1035 		mutex_unlock(&sVnodeMutex);
1036 
1037 		if (vnode->cache != NULL)
1038 			vm_cache_write_modified(vnode->cache, false);
1039 
1040 		dec_vnode_ref_count(vnode, false);
1041 	}
1042 
1043 	// and then free them
1044 
1045 	for (uint32 i = 0; i < freeCount; i++) {
1046 		mutex_lock(&sVnodeMutex);
1047 
1048 		// We're removing vnodes from the tail of the list - hoping it's
1049 		// one of those we have just written back; otherwise we'll write
1050 		// back the vnode with the busy flag turned on, and that might
1051 		// take some time.
1052 		struct vnode *vnode = (struct vnode *)list_remove_tail_item(
1053 			&sUnusedVnodeList);
1054 		if (vnode == NULL) {
1055 			mutex_unlock(&sVnodeMutex);
1056 			break;
1057 		}
1058 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
1059 
1060 		vnode->busy = true;
1061 		sUnusedVnodes--;
1062 
1063 		mutex_unlock(&sVnodeMutex);
1064 
1065 		free_vnode(vnode, false);
1066 	}
1067 }
1068 
1069 
1070 static inline void
1071 put_advisory_locking(struct advisory_locking *locking)
1072 {
1073 	release_sem(locking->lock);
1074 }
1075 
1076 
1077 /*!	Returns the advisory_locking object of the \a vnode in case it
1078 	has one, and locks it.
1079 	You have to call put_advisory_locking() when you're done with
1080 	it.
1081 	Note, you must not have the vnode mutex locked when calling
1082 	this function.
1083 */
1084 static struct advisory_locking *
1085 get_advisory_locking(struct vnode *vnode)
1086 {
1087 	mutex_lock(&sVnodeMutex);
1088 
1089 	struct advisory_locking *locking = vnode->advisory_locking;
1090 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1091 
1092 	mutex_unlock(&sVnodeMutex);
1093 
1094 	if (lock >= B_OK)
1095 		lock = acquire_sem(lock);
1096 	if (lock < B_OK) {
1097 		// This means the locking has been deleted in the mean time
1098 		// or had never existed in the first place - otherwise, we
1099 		// would get the lock at some point.
1100 		return NULL;
1101 	}
1102 
1103 	return locking;
1104 }
1105 
1106 
1107 /*!	Creates a locked advisory_locking object, and attaches it to the
1108 	given \a vnode.
1109 	Returns B_OK in case of success - also if the vnode got such an
1110 	object from someone else in the mean time, you'll still get this
1111 	one locked then.
1112 */
1113 static status_t
1114 create_advisory_locking(struct vnode *vnode)
1115 {
1116 	if (vnode == NULL)
1117 		return B_FILE_ERROR;
1118 
1119 	struct advisory_locking *locking = new(std::nothrow) advisory_locking;
1120 	if (locking == NULL)
1121 		return B_NO_MEMORY;
1122 
1123 	status_t status;
1124 
1125 	locking->wait_sem = create_sem(0, "advisory lock");
1126 	if (locking->wait_sem < B_OK) {
1127 		status = locking->wait_sem;
1128 		goto err1;
1129 	}
1130 
1131 	locking->lock = create_sem(0, "advisory locking");
1132 	if (locking->lock < B_OK) {
1133 		status = locking->lock;
1134 		goto err2;
1135 	}
1136 
1137 	// We need to set the locking structure atomically - someone
1138 	// else might set one at the same time
1139 	do {
1140 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking,
1141 				(addr_t)locking, (addr_t)NULL) == (addr_t)NULL)
1142 			return B_OK;
1143 	} while (get_advisory_locking(vnode) == NULL);
1144 
1145 	status = B_OK;
1146 		// we delete the one we've just created, but nevertheless, the vnode
1147 		// does have a locking structure now
1148 
1149 	delete_sem(locking->lock);
1150 err2:
1151 	delete_sem(locking->wait_sem);
1152 err1:
1153 	delete locking;
1154 	return status;
1155 }
1156 
1157 
1158 /*!	Retrieves the first lock that has been set by the current team.
1159 */
1160 static status_t
1161 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1162 {
1163 	struct advisory_locking *locking = get_advisory_locking(vnode);
1164 	if (locking == NULL)
1165 		return B_BAD_VALUE;
1166 
1167 	// TODO: this should probably get the flock by its file descriptor!
1168 	team_id team = team_get_current_team_id();
1169 	status_t status = B_BAD_VALUE;
1170 
1171 	LockList::Iterator iterator = locking->locks.GetIterator();
1172 	while (iterator.HasNext()) {
1173 		struct advisory_lock *lock = iterator.Next();
1174 
1175 		if (lock->team == team) {
1176 			flock->l_start = lock->start;
1177 			flock->l_len = lock->end - lock->start + 1;
1178 			status = B_OK;
1179 			break;
1180 		}
1181 	}
1182 
1183 	put_advisory_locking(locking);
1184 	return status;
1185 }
1186 
1187 
1188 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1189 	with the advisory_lock \a lock.
1190 */
1191 static bool
1192 advisory_lock_intersects(struct advisory_lock *lock, struct flock *flock)
1193 {
1194 	if (flock == NULL)
1195 		return true;
1196 
1197 	return lock->start <= flock->l_start - 1 + flock->l_len
1198 		&& lock->end >= flock->l_start;
1199 }
1200 
1201 
1202 /*!	Removes the specified lock, or all locks of the calling team
1203 	if \a flock is NULL.
1204 */
1205 static status_t
1206 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1207 {
1208 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1209 
1210 	struct advisory_locking *locking = get_advisory_locking(vnode);
1211 	if (locking == NULL)
1212 		return B_OK;
1213 
1214 	// TODO: use the thread ID instead??
1215 	team_id team = team_get_current_team_id();
1216 	pid_t session = thread_get_current_thread()->team->session_id;
1217 
1218 	// find matching lock entries
1219 
1220 	LockList::Iterator iterator = locking->locks.GetIterator();
1221 	while (iterator.HasNext()) {
1222 		struct advisory_lock *lock = iterator.Next();
1223 		bool removeLock = false;
1224 
1225 		if (lock->session == session)
1226 			removeLock = true;
1227 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1228 			bool endsBeyond = false;
1229 			bool startsBefore = false;
1230 			if (flock != NULL) {
1231 				startsBefore = lock->start < flock->l_start;
1232 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1233 			}
1234 
1235 			if (!startsBefore && !endsBeyond) {
1236 				// lock is completely contained in flock
1237 				removeLock = true;
1238 			} else if (startsBefore && !endsBeyond) {
1239 				// cut the end of the lock
1240 				lock->end = flock->l_start - 1;
1241 			} else if (!startsBefore && endsBeyond) {
1242 				// cut the start of the lock
1243 				lock->start = flock->l_start + flock->l_len;
1244 			} else {
1245 				// divide the lock into two locks
1246 				struct advisory_lock *secondLock = new advisory_lock;
1247 				if (secondLock == NULL) {
1248 					// TODO: we should probably revert the locks we already
1249 					// changed... (ie. allocate upfront)
1250 					put_advisory_locking(locking);
1251 					return B_NO_MEMORY;
1252 				}
1253 
1254 				lock->end = flock->l_start - 1;
1255 
1256 				secondLock->team = lock->team;
1257 				secondLock->session = lock->session;
1258 				// values must already be normalized when getting here
1259 				secondLock->start = flock->l_start + flock->l_len;
1260 				secondLock->end = lock->end;
1261 				secondLock->shared = lock->shared;
1262 
1263 				locking->locks.Add(secondLock);
1264 			}
1265 		}
1266 
1267 		if (removeLock) {
1268 			// this lock is no longer used
1269 			iterator.Remove();
1270 			free(lock);
1271 		}
1272 	}
1273 
1274 	bool removeLocking = locking->locks.IsEmpty();
1275 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1276 
1277 	put_advisory_locking(locking);
1278 
1279 	if (removeLocking) {
1280 		// We can remove the whole advisory locking structure; it's no
1281 		// longer used
1282 		locking = get_advisory_locking(vnode);
1283 		if (locking != NULL) {
1284 			// the locking could have been changed in the mean time
1285 			if (locking->locks.IsEmpty()) {
1286 				vnode->advisory_locking = NULL;
1287 
1288 				// we've detached the locking from the vnode, so we can
1289 				// safely delete it
1290 				delete_sem(locking->lock);
1291 				delete_sem(locking->wait_sem);
1292 				delete locking;
1293 			} else {
1294 				// the locking is in use again
1295 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1296 			}
1297 		}
1298 	}
1299 
1300 	return B_OK;
1301 }
1302 
1303 
1304 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1305 	will wait for the lock to become available, if there are any collisions
1306 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1307 
1308 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1309 	BSD flock() semantics are used, that is, all children can unlock the file
1310 	in question (we even allow parents to remove the lock, though, but that
1311 	seems to be in line to what the BSD's are doing).
1312 */
1313 static status_t
1314 acquire_advisory_lock(struct vnode *vnode, pid_t session, struct flock *flock,
1315 	bool wait)
1316 {
1317 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1318 		vnode, flock, wait ? "yes" : "no"));
1319 
1320 	bool shared = flock->l_type == F_RDLCK;
1321 	status_t status = B_OK;
1322 
1323 	// TODO: do deadlock detection!
1324 
1325 restart:
1326 	// if this vnode has an advisory_locking structure attached,
1327 	// lock that one and search for any colliding file lock
1328 	struct advisory_locking *locking = get_advisory_locking(vnode);
1329 	team_id team = team_get_current_team_id();
1330 	sem_id waitForLock = -1;
1331 
1332 	if (locking != NULL) {
1333 		// test for collisions
1334 		LockList::Iterator iterator = locking->locks.GetIterator();
1335 		while (iterator.HasNext()) {
1336 			struct advisory_lock *lock = iterator.Next();
1337 
1338 			// TODO: locks from the same team might be joinable!
1339 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1340 				// locks do overlap
1341 				if (!shared || !lock->shared) {
1342 					// we need to wait
1343 					waitForLock = locking->wait_sem;
1344 					break;
1345 				}
1346 			}
1347 		}
1348 
1349 		if (waitForLock < B_OK || !wait)
1350 			put_advisory_locking(locking);
1351 	}
1352 
1353 	// wait for the lock if we have to, or else return immediately
1354 
1355 	if (waitForLock >= B_OK) {
1356 		if (!wait)
1357 			status = session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1358 		else {
1359 			status = switch_sem_etc(locking->lock, waitForLock, 1,
1360 				B_CAN_INTERRUPT, 0);
1361 			if (status == B_OK) {
1362 				// see if we're still colliding
1363 				goto restart;
1364 			}
1365 		}
1366 	}
1367 
1368 	if (status < B_OK)
1369 		return status;
1370 
1371 	// install new lock
1372 
1373 	locking = get_advisory_locking(vnode);
1374 	if (locking == NULL) {
1375 		// we need to create a new locking object
1376 		status = create_advisory_locking(vnode);
1377 		if (status < B_OK)
1378 			return status;
1379 
1380 		locking = vnode->advisory_locking;
1381 			// we own the locking object, so it can't go away
1382 	}
1383 
1384 	struct advisory_lock *lock = (struct advisory_lock *)malloc(
1385 		sizeof(struct advisory_lock));
1386 	if (lock == NULL) {
1387 		if (waitForLock >= B_OK)
1388 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1389 		release_sem(locking->lock);
1390 		return B_NO_MEMORY;
1391 	}
1392 
1393 	lock->team = team_get_current_team_id();
1394 	lock->session = session;
1395 	// values must already be normalized when getting here
1396 	lock->start = flock->l_start;
1397 	lock->end = flock->l_start - 1 + flock->l_len;
1398 	lock->shared = shared;
1399 
1400 	locking->locks.Add(lock);
1401 	put_advisory_locking(locking);
1402 
1403 	return status;
1404 }
1405 
1406 
1407 /*!	Normalizes the \a flock structure to make it easier to compare the
1408 	structure with others. The l_start and l_len fields are set to absolute
1409 	values according to the l_whence field.
1410 */
1411 static status_t
1412 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1413 {
1414 	switch (flock->l_whence) {
1415 		case SEEK_SET:
1416 			break;
1417 		case SEEK_CUR:
1418 			flock->l_start += descriptor->pos;
1419 			break;
1420 		case SEEK_END:
1421 		{
1422 			struct vnode *vnode = descriptor->u.vnode;
1423 			struct stat stat;
1424 			status_t status;
1425 
1426 			if (!HAS_FS_CALL(vnode, read_stat))
1427 				return EOPNOTSUPP;
1428 
1429 			status = FS_CALL(vnode, read_stat, &stat);
1430 			if (status < B_OK)
1431 				return status;
1432 
1433 			flock->l_start += stat.st_size;
1434 			break;
1435 		}
1436 		default:
1437 			return B_BAD_VALUE;
1438 	}
1439 
1440 	if (flock->l_start < 0)
1441 		flock->l_start = 0;
1442 	if (flock->l_len == 0)
1443 		flock->l_len = OFF_MAX;
1444 
1445 	// don't let the offset and length overflow
1446 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1447 		flock->l_len = OFF_MAX - flock->l_start;
1448 
1449 	if (flock->l_len < 0) {
1450 		// a negative length reverses the region
1451 		flock->l_start += flock->l_len;
1452 		flock->l_len = -flock->l_len;
1453 	}
1454 
1455 	return B_OK;
1456 }
1457 
1458 
1459 static void
1460 replace_vnode_if_disconnected(struct fs_mount* mount,
1461 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1462 	struct vnode* fallBack, bool lockRootLock)
1463 {
1464 	if (lockRootLock)
1465 		benaphore_lock(&sIOContextRootLock);
1466 
1467 	struct vnode* obsoleteVnode = NULL;
1468 
1469 	if (vnode != NULL && vnode->mount == mount
1470 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1471 		obsoleteVnode = vnode;
1472 
1473 		if (vnode == mount->root_vnode) {
1474 			// redirect the vnode to the covered vnode
1475 			vnode = mount->covers_vnode;
1476 		} else
1477 			vnode = fallBack;
1478 
1479 		if (vnode != NULL)
1480 			inc_vnode_ref_count(vnode);
1481 	}
1482 
1483 	if (lockRootLock)
1484 		benaphore_unlock(&sIOContextRootLock);
1485 
1486 	if (obsoleteVnode != NULL)
1487 		put_vnode(obsoleteVnode);
1488 }
1489 
1490 
1491 /*!	Disconnects all file descriptors that are associated with the
1492 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1493 	\a mount object.
1494 
1495 	Note, after you've called this function, there might still be ongoing
1496 	accesses - they won't be interrupted if they already happened before.
1497 	However, any subsequent access will fail.
1498 
1499 	This is not a cheap function and should be used with care and rarely.
1500 	TODO: there is currently no means to stop a blocking read/write!
1501 */
1502 void
1503 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1504 	struct vnode *vnodeToDisconnect)
1505 {
1506 	// iterate over all teams and peek into their file descriptors
1507 	int32 nextTeamID = 0;
1508 
1509 	while (true) {
1510 		struct io_context *context = NULL;
1511 		sem_id contextMutex = -1;
1512 		struct team *team = NULL;
1513 		team_id lastTeamID;
1514 
1515 		cpu_status state = disable_interrupts();
1516 		GRAB_TEAM_LOCK();
1517 
1518 		lastTeamID = peek_next_thread_id();
1519 		if (nextTeamID < lastTeamID) {
1520 			// get next valid team
1521 			while (nextTeamID < lastTeamID
1522 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1523 				nextTeamID++;
1524 			}
1525 
1526 			if (team) {
1527 				context = (io_context *)team->io_context;
1528 				contextMutex = context->io_mutex.sem;
1529 				nextTeamID++;
1530 			}
1531 		}
1532 
1533 		RELEASE_TEAM_LOCK();
1534 		restore_interrupts(state);
1535 
1536 		if (context == NULL)
1537 			break;
1538 
1539 		// we now have a context - since we couldn't lock it while having
1540 		// safe access to the team structure, we now need to lock the mutex
1541 		// manually
1542 
1543 		if (acquire_sem(contextMutex) != B_OK) {
1544 			// team seems to be gone, go over to the next team
1545 			continue;
1546 		}
1547 
1548 		// the team cannot be deleted completely while we're owning its
1549 		// io_context mutex, so we can safely play with it now
1550 
1551 		context->io_mutex.holder = thread_get_current_thread_id();
1552 
1553 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1554 			sRoot, true);
1555 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1556 			sRoot, false);
1557 
1558 		for (uint32 i = 0; i < context->table_size; i++) {
1559 			if (struct file_descriptor *descriptor = context->fds[i]) {
1560 				inc_fd_ref_count(descriptor);
1561 
1562 				// if this descriptor points at this mount, we
1563 				// need to disconnect it to be able to unmount
1564 				struct vnode *vnode = fd_vnode(descriptor);
1565 				if (vnodeToDisconnect != NULL) {
1566 					if (vnode == vnodeToDisconnect)
1567 						disconnect_fd(descriptor);
1568 				} else if (vnode != NULL && vnode->mount == mount
1569 					|| vnode == NULL && descriptor->u.mount == mount)
1570 					disconnect_fd(descriptor);
1571 
1572 				put_fd(descriptor);
1573 			}
1574 		}
1575 
1576 		mutex_unlock(&context->io_mutex);
1577 	}
1578 }
1579 
1580 
1581 /*!	\brief Gets the root node of the current IO context.
1582 	If \a kernel is \c true, the kernel IO context will be used.
1583 	The caller obtains a reference to the returned node.
1584 */
1585 struct vnode*
1586 get_root_vnode(bool kernel)
1587 {
1588 	if (!kernel) {
1589 		// Get current working directory from io context
1590 		struct io_context* context = get_current_io_context(kernel);
1591 
1592 		benaphore_lock(&sIOContextRootLock);
1593 
1594 		struct vnode* root = context->root;
1595 		if (root != NULL)
1596 			inc_vnode_ref_count(root);
1597 
1598 		benaphore_unlock(&sIOContextRootLock);
1599 
1600 		if (root != NULL)
1601 			return root;
1602 
1603 		// That should never happen.
1604 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1605 			"root\n", team_get_current_team_id());
1606 	}
1607 
1608 	inc_vnode_ref_count(sRoot);
1609 	return sRoot;
1610 }
1611 
1612 
1613 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1614 		   by.
1615 
1616 	Given an arbitrary vnode, the function checks, whether the node is covered
1617 	by the root of a volume. If it is the function obtains a reference to the
1618 	volume root node and returns it.
1619 
1620 	\param vnode The vnode in question.
1621 	\return The volume root vnode the vnode cover is covered by, if it is
1622 			indeed a mount point, or \c NULL otherwise.
1623 */
1624 static struct vnode *
1625 resolve_mount_point_to_volume_root(struct vnode *vnode)
1626 {
1627 	if (!vnode)
1628 		return NULL;
1629 
1630 	struct vnode *volumeRoot = NULL;
1631 
1632 	mutex_lock(&sVnodeCoveredByMutex);
1633 	if (vnode->covered_by) {
1634 		volumeRoot = vnode->covered_by;
1635 		inc_vnode_ref_count(volumeRoot);
1636 	}
1637 	mutex_unlock(&sVnodeCoveredByMutex);
1638 
1639 	return volumeRoot;
1640 }
1641 
1642 
1643 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1644 		   by.
1645 
1646 	Given an arbitrary vnode (identified by mount and node ID), the function
1647 	checks, whether the node is covered by the root of a volume. If it is the
1648 	function returns the mount and node ID of the volume root node. Otherwise
1649 	it simply returns the supplied mount and node ID.
1650 
1651 	In case of error (e.g. the supplied node could not be found) the variables
1652 	for storing the resolved mount and node ID remain untouched and an error
1653 	code is returned.
1654 
1655 	\param mountID The mount ID of the vnode in question.
1656 	\param nodeID The node ID of the vnode in question.
1657 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1658 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1659 	\return
1660 	- \c B_OK, if everything went fine,
1661 	- another error code, if something went wrong.
1662 */
1663 status_t
1664 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1665 	dev_t *resolvedMountID, ino_t *resolvedNodeID)
1666 {
1667 	// get the node
1668 	struct vnode *node;
1669 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1670 	if (error != B_OK)
1671 		return error;
1672 
1673 	// resolve the node
1674 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1675 	if (resolvedNode) {
1676 		put_vnode(node);
1677 		node = resolvedNode;
1678 	}
1679 
1680 	// set the return values
1681 	*resolvedMountID = node->device;
1682 	*resolvedNodeID = node->id;
1683 
1684 	put_vnode(node);
1685 
1686 	return B_OK;
1687 }
1688 
1689 
1690 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1691 
1692 	Given an arbitrary vnode, the function checks, whether the node is the
1693 	root of a volume. If it is (and if it is not "/"), the function obtains
1694 	a reference to the underlying mount point node and returns it.
1695 
1696 	\param vnode The vnode in question (caller must have a reference).
1697 	\return The mount point vnode the vnode covers, if it is indeed a volume
1698 			root and not "/", or \c NULL otherwise.
1699 */
1700 static struct vnode *
1701 resolve_volume_root_to_mount_point(struct vnode *vnode)
1702 {
1703 	if (!vnode)
1704 		return NULL;
1705 
1706 	struct vnode *mountPoint = NULL;
1707 
1708 	struct fs_mount *mount = vnode->mount;
1709 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1710 		mountPoint = mount->covers_vnode;
1711 		inc_vnode_ref_count(mountPoint);
1712 	}
1713 
1714 	return mountPoint;
1715 }
1716 
1717 
1718 /*!	\brief Gets the directory path and leaf name for a given path.
1719 
1720 	The supplied \a path is transformed to refer to the directory part of
1721 	the entry identified by the original path, and into the buffer \a filename
1722 	the leaf name of the original entry is written.
1723 	Neither the returned path nor the leaf name can be expected to be
1724 	canonical.
1725 
1726 	\param path The path to be analyzed. Must be able to store at least one
1727 		   additional character.
1728 	\param filename The buffer into which the leaf name will be written.
1729 		   Must be of size B_FILE_NAME_LENGTH at least.
1730 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1731 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1732 		   if the given path name is empty.
1733 */
1734 static status_t
1735 get_dir_path_and_leaf(char *path, char *filename)
1736 {
1737 	if (*path == '\0')
1738 		return B_ENTRY_NOT_FOUND;
1739 
1740 	char *p = strrchr(path, '/');
1741 		// '/' are not allowed in file names!
1742 
1743 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1744 
1745 	if (!p) {
1746 		// this path is single segment with no '/' in it
1747 		// ex. "foo"
1748 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1749 			return B_NAME_TOO_LONG;
1750 		strcpy(path, ".");
1751 	} else {
1752 		p++;
1753 		if (*p == '\0') {
1754 			// special case: the path ends in '/'
1755 			strcpy(filename, ".");
1756 		} else {
1757 			// normal leaf: replace the leaf portion of the path with a '.'
1758 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1759 				>= B_FILE_NAME_LENGTH) {
1760 				return B_NAME_TOO_LONG;
1761 			}
1762 		}
1763 		p[0] = '.';
1764 		p[1] = '\0';
1765 	}
1766 	return B_OK;
1767 }
1768 
1769 
1770 static status_t
1771 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char *name,
1772 	bool traverse, bool kernel, struct vnode **_vnode)
1773 {
1774 	char clonedName[B_FILE_NAME_LENGTH + 1];
1775 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1776 		return B_NAME_TOO_LONG;
1777 
1778 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1779 	struct vnode *directory;
1780 
1781 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
1782 	if (status < 0)
1783 		return status;
1784 
1785 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
1786 		_vnode, NULL);
1787 }
1788 
1789 
1790 static status_t
1791 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
1792 {
1793 	ino_t id;
1794 	status_t status = FS_CALL(dir, lookup, name, &id);
1795 	if (status < B_OK)
1796 		return status;
1797 
1798 	mutex_lock(&sVnodeMutex);
1799 	*_vnode = lookup_vnode(dir->device, id);
1800 	mutex_unlock(&sVnodeMutex);
1801 
1802 	if (*_vnode == NULL) {
1803 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
1804 			"0x%Lx)\n", dir->device, id);
1805 		return B_ENTRY_NOT_FOUND;
1806 	}
1807 
1808 	return B_OK;
1809 }
1810 
1811 
1812 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
1813 	\a path must not be NULL.
1814 	If it returns successfully, \a path contains the name of the last path
1815 	component. This function clobbers the buffer pointed to by \a path only
1816 	if it does contain more than one component.
1817 	Note, this reduces the ref_count of the starting \a vnode, no matter if
1818 	it is successful or not!
1819 */
1820 static status_t
1821 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1822 	int count, struct io_context *ioContext, struct vnode **_vnode,
1823 	ino_t *_parentID)
1824 {
1825 	status_t status = B_OK;
1826 	ino_t lastParentID = vnode->id;
1827 
1828 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1829 
1830 	if (path == NULL) {
1831 		put_vnode(vnode);
1832 		return B_BAD_VALUE;
1833 	}
1834 
1835 	if (*path == '\0') {
1836 		put_vnode(vnode);
1837 		return B_ENTRY_NOT_FOUND;
1838 	}
1839 
1840 	while (true) {
1841 		struct vnode *nextVnode;
1842 		ino_t vnodeID;
1843 		char *nextPath;
1844 
1845 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1846 
1847 		// done?
1848 		if (path[0] == '\0')
1849 			break;
1850 
1851 		// walk to find the next path component ("path" will point to a single
1852 		// path component), and filter out multiple slashes
1853 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
1854 				nextPath++);
1855 
1856 		if (*nextPath == '/') {
1857 			*nextPath = '\0';
1858 			do
1859 				nextPath++;
1860 			while (*nextPath == '/');
1861 		}
1862 
1863 		// See if the '..' is at the root of a mount and move to the covered
1864 		// vnode so we pass the '..' path to the underlying filesystem.
1865 		// Also prevent breaking the root of the IO context.
1866 		if (strcmp("..", path) == 0) {
1867 			if (vnode == ioContext->root) {
1868 				// Attempted prison break! Keep it contained.
1869 				path = nextPath;
1870 				continue;
1871 			} else if (vnode->mount->root_vnode == vnode
1872 				&& vnode->mount->covers_vnode) {
1873 				nextVnode = vnode->mount->covers_vnode;
1874 				inc_vnode_ref_count(nextVnode);
1875 				put_vnode(vnode);
1876 				vnode = nextVnode;
1877 			}
1878 		}
1879 
1880 		// check if vnode is really a directory
1881 		if (status == B_OK && !S_ISDIR(vnode->type))
1882 			status = B_NOT_A_DIRECTORY;
1883 
1884 		// Check if we have the right to search the current directory vnode.
1885 		// If a file system doesn't have the access() function, we assume that
1886 		// searching a directory is always allowed
1887 		if (status == B_OK && HAS_FS_CALL(vnode, access))
1888 			status = FS_CALL(vnode, access, X_OK);
1889 
1890 		// Tell the filesystem to get the vnode of this path component (if we got the
1891 		// permission from the call above)
1892 		if (status >= B_OK)
1893 			status = lookup_dir_entry(vnode, path, &nextVnode);
1894 
1895 		if (status < B_OK) {
1896 			put_vnode(vnode);
1897 			return status;
1898 		}
1899 
1900 		// If the new node is a symbolic link, resolve it (if we've been told
1901 		// to do it)
1902 		if (S_ISLNK(nextVnode->type)
1903 			&& !(!traverseLeafLink && nextPath[0] == '\0')) {
1904 			size_t bufferSize;
1905 			char *buffer;
1906 
1907 			TRACE(("traverse link\n"));
1908 
1909 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1910 			if (count + 1 > B_MAX_SYMLINKS) {
1911 				status = B_LINK_LIMIT;
1912 				goto resolve_link_error;
1913 			}
1914 
1915 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1916 			if (buffer == NULL) {
1917 				status = B_NO_MEMORY;
1918 				goto resolve_link_error;
1919 			}
1920 
1921 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
1922 				bufferSize--;
1923 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
1924 				// null-terminate
1925 				if (status >= 0)
1926 					buffer[bufferSize] = '\0';
1927 			} else
1928 				status = B_BAD_VALUE;
1929 
1930 			if (status < B_OK) {
1931 				free(buffer);
1932 
1933 		resolve_link_error:
1934 				put_vnode(vnode);
1935 				put_vnode(nextVnode);
1936 
1937 				return status;
1938 			}
1939 			put_vnode(nextVnode);
1940 
1941 			// Check if we start from the root directory or the current
1942 			// directory ("vnode" still points to that one).
1943 			// Cut off all leading slashes if it's the root directory
1944 			path = buffer;
1945 			bool absoluteSymlink = false;
1946 			if (path[0] == '/') {
1947 				// we don't need the old directory anymore
1948 				put_vnode(vnode);
1949 
1950 				while (*++path == '/')
1951 					;
1952 
1953 				benaphore_lock(&sIOContextRootLock);
1954 				vnode = ioContext->root;
1955 				inc_vnode_ref_count(vnode);
1956 				benaphore_unlock(&sIOContextRootLock);
1957 
1958 				absoluteSymlink = true;
1959 			}
1960 
1961 			inc_vnode_ref_count(vnode);
1962 				// balance the next recursion - we will decrement the
1963 				// ref_count of the vnode, no matter if we succeeded or not
1964 
1965 			if (absoluteSymlink && *path == '\0') {
1966 				// symlink was just "/"
1967 				nextVnode = vnode;
1968 			} else {
1969 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
1970 					count + 1, ioContext, &nextVnode, &lastParentID);
1971 			}
1972 
1973 			free(buffer);
1974 
1975 			if (status < B_OK) {
1976 				put_vnode(vnode);
1977 				return status;
1978 			}
1979 		} else
1980 			lastParentID = vnode->id;
1981 
1982 		// decrease the ref count on the old dir we just looked up into
1983 		put_vnode(vnode);
1984 
1985 		path = nextPath;
1986 		vnode = nextVnode;
1987 
1988 		// see if we hit a mount point
1989 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1990 		if (mountPoint) {
1991 			put_vnode(vnode);
1992 			vnode = mountPoint;
1993 		}
1994 	}
1995 
1996 	*_vnode = vnode;
1997 	if (_parentID)
1998 		*_parentID = lastParentID;
1999 
2000 	return B_OK;
2001 }
2002 
2003 
2004 static status_t
2005 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
2006 	int count, bool kernel, struct vnode **_vnode, ino_t *_parentID)
2007 {
2008 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2009 		get_current_io_context(kernel), _vnode, _parentID);
2010 }
2011 
2012 
2013 static status_t
2014 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
2015 	ino_t *_parentID, bool kernel)
2016 {
2017 	struct vnode *start = NULL;
2018 
2019 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2020 
2021 	if (!path)
2022 		return B_BAD_VALUE;
2023 
2024 	if (*path == '\0')
2025 		return B_ENTRY_NOT_FOUND;
2026 
2027 	// figure out if we need to start at root or at cwd
2028 	if (*path == '/') {
2029 		if (sRoot == NULL) {
2030 			// we're a bit early, aren't we?
2031 			return B_ERROR;
2032 		}
2033 
2034 		while (*++path == '/')
2035 			;
2036 		start = get_root_vnode(kernel);
2037 
2038 		if (*path == '\0') {
2039 			*_vnode = start;
2040 			return B_OK;
2041 		}
2042 
2043 	} else {
2044 		struct io_context *context = get_current_io_context(kernel);
2045 
2046 		mutex_lock(&context->io_mutex);
2047 		start = context->cwd;
2048 		if (start != NULL)
2049 			inc_vnode_ref_count(start);
2050 		mutex_unlock(&context->io_mutex);
2051 
2052 		if (start == NULL)
2053 			return B_ERROR;
2054 	}
2055 
2056 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2057 		_parentID);
2058 }
2059 
2060 
2061 /*! Returns the vnode in the next to last segment of the path, and returns
2062 	the last portion in filename.
2063 	The path buffer must be able to store at least one additional character.
2064 */
2065 static status_t
2066 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
2067 {
2068 	status_t status = get_dir_path_and_leaf(path, filename);
2069 	if (status != B_OK)
2070 		return status;
2071 
2072 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2073 }
2074 
2075 
2076 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2077 		   to by a FD + path pair.
2078 
2079 	\a path must be given in either case. \a fd might be omitted, in which
2080 	case \a path is either an absolute path or one relative to the current
2081 	directory. If both a supplied and \a path is relative it is reckoned off
2082 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2083 	ignored.
2084 
2085 	The caller has the responsibility to call put_vnode() on the returned
2086 	directory vnode.
2087 
2088 	\param fd The FD. May be < 0.
2089 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2090 	       is modified by this function. It must have at least room for a
2091 	       string one character longer than the path it contains.
2092 	\param _vnode A pointer to a variable the directory vnode shall be written
2093 		   into.
2094 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2095 		   the leaf name of the specified entry will be written.
2096 	\param kernel \c true, if invoked from inside the kernel, \c false if
2097 		   invoked from userland.
2098 	\return \c B_OK, if everything went fine, another error code otherwise.
2099 */
2100 static status_t
2101 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
2102 	char *filename, bool kernel)
2103 {
2104 	if (!path)
2105 		return B_BAD_VALUE;
2106 	if (*path == '\0')
2107 		return B_ENTRY_NOT_FOUND;
2108 	if (fd < 0)
2109 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2110 
2111 	status_t status = get_dir_path_and_leaf(path, filename);
2112 	if (status != B_OK)
2113 		return status;
2114 
2115 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2116 }
2117 
2118 
2119 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2120 		   to by a vnode + path pair.
2121 
2122 	\a path must be given in either case. \a vnode might be omitted, in which
2123 	case \a path is either an absolute path or one relative to the current
2124 	directory. If both a supplied and \a path is relative it is reckoned off
2125 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2126 	ignored.
2127 
2128 	The caller has the responsibility to call put_vnode() on the returned
2129 	directory vnode.
2130 
2131 	\param vnode The vnode. May be \c NULL.
2132 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2133 	       is modified by this function. It must have at least room for a
2134 	       string one character longer than the path it contains.
2135 	\param _vnode A pointer to a variable the directory vnode shall be written
2136 		   into.
2137 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2138 		   the leaf name of the specified entry will be written.
2139 	\param kernel \c true, if invoked from inside the kernel, \c false if
2140 		   invoked from userland.
2141 	\return \c B_OK, if everything went fine, another error code otherwise.
2142 */
2143 static status_t
2144 vnode_and_path_to_dir_vnode(struct vnode* vnode, char *path,
2145 	struct vnode **_vnode, char *filename, bool kernel)
2146 {
2147 	if (!path)
2148 		return B_BAD_VALUE;
2149 	if (*path == '\0')
2150 		return B_ENTRY_NOT_FOUND;
2151 	if (vnode == NULL || path[0] == '/')
2152 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2153 
2154 	status_t status = get_dir_path_and_leaf(path, filename);
2155 	if (status != B_OK)
2156 		return status;
2157 
2158 	inc_vnode_ref_count(vnode);
2159 		// vnode_path_to_vnode() always decrements the ref count
2160 
2161 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2162 }
2163 
2164 
2165 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2166 */
2167 static status_t
2168 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
2169 	size_t bufferSize, struct io_context* ioContext)
2170 {
2171 	if (bufferSize < sizeof(struct dirent))
2172 		return B_BAD_VALUE;
2173 
2174 	// See if vnode is the root of a mount and move to the covered
2175 	// vnode so we get the underlying file system
2176 	VNodePutter vnodePutter;
2177 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
2178 		vnode = vnode->mount->covers_vnode;
2179 		inc_vnode_ref_count(vnode);
2180 		vnodePutter.SetTo(vnode);
2181 	}
2182 
2183 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2184 		// The FS supports getting the name of a vnode.
2185 		return FS_CALL(vnode, get_vnode_name, buffer->d_name,
2186 			(char*)buffer + bufferSize - buffer->d_name);
2187 	}
2188 
2189 	// The FS doesn't support getting the name of a vnode. So we search the
2190 	// parent directory for the vnode, if the caller let us.
2191 
2192 	if (parent == NULL)
2193 		return EOPNOTSUPP;
2194 
2195 	void *cookie;
2196 
2197 	status_t status = FS_CALL(parent, open_dir, &cookie);
2198 	if (status >= B_OK) {
2199 		while (true) {
2200 			uint32 num = 1;
2201 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2202 				&num);
2203 			if (status < B_OK)
2204 				break;
2205 			if (num == 0) {
2206 				status = B_ENTRY_NOT_FOUND;
2207 				break;
2208 			}
2209 
2210 			if (vnode->id == buffer->d_ino) {
2211 				// found correct entry!
2212 				break;
2213 			}
2214 		}
2215 
2216 		FS_CALL(vnode, close_dir, cookie);
2217 		FS_CALL(vnode, free_dir_cookie, cookie);
2218 	}
2219 	return status;
2220 }
2221 
2222 
2223 static status_t
2224 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
2225 	size_t nameSize, bool kernel)
2226 {
2227 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2228 	struct dirent *dirent = (struct dirent *)buffer;
2229 
2230 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer),
2231 		get_current_io_context(kernel));
2232 	if (status != B_OK)
2233 		return status;
2234 
2235 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2236 		return B_BUFFER_OVERFLOW;
2237 
2238 	return B_OK;
2239 }
2240 
2241 
2242 /*!	Gets the full path to a given directory vnode.
2243 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2244 	file system doesn't support this call, it will fall back to iterating
2245 	through the parent directory to get the name of the child.
2246 
2247 	To protect against circular loops, it supports a maximum tree depth
2248 	of 256 levels.
2249 
2250 	Note that the path may not be correct the time this function returns!
2251 	It doesn't use any locking to prevent returning the correct path, as
2252 	paths aren't safe anyway: the path to a file can change at any time.
2253 
2254 	It might be a good idea, though, to check if the returned path exists
2255 	in the calling function (it's not done here because of efficiency)
2256 */
2257 static status_t
2258 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize,
2259 	bool kernel)
2260 {
2261 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2262 
2263 	if (vnode == NULL || buffer == NULL)
2264 		return B_BAD_VALUE;
2265 
2266 	if (!S_ISDIR(vnode->type))
2267 		return B_NOT_A_DIRECTORY;
2268 
2269 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2270 	KPath pathBuffer;
2271 	if (pathBuffer.InitCheck() != B_OK)
2272 		return B_NO_MEMORY;
2273 
2274 	char *path = pathBuffer.LockBuffer();
2275 	int32 insert = pathBuffer.BufferSize();
2276 	int32 maxLevel = 256;
2277 	int32 length;
2278 	status_t status;
2279 
2280 	// we don't use get_vnode() here because this call is more
2281 	// efficient and does all we need from get_vnode()
2282 	inc_vnode_ref_count(vnode);
2283 
2284 	// resolve a volume root to its mount point
2285 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
2286 	if (mountPoint) {
2287 		put_vnode(vnode);
2288 		vnode = mountPoint;
2289 	}
2290 
2291 	path[--insert] = '\0';
2292 
2293 	struct io_context* ioContext = get_current_io_context(kernel);
2294 
2295 	while (true) {
2296 		// the name buffer is also used for fs_read_dir()
2297 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2298 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
2299 		struct vnode *parentVnode;
2300 		ino_t parentID;
2301 
2302 		// lookup the parent vnode
2303 		if (vnode == ioContext->root) {
2304 			// we hit the IO context root
2305 			parentVnode = vnode;
2306 			inc_vnode_ref_count(vnode);
2307 		} else {
2308 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2309 			if (status < B_OK)
2310 				goto out;
2311 		}
2312 
2313 		// get the node's name
2314 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2315 			sizeof(nameBuffer), ioContext);
2316 
2317 		// resolve a volume root to its mount point
2318 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2319 		if (mountPoint) {
2320 			put_vnode(parentVnode);
2321 			parentVnode = mountPoint;
2322 			parentID = parentVnode->id;
2323 		}
2324 
2325 		bool hitRoot = (parentVnode == vnode);
2326 
2327 		// release the current vnode, we only need its parent from now on
2328 		put_vnode(vnode);
2329 		vnode = parentVnode;
2330 
2331 		if (status < B_OK)
2332 			goto out;
2333 
2334 		if (hitRoot) {
2335 			// we have reached "/", which means we have constructed the full
2336 			// path
2337 			break;
2338 		}
2339 
2340 		// ToDo: add an explicit check for loops in about 10 levels to do
2341 		// real loop detection
2342 
2343 		// don't go deeper as 'maxLevel' to prevent circular loops
2344 		if (maxLevel-- < 0) {
2345 			status = ELOOP;
2346 			goto out;
2347 		}
2348 
2349 		// add the name in front of the current path
2350 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2351 		length = strlen(name);
2352 		insert -= length;
2353 		if (insert <= 0) {
2354 			status = ENOBUFS;
2355 			goto out;
2356 		}
2357 		memcpy(path + insert, name, length);
2358 		path[--insert] = '/';
2359 	}
2360 
2361 	// the root dir will result in an empty path: fix it
2362 	if (path[insert] == '\0')
2363 		path[--insert] = '/';
2364 
2365 	TRACE(("  path is: %s\n", path + insert));
2366 
2367 	// copy the path to the output buffer
2368 	length = pathBuffer.BufferSize() - insert;
2369 	if (length <= (int)bufferSize)
2370 		memcpy(buffer, path + insert, length);
2371 	else
2372 		status = ENOBUFS;
2373 
2374 out:
2375 	put_vnode(vnode);
2376 	return status;
2377 }
2378 
2379 
2380 /*!	Checks the length of every path component, and adds a '.'
2381 	if the path ends in a slash.
2382 	The given path buffer must be able to store at least one
2383 	additional character.
2384 */
2385 static status_t
2386 check_path(char *to)
2387 {
2388 	int32 length = 0;
2389 
2390 	// check length of every path component
2391 
2392 	while (*to) {
2393 		char *begin;
2394 		if (*to == '/')
2395 			to++, length++;
2396 
2397 		begin = to;
2398 		while (*to != '/' && *to)
2399 			to++, length++;
2400 
2401 		if (to - begin > B_FILE_NAME_LENGTH)
2402 			return B_NAME_TOO_LONG;
2403 	}
2404 
2405 	if (length == 0)
2406 		return B_ENTRY_NOT_FOUND;
2407 
2408 	// complete path if there is a slash at the end
2409 
2410 	if (*(to - 1) == '/') {
2411 		if (length > B_PATH_NAME_LENGTH - 2)
2412 			return B_NAME_TOO_LONG;
2413 
2414 		to[0] = '.';
2415 		to[1] = '\0';
2416 	}
2417 
2418 	return B_OK;
2419 }
2420 
2421 
2422 static struct file_descriptor *
2423 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2424 {
2425 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2426 	if (descriptor == NULL)
2427 		return NULL;
2428 
2429 	if (fd_vnode(descriptor) == NULL) {
2430 		put_fd(descriptor);
2431 		return NULL;
2432 	}
2433 
2434 	// ToDo: when we can close a file descriptor at any point, investigate
2435 	//	if this is still valid to do (accessing the vnode without ref_count
2436 	//	or locking)
2437 	*_vnode = descriptor->u.vnode;
2438 	return descriptor;
2439 }
2440 
2441 
2442 static struct vnode *
2443 get_vnode_from_fd(int fd, bool kernel)
2444 {
2445 	struct file_descriptor *descriptor;
2446 	struct vnode *vnode;
2447 
2448 	descriptor = get_fd(get_current_io_context(kernel), fd);
2449 	if (descriptor == NULL)
2450 		return NULL;
2451 
2452 	vnode = fd_vnode(descriptor);
2453 	if (vnode != NULL)
2454 		inc_vnode_ref_count(vnode);
2455 
2456 	put_fd(descriptor);
2457 	return vnode;
2458 }
2459 
2460 
2461 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2462 	only the path will be considered. In this case, the \a path must not be
2463 	NULL.
2464 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2465 	and should be NULL for files.
2466 */
2467 static status_t
2468 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2469 	struct vnode **_vnode, ino_t *_parentID, bool kernel)
2470 {
2471 	if (fd < 0 && !path)
2472 		return B_BAD_VALUE;
2473 
2474 	if (path != NULL && *path == '\0')
2475 		return B_ENTRY_NOT_FOUND;
2476 
2477 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2478 		// no FD or absolute path
2479 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2480 	}
2481 
2482 	// FD only, or FD + relative path
2483 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2484 	if (!vnode)
2485 		return B_FILE_ERROR;
2486 
2487 	if (path != NULL) {
2488 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2489 			_vnode, _parentID);
2490 	}
2491 
2492 	// there is no relative path to take into account
2493 
2494 	*_vnode = vnode;
2495 	if (_parentID)
2496 		*_parentID = -1;
2497 
2498 	return B_OK;
2499 }
2500 
2501 
2502 static int
2503 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2504 	void *cookie, int openMode, bool kernel)
2505 {
2506 	struct file_descriptor *descriptor;
2507 	int fd;
2508 
2509 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2510 	if (vnode && vnode->mandatory_locked_by != NULL)
2511 		return B_BUSY;
2512 
2513 	descriptor = alloc_fd();
2514 	if (!descriptor)
2515 		return B_NO_MEMORY;
2516 
2517 	if (vnode)
2518 		descriptor->u.vnode = vnode;
2519 	else
2520 		descriptor->u.mount = mount;
2521 	descriptor->cookie = cookie;
2522 
2523 	switch (type) {
2524 		// vnode types
2525 		case FDTYPE_FILE:
2526 			descriptor->ops = &sFileOps;
2527 			break;
2528 		case FDTYPE_DIR:
2529 			descriptor->ops = &sDirectoryOps;
2530 			break;
2531 		case FDTYPE_ATTR:
2532 			descriptor->ops = &sAttributeOps;
2533 			break;
2534 		case FDTYPE_ATTR_DIR:
2535 			descriptor->ops = &sAttributeDirectoryOps;
2536 			break;
2537 
2538 		// mount types
2539 		case FDTYPE_INDEX_DIR:
2540 			descriptor->ops = &sIndexDirectoryOps;
2541 			break;
2542 		case FDTYPE_QUERY:
2543 			descriptor->ops = &sQueryOps;
2544 			break;
2545 
2546 		default:
2547 			panic("get_new_fd() called with unknown type %d\n", type);
2548 			break;
2549 	}
2550 	descriptor->type = type;
2551 	descriptor->open_mode = openMode;
2552 
2553 	fd = new_fd(get_current_io_context(kernel), descriptor);
2554 	if (fd < 0) {
2555 		free(descriptor);
2556 		return B_NO_MORE_FDS;
2557 	}
2558 
2559 	return fd;
2560 }
2561 
2562 #ifdef ADD_DEBUGGER_COMMANDS
2563 
2564 
2565 static void
2566 _dump_advisory_locking(advisory_locking *locking)
2567 {
2568 	if (locking == NULL)
2569 		return;
2570 
2571 	kprintf("   lock:        %ld", locking->lock);
2572 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2573 
2574 	int32 index = 0;
2575 	LockList::Iterator iterator = locking->locks.GetIterator();
2576 	while (iterator.HasNext()) {
2577 		struct advisory_lock *lock = iterator.Next();
2578 
2579 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2580 		kprintf("        start:  %Ld\n", lock->start);
2581 		kprintf("        end:    %Ld\n", lock->end);
2582 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2583 	}
2584 }
2585 
2586 
2587 static void
2588 _dump_mount(struct fs_mount *mount)
2589 {
2590 	kprintf("MOUNT: %p\n", mount);
2591 	kprintf(" id:            %ld\n", mount->id);
2592 	kprintf(" device_name:   %s\n", mount->device_name);
2593 	kprintf(" fs_name:       %s\n", mount->fs_name);
2594 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2595 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2596 	kprintf(" partition:     %p\n", mount->partition);
2597 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2598 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2599 		mount->owns_file_device ? " owns_file_device" : "");
2600 
2601 	fs_volume *volume = mount->volume;
2602 	while (volume != NULL) {
2603 		kprintf(" volume %p:\n", volume);
2604 		kprintf("  layer:          %ld\n", volume->layer);
2605 		kprintf("  private_volume: %p\n", volume->private_volume);
2606 		kprintf("  ops:            %p\n", volume->ops);
2607 		volume = volume->super_volume;
2608 	}
2609 
2610 	set_debug_variable("_cookie", (addr_t)mount->volume->private_volume);
2611 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2612 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2613 	set_debug_variable("_partition", (addr_t)mount->partition);
2614 }
2615 
2616 
2617 static void
2618 _dump_vnode(struct vnode *vnode)
2619 {
2620 	kprintf("VNODE: %p\n", vnode);
2621 	kprintf(" device:        %ld\n", vnode->device);
2622 	kprintf(" id:            %Ld\n", vnode->id);
2623 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2624 	kprintf(" private_node:  %p\n", vnode->private_node);
2625 	kprintf(" mount:         %p\n", vnode->mount);
2626 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2627 	kprintf(" cache:         %p\n", vnode->cache);
2628 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2629 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2630 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2631 
2632 	_dump_advisory_locking(vnode->advisory_locking);
2633 
2634 	set_debug_variable("_node", (addr_t)vnode->private_node);
2635 	set_debug_variable("_mount", (addr_t)vnode->mount);
2636 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
2637 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
2638 }
2639 
2640 
2641 static int
2642 dump_mount(int argc, char **argv)
2643 {
2644 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2645 		kprintf("usage: %s [id|address]\n", argv[0]);
2646 		return 0;
2647 	}
2648 
2649 	uint32 id = parse_expression(argv[1]);
2650 	struct fs_mount *mount = NULL;
2651 
2652 	mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2653 	if (mount == NULL) {
2654 		if (IS_USER_ADDRESS(id)) {
2655 			kprintf("fs_mount not found\n");
2656 			return 0;
2657 		}
2658 		mount = (fs_mount *)id;
2659 	}
2660 
2661 	_dump_mount(mount);
2662 	return 0;
2663 }
2664 
2665 
2666 static int
2667 dump_mounts(int argc, char **argv)
2668 {
2669 	if (argc != 1) {
2670 		kprintf("usage: %s\n", argv[0]);
2671 		return 0;
2672 	}
2673 
2674 	kprintf("address     id root       covers     cookie     fs_name\n");
2675 
2676 	struct hash_iterator iterator;
2677 	struct fs_mount *mount;
2678 
2679 	hash_open(sMountsTable, &iterator);
2680 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2681 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
2682 			mount->covers_vnode, mount->volume->private_volume, mount->fs_name);
2683 	}
2684 
2685 	hash_close(sMountsTable, &iterator, false);
2686 	return 0;
2687 }
2688 
2689 
2690 static int
2691 dump_vnode(int argc, char **argv)
2692 {
2693 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
2694 		kprintf("usage: %s <device> <id>\n"
2695 			"   or: %s <address>\n", argv[0], argv[0]);
2696 		return 0;
2697 	}
2698 
2699 	struct vnode *vnode = NULL;
2700 
2701 	if (argc == 2) {
2702 		vnode = (struct vnode *)parse_expression(argv[1]);
2703 		if (IS_USER_ADDRESS(vnode)) {
2704 			kprintf("invalid vnode address\n");
2705 			return 0;
2706 		}
2707 		_dump_vnode(vnode);
2708 		return 0;
2709 	}
2710 
2711 	struct hash_iterator iterator;
2712 	dev_t device = parse_expression(argv[1]);
2713 	ino_t id = atoll(argv[2]);
2714 
2715 	hash_open(sVnodeTable, &iterator);
2716 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2717 		if (vnode->id != id || vnode->device != device)
2718 			continue;
2719 
2720 		_dump_vnode(vnode);
2721 	}
2722 
2723 	hash_close(sVnodeTable, &iterator, false);
2724 	return 0;
2725 }
2726 
2727 
2728 static int
2729 dump_vnodes(int argc, char **argv)
2730 {
2731 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2732 		kprintf("usage: %s [device]\n", argv[0]);
2733 		return 0;
2734 	}
2735 
2736 	// restrict dumped nodes to a certain device if requested
2737 	dev_t device = parse_expression(argv[1]);
2738 
2739 	struct hash_iterator iterator;
2740 	struct vnode *vnode;
2741 
2742 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
2743 		"flags\n");
2744 
2745 	hash_open(sVnodeTable, &iterator);
2746 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2747 		if (vnode->device != device)
2748 			continue;
2749 
2750 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
2751 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
2752 			vnode->advisory_locking, vnode->remove ? "r" : "-",
2753 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2754 	}
2755 
2756 	hash_close(sVnodeTable, &iterator, false);
2757 	return 0;
2758 }
2759 
2760 
2761 static int
2762 dump_vnode_caches(int argc, char **argv)
2763 {
2764 	struct hash_iterator iterator;
2765 	struct vnode *vnode;
2766 
2767 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2768 		kprintf("usage: %s [device]\n", argv[0]);
2769 		return 0;
2770 	}
2771 
2772 	// restrict dumped nodes to a certain device if requested
2773 	dev_t device = -1;
2774 	if (argc > 1)
2775 		device = atoi(argv[1]);
2776 
2777 	kprintf("address    dev     inode cache          size   pages\n");
2778 
2779 	hash_open(sVnodeTable, &iterator);
2780 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2781 		if (vnode->cache == NULL)
2782 			continue;
2783 		if (device != -1 && vnode->device != device)
2784 			continue;
2785 
2786 		// count pages in cache
2787 		size_t numPages = 0;
2788 		for (struct vm_page *page = vnode->cache->page_list;
2789 				page != NULL; page = page->cache_next) {
2790 			numPages++;
2791 		}
2792 
2793 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
2794 			vnode->cache, (vnode->cache->virtual_size + B_PAGE_SIZE - 1)
2795 				/ B_PAGE_SIZE, numPages);
2796 	}
2797 
2798 	hash_close(sVnodeTable, &iterator, false);
2799 	return 0;
2800 }
2801 
2802 
2803 int
2804 dump_io_context(int argc, char **argv)
2805 {
2806 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2807 		kprintf("usage: %s [team-id|address]\n", argv[0]);
2808 		return 0;
2809 	}
2810 
2811 	struct io_context *context = NULL;
2812 
2813 	if (argc > 1) {
2814 		uint32 num = parse_expression(argv[1]);
2815 		if (IS_KERNEL_ADDRESS(num))
2816 			context = (struct io_context *)num;
2817 		else {
2818 			struct team *team = team_get_team_struct_locked(num);
2819 			if (team == NULL) {
2820 				kprintf("could not find team with ID %ld\n", num);
2821 				return 0;
2822 			}
2823 			context = (struct io_context *)team->io_context;
2824 		}
2825 	} else
2826 		context = get_current_io_context(true);
2827 
2828 	kprintf("I/O CONTEXT: %p\n", context);
2829 	kprintf(" root vnode:\t%p\n", context->root);
2830 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2831 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2832 	kprintf(" max fds:\t%lu\n", context->table_size);
2833 
2834 	if (context->num_used_fds)
2835 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2836 
2837 	for (uint32 i = 0; i < context->table_size; i++) {
2838 		struct file_descriptor *fd = context->fds[i];
2839 		if (fd == NULL)
2840 			continue;
2841 
2842 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2843 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2844 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2845 			fd->u.vnode);
2846 	}
2847 
2848 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2849 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2850 
2851 	set_debug_variable("_cwd", (addr_t)context->cwd);
2852 
2853 	return 0;
2854 }
2855 
2856 
2857 int
2858 dump_vnode_usage(int argc, char **argv)
2859 {
2860 	if (argc != 1) {
2861 		kprintf("usage: %s\n", argv[0]);
2862 		return 0;
2863 	}
2864 
2865 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
2866 		kMaxUnusedVnodes);
2867 
2868 	struct hash_iterator iterator;
2869 	hash_open(sVnodeTable, &iterator);
2870 
2871 	uint32 count = 0;
2872 	struct vnode *vnode;
2873 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2874 		count++;
2875 	}
2876 
2877 	hash_close(sVnodeTable, &iterator, false);
2878 
2879 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2880 	return 0;
2881 }
2882 
2883 #endif	// ADD_DEBUGGER_COMMANDS
2884 
2885 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
2886 	and calls the file system hooks to read/write the request to disk.
2887 */
2888 static status_t
2889 common_file_io_vec_pages(struct vnode *vnode, void *cookie,
2890 	const file_io_vec *fileVecs, size_t fileVecCount, const iovec *vecs,
2891 	size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset, size_t *_numBytes,
2892 	bool doWrite)
2893 {
2894 	if (fileVecCount == 0) {
2895 		// There are no file vecs at this offset, so we're obviously trying
2896 		// to access the file outside of its bounds
2897 		return B_BAD_VALUE;
2898 	}
2899 
2900 	size_t numBytes = *_numBytes;
2901 	uint32 fileVecIndex;
2902 	size_t vecOffset = *_vecOffset;
2903 	uint32 vecIndex = *_vecIndex;
2904 	status_t status;
2905 	size_t size;
2906 
2907 	if (!doWrite && vecOffset == 0) {
2908 		// now directly read the data from the device
2909 		// the first file_io_vec can be read directly
2910 
2911 		size = fileVecs[0].length;
2912 		if (size > numBytes)
2913 			size = numBytes;
2914 
2915 		status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
2916 			&vecs[vecIndex], vecCount - vecIndex, &size, false);
2917 		if (status < B_OK)
2918 			return status;
2919 
2920 		// TODO: this is a work-around for buggy device drivers!
2921 		//	When our own drivers honour the length, we can:
2922 		//	a) also use this direct I/O for writes (otherwise, it would
2923 		//	   overwrite precious data)
2924 		//	b) panic if the term below is true (at least for writes)
2925 		if (size > fileVecs[0].length) {
2926 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
2927 			size = fileVecs[0].length;
2928 		}
2929 
2930 		ASSERT(size <= fileVecs[0].length);
2931 
2932 		// If the file portion was contiguous, we're already done now
2933 		if (size == numBytes)
2934 			return B_OK;
2935 
2936 		// if we reached the end of the file, we can return as well
2937 		if (size != fileVecs[0].length) {
2938 			*_numBytes = size;
2939 			return B_OK;
2940 		}
2941 
2942 		fileVecIndex = 1;
2943 
2944 		// first, find out where we have to continue in our iovecs
2945 		for (; vecIndex < vecCount; vecIndex++) {
2946 			if (size < vecs[vecIndex].iov_len)
2947 				break;
2948 
2949 			size -= vecs[vecIndex].iov_len;
2950 		}
2951 
2952 		vecOffset = size;
2953 	} else {
2954 		fileVecIndex = 0;
2955 		size = 0;
2956 	}
2957 
2958 	// Too bad, let's process the rest of the file_io_vecs
2959 
2960 	size_t totalSize = size;
2961 	size_t bytesLeft = numBytes - size;
2962 
2963 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
2964 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
2965 		off_t fileOffset = fileVec.offset;
2966 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
2967 
2968 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
2969 
2970 		// process the complete fileVec
2971 		while (fileLeft > 0) {
2972 			iovec tempVecs[MAX_TEMP_IO_VECS];
2973 			uint32 tempCount = 0;
2974 
2975 			// size tracks how much of what is left of the current fileVec
2976 			// (fileLeft) has been assigned to tempVecs
2977 			size = 0;
2978 
2979 			// assign what is left of the current fileVec to the tempVecs
2980 			for (size = 0; size < fileLeft && vecIndex < vecCount
2981 					&& tempCount < MAX_TEMP_IO_VECS;) {
2982 				// try to satisfy one iovec per iteration (or as much as
2983 				// possible)
2984 
2985 				// bytes left of the current iovec
2986 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
2987 				if (vecLeft == 0) {
2988 					vecOffset = 0;
2989 					vecIndex++;
2990 					continue;
2991 				}
2992 
2993 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
2994 					vecIndex, vecOffset, size));
2995 
2996 				// actually available bytes
2997 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
2998 
2999 				tempVecs[tempCount].iov_base
3000 					= (void *)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3001 				tempVecs[tempCount].iov_len = tempVecSize;
3002 				tempCount++;
3003 
3004 				size += tempVecSize;
3005 				vecOffset += tempVecSize;
3006 			}
3007 
3008 			size_t bytes = size;
3009 			if (doWrite) {
3010 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3011 					tempVecs, tempCount, &bytes, false);
3012 			} else {
3013 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3014 					tempVecs, tempCount, &bytes, false);
3015 			}
3016 			if (status < B_OK)
3017 				return status;
3018 
3019 			totalSize += bytes;
3020 			bytesLeft -= size;
3021 			fileOffset += size;
3022 			fileLeft -= size;
3023 			//dprintf("-> file left = %Lu\n", fileLeft);
3024 
3025 			if (size != bytes || vecIndex >= vecCount) {
3026 				// there are no more bytes or iovecs, let's bail out
3027 				*_numBytes = totalSize;
3028 				return B_OK;
3029 			}
3030 		}
3031 	}
3032 
3033 	*_vecIndex = vecIndex;
3034 	*_vecOffset = vecOffset;
3035 	*_numBytes = totalSize;
3036 	return B_OK;
3037 }
3038 
3039 
3040 //	#pragma mark - public API for file systems
3041 
3042 
3043 extern "C" status_t
3044 new_vnode(fs_volume *volume, ino_t vnodeID, void *privateNode,
3045 	fs_vnode_ops *ops)
3046 {
3047 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3048 		volume, volume->id, vnodeID, privateNode));
3049 
3050 	if (privateNode == NULL)
3051 		return B_BAD_VALUE;
3052 
3053 	mutex_lock(&sVnodeMutex);
3054 
3055 	// file system integrity check:
3056 	// test if the vnode already exists and bail out if this is the case!
3057 
3058 	// ToDo: the R5 implementation obviously checks for a different cookie
3059 	//	and doesn't panic if they are equal
3060 
3061 	struct vnode *vnode = lookup_vnode(volume->id, vnodeID);
3062 	if (vnode != NULL) {
3063 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3064 			volume->id, vnodeID, privateNode, vnode->private_node);
3065 	}
3066 
3067 	status_t status = create_new_vnode(&vnode, volume->id, vnodeID);
3068 	if (status == B_OK) {
3069 		vnode->private_node = privateNode;
3070 		vnode->ops = ops;
3071 		vnode->busy = true;
3072 		vnode->unpublished = true;
3073 	}
3074 
3075 	TRACE(("returns: %s\n", strerror(status)));
3076 
3077 	mutex_unlock(&sVnodeMutex);
3078 	return status;
3079 }
3080 
3081 
3082 extern "C" status_t
3083 publish_vnode(fs_volume *volume, ino_t vnodeID, void *privateNode,
3084 	fs_vnode_ops *ops, int type, uint32 flags)
3085 {
3086 	FUNCTION(("publish_vnode()\n"));
3087 
3088 	MutexLocker locker(sVnodeMutex);
3089 
3090 	struct vnode *vnode = lookup_vnode(volume->id, vnodeID);
3091 	status_t status = B_OK;
3092 
3093 	if (vnode != NULL && vnode->busy && vnode->unpublished
3094 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3095 		// already known, but not published
3096 	} else if (vnode == NULL && privateNode != NULL) {
3097 		status = create_new_vnode(&vnode, volume->id, vnodeID);
3098 		if (status == B_OK) {
3099 			vnode->private_node = privateNode;
3100 			vnode->ops = ops;
3101 			vnode->busy = true;
3102 			vnode->unpublished = true;
3103 		}
3104 	} else
3105 		status = B_BAD_VALUE;
3106 
3107 	bool publishSpecialSubNode = false;
3108 
3109 	if (status == B_OK) {
3110 		vnode->type = type;
3111 		vnode->remove = (flags & B_VNODE_PUBLISH_REMOVED) != 0;
3112 		publishSpecialSubNode = is_special_node_type(type)
3113 			&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3114 	}
3115 
3116 
3117 	// create sub vnodes, if necessary
3118 	if (status == B_OK
3119 			&& (volume->sub_volume != NULL || publishSpecialSubNode)) {
3120 		locker.Unlock();
3121 
3122 		fs_volume *subVolume = volume;
3123 		if (volume->sub_volume != NULL) {
3124 			while (status == B_OK && subVolume->sub_volume != NULL) {
3125 				subVolume = subVolume->sub_volume;
3126 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3127 					vnode);
3128 			}
3129 		}
3130 
3131 		if (status == B_OK && publishSpecialSubNode)
3132 			status = create_special_sub_node(vnode, flags);
3133 
3134 		if (status != B_OK) {
3135 			// error -- clean up the created sub vnodes
3136 			while (subVolume->super_volume != volume) {
3137 				subVolume = subVolume->super_volume;
3138 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3139 			}
3140 		}
3141 
3142 		locker.Lock();
3143 
3144 		if (status != B_OK) {
3145 			hash_remove(sVnodeTable, vnode);
3146 			remove_vnode_from_mount_list(vnode, vnode->mount);
3147 			free(vnode);
3148 		}
3149 	}
3150 
3151 	if (status == B_OK) {
3152 		vnode->busy = false;
3153 		vnode->unpublished = false;
3154 	}
3155 
3156 	TRACE(("returns: %s\n", strerror(status)));
3157 
3158 	return status;
3159 }
3160 
3161 
3162 extern "C" status_t
3163 get_vnode(fs_volume *volume, ino_t vnodeID, void **fsNode)
3164 {
3165 	struct vnode *vnode;
3166 
3167 	if (volume == NULL)
3168 		return B_BAD_VALUE;
3169 
3170 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3171 	if (status < B_OK)
3172 		return status;
3173 
3174 	// If this is a layered FS, we need to get the node cookie for the requested
3175 	// layer.
3176 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3177 		fs_vnode resolvedNode;
3178 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3179 			&resolvedNode);
3180 		if (status != B_OK) {
3181 			panic("get_vnode(): Failed to get super node for vnode %p, "
3182 				"volume: %p", vnode, volume);
3183 			put_vnode(vnode);
3184 			return status;
3185 		}
3186 
3187 		*fsNode = resolvedNode.private_node;
3188 	} else
3189 		*fsNode = vnode->private_node;
3190 
3191 	return B_OK;
3192 }
3193 
3194 
3195 extern "C" status_t
3196 put_vnode(fs_volume *volume, ino_t vnodeID)
3197 {
3198 	struct vnode *vnode;
3199 
3200 	mutex_lock(&sVnodeMutex);
3201 	vnode = lookup_vnode(volume->id, vnodeID);
3202 	mutex_unlock(&sVnodeMutex);
3203 
3204 	if (vnode)
3205 		dec_vnode_ref_count(vnode, true);
3206 
3207 	return B_OK;
3208 }
3209 
3210 
3211 extern "C" status_t
3212 remove_vnode(fs_volume *volume, ino_t vnodeID)
3213 {
3214 	struct vnode *vnode;
3215 	bool remove = false;
3216 
3217 	MutexLocker locker(sVnodeMutex);
3218 
3219 	vnode = lookup_vnode(volume->id, vnodeID);
3220 	if (vnode == NULL)
3221 		return B_ENTRY_NOT_FOUND;
3222 
3223 	if (vnode->covered_by != NULL) {
3224 		// this vnode is in use
3225 		mutex_unlock(&sVnodeMutex);
3226 		return B_BUSY;
3227 	}
3228 
3229 	vnode->remove = true;
3230 	if (vnode->unpublished) {
3231 		// prepare the vnode for deletion
3232 		vnode->busy = true;
3233 		remove = true;
3234 	}
3235 
3236 	locker.Unlock();
3237 
3238 	if (remove) {
3239 		// if the vnode hasn't been published yet, we delete it here
3240 		atomic_add(&vnode->ref_count, -1);
3241 		free_vnode(vnode, true);
3242 	}
3243 
3244 	return B_OK;
3245 }
3246 
3247 
3248 extern "C" status_t
3249 unremove_vnode(fs_volume *volume, ino_t vnodeID)
3250 {
3251 	struct vnode *vnode;
3252 
3253 	mutex_lock(&sVnodeMutex);
3254 
3255 	vnode = lookup_vnode(volume->id, vnodeID);
3256 	if (vnode)
3257 		vnode->remove = false;
3258 
3259 	mutex_unlock(&sVnodeMutex);
3260 	return B_OK;
3261 }
3262 
3263 
3264 extern "C" status_t
3265 get_vnode_removed(fs_volume *volume, ino_t vnodeID, bool* removed)
3266 {
3267 	mutex_lock(&sVnodeMutex);
3268 
3269 	status_t result;
3270 
3271 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3272 		if (removed)
3273 			*removed = vnode->remove;
3274 		result = B_OK;
3275 	} else
3276 		result = B_BAD_VALUE;
3277 
3278 	mutex_unlock(&sVnodeMutex);
3279 	return result;
3280 }
3281 
3282 
3283 extern "C" status_t
3284 read_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3285 	size_t *_numBytes, bool fsReenter)
3286 {
3287 	struct file_descriptor *descriptor;
3288 	struct vnode *vnode;
3289 
3290 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3291 	if (descriptor == NULL)
3292 		return B_FILE_ERROR;
3293 
3294 	status_t status = FS_CALL(vnode, read_pages, descriptor->cookie, pos, vecs,
3295 		count, _numBytes, fsReenter);
3296 
3297 	put_fd(descriptor);
3298 	return status;
3299 }
3300 
3301 
3302 extern "C" status_t
3303 write_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3304 	size_t *_numBytes, bool fsReenter)
3305 {
3306 	struct file_descriptor *descriptor;
3307 	struct vnode *vnode;
3308 
3309 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3310 	if (descriptor == NULL)
3311 		return B_FILE_ERROR;
3312 
3313 	status_t status = FS_CALL(vnode, write_pages, descriptor->cookie, pos, vecs,
3314 		count, _numBytes, fsReenter);
3315 
3316 	put_fd(descriptor);
3317 	return status;
3318 }
3319 
3320 
3321 extern "C" status_t
3322 read_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3323 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3324 	size_t *_bytes)
3325 {
3326 	struct file_descriptor *descriptor;
3327 	struct vnode *vnode;
3328 
3329 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3330 	if (descriptor == NULL)
3331 		return B_FILE_ERROR;
3332 
3333 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3334 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3335 		false);
3336 
3337 	put_fd(descriptor);
3338 	return status;
3339 }
3340 
3341 
3342 extern "C" status_t
3343 write_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3344 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3345 	size_t *_bytes)
3346 {
3347 	struct file_descriptor *descriptor;
3348 	struct vnode *vnode;
3349 
3350 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3351 	if (descriptor == NULL)
3352 		return B_FILE_ERROR;
3353 
3354 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3355 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3356 		true);
3357 
3358 	put_fd(descriptor);
3359 	return status;
3360 }
3361 
3362 
3363 //	#pragma mark - private VFS API
3364 //	Functions the VFS exports for other parts of the kernel
3365 
3366 
3367 /*! Acquires another reference to the vnode that has to be released
3368 	by calling vfs_put_vnode().
3369 */
3370 void
3371 vfs_acquire_vnode(struct vnode *vnode)
3372 {
3373 	inc_vnode_ref_count(vnode);
3374 }
3375 
3376 
3377 /*! This is currently called from file_cache_create() only.
3378 	It's probably a temporary solution as long as devfs requires that
3379 	fs_read_pages()/fs_write_pages() are called with the standard
3380 	open cookie and not with a device cookie.
3381 	If that's done differently, remove this call; it has no other
3382 	purpose.
3383 */
3384 extern "C" status_t
3385 vfs_get_cookie_from_fd(int fd, void **_cookie)
3386 {
3387 	struct file_descriptor *descriptor;
3388 
3389 	descriptor = get_fd(get_current_io_context(true), fd);
3390 	if (descriptor == NULL)
3391 		return B_FILE_ERROR;
3392 
3393 	*_cookie = descriptor->cookie;
3394 	return B_OK;
3395 }
3396 
3397 
3398 extern "C" int
3399 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode **vnode)
3400 {
3401 	*vnode = get_vnode_from_fd(fd, kernel);
3402 
3403 	if (*vnode == NULL)
3404 		return B_FILE_ERROR;
3405 
3406 	return B_NO_ERROR;
3407 }
3408 
3409 
3410 extern "C" status_t
3411 vfs_get_vnode_from_path(const char *path, bool kernel, struct vnode **_vnode)
3412 {
3413 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3414 		path, kernel));
3415 
3416 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3417 	if (pathBuffer.InitCheck() != B_OK)
3418 		return B_NO_MEMORY;
3419 
3420 	char *buffer = pathBuffer.LockBuffer();
3421 	strlcpy(buffer, path, pathBuffer.BufferSize());
3422 
3423 	struct vnode *vnode;
3424 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3425 	if (status < B_OK)
3426 		return status;
3427 
3428 	*_vnode = vnode;
3429 	return B_OK;
3430 }
3431 
3432 
3433 extern "C" status_t
3434 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode **_vnode)
3435 {
3436 	struct vnode *vnode;
3437 
3438 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3439 	if (status < B_OK)
3440 		return status;
3441 
3442 	*_vnode = vnode;
3443 	return B_OK;
3444 }
3445 
3446 
3447 extern "C" status_t
3448 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3449 	const char *name, struct vnode **_vnode)
3450 {
3451 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
3452 }
3453 
3454 
3455 extern "C" void
3456 vfs_vnode_to_node_ref(struct vnode *vnode, dev_t *_mountID, ino_t *_vnodeID)
3457 {
3458 	*_mountID = vnode->device;
3459 	*_vnodeID = vnode->id;
3460 }
3461 
3462 
3463 /*!	Looks up a vnode with the given mount and vnode ID.
3464 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3465 	to the node.
3466 	It's currently only be used by file_cache_create().
3467 */
3468 extern "C" status_t
3469 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode)
3470 {
3471 	mutex_lock(&sVnodeMutex);
3472 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
3473 	mutex_unlock(&sVnodeMutex);
3474 
3475 	if (vnode == NULL)
3476 		return B_ERROR;
3477 
3478 	*_vnode = vnode;
3479 	return B_OK;
3480 }
3481 
3482 
3483 extern "C" status_t
3484 vfs_get_fs_node_from_path(fs_volume *volume, const char *path, bool kernel,
3485 	void **_node)
3486 {
3487 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
3488 		mountID, path, kernel));
3489 
3490 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3491 	if (pathBuffer.InitCheck() != B_OK)
3492 		return B_NO_MEMORY;
3493 
3494 	fs_mount *mount;
3495 	status_t status = get_mount(volume->id, &mount);
3496 	if (status < B_OK)
3497 		return status;
3498 
3499 	char *buffer = pathBuffer.LockBuffer();
3500 	strlcpy(buffer, path, pathBuffer.BufferSize());
3501 
3502 	struct vnode *vnode = mount->root_vnode;
3503 
3504 	if (buffer[0] == '/')
3505 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
3506 	else {
3507 		inc_vnode_ref_count(vnode);
3508 			// vnode_path_to_vnode() releases a reference to the starting vnode
3509 		status = vnode_path_to_vnode(vnode, buffer, true, 0, kernel, &vnode,
3510 			NULL);
3511 	}
3512 
3513 	put_mount(mount);
3514 
3515 	if (status < B_OK)
3516 		return status;
3517 
3518 	if (vnode->device != volume->id) {
3519 		// wrong mount ID - must not gain access on foreign file system nodes
3520 		put_vnode(vnode);
3521 		return B_BAD_VALUE;
3522 	}
3523 
3524 	// Use get_vnode() to resolve the cookie for the right layer.
3525 	status = get_vnode(volume, vnode->id, _node);
3526 	put_vnode(vnode);
3527 
3528 	return status;
3529 }
3530 
3531 
3532 status_t
3533 vfs_read_stat(int fd, const char *path, bool traverseLeafLink,
3534 	struct stat *stat, bool kernel)
3535 {
3536 	status_t status;
3537 
3538 	if (path) {
3539 		// path given: get the stat of the node referred to by (fd, path)
3540 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
3541 		if (pathBuffer.InitCheck() != B_OK)
3542 			return B_NO_MEMORY;
3543 
3544 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
3545 			traverseLeafLink, stat, kernel);
3546 	} else {
3547 		// no path given: get the FD and use the FD operation
3548 		struct file_descriptor *descriptor
3549 			= get_fd(get_current_io_context(kernel), fd);
3550 		if (descriptor == NULL)
3551 			return B_FILE_ERROR;
3552 
3553 		if (descriptor->ops->fd_read_stat)
3554 			status = descriptor->ops->fd_read_stat(descriptor, stat);
3555 		else
3556 			status = EOPNOTSUPP;
3557 
3558 		put_fd(descriptor);
3559 	}
3560 
3561 	return status;
3562 }
3563 
3564 
3565 /*!	Finds the full path to the file that contains the module \a moduleName,
3566 	puts it into \a pathBuffer, and returns B_OK for success.
3567 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
3568 	\c B_ENTRY_NOT_FOUNT if no file could be found.
3569 	\a pathBuffer is clobbered in any case and must not be relied on if this
3570 	functions returns unsuccessfully.
3571 	\a basePath and \a pathBuffer must not point to the same space.
3572 */
3573 status_t
3574 vfs_get_module_path(const char *basePath, const char *moduleName,
3575 	char *pathBuffer, size_t bufferSize)
3576 {
3577 	struct vnode *dir, *file;
3578 	status_t status;
3579 	size_t length;
3580 	char *path;
3581 
3582 	if (bufferSize == 0
3583 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
3584 		return B_BUFFER_OVERFLOW;
3585 
3586 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
3587 	if (status < B_OK)
3588 		return status;
3589 
3590 	// the path buffer had been clobbered by the above call
3591 	length = strlcpy(pathBuffer, basePath, bufferSize);
3592 	if (pathBuffer[length - 1] != '/')
3593 		pathBuffer[length++] = '/';
3594 
3595 	path = pathBuffer + length;
3596 	bufferSize -= length;
3597 
3598 	while (moduleName) {
3599 		char *nextPath = strchr(moduleName, '/');
3600 		if (nextPath == NULL)
3601 			length = strlen(moduleName);
3602 		else {
3603 			length = nextPath - moduleName;
3604 			nextPath++;
3605 		}
3606 
3607 		if (length + 1 >= bufferSize) {
3608 			status = B_BUFFER_OVERFLOW;
3609 			goto err;
3610 		}
3611 
3612 		memcpy(path, moduleName, length);
3613 		path[length] = '\0';
3614 		moduleName = nextPath;
3615 
3616 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
3617 		if (status < B_OK) {
3618 			// vnode_path_to_vnode() has already released the reference to dir
3619 			return status;
3620 		}
3621 
3622 		if (S_ISDIR(file->type)) {
3623 			// goto the next directory
3624 			path[length] = '/';
3625 			path[length + 1] = '\0';
3626 			path += length + 1;
3627 			bufferSize -= length + 1;
3628 
3629 			dir = file;
3630 		} else if (S_ISREG(file->type)) {
3631 			// it's a file so it should be what we've searched for
3632 			put_vnode(file);
3633 
3634 			return B_OK;
3635 		} else {
3636 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n",
3637 				file->type));
3638 			status = B_ERROR;
3639 			dir = file;
3640 			goto err;
3641 		}
3642 	}
3643 
3644 	// if we got here, the moduleName just pointed to a directory, not to
3645 	// a real module - what should we do in this case?
3646 	status = B_ENTRY_NOT_FOUND;
3647 
3648 err:
3649 	put_vnode(dir);
3650 	return status;
3651 }
3652 
3653 
3654 /*!	\brief Normalizes a given path.
3655 
3656 	The path must refer to an existing or non-existing entry in an existing
3657 	directory, that is chopping off the leaf component the remaining path must
3658 	refer to an existing directory.
3659 
3660 	The returned will be canonical in that it will be absolute, will not
3661 	contain any "." or ".." components or duplicate occurrences of '/'s,
3662 	and none of the directory components will by symbolic links.
3663 
3664 	Any two paths referring to the same entry, will result in the same
3665 	normalized path (well, that is pretty much the definition of `normalized',
3666 	isn't it :-).
3667 
3668 	\param path The path to be normalized.
3669 	\param buffer The buffer into which the normalized path will be written.
3670 		   May be the same one as \a path.
3671 	\param bufferSize The size of \a buffer.
3672 	\param kernel \c true, if the IO context of the kernel shall be used,
3673 		   otherwise that of the team this thread belongs to. Only relevant,
3674 		   if the path is relative (to get the CWD).
3675 	\return \c B_OK if everything went fine, another error code otherwise.
3676 */
3677 status_t
3678 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
3679 	bool kernel)
3680 {
3681 	if (!path || !buffer || bufferSize < 1)
3682 		return B_BAD_VALUE;
3683 
3684 	TRACE(("vfs_normalize_path(`%s')\n", path));
3685 
3686 	// copy the supplied path to the stack, so it can be modified
3687 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
3688 	if (mutablePathBuffer.InitCheck() != B_OK)
3689 		return B_NO_MEMORY;
3690 
3691 	char *mutablePath = mutablePathBuffer.LockBuffer();
3692 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
3693 		return B_NAME_TOO_LONG;
3694 
3695 	// get the dir vnode and the leaf name
3696 	struct vnode *dirNode;
3697 	char leaf[B_FILE_NAME_LENGTH];
3698 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
3699 	if (error != B_OK) {
3700 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
3701 		return error;
3702 	}
3703 
3704 	// if the leaf is "." or "..", we directly get the correct directory
3705 	// vnode and ignore the leaf later
3706 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
3707 	if (isDir) {
3708 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, kernel, &dirNode,
3709 			NULL);
3710 	}
3711 	if (error != B_OK) {
3712 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
3713 			strerror(error)));
3714 		return error;
3715 	}
3716 
3717 	// get the directory path
3718 	error = dir_vnode_to_path(dirNode, buffer, bufferSize, kernel);
3719 	put_vnode(dirNode);
3720 	if (error < B_OK) {
3721 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
3722 		return error;
3723 	}
3724 
3725 	// append the leaf name
3726 	if (!isDir) {
3727 		// insert a directory separator only if this is not the file system root
3728 		if ((strcmp(buffer, "/") != 0
3729 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3730 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3731 			return B_NAME_TOO_LONG;
3732 		}
3733 	}
3734 
3735 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3736 	return B_OK;
3737 }
3738 
3739 
3740 extern "C" void
3741 vfs_put_vnode(struct vnode *vnode)
3742 {
3743 	put_vnode(vnode);
3744 }
3745 
3746 
3747 extern "C" status_t
3748 vfs_get_cwd(dev_t *_mountID, ino_t *_vnodeID)
3749 {
3750 	// Get current working directory from io context
3751 	struct io_context *context = get_current_io_context(false);
3752 	status_t status = B_OK;
3753 
3754 	mutex_lock(&context->io_mutex);
3755 
3756 	if (context->cwd != NULL) {
3757 		*_mountID = context->cwd->device;
3758 		*_vnodeID = context->cwd->id;
3759 	} else
3760 		status = B_ERROR;
3761 
3762 	mutex_unlock(&context->io_mutex);
3763 	return status;
3764 }
3765 
3766 
3767 status_t
3768 vfs_unmount(dev_t mountID, uint32 flags)
3769 {
3770 	return fs_unmount(NULL, mountID, flags, true);
3771 }
3772 
3773 
3774 extern "C" status_t
3775 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
3776 {
3777 	struct vnode *vnode;
3778 
3779 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
3780 	if (status < B_OK)
3781 		return status;
3782 
3783 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3784 	put_vnode(vnode);
3785 	return B_OK;
3786 }
3787 
3788 
3789 extern "C" void
3790 vfs_free_unused_vnodes(int32 level)
3791 {
3792 	vnode_low_memory_handler(NULL, level);
3793 }
3794 
3795 
3796 extern "C" bool
3797 vfs_can_page(struct vnode *vnode, void *cookie)
3798 {
3799 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3800 
3801 	if (HAS_FS_CALL(vnode, can_page))
3802 		return FS_CALL(vnode, can_page, cookie);
3803 	return false;
3804 }
3805 
3806 
3807 extern "C" status_t
3808 vfs_read_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3809 	size_t count, size_t *_numBytes, bool fsReenter)
3810 {
3811 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3812 
3813 	return FS_CALL(vnode, read_pages, cookie, pos, vecs, count, _numBytes,
3814 		fsReenter);
3815 }
3816 
3817 
3818 extern "C" status_t
3819 vfs_write_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3820 	size_t count, size_t *_numBytes, bool fsReenter)
3821 {
3822 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3823 
3824 	return FS_CALL(vnode, write_pages, cookie, pos, vecs, count, _numBytes,
3825 		fsReenter);
3826 }
3827 
3828 
3829 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
3830 	created if \a allocate is \c true.
3831 	In case it's successful, it will also grab a reference to the cache
3832 	it returns.
3833 */
3834 extern "C" status_t
3835 vfs_get_vnode_cache(struct vnode *vnode, vm_cache **_cache, bool allocate)
3836 {
3837 	if (vnode->cache != NULL) {
3838 		vm_cache_acquire_ref(vnode->cache);
3839 		*_cache = vnode->cache;
3840 		return B_OK;
3841 	}
3842 
3843 	mutex_lock(&sVnodeMutex);
3844 
3845 	status_t status = B_OK;
3846 
3847 	// The cache could have been created in the meantime
3848 	if (vnode->cache == NULL) {
3849 		if (allocate) {
3850 			// TODO: actually the vnode need to be busy already here, or
3851 			//	else this won't work...
3852 			bool wasBusy = vnode->busy;
3853 			vnode->busy = true;
3854 			mutex_unlock(&sVnodeMutex);
3855 
3856 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3857 
3858 			mutex_lock(&sVnodeMutex);
3859 			vnode->busy = wasBusy;
3860 		} else
3861 			status = B_BAD_VALUE;
3862 	}
3863 
3864 	if (status == B_OK) {
3865 		vm_cache_acquire_ref(vnode->cache);
3866 		*_cache = vnode->cache;
3867 	}
3868 
3869 	mutex_unlock(&sVnodeMutex);
3870 	return status;
3871 }
3872 
3873 
3874 status_t
3875 vfs_get_file_map(struct vnode *vnode, off_t offset, size_t size,
3876 	file_io_vec *vecs, size_t *_count)
3877 {
3878 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3879 
3880 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
3881 }
3882 
3883 
3884 status_t
3885 vfs_stat_vnode(struct vnode *vnode, struct stat *stat)
3886 {
3887 	status_t status = FS_CALL(vnode, read_stat, stat);
3888 
3889 	// fill in the st_dev and st_ino fields
3890 	if (status == B_OK) {
3891 		stat->st_dev = vnode->device;
3892 		stat->st_ino = vnode->id;
3893 	}
3894 
3895 	return status;
3896 }
3897 
3898 
3899 status_t
3900 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat *stat)
3901 {
3902 	struct vnode *vnode;
3903 	status_t status = get_vnode(device, inode, &vnode, true, false);
3904 	if (status < B_OK)
3905 		return status;
3906 
3907 	status = FS_CALL(vnode, read_stat, stat);
3908 	put_vnode(vnode);
3909 	return status;
3910 }
3911 
3912 
3913 status_t
3914 vfs_get_vnode_name(struct vnode *vnode, char *name, size_t nameSize)
3915 {
3916 	return get_vnode_name(vnode, NULL, name, nameSize, true);
3917 }
3918 
3919 
3920 status_t
3921 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
3922 	char *path, size_t pathLength)
3923 {
3924 	struct vnode *vnode;
3925 	status_t status;
3926 
3927 	// filter invalid leaf names
3928 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
3929 		return B_BAD_VALUE;
3930 
3931 	// get the vnode matching the dir's node_ref
3932 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
3933 		// special cases "." and "..": we can directly get the vnode of the
3934 		// referenced directory
3935 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
3936 		leaf = NULL;
3937 	} else
3938 		status = get_vnode(device, inode, &vnode, true, false);
3939 	if (status < B_OK)
3940 		return status;
3941 
3942 	// get the directory path
3943 	status = dir_vnode_to_path(vnode, path, pathLength, true);
3944 	put_vnode(vnode);
3945 		// we don't need the vnode anymore
3946 	if (status < B_OK)
3947 		return status;
3948 
3949 	// append the leaf name
3950 	if (leaf) {
3951 		// insert a directory separator if this is not the file system root
3952 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
3953 				>= pathLength)
3954 			|| strlcat(path, leaf, pathLength) >= pathLength) {
3955 			return B_NAME_TOO_LONG;
3956 		}
3957 	}
3958 
3959 	return B_OK;
3960 }
3961 
3962 
3963 /*!	If the given descriptor locked its vnode, that lock will be released. */
3964 void
3965 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3966 {
3967 	struct vnode *vnode = fd_vnode(descriptor);
3968 
3969 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3970 		vnode->mandatory_locked_by = NULL;
3971 }
3972 
3973 
3974 /*!	Closes all file descriptors of the specified I/O context that
3975 	have the O_CLOEXEC flag set.
3976 */
3977 void
3978 vfs_exec_io_context(void *_context)
3979 {
3980 	struct io_context *context = (struct io_context *)_context;
3981 	uint32 i;
3982 
3983 	for (i = 0; i < context->table_size; i++) {
3984 		mutex_lock(&context->io_mutex);
3985 
3986 		struct file_descriptor *descriptor = context->fds[i];
3987 		bool remove = false;
3988 
3989 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3990 			context->fds[i] = NULL;
3991 			context->num_used_fds--;
3992 
3993 			remove = true;
3994 		}
3995 
3996 		mutex_unlock(&context->io_mutex);
3997 
3998 		if (remove) {
3999 			close_fd(descriptor);
4000 			put_fd(descriptor);
4001 		}
4002 	}
4003 }
4004 
4005 
4006 /*! Sets up a new io_control structure, and inherits the properties
4007 	of the parent io_control if it is given.
4008 */
4009 void *
4010 vfs_new_io_context(void *_parentContext)
4011 {
4012 	size_t tableSize;
4013 	struct io_context *context;
4014 	struct io_context *parentContext;
4015 
4016 	context = (io_context *)malloc(sizeof(struct io_context));
4017 	if (context == NULL)
4018 		return NULL;
4019 
4020 	memset(context, 0, sizeof(struct io_context));
4021 
4022 	parentContext = (struct io_context *)_parentContext;
4023 	if (parentContext)
4024 		tableSize = parentContext->table_size;
4025 	else
4026 		tableSize = DEFAULT_FD_TABLE_SIZE;
4027 
4028 	// allocate space for FDs and their close-on-exec flag
4029 	context->fds = (file_descriptor**)malloc(
4030 		sizeof(struct file_descriptor*) * tableSize
4031 		+ sizeof(struct select_sync*) * tableSize
4032 		+ (tableSize + 7) / 8);
4033 	if (context->fds == NULL) {
4034 		free(context);
4035 		return NULL;
4036 	}
4037 
4038 	context->select_infos = (select_info**)(context->fds + tableSize);
4039 	context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
4040 
4041 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4042 		+ sizeof(struct select_sync*) * tableSize
4043 		+ (tableSize + 7) / 8);
4044 
4045 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
4046 		free(context->fds);
4047 		free(context);
4048 		return NULL;
4049 	}
4050 
4051 	// Copy all parent file descriptors
4052 
4053 	if (parentContext) {
4054 		size_t i;
4055 
4056 		mutex_lock(&parentContext->io_mutex);
4057 
4058 		benaphore_lock(&sIOContextRootLock);
4059 		context->root = parentContext->root;
4060 		if (context->root)
4061 			inc_vnode_ref_count(context->root);
4062 		benaphore_unlock(&sIOContextRootLock);
4063 
4064 		context->cwd = parentContext->cwd;
4065 		if (context->cwd)
4066 			inc_vnode_ref_count(context->cwd);
4067 
4068 		for (i = 0; i < tableSize; i++) {
4069 			struct file_descriptor *descriptor = parentContext->fds[i];
4070 
4071 			if (descriptor != NULL) {
4072 				context->fds[i] = descriptor;
4073 				context->num_used_fds++;
4074 				atomic_add(&descriptor->ref_count, 1);
4075 				atomic_add(&descriptor->open_count, 1);
4076 
4077 				if (fd_close_on_exec(parentContext, i))
4078 					fd_set_close_on_exec(context, i, true);
4079 			}
4080 		}
4081 
4082 		mutex_unlock(&parentContext->io_mutex);
4083 	} else {
4084 		context->root = sRoot;
4085 		context->cwd = sRoot;
4086 
4087 		if (context->root)
4088 			inc_vnode_ref_count(context->root);
4089 
4090 		if (context->cwd)
4091 			inc_vnode_ref_count(context->cwd);
4092 	}
4093 
4094 	context->table_size = tableSize;
4095 
4096 	list_init(&context->node_monitors);
4097 	context->max_monitors = DEFAULT_NODE_MONITORS;
4098 
4099 	return context;
4100 }
4101 
4102 
4103 status_t
4104 vfs_free_io_context(void *_ioContext)
4105 {
4106 	struct io_context *context = (struct io_context *)_ioContext;
4107 	uint32 i;
4108 
4109 	if (context->root)
4110 		dec_vnode_ref_count(context->root, false);
4111 
4112 	if (context->cwd)
4113 		dec_vnode_ref_count(context->cwd, false);
4114 
4115 	mutex_lock(&context->io_mutex);
4116 
4117 	for (i = 0; i < context->table_size; i++) {
4118 		if (struct file_descriptor *descriptor = context->fds[i]) {
4119 			close_fd(descriptor);
4120 			put_fd(descriptor);
4121 		}
4122 	}
4123 
4124 	mutex_destroy(&context->io_mutex);
4125 
4126 	remove_node_monitors(context);
4127 	free(context->fds);
4128 	free(context);
4129 
4130 	return B_OK;
4131 }
4132 
4133 
4134 static status_t
4135 vfs_resize_fd_table(struct io_context *context, const int newSize)
4136 {
4137 	struct file_descriptor **fds;
4138 
4139 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4140 		return EINVAL;
4141 
4142 	MutexLocker(context->io_mutex);
4143 
4144 	int oldSize = context->table_size;
4145 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4146 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4147 
4148 	// If the tables shrink, make sure none of the fds being dropped are in use.
4149 	if (newSize < oldSize) {
4150 		for (int i = oldSize; i-- > newSize;) {
4151 			if (context->fds[i])
4152 				return EBUSY;
4153 		}
4154 	}
4155 
4156 	// store pointers to the old tables
4157 	file_descriptor** oldFDs = context->fds;
4158 	select_info** oldSelectInfos = context->select_infos;
4159 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4160 
4161 	// allocate new tables
4162 	file_descriptor** newFDs = (file_descriptor**)malloc(
4163 		sizeof(struct file_descriptor*) * newSize
4164 		+ sizeof(struct select_sync*) * newSize
4165 		+ newCloseOnExitBitmapSize);
4166 	if (newFDs == NULL)
4167 		return ENOMEM;
4168 
4169 	context->fds = newFDs;
4170 	context->select_infos = (select_info**)(context->fds + newSize);
4171 	context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
4172 	context->table_size = newSize;
4173 
4174 	// copy entries from old tables
4175 	int toCopy = min_c(oldSize, newSize);
4176 
4177 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4178 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4179 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4180 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4181 
4182 	// clear additional entries, if the tables grow
4183 	if (newSize > oldSize) {
4184 		memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
4185 		memset(context->select_infos + oldSize, 0,
4186 			sizeof(void *) * (newSize - oldSize));
4187 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4188 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4189 	}
4190 
4191 	free(oldFDs);
4192 
4193 	return B_OK;
4194 }
4195 
4196 
4197 static status_t
4198 vfs_resize_monitor_table(struct io_context *context, const int newSize)
4199 {
4200 	void *fds;
4201 	int	status = B_OK;
4202 
4203 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4204 		return EINVAL;
4205 
4206 	mutex_lock(&context->io_mutex);
4207 
4208 	if ((size_t)newSize < context->num_monitors) {
4209 		status = EBUSY;
4210 		goto out;
4211 	}
4212 	context->max_monitors = newSize;
4213 
4214 out:
4215 	mutex_unlock(&context->io_mutex);
4216 	return status;
4217 }
4218 
4219 
4220 int
4221 vfs_getrlimit(int resource, struct rlimit * rlp)
4222 {
4223 	if (!rlp)
4224 		return B_BAD_ADDRESS;
4225 
4226 	switch (resource) {
4227 		case RLIMIT_NOFILE:
4228 		{
4229 			struct io_context *ioctx = get_current_io_context(false);
4230 
4231 			mutex_lock(&ioctx->io_mutex);
4232 
4233 			rlp->rlim_cur = ioctx->table_size;
4234 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4235 
4236 			mutex_unlock(&ioctx->io_mutex);
4237 
4238 			return 0;
4239 		}
4240 
4241 		case RLIMIT_NOVMON:
4242 		{
4243 			struct io_context *ioctx = get_current_io_context(false);
4244 
4245 			mutex_lock(&ioctx->io_mutex);
4246 
4247 			rlp->rlim_cur = ioctx->max_monitors;
4248 			rlp->rlim_max = MAX_NODE_MONITORS;
4249 
4250 			mutex_unlock(&ioctx->io_mutex);
4251 
4252 			return 0;
4253 		}
4254 
4255 		default:
4256 			return EINVAL;
4257 	}
4258 }
4259 
4260 
4261 int
4262 vfs_setrlimit(int resource, const struct rlimit * rlp)
4263 {
4264 	if (!rlp)
4265 		return B_BAD_ADDRESS;
4266 
4267 	switch (resource) {
4268 		case RLIMIT_NOFILE:
4269 			/* TODO: check getuid() */
4270 			if (rlp->rlim_max != RLIM_SAVED_MAX
4271 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4272 				return EPERM;
4273 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
4274 
4275 		case RLIMIT_NOVMON:
4276 			/* TODO: check getuid() */
4277 			if (rlp->rlim_max != RLIM_SAVED_MAX
4278 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4279 				return EPERM;
4280 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
4281 
4282 		default:
4283 			return EINVAL;
4284 	}
4285 }
4286 
4287 
4288 status_t
4289 vfs_init(kernel_args *args)
4290 {
4291 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
4292 		&vnode_compare, &vnode_hash);
4293 	if (sVnodeTable == NULL)
4294 		panic("vfs_init: error creating vnode hash table\n");
4295 
4296 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
4297 
4298 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
4299 		&mount_compare, &mount_hash);
4300 	if (sMountsTable == NULL)
4301 		panic("vfs_init: error creating mounts hash table\n");
4302 
4303 	node_monitor_init();
4304 
4305 	sRoot = NULL;
4306 
4307 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
4308 		panic("vfs_init: error allocating file systems lock\n");
4309 
4310 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
4311 		panic("vfs_init: error allocating mount op lock\n");
4312 
4313 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
4314 		panic("vfs_init: error allocating mount lock\n");
4315 
4316 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
4317 		panic("vfs_init: error allocating vnode::covered_by lock\n");
4318 
4319 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
4320 		panic("vfs_init: error allocating vnode lock\n");
4321 
4322 	if (benaphore_init(&sIOContextRootLock, "io_context::root lock") < 0)
4323 		panic("vfs_init: error allocating io_context::root lock\n");
4324 
4325 	if (block_cache_init() != B_OK)
4326 		return B_ERROR;
4327 
4328 #ifdef ADD_DEBUGGER_COMMANDS
4329 	// add some debugger commands
4330 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
4331 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
4332 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
4333 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
4334 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4335 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
4336 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
4337 #endif
4338 
4339 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
4340 
4341 	return file_cache_init();
4342 }
4343 
4344 
4345 //	#pragma mark - fd_ops implementations
4346 
4347 
4348 /*!
4349 	Calls fs_open() on the given vnode and returns a new
4350 	file descriptor for it
4351 */
4352 static int
4353 open_vnode(struct vnode *vnode, int openMode, bool kernel)
4354 {
4355 	void *cookie;
4356 	int status;
4357 
4358 	status = FS_CALL(vnode, open, openMode, &cookie);
4359 	if (status < 0)
4360 		return status;
4361 
4362 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4363 	if (status < 0) {
4364 		FS_CALL(vnode, close, cookie);
4365 		FS_CALL(vnode, free_cookie, cookie);
4366 	}
4367 	return status;
4368 }
4369 
4370 
4371 /*!
4372 	Calls fs_open() on the given vnode and returns a new
4373 	file descriptor for it
4374 */
4375 static int
4376 create_vnode(struct vnode *directory, const char *name, int openMode,
4377 	int perms, bool kernel)
4378 {
4379 	void *cookie;
4380 	ino_t newID;
4381 	int status = B_ERROR;
4382 	struct vnode *vnode;
4383 
4384 	// This is somewhat tricky: If the entry already exists, the FS responsible
4385 	// for the directory might not necessarily the one also responsible for the
4386 	// node the entry refers to. So we can actually never call the create() hook
4387 	// without O_EXCL. Instead we try to look the entry up first. If it already
4388 	// exists, we just open the node (unless O_EXCL), otherwise we call create()
4389 	// with O_EXCL. This introduces a race condition, since we someone else
4390 	// might have created the entry in the meantime. We hope the respective
4391 	// FS returns the correct error code and retry (up to 3 times) again.
4392 
4393 	for (int i = 0; i < 3 && status != B_OK; i++) {
4394 		// look the node up
4395 		status = lookup_dir_entry(directory, name, &vnode);
4396 		if (status == B_OK) {
4397 			VNodePutter putter(vnode);
4398 
4399 			if ((openMode & O_EXCL) != 0)
4400 				return B_FILE_EXISTS;
4401 
4402 			status = open_vnode(vnode, openMode & ~O_CREAT, kernel);
4403 			// on success keep the vnode reference for the FD
4404 			if (status >= 0)
4405 				putter.Detach();
4406 
4407 			return status;
4408 		}
4409 
4410 		// it doesn't exist yet -- try to create it
4411 
4412 		if (!HAS_FS_CALL(directory, create))
4413 			return EROFS;
4414 
4415 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
4416 			&cookie, &newID);
4417 		if (status != B_OK
4418 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
4419 			return status;
4420 		}
4421 	}
4422 
4423 	if (status != B_OK)
4424 		return status;
4425 
4426 	// the node has been created successfully
4427 
4428 	mutex_lock(&sVnodeMutex);
4429 	vnode = lookup_vnode(directory->device, newID);
4430 	mutex_unlock(&sVnodeMutex);
4431 
4432 	if (vnode == NULL) {
4433 		panic("vfs: fs_create() returned success but there is no vnode, "
4434 			"mount ID %ld!\n", directory->device);
4435 		return B_BAD_VALUE;
4436 	}
4437 
4438 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
4439 		return status;
4440 
4441 	// something went wrong, clean up
4442 
4443 	FS_CALL(vnode, close, cookie);
4444 	FS_CALL(vnode, free_cookie, cookie);
4445 	put_vnode(vnode);
4446 
4447 	FS_CALL(directory, unlink, name);
4448 
4449 	return status;
4450 }
4451 
4452 
4453 /*! Calls fs open_dir() on the given vnode and returns a new
4454 	file descriptor for it
4455 */
4456 static int
4457 open_dir_vnode(struct vnode *vnode, bool kernel)
4458 {
4459 	void *cookie;
4460 	int status;
4461 
4462 	status = FS_CALL(vnode, open_dir, &cookie);
4463 	if (status < B_OK)
4464 		return status;
4465 
4466 	// file is opened, create a fd
4467 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
4468 	if (status >= 0)
4469 		return status;
4470 
4471 	FS_CALL(vnode, close_dir, cookie);
4472 	FS_CALL(vnode, free_dir_cookie, cookie);
4473 
4474 	return status;
4475 }
4476 
4477 
4478 /*! Calls fs open_attr_dir() on the given vnode and returns a new
4479 	file descriptor for it.
4480 	Used by attr_dir_open(), and attr_dir_open_fd().
4481 */
4482 static int
4483 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
4484 {
4485 	void *cookie;
4486 	int status;
4487 
4488 	if (!HAS_FS_CALL(vnode, open_attr_dir))
4489 		return EOPNOTSUPP;
4490 
4491 	status = FS_CALL(vnode, open_attr_dir, &cookie);
4492 	if (status < 0)
4493 		return status;
4494 
4495 	// file is opened, create a fd
4496 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
4497 	if (status >= 0)
4498 		return status;
4499 
4500 	FS_CALL(vnode, close_attr_dir, cookie);
4501 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
4502 
4503 	return status;
4504 }
4505 
4506 
4507 static int
4508 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4509 	int openMode, int perms, bool kernel)
4510 {
4511 	struct vnode *directory;
4512 	int status;
4513 
4514 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
4515 
4516 	// get directory to put the new file in
4517 	status = get_vnode(mountID, directoryID, &directory, true, false);
4518 	if (status < B_OK)
4519 		return status;
4520 
4521 	status = create_vnode(directory, name, openMode, perms, kernel);
4522 	put_vnode(directory);
4523 
4524 	return status;
4525 }
4526 
4527 
4528 static int
4529 file_create(int fd, char *path, int openMode, int perms, bool kernel)
4530 {
4531 	char name[B_FILE_NAME_LENGTH];
4532 	struct vnode *directory;
4533 	int status;
4534 
4535 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
4536 
4537 	// get directory to put the new file in
4538 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4539 	if (status < 0)
4540 		return status;
4541 
4542 	status = create_vnode(directory, name, openMode, perms, kernel);
4543 
4544 	put_vnode(directory);
4545 	return status;
4546 }
4547 
4548 
4549 static int
4550 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4551 	int openMode, bool kernel)
4552 {
4553 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4554 	struct vnode *vnode;
4555 	int status;
4556 
4557 	if (name == NULL || *name == '\0')
4558 		return B_BAD_VALUE;
4559 
4560 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
4561 		mountID, directoryID, name, openMode));
4562 
4563 	// get the vnode matching the entry_ref
4564 	status = entry_ref_to_vnode(mountID, directoryID, name, traverse, kernel,
4565 		&vnode);
4566 	if (status < B_OK)
4567 		return status;
4568 
4569 	status = open_vnode(vnode, openMode, kernel);
4570 	if (status < B_OK)
4571 		put_vnode(vnode);
4572 
4573 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
4574 		vnode->id, name);
4575 	return status;
4576 }
4577 
4578 
4579 static int
4580 file_open(int fd, char *path, int openMode, bool kernel)
4581 {
4582 	int status = B_OK;
4583 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4584 
4585 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
4586 		fd, path, openMode, kernel));
4587 
4588 	// get the vnode matching the vnode + path combination
4589 	struct vnode *vnode = NULL;
4590 	ino_t parentID;
4591 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
4592 	if (status != B_OK)
4593 		return status;
4594 
4595 	// open the vnode
4596 	status = open_vnode(vnode, openMode, kernel);
4597 	// put only on error -- otherwise our reference was transferred to the FD
4598 	if (status < B_OK)
4599 		put_vnode(vnode);
4600 
4601 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
4602 		vnode->device, parentID, vnode->id, NULL);
4603 
4604 	return status;
4605 }
4606 
4607 
4608 static status_t
4609 file_close(struct file_descriptor *descriptor)
4610 {
4611 	struct vnode *vnode = descriptor->u.vnode;
4612 	status_t status = B_OK;
4613 
4614 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
4615 
4616 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
4617 	if (HAS_FS_CALL(vnode, close)) {
4618 		status = FS_CALL(vnode, close, descriptor->cookie);
4619 	}
4620 
4621 	if (status == B_OK) {
4622 		// remove all outstanding locks for this team
4623 		release_advisory_lock(vnode, NULL);
4624 	}
4625 	return status;
4626 }
4627 
4628 
4629 static void
4630 file_free_fd(struct file_descriptor *descriptor)
4631 {
4632 	struct vnode *vnode = descriptor->u.vnode;
4633 
4634 	if (vnode != NULL) {
4635 		FS_CALL(vnode, free_cookie, descriptor->cookie);
4636 		put_vnode(vnode);
4637 	}
4638 }
4639 
4640 
4641 static status_t
4642 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4643 {
4644 	struct vnode *vnode = descriptor->u.vnode;
4645 
4646 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4647 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
4648 }
4649 
4650 
4651 static status_t
4652 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4653 {
4654 	struct vnode *vnode = descriptor->u.vnode;
4655 
4656 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4657 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
4658 }
4659 
4660 
4661 static off_t
4662 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4663 {
4664 	off_t offset;
4665 
4666 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
4667 
4668 	// stat() the node
4669 	struct vnode *vnode = descriptor->u.vnode;
4670 	if (!HAS_FS_CALL(vnode, read_stat))
4671 		return EOPNOTSUPP;
4672 
4673 	struct stat stat;
4674 	status_t status = FS_CALL(vnode, read_stat, &stat);
4675 	if (status < B_OK)
4676 		return status;
4677 
4678 	// some kinds of files are not seekable
4679 	switch (stat.st_mode & S_IFMT) {
4680 		case S_IFIFO:
4681 			return ESPIPE;
4682 // TODO: We don't catch sockets here, but they are not seekable either (ESPIPE)!
4683 		// The Open Group Base Specs don't mention any file types besides pipes,
4684 		// fifos, and sockets specially, so we allow seeking them.
4685 		case S_IFREG:
4686 		case S_IFBLK:
4687 		case S_IFDIR:
4688 		case S_IFLNK:
4689 		case S_IFCHR:
4690 			break;
4691 	}
4692 
4693 	switch (seekType) {
4694 		case SEEK_SET:
4695 			offset = 0;
4696 			break;
4697 		case SEEK_CUR:
4698 			offset = descriptor->pos;
4699 			break;
4700 		case SEEK_END:
4701 			offset = stat.st_size;
4702 			break;
4703 		default:
4704 			return B_BAD_VALUE;
4705 	}
4706 
4707 	// assumes off_t is 64 bits wide
4708 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4709 		return EOVERFLOW;
4710 
4711 	pos += offset;
4712 	if (pos < 0)
4713 		return B_BAD_VALUE;
4714 
4715 	return descriptor->pos = pos;
4716 }
4717 
4718 
4719 static status_t
4720 file_select(struct file_descriptor *descriptor, uint8 event,
4721 	struct selectsync *sync)
4722 {
4723 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
4724 
4725 	struct vnode *vnode = descriptor->u.vnode;
4726 
4727 	// If the FS has no select() hook, notify select() now.
4728 	if (!HAS_FS_CALL(vnode, select))
4729 		return notify_select_event(sync, event);
4730 
4731 	return FS_CALL(vnode, select, descriptor->cookie, event, 0, sync);
4732 }
4733 
4734 
4735 static status_t
4736 file_deselect(struct file_descriptor *descriptor, uint8 event,
4737 	struct selectsync *sync)
4738 {
4739 	struct vnode *vnode = descriptor->u.vnode;
4740 
4741 	if (!HAS_FS_CALL(vnode, deselect))
4742 		return B_OK;
4743 
4744 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
4745 }
4746 
4747 
4748 static status_t
4749 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char *name, int perms, bool kernel)
4750 {
4751 	struct vnode *vnode;
4752 	ino_t newID;
4753 	status_t status;
4754 
4755 	if (name == NULL || *name == '\0')
4756 		return B_BAD_VALUE;
4757 
4758 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
4759 
4760 	status = get_vnode(mountID, parentID, &vnode, true, false);
4761 	if (status < B_OK)
4762 		return status;
4763 
4764 	if (HAS_FS_CALL(vnode, create_dir))
4765 		status = FS_CALL(vnode, create_dir, name, perms, &newID);
4766 	else
4767 		status = EROFS;
4768 
4769 	put_vnode(vnode);
4770 	return status;
4771 }
4772 
4773 
4774 static status_t
4775 dir_create(int fd, char *path, int perms, bool kernel)
4776 {
4777 	char filename[B_FILE_NAME_LENGTH];
4778 	struct vnode *vnode;
4779 	ino_t newID;
4780 	status_t status;
4781 
4782 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
4783 
4784 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4785 	if (status < 0)
4786 		return status;
4787 
4788 	if (HAS_FS_CALL(vnode, create_dir)) {
4789 		status = FS_CALL(vnode, create_dir, filename, perms, &newID);
4790 	} else
4791 		status = EROFS;
4792 
4793 	put_vnode(vnode);
4794 	return status;
4795 }
4796 
4797 
4798 static int
4799 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char *name, bool kernel)
4800 {
4801 	struct vnode *vnode;
4802 	int status;
4803 
4804 	FUNCTION(("dir_open_entry_ref()\n"));
4805 
4806 	if (name && *name == '\0')
4807 		return B_BAD_VALUE;
4808 
4809 	// get the vnode matching the entry_ref/node_ref
4810 	if (name) {
4811 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
4812 			&vnode);
4813 	} else
4814 		status = get_vnode(mountID, parentID, &vnode, true, false);
4815 	if (status < B_OK)
4816 		return status;
4817 
4818 	status = open_dir_vnode(vnode, kernel);
4819 	if (status < B_OK)
4820 		put_vnode(vnode);
4821 
4822 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
4823 		vnode->id, name);
4824 	return status;
4825 }
4826 
4827 
4828 static int
4829 dir_open(int fd, char *path, bool kernel)
4830 {
4831 	int status = B_OK;
4832 
4833 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
4834 
4835 	// get the vnode matching the vnode + path combination
4836 	struct vnode *vnode = NULL;
4837 	ino_t parentID;
4838 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
4839 	if (status != B_OK)
4840 		return status;
4841 
4842 	// open the dir
4843 	status = open_dir_vnode(vnode, kernel);
4844 	if (status < B_OK)
4845 		put_vnode(vnode);
4846 
4847 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4848 	return status;
4849 }
4850 
4851 
4852 static status_t
4853 dir_close(struct file_descriptor *descriptor)
4854 {
4855 	struct vnode *vnode = descriptor->u.vnode;
4856 
4857 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4858 
4859 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4860 	if (HAS_FS_CALL(vnode, close_dir))
4861 		return FS_CALL(vnode, close_dir, descriptor->cookie);
4862 
4863 	return B_OK;
4864 }
4865 
4866 
4867 static void
4868 dir_free_fd(struct file_descriptor *descriptor)
4869 {
4870 	struct vnode *vnode = descriptor->u.vnode;
4871 
4872 	if (vnode != NULL) {
4873 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
4874 		put_vnode(vnode);
4875 	}
4876 }
4877 
4878 
4879 static status_t
4880 dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
4881 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
4882 {
4883 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
4884 		bufferSize, _count);
4885 }
4886 
4887 
4888 static void
4889 fix_dirent(struct vnode *parent, struct dirent *entry,
4890 	struct io_context* ioContext)
4891 {
4892 	// set d_pdev and d_pino
4893 	entry->d_pdev = parent->device;
4894 	entry->d_pino = parent->id;
4895 
4896 	// If this is the ".." entry and the directory is the root of a FS,
4897 	// we need to replace d_dev and d_ino with the actual values.
4898 	if (strcmp(entry->d_name, "..") == 0
4899 		&& parent->mount->root_vnode == parent
4900 		&& parent->mount->covers_vnode) {
4901 		inc_vnode_ref_count(parent);
4902 			// vnode_path_to_vnode() puts the node
4903 
4904 		// Make sure the IO context root is not bypassed.
4905 		if (parent == ioContext->root) {
4906 			entry->d_dev = parent->device;
4907 			entry->d_ino = parent->id;
4908 		} else {
4909 			// ".." is guaranteed not to be clobbered by this call
4910 			struct vnode *vnode;
4911 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
4912 				ioContext, &vnode, NULL);
4913 
4914 			if (status == B_OK) {
4915 				entry->d_dev = vnode->device;
4916 				entry->d_ino = vnode->id;
4917 			}
4918 		}
4919 	} else {
4920 		// resolve mount points
4921 		struct vnode *vnode = NULL;
4922 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
4923 			false);
4924 		if (status != B_OK)
4925 			return;
4926 
4927 		mutex_lock(&sVnodeCoveredByMutex);
4928 		if (vnode->covered_by) {
4929 			entry->d_dev = vnode->covered_by->device;
4930 			entry->d_ino = vnode->covered_by->id;
4931 		}
4932 		mutex_unlock(&sVnodeCoveredByMutex);
4933 
4934 		put_vnode(vnode);
4935 	}
4936 }
4937 
4938 
4939 static status_t
4940 dir_read(struct io_context* ioContext, struct vnode *vnode, void *cookie,
4941 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
4942 {
4943 	if (!HAS_FS_CALL(vnode, read_dir))
4944 		return EOPNOTSUPP;
4945 
4946 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
4947 		_count);
4948 	if (error != B_OK)
4949 		return error;
4950 
4951 	// we need to adjust the read dirents
4952 	if (*_count > 0) {
4953 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4954 		fix_dirent(vnode, buffer, ioContext);
4955 	}
4956 
4957 	return error;
4958 }
4959 
4960 
4961 static status_t
4962 dir_rewind(struct file_descriptor *descriptor)
4963 {
4964 	struct vnode *vnode = descriptor->u.vnode;
4965 
4966 	if (HAS_FS_CALL(vnode, rewind_dir)) {
4967 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
4968 	}
4969 
4970 	return EOPNOTSUPP;
4971 }
4972 
4973 
4974 static status_t
4975 dir_remove(int fd, char *path, bool kernel)
4976 {
4977 	char name[B_FILE_NAME_LENGTH];
4978 	struct vnode *directory;
4979 	status_t status;
4980 
4981 	if (path != NULL) {
4982 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4983 		char *lastSlash = strrchr(path, '/');
4984 		if (lastSlash != NULL) {
4985 			char *leaf = lastSlash + 1;
4986 			if (!strcmp(leaf, ".."))
4987 				return B_NOT_ALLOWED;
4988 
4989 			// omit multiple slashes
4990 			while (lastSlash > path && lastSlash[-1] == '/') {
4991 				lastSlash--;
4992 			}
4993 
4994 			if (!leaf[0]
4995 				|| !strcmp(leaf, ".")) {
4996 				// "name/" -> "name", or "name/." -> "name"
4997 				lastSlash[0] = '\0';
4998 			}
4999 		} else if (!strcmp(path, ".."))
5000 			return B_NOT_ALLOWED;
5001 	}
5002 
5003 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5004 	if (status < B_OK)
5005 		return status;
5006 
5007 	if (HAS_FS_CALL(directory, remove_dir))
5008 		status = FS_CALL(directory, remove_dir, name);
5009 	else
5010 		status = EROFS;
5011 
5012 	put_vnode(directory);
5013 	return status;
5014 }
5015 
5016 
5017 static status_t
5018 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer,
5019 	size_t length)
5020 {
5021 	struct vnode *vnode = descriptor->u.vnode;
5022 
5023 	if (HAS_FS_CALL(vnode, ioctl))
5024 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5025 
5026 	return EOPNOTSUPP;
5027 }
5028 
5029 
5030 static status_t
5031 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5032 {
5033 	struct file_descriptor *descriptor;
5034 	struct vnode *vnode;
5035 	struct flock flock;
5036 
5037 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5038 		fd, op, argument, kernel ? "kernel" : "user"));
5039 
5040 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5041 	if (descriptor == NULL)
5042 		return B_FILE_ERROR;
5043 
5044 	status_t status = B_OK;
5045 
5046 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5047 		if (descriptor->type != FDTYPE_FILE)
5048 			status = B_BAD_VALUE;
5049 		else if (user_memcpy(&flock, (struct flock *)argument,
5050 				sizeof(struct flock)) < B_OK)
5051 			status = B_BAD_ADDRESS;
5052 
5053 		if (status != B_OK) {
5054 			put_fd(descriptor);
5055 			return status;
5056 		}
5057 	}
5058 
5059 	switch (op) {
5060 		case F_SETFD:
5061 		{
5062 			struct io_context *context = get_current_io_context(kernel);
5063 			// Set file descriptor flags
5064 
5065 			// O_CLOEXEC is the only flag available at this time
5066 			mutex_lock(&context->io_mutex);
5067 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5068 			mutex_unlock(&context->io_mutex);
5069 
5070 			status = B_OK;
5071 			break;
5072 		}
5073 
5074 		case F_GETFD:
5075 		{
5076 			struct io_context *context = get_current_io_context(kernel);
5077 
5078 			// Get file descriptor flags
5079 			mutex_lock(&context->io_mutex);
5080 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5081 			mutex_unlock(&context->io_mutex);
5082 			break;
5083 		}
5084 
5085 		case F_SETFL:
5086 			// Set file descriptor open mode
5087 			if (HAS_FS_CALL(vnode, set_flags)) {
5088 				// we only accept changes to O_APPEND and O_NONBLOCK
5089 				argument &= O_APPEND | O_NONBLOCK;
5090 
5091 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5092 					(int)argument);
5093 				if (status == B_OK) {
5094 					// update this descriptor's open_mode field
5095 					descriptor->open_mode = (descriptor->open_mode
5096 						& ~(O_APPEND | O_NONBLOCK)) | argument;
5097 				}
5098 			} else
5099 				status = EOPNOTSUPP;
5100 			break;
5101 
5102 		case F_GETFL:
5103 			// Get file descriptor open mode
5104 			status = descriptor->open_mode;
5105 			break;
5106 
5107 		case F_DUPFD:
5108 		{
5109 			struct io_context *context = get_current_io_context(kernel);
5110 
5111 			status = new_fd_etc(context, descriptor, (int)argument);
5112 			if (status >= 0) {
5113 				mutex_lock(&context->io_mutex);
5114 				fd_set_close_on_exec(context, fd, false);
5115 				mutex_unlock(&context->io_mutex);
5116 
5117 				atomic_add(&descriptor->ref_count, 1);
5118 			}
5119 			break;
5120 		}
5121 
5122 		case F_GETLK:
5123 			status = get_advisory_lock(descriptor->u.vnode, &flock);
5124 			if (status == B_OK) {
5125 				// copy back flock structure
5126 				status = user_memcpy((struct flock *)argument, &flock,
5127 					sizeof(struct flock));
5128 			}
5129 			break;
5130 
5131 		case F_SETLK:
5132 		case F_SETLKW:
5133 			status = normalize_flock(descriptor, &flock);
5134 			if (status < B_OK)
5135 				break;
5136 
5137 			if (flock.l_type == F_UNLCK)
5138 				status = release_advisory_lock(descriptor->u.vnode, &flock);
5139 			else {
5140 				// the open mode must match the lock type
5141 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
5142 						&& flock.l_type == F_WRLCK
5143 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY
5144 						&& flock.l_type == F_RDLCK)
5145 					status = B_FILE_ERROR;
5146 				else {
5147 					status = acquire_advisory_lock(descriptor->u.vnode, -1,
5148 						&flock, op == F_SETLKW);
5149 				}
5150 			}
5151 			break;
5152 
5153 		// ToDo: add support for more ops?
5154 
5155 		default:
5156 			status = B_BAD_VALUE;
5157 	}
5158 
5159 	put_fd(descriptor);
5160 	return status;
5161 }
5162 
5163 
5164 static status_t
5165 common_sync(int fd, bool kernel)
5166 {
5167 	struct file_descriptor *descriptor;
5168 	struct vnode *vnode;
5169 	status_t status;
5170 
5171 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5172 
5173 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5174 	if (descriptor == NULL)
5175 		return B_FILE_ERROR;
5176 
5177 	if (HAS_FS_CALL(vnode, fsync))
5178 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5179 	else
5180 		status = EOPNOTSUPP;
5181 
5182 	put_fd(descriptor);
5183 	return status;
5184 }
5185 
5186 
5187 static status_t
5188 common_lock_node(int fd, bool kernel)
5189 {
5190 	struct file_descriptor *descriptor;
5191 	struct vnode *vnode;
5192 
5193 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5194 	if (descriptor == NULL)
5195 		return B_FILE_ERROR;
5196 
5197 	status_t status = B_OK;
5198 
5199 	// We need to set the locking atomically - someone
5200 	// else might set one at the same time
5201 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
5202 			(addr_t)descriptor, (addr_t)NULL) != (addr_t)NULL)
5203 		status = B_BUSY;
5204 
5205 	put_fd(descriptor);
5206 	return status;
5207 }
5208 
5209 
5210 static status_t
5211 common_unlock_node(int fd, bool kernel)
5212 {
5213 	struct file_descriptor *descriptor;
5214 	struct vnode *vnode;
5215 
5216 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5217 	if (descriptor == NULL)
5218 		return B_FILE_ERROR;
5219 
5220 	status_t status = B_OK;
5221 
5222 	// We need to set the locking atomically - someone
5223 	// else might set one at the same time
5224 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
5225 			(addr_t)NULL, (addr_t)descriptor) != (int32)descriptor)
5226 		status = B_BAD_VALUE;
5227 
5228 	put_fd(descriptor);
5229 	return status;
5230 }
5231 
5232 
5233 static status_t
5234 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
5235 	bool kernel)
5236 {
5237 	struct vnode *vnode;
5238 	status_t status;
5239 
5240 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
5241 	if (status < B_OK)
5242 		return status;
5243 
5244 	if (HAS_FS_CALL(vnode, read_symlink)) {
5245 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
5246 	} else
5247 		status = B_BAD_VALUE;
5248 
5249 	put_vnode(vnode);
5250 	return status;
5251 }
5252 
5253 
5254 static status_t
5255 common_create_symlink(int fd, char *path, const char *toPath, int mode,
5256 	bool kernel)
5257 {
5258 	// path validity checks have to be in the calling function!
5259 	char name[B_FILE_NAME_LENGTH];
5260 	struct vnode *vnode;
5261 	status_t status;
5262 
5263 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
5264 
5265 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
5266 	if (status < B_OK)
5267 		return status;
5268 
5269 	if (HAS_FS_CALL(vnode, create_symlink))
5270 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
5271 	else
5272 		status = EROFS;
5273 
5274 	put_vnode(vnode);
5275 
5276 	return status;
5277 }
5278 
5279 
5280 static status_t
5281 common_create_link(char *path, char *toPath, bool kernel)
5282 {
5283 	// path validity checks have to be in the calling function!
5284 	char name[B_FILE_NAME_LENGTH];
5285 	struct vnode *directory, *vnode;
5286 	status_t status;
5287 
5288 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
5289 
5290 	status = path_to_dir_vnode(path, &directory, name, kernel);
5291 	if (status < B_OK)
5292 		return status;
5293 
5294 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
5295 	if (status < B_OK)
5296 		goto err;
5297 
5298 	if (directory->mount != vnode->mount) {
5299 		status = B_CROSS_DEVICE_LINK;
5300 		goto err1;
5301 	}
5302 
5303 	if (HAS_FS_CALL(directory, link))
5304 		status = FS_CALL(directory, link, name, vnode);
5305 	else
5306 		status = EROFS;
5307 
5308 err1:
5309 	put_vnode(vnode);
5310 err:
5311 	put_vnode(directory);
5312 
5313 	return status;
5314 }
5315 
5316 
5317 static status_t
5318 common_unlink(int fd, char *path, bool kernel)
5319 {
5320 	char filename[B_FILE_NAME_LENGTH];
5321 	struct vnode *vnode;
5322 	status_t status;
5323 
5324 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
5325 
5326 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5327 	if (status < 0)
5328 		return status;
5329 
5330 	if (HAS_FS_CALL(vnode, unlink))
5331 		status = FS_CALL(vnode, unlink, filename);
5332 	else
5333 		status = EROFS;
5334 
5335 	put_vnode(vnode);
5336 
5337 	return status;
5338 }
5339 
5340 
5341 static status_t
5342 common_access(char *path, int mode, bool kernel)
5343 {
5344 	struct vnode *vnode;
5345 	status_t status;
5346 
5347 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
5348 	if (status < B_OK)
5349 		return status;
5350 
5351 	if (HAS_FS_CALL(vnode, access))
5352 		status = FS_CALL(vnode, access, mode);
5353 	else
5354 		status = B_OK;
5355 
5356 	put_vnode(vnode);
5357 
5358 	return status;
5359 }
5360 
5361 
5362 static status_t
5363 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
5364 {
5365 	struct vnode *fromVnode, *toVnode;
5366 	char fromName[B_FILE_NAME_LENGTH];
5367 	char toName[B_FILE_NAME_LENGTH];
5368 	status_t status;
5369 
5370 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
5371 
5372 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
5373 	if (status < 0)
5374 		return status;
5375 
5376 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
5377 	if (status < 0)
5378 		goto err;
5379 
5380 	if (fromVnode->device != toVnode->device) {
5381 		status = B_CROSS_DEVICE_LINK;
5382 		goto err1;
5383 	}
5384 
5385 	if (HAS_FS_CALL(fromVnode, rename))
5386 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
5387 	else
5388 		status = EROFS;
5389 
5390 err1:
5391 	put_vnode(toVnode);
5392 err:
5393 	put_vnode(fromVnode);
5394 
5395 	return status;
5396 }
5397 
5398 
5399 static status_t
5400 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5401 {
5402 	struct vnode *vnode = descriptor->u.vnode;
5403 
5404 	FUNCTION(("common_read_stat: stat %p\n", stat));
5405 
5406 	status_t status = FS_CALL(vnode, read_stat, stat);
5407 
5408 	// fill in the st_dev and st_ino fields
5409 	if (status == B_OK) {
5410 		stat->st_dev = vnode->device;
5411 		stat->st_ino = vnode->id;
5412 	}
5413 
5414 	return status;
5415 }
5416 
5417 
5418 static status_t
5419 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5420 {
5421 	struct vnode *vnode = descriptor->u.vnode;
5422 
5423 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
5424 	if (!HAS_FS_CALL(vnode, write_stat))
5425 		return EROFS;
5426 
5427 	return FS_CALL(vnode, write_stat, stat, statMask);
5428 }
5429 
5430 
5431 static status_t
5432 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
5433 	struct stat *stat, bool kernel)
5434 {
5435 	struct vnode *vnode;
5436 	status_t status;
5437 
5438 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
5439 
5440 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5441 	if (status < 0)
5442 		return status;
5443 
5444 	status = FS_CALL(vnode, read_stat, stat);
5445 
5446 	// fill in the st_dev and st_ino fields
5447 	if (status == B_OK) {
5448 		stat->st_dev = vnode->device;
5449 		stat->st_ino = vnode->id;
5450 	}
5451 
5452 	put_vnode(vnode);
5453 	return status;
5454 }
5455 
5456 
5457 static status_t
5458 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
5459 	const struct stat *stat, int statMask, bool kernel)
5460 {
5461 	struct vnode *vnode;
5462 	status_t status;
5463 
5464 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
5465 
5466 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5467 	if (status < 0)
5468 		return status;
5469 
5470 	if (HAS_FS_CALL(vnode, write_stat))
5471 		status = FS_CALL(vnode, write_stat, stat, statMask);
5472 	else
5473 		status = EROFS;
5474 
5475 	put_vnode(vnode);
5476 
5477 	return status;
5478 }
5479 
5480 
5481 static int
5482 attr_dir_open(int fd, char *path, bool kernel)
5483 {
5484 	struct vnode *vnode;
5485 	int status;
5486 
5487 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
5488 
5489 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5490 	if (status < B_OK)
5491 		return status;
5492 
5493 	status = open_attr_dir_vnode(vnode, kernel);
5494 	if (status < 0)
5495 		put_vnode(vnode);
5496 
5497 	return status;
5498 }
5499 
5500 
5501 static status_t
5502 attr_dir_close(struct file_descriptor *descriptor)
5503 {
5504 	struct vnode *vnode = descriptor->u.vnode;
5505 
5506 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
5507 
5508 	if (HAS_FS_CALL(vnode, close_attr_dir))
5509 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
5510 
5511 	return B_OK;
5512 }
5513 
5514 
5515 static void
5516 attr_dir_free_fd(struct file_descriptor *descriptor)
5517 {
5518 	struct vnode *vnode = descriptor->u.vnode;
5519 
5520 	if (vnode != NULL) {
5521 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
5522 		put_vnode(vnode);
5523 	}
5524 }
5525 
5526 
5527 static status_t
5528 attr_dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
5529 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
5530 {
5531 	struct vnode *vnode = descriptor->u.vnode;
5532 
5533 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
5534 
5535 	if (HAS_FS_CALL(vnode, read_attr_dir))
5536 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
5537 			bufferSize, _count);
5538 
5539 	return EOPNOTSUPP;
5540 }
5541 
5542 
5543 static status_t
5544 attr_dir_rewind(struct file_descriptor *descriptor)
5545 {
5546 	struct vnode *vnode = descriptor->u.vnode;
5547 
5548 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
5549 
5550 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
5551 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
5552 
5553 	return EOPNOTSUPP;
5554 }
5555 
5556 
5557 static int
5558 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
5559 {
5560 	struct vnode *vnode;
5561 	void *cookie;
5562 	int status;
5563 
5564 	if (name == NULL || *name == '\0')
5565 		return B_BAD_VALUE;
5566 
5567 	vnode = get_vnode_from_fd(fd, kernel);
5568 	if (vnode == NULL)
5569 		return B_FILE_ERROR;
5570 
5571 	if (!HAS_FS_CALL(vnode, create_attr)) {
5572 		status = EROFS;
5573 		goto err;
5574 	}
5575 
5576 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
5577 	if (status < B_OK)
5578 		goto err;
5579 
5580 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5581 		return status;
5582 
5583 	FS_CALL(vnode, close_attr, cookie);
5584 	FS_CALL(vnode, free_attr_cookie, cookie);
5585 
5586 	FS_CALL(vnode, remove_attr, name);
5587 
5588 err:
5589 	put_vnode(vnode);
5590 
5591 	return status;
5592 }
5593 
5594 
5595 static int
5596 attr_open(int fd, const char *name, int openMode, bool kernel)
5597 {
5598 	struct vnode *vnode;
5599 	void *cookie;
5600 	int status;
5601 
5602 	if (name == NULL || *name == '\0')
5603 		return B_BAD_VALUE;
5604 
5605 	vnode = get_vnode_from_fd(fd, kernel);
5606 	if (vnode == NULL)
5607 		return B_FILE_ERROR;
5608 
5609 	if (!HAS_FS_CALL(vnode, open_attr)) {
5610 		status = EOPNOTSUPP;
5611 		goto err;
5612 	}
5613 
5614 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
5615 	if (status < B_OK)
5616 		goto err;
5617 
5618 	// now we only need a file descriptor for this attribute and we're done
5619 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5620 		return status;
5621 
5622 	FS_CALL(vnode, close_attr, cookie);
5623 	FS_CALL(vnode, free_attr_cookie, cookie);
5624 
5625 err:
5626 	put_vnode(vnode);
5627 
5628 	return status;
5629 }
5630 
5631 
5632 static status_t
5633 attr_close(struct file_descriptor *descriptor)
5634 {
5635 	struct vnode *vnode = descriptor->u.vnode;
5636 
5637 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
5638 
5639 	if (HAS_FS_CALL(vnode, close_attr))
5640 		return FS_CALL(vnode, close_attr, descriptor->cookie);
5641 
5642 	return B_OK;
5643 }
5644 
5645 
5646 static void
5647 attr_free_fd(struct file_descriptor *descriptor)
5648 {
5649 	struct vnode *vnode = descriptor->u.vnode;
5650 
5651 	if (vnode != NULL) {
5652 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
5653 		put_vnode(vnode);
5654 	}
5655 }
5656 
5657 
5658 static status_t
5659 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
5660 {
5661 	struct vnode *vnode = descriptor->u.vnode;
5662 
5663 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5664 	if (!HAS_FS_CALL(vnode, read_attr))
5665 		return EOPNOTSUPP;
5666 
5667 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
5668 }
5669 
5670 
5671 static status_t
5672 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
5673 {
5674 	struct vnode *vnode = descriptor->u.vnode;
5675 
5676 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5677 	if (!HAS_FS_CALL(vnode, write_attr))
5678 		return EOPNOTSUPP;
5679 
5680 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
5681 }
5682 
5683 
5684 static off_t
5685 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
5686 {
5687 	off_t offset;
5688 
5689 	switch (seekType) {
5690 		case SEEK_SET:
5691 			offset = 0;
5692 			break;
5693 		case SEEK_CUR:
5694 			offset = descriptor->pos;
5695 			break;
5696 		case SEEK_END:
5697 		{
5698 			struct vnode *vnode = descriptor->u.vnode;
5699 			struct stat stat;
5700 			status_t status;
5701 
5702 			if (!HAS_FS_CALL(vnode, read_stat))
5703 				return EOPNOTSUPP;
5704 
5705 			status = FS_CALL(vnode, read_attr_stat, descriptor->cookie, &stat);
5706 			if (status < B_OK)
5707 				return status;
5708 
5709 			offset = stat.st_size;
5710 			break;
5711 		}
5712 		default:
5713 			return B_BAD_VALUE;
5714 	}
5715 
5716 	// assumes off_t is 64 bits wide
5717 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5718 		return EOVERFLOW;
5719 
5720 	pos += offset;
5721 	if (pos < 0)
5722 		return B_BAD_VALUE;
5723 
5724 	return descriptor->pos = pos;
5725 }
5726 
5727 
5728 static status_t
5729 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5730 {
5731 	struct vnode *vnode = descriptor->u.vnode;
5732 
5733 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
5734 
5735 	if (!HAS_FS_CALL(vnode, read_attr_stat))
5736 		return EOPNOTSUPP;
5737 
5738 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
5739 }
5740 
5741 
5742 static status_t
5743 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5744 {
5745 	struct vnode *vnode = descriptor->u.vnode;
5746 
5747 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
5748 
5749 	if (!HAS_FS_CALL(vnode, write_attr_stat))
5750 		return EROFS;
5751 
5752 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
5753 }
5754 
5755 
5756 static status_t
5757 attr_remove(int fd, const char *name, bool kernel)
5758 {
5759 	struct file_descriptor *descriptor;
5760 	struct vnode *vnode;
5761 	status_t status;
5762 
5763 	if (name == NULL || *name == '\0')
5764 		return B_BAD_VALUE;
5765 
5766 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
5767 
5768 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5769 	if (descriptor == NULL)
5770 		return B_FILE_ERROR;
5771 
5772 	if (HAS_FS_CALL(vnode, remove_attr))
5773 		status = FS_CALL(vnode, remove_attr, name);
5774 	else
5775 		status = EROFS;
5776 
5777 	put_fd(descriptor);
5778 
5779 	return status;
5780 }
5781 
5782 
5783 static status_t
5784 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
5785 {
5786 	struct file_descriptor *fromDescriptor, *toDescriptor;
5787 	struct vnode *fromVnode, *toVnode;
5788 	status_t status;
5789 
5790 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
5791 		return B_BAD_VALUE;
5792 
5793 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
5794 
5795 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
5796 	if (fromDescriptor == NULL)
5797 		return B_FILE_ERROR;
5798 
5799 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
5800 	if (toDescriptor == NULL) {
5801 		status = B_FILE_ERROR;
5802 		goto err;
5803 	}
5804 
5805 	// are the files on the same volume?
5806 	if (fromVnode->device != toVnode->device) {
5807 		status = B_CROSS_DEVICE_LINK;
5808 		goto err1;
5809 	}
5810 
5811 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
5812 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
5813 	} else
5814 		status = EROFS;
5815 
5816 err1:
5817 	put_fd(toDescriptor);
5818 err:
5819 	put_fd(fromDescriptor);
5820 
5821 	return status;
5822 }
5823 
5824 
5825 static status_t
5826 index_dir_open(dev_t mountID, bool kernel)
5827 {
5828 	struct fs_mount *mount;
5829 	void *cookie;
5830 
5831 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
5832 
5833 	status_t status = get_mount(mountID, &mount);
5834 	if (status < B_OK)
5835 		return status;
5836 
5837 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
5838 		status = EOPNOTSUPP;
5839 		goto out;
5840 	}
5841 
5842 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
5843 	if (status < B_OK)
5844 		goto out;
5845 
5846 	// get fd for the index directory
5847 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
5848 	if (status >= 0)
5849 		goto out;
5850 
5851 	// something went wrong
5852 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
5853 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
5854 
5855 out:
5856 	put_mount(mount);
5857 	return status;
5858 }
5859 
5860 
5861 static status_t
5862 index_dir_close(struct file_descriptor *descriptor)
5863 {
5864 	struct fs_mount *mount = descriptor->u.mount;
5865 
5866 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
5867 
5868 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
5869 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
5870 
5871 	return B_OK;
5872 }
5873 
5874 
5875 static void
5876 index_dir_free_fd(struct file_descriptor *descriptor)
5877 {
5878 	struct fs_mount *mount = descriptor->u.mount;
5879 
5880 	if (mount != NULL) {
5881 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
5882 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5883 		//put_vnode(vnode);
5884 	}
5885 }
5886 
5887 
5888 static status_t
5889 index_dir_read(struct io_context* ioContext, struct file_descriptor *descriptor,
5890 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
5891 {
5892 	struct fs_mount *mount = descriptor->u.mount;
5893 
5894 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
5895 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
5896 			bufferSize, _count);
5897 	}
5898 
5899 	return EOPNOTSUPP;
5900 }
5901 
5902 
5903 static status_t
5904 index_dir_rewind(struct file_descriptor *descriptor)
5905 {
5906 	struct fs_mount *mount = descriptor->u.mount;
5907 
5908 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
5909 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
5910 
5911 	return EOPNOTSUPP;
5912 }
5913 
5914 
5915 static status_t
5916 index_create(dev_t mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5917 {
5918 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5919 
5920 	struct fs_mount *mount;
5921 	status_t status = get_mount(mountID, &mount);
5922 	if (status < B_OK)
5923 		return status;
5924 
5925 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
5926 		status = EROFS;
5927 		goto out;
5928 	}
5929 
5930 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
5931 
5932 out:
5933 	put_mount(mount);
5934 	return status;
5935 }
5936 
5937 
5938 #if 0
5939 static status_t
5940 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5941 {
5942 	struct vnode *vnode = descriptor->u.vnode;
5943 
5944 	// ToDo: currently unused!
5945 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5946 	if (!HAS_FS_CALL(vnode, read_index_stat))
5947 		return EOPNOTSUPP;
5948 
5949 	return EOPNOTSUPP;
5950 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
5951 }
5952 
5953 
5954 static void
5955 index_free_fd(struct file_descriptor *descriptor)
5956 {
5957 	struct vnode *vnode = descriptor->u.vnode;
5958 
5959 	if (vnode != NULL) {
5960 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
5961 		put_vnode(vnode);
5962 	}
5963 }
5964 #endif
5965 
5966 
5967 static status_t
5968 index_name_read_stat(dev_t mountID, const char *name, struct stat *stat, bool kernel)
5969 {
5970 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5971 
5972 	struct fs_mount *mount;
5973 	status_t status = get_mount(mountID, &mount);
5974 	if (status < B_OK)
5975 		return status;
5976 
5977 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
5978 		status = EOPNOTSUPP;
5979 		goto out;
5980 	}
5981 
5982 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
5983 
5984 out:
5985 	put_mount(mount);
5986 	return status;
5987 }
5988 
5989 
5990 static status_t
5991 index_remove(dev_t mountID, const char *name, bool kernel)
5992 {
5993 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5994 
5995 	struct fs_mount *mount;
5996 	status_t status = get_mount(mountID, &mount);
5997 	if (status < B_OK)
5998 		return status;
5999 
6000 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6001 		status = EROFS;
6002 		goto out;
6003 	}
6004 
6005 	status = FS_MOUNT_CALL(mount, remove_index, name);
6006 
6007 out:
6008 	put_mount(mount);
6009 	return status;
6010 }
6011 
6012 
6013 /*!	ToDo: the query FS API is still the pretty much the same as in R5.
6014 		It would be nice if the FS would find some more kernel support
6015 		for them.
6016 		For example, query parsing should be moved into the kernel.
6017 */
6018 static int
6019 query_open(dev_t device, const char *query, uint32 flags,
6020 	port_id port, int32 token, bool kernel)
6021 {
6022 	struct fs_mount *mount;
6023 	void *cookie;
6024 
6025 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
6026 
6027 	status_t status = get_mount(device, &mount);
6028 	if (status < B_OK)
6029 		return status;
6030 
6031 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6032 		status = EOPNOTSUPP;
6033 		goto out;
6034 	}
6035 
6036 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6037 		&cookie);
6038 	if (status < B_OK)
6039 		goto out;
6040 
6041 	// get fd for the index directory
6042 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
6043 	if (status >= 0)
6044 		goto out;
6045 
6046 	// something went wrong
6047 	FS_MOUNT_CALL(mount, close_query, cookie);
6048 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6049 
6050 out:
6051 	put_mount(mount);
6052 	return status;
6053 }
6054 
6055 
6056 static status_t
6057 query_close(struct file_descriptor *descriptor)
6058 {
6059 	struct fs_mount *mount = descriptor->u.mount;
6060 
6061 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6062 
6063 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6064 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6065 
6066 	return B_OK;
6067 }
6068 
6069 
6070 static void
6071 query_free_fd(struct file_descriptor *descriptor)
6072 {
6073 	struct fs_mount *mount = descriptor->u.mount;
6074 
6075 	if (mount != NULL) {
6076 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6077 		// ToDo: find a replacement ref_count object - perhaps the root dir?
6078 		//put_vnode(vnode);
6079 	}
6080 }
6081 
6082 
6083 static status_t
6084 query_read(struct io_context *ioContext, struct file_descriptor *descriptor,
6085 	struct dirent *buffer, size_t bufferSize, uint32 *_count)
6086 {
6087 	struct fs_mount *mount = descriptor->u.mount;
6088 
6089 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6090 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6091 			bufferSize, _count);
6092 	}
6093 
6094 	return EOPNOTSUPP;
6095 }
6096 
6097 
6098 static status_t
6099 query_rewind(struct file_descriptor *descriptor)
6100 {
6101 	struct fs_mount *mount = descriptor->u.mount;
6102 
6103 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6104 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6105 
6106 	return EOPNOTSUPP;
6107 }
6108 
6109 
6110 //	#pragma mark - General File System functions
6111 
6112 
6113 static dev_t
6114 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
6115 	const char *args, bool kernel)
6116 {
6117 	struct fs_mount *mount;
6118 	status_t status = 0;
6119 
6120 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6121 
6122 	// The path is always safe, we just have to make sure that fsName is
6123 	// almost valid - we can't make any assumptions about args, though.
6124 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6125 	// We'll get it from the DDM later.
6126 	if (fsName == NULL) {
6127 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6128 			return B_BAD_VALUE;
6129 	} else if (fsName[0] == '\0')
6130 		return B_BAD_VALUE;
6131 
6132 	RecursiveLocker mountOpLocker(sMountOpLock);
6133 
6134 	// Helper to delete a newly created file device on failure.
6135 	// Not exactly beautiful, but helps to keep the code below cleaner.
6136 	struct FileDeviceDeleter {
6137 		FileDeviceDeleter() : id(-1) {}
6138 		~FileDeviceDeleter()
6139 		{
6140 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
6141 		}
6142 
6143 		partition_id id;
6144 	} fileDeviceDeleter;
6145 
6146 	// If the file system is not a "virtual" one, the device argument should
6147 	// point to a real file/device (if given at all).
6148 	// get the partition
6149 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
6150 	KPartition *partition = NULL;
6151 	KPath normalizedDevice;
6152 	bool newlyCreatedFileDevice = false;
6153 
6154 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
6155 		// normalize the device path
6156 		status = normalizedDevice.SetTo(device, true);
6157 		if (status != B_OK)
6158 			return status;
6159 
6160 		// get a corresponding partition from the DDM
6161 		partition = ddm->RegisterPartition(normalizedDevice.Path());
6162 
6163 		if (!partition) {
6164 			// Partition not found: This either means, the user supplied
6165 			// an invalid path, or the path refers to an image file. We try
6166 			// to let the DDM create a file device for the path.
6167 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
6168 				&newlyCreatedFileDevice);
6169 			if (deviceID >= 0) {
6170 				partition = ddm->RegisterPartition(deviceID);
6171 				if (newlyCreatedFileDevice)
6172 					fileDeviceDeleter.id = deviceID;
6173 			}
6174 		}
6175 
6176 		if (!partition) {
6177 			TRACE(("fs_mount(): Partition `%s' not found.\n",
6178 				normalizedDevice.Path()));
6179 			return B_ENTRY_NOT_FOUND;
6180 		}
6181 
6182 		device = normalizedDevice.Path();
6183 			// correct path to file device
6184 	}
6185 	PartitionRegistrar partitionRegistrar(partition, true);
6186 
6187 	// Write lock the partition's device. For the time being, we keep the lock
6188 	// until we're done mounting -- not nice, but ensure, that no-one is
6189 	// interfering.
6190 	// TODO: Find a better solution.
6191 	KDiskDevice *diskDevice = NULL;
6192 	if (partition) {
6193 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6194 		if (!diskDevice) {
6195 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
6196 			return B_ERROR;
6197 		}
6198 	}
6199 
6200 	DeviceWriteLocker writeLocker(diskDevice, true);
6201 		// this takes over the write lock acquired before
6202 
6203 	if (partition) {
6204 		// make sure, that the partition is not busy
6205 		if (partition->IsBusy()) {
6206 			TRACE(("fs_mount(): Partition is busy.\n"));
6207 			return B_BUSY;
6208 		}
6209 
6210 		// if no FS name had been supplied, we get it from the partition
6211 		if (!fsName) {
6212 			KDiskSystem *diskSystem = partition->DiskSystem();
6213 			if (!diskSystem) {
6214 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
6215 					"recognize it.\n"));
6216 				return B_BAD_VALUE;
6217 			}
6218 
6219 			if (!diskSystem->IsFileSystem()) {
6220 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
6221 					"partitioning system.\n"));
6222 				return B_BAD_VALUE;
6223 			}
6224 
6225 			// The disk system name will not change, and the KDiskSystem
6226 			// object will not go away while the disk device is locked (and
6227 			// the partition has a reference to it), so this is safe.
6228 			fsName = diskSystem->Name();
6229 		}
6230 	}
6231 
6232 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
6233 	if (mount == NULL)
6234 		return B_NO_MEMORY;
6235 
6236 	mount->volume = (fs_volume*)malloc(sizeof(fs_volume));
6237 	if (mount->volume == NULL) {
6238 		free(mount);
6239 		return B_NO_MEMORY;
6240 	}
6241 
6242 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
6243 
6244 	mount->fs_name = get_file_system_name(fsName);
6245 	if (mount->fs_name == NULL) {
6246 		status = B_NO_MEMORY;
6247 		goto err1;
6248 	}
6249 
6250 	mount->device_name = strdup(device);
6251 		// "device" can be NULL
6252 
6253 	mount->fs = get_file_system(fsName);
6254 	if (mount->fs == NULL) {
6255 		status = ENODEV;
6256 		goto err3;
6257 	}
6258 
6259 	status = recursive_lock_init(&mount->rlock, "mount rlock");
6260 	if (status < B_OK)
6261 		goto err4;
6262 
6263 	// initialize structure
6264 	mount->id = sNextMountID++;
6265 	mount->partition = NULL;
6266 	mount->root_vnode = NULL;
6267 	mount->covers_vnode = NULL;
6268 	mount->unmounting = false;
6269 	mount->owns_file_device = false;
6270 
6271 	mount->volume->id = mount->id;
6272 	mount->volume->layer = 0;
6273 	mount->volume->private_volume = NULL;
6274 	mount->volume->ops = NULL;
6275 	mount->volume->sub_volume = NULL;
6276 	mount->volume->super_volume = NULL;
6277 
6278 	// insert mount struct into list before we call FS's mount() function
6279 	// so that vnodes can be created for this mount
6280 	mutex_lock(&sMountMutex);
6281 	hash_insert(sMountsTable, mount);
6282 	mutex_unlock(&sMountMutex);
6283 
6284 	ino_t rootID;
6285 
6286 	if (!sRoot) {
6287 		// we haven't mounted anything yet
6288 		if (strcmp(path, "/") != 0) {
6289 			status = B_ERROR;
6290 			goto err5;
6291 		}
6292 
6293 		status = mount->fs->mount(mount->volume, device, flags, args, &rootID);
6294 		if (status < 0) {
6295 			// ToDo: why should we hide the error code from the file system here?
6296 			//status = ERR_VFS_GENERAL;
6297 			goto err5;
6298 		}
6299 	} else {
6300 		struct vnode *coveredVnode;
6301 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
6302 		if (status < B_OK)
6303 			goto err5;
6304 
6305 		// make sure covered_vnode is a DIR
6306 		struct stat coveredNodeStat;
6307 		status = FS_CALL(coveredVnode, read_stat, &coveredNodeStat);
6308 		if (status < B_OK)
6309 			goto err5;
6310 
6311 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
6312 			status = B_NOT_A_DIRECTORY;
6313 			goto err5;
6314 		}
6315 
6316 		if (coveredVnode->mount->root_vnode == coveredVnode) {
6317 			// this is already a mount point
6318 			status = B_BUSY;
6319 			goto err5;
6320 		}
6321 
6322 		mount->covers_vnode = coveredVnode;
6323 
6324 		// mount it
6325 		status = mount->fs->mount(mount->volume, device, flags, args, &rootID);
6326 		if (status < B_OK)
6327 			goto err6;
6328 	}
6329 
6330 	// the root node is supposed to be owned by the file system - it must
6331 	// exist at this point
6332 	mount->root_vnode = lookup_vnode(mount->id, rootID);
6333 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
6334 		panic("fs_mount: file system does not own its root node!\n");
6335 		status = B_ERROR;
6336 		goto err7;
6337 	}
6338 
6339 	// No race here, since fs_mount() is the only function changing
6340 	// covers_vnode (and holds sMountOpLock at that time).
6341 	mutex_lock(&sVnodeCoveredByMutex);
6342 	if (mount->covers_vnode)
6343 		mount->covers_vnode->covered_by = mount->root_vnode;
6344 	mutex_unlock(&sVnodeCoveredByMutex);
6345 
6346 	if (!sRoot) {
6347 		sRoot = mount->root_vnode;
6348 		benaphore_lock(&sIOContextRootLock);
6349 		get_current_io_context(true)->root = sRoot;
6350 		benaphore_unlock(&sIOContextRootLock);
6351 		inc_vnode_ref_count(sRoot);
6352 	}
6353 
6354 	// supply the partition (if any) with the mount cookie and mark it mounted
6355 	if (partition) {
6356 		partition->SetMountCookie(mount->volume->private_volume);
6357 		partition->SetVolumeID(mount->id);
6358 
6359 		// keep a partition reference as long as the partition is mounted
6360 		partitionRegistrar.Detach();
6361 		mount->partition = partition;
6362 		mount->owns_file_device = newlyCreatedFileDevice;
6363 		fileDeviceDeleter.id = -1;
6364 	}
6365 
6366 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
6367 		mount->covers_vnode ? mount->covers_vnode->id : -1);
6368 
6369 	return mount->id;
6370 
6371 err7:
6372 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
6373 err6:
6374 	if (mount->covers_vnode)
6375 		put_vnode(mount->covers_vnode);
6376 err5:
6377 	mutex_lock(&sMountMutex);
6378 	hash_remove(sMountsTable, mount);
6379 	mutex_unlock(&sMountMutex);
6380 
6381 	recursive_lock_destroy(&mount->rlock);
6382 err4:
6383 	put_file_system(mount->fs);
6384 	free(mount->device_name);
6385 err3:
6386 	free(mount->fs_name);
6387 err1:
6388 	free(mount->volume);
6389 	free(mount);
6390 
6391 	return status;
6392 }
6393 
6394 
6395 static status_t
6396 fs_unmount(char *path, dev_t mountID, uint32 flags, bool kernel)
6397 {
6398 	struct vnode *vnode = NULL;
6399 	struct fs_mount *mount;
6400 	status_t err;
6401 
6402 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
6403 		kernel));
6404 
6405 	if (path != NULL) {
6406 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
6407 		if (err != B_OK)
6408 			return B_ENTRY_NOT_FOUND;
6409 	}
6410 
6411 	RecursiveLocker mountOpLocker(sMountOpLock);
6412 
6413 	mount = find_mount(path != NULL ? vnode->device : mountID);
6414 	if (mount == NULL) {
6415 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
6416 			vnode);
6417 	}
6418 
6419 	if (path != NULL) {
6420 		put_vnode(vnode);
6421 
6422 		if (mount->root_vnode != vnode) {
6423 			// not mountpoint
6424 			return B_BAD_VALUE;
6425 		}
6426 	}
6427 
6428 	// if the volume is associated with a partition, lock the device of the
6429 	// partition as long as we are unmounting
6430 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6431 	KPartition *partition = mount->partition;
6432 	KDiskDevice *diskDevice = NULL;
6433 	if (partition) {
6434 		if (partition->Device() == NULL) {
6435 			dprintf("fs_unmount(): There is no device!\n");
6436 			return B_ERROR;
6437 		}
6438 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6439 		if (!diskDevice) {
6440 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
6441 			return B_ERROR;
6442 		}
6443 	}
6444 	DeviceWriteLocker writeLocker(diskDevice, true);
6445 
6446 	// make sure, that the partition is not busy
6447 	if (partition) {
6448 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
6449 			TRACE(("fs_unmount(): Partition is busy.\n"));
6450 			return B_BUSY;
6451 		}
6452 	}
6453 
6454 	// grab the vnode master mutex to keep someone from creating
6455 	// a vnode while we're figuring out if we can continue
6456 	mutex_lock(&sVnodeMutex);
6457 
6458 	bool disconnectedDescriptors = false;
6459 
6460 	while (true) {
6461 		bool busy = false;
6462 
6463 		// cycle through the list of vnodes associated with this mount and
6464 		// make sure all of them are not busy or have refs on them
6465 		vnode = NULL;
6466 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
6467 				vnode)) != NULL) {
6468 			// The root vnode ref_count needs to be 1 here (the mount has a
6469 			// reference).
6470 			if (vnode->busy
6471 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
6472 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
6473 				// there are still vnodes in use on this mount, so we cannot
6474 				// unmount yet
6475 				busy = true;
6476 				break;
6477 			}
6478 		}
6479 
6480 		if (!busy)
6481 			break;
6482 
6483 		if ((flags & B_FORCE_UNMOUNT) == 0) {
6484 			mutex_unlock(&sVnodeMutex);
6485 			put_vnode(mount->root_vnode);
6486 
6487 			return B_BUSY;
6488 		}
6489 
6490 		if (disconnectedDescriptors) {
6491 			// wait a bit until the last access is finished, and then try again
6492 			mutex_unlock(&sVnodeMutex);
6493 			snooze(100000);
6494 			// TODO: if there is some kind of bug that prevents the ref counts
6495 			//	from getting back to zero, this will fall into an endless loop...
6496 			mutex_lock(&sVnodeMutex);
6497 			continue;
6498 		}
6499 
6500 		// the file system is still busy - but we're forced to unmount it,
6501 		// so let's disconnect all open file descriptors
6502 
6503 		mount->unmounting = true;
6504 			// prevent new vnodes from being created
6505 
6506 		mutex_unlock(&sVnodeMutex);
6507 
6508 		disconnect_mount_or_vnode_fds(mount, NULL);
6509 		disconnectedDescriptors = true;
6510 
6511 		mutex_lock(&sVnodeMutex);
6512 	}
6513 
6514 	// we can safely continue, mark all of the vnodes busy and this mount
6515 	// structure in unmounting state
6516 	mount->unmounting = true;
6517 
6518 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
6519 		vnode->busy = true;
6520 
6521 		if (vnode->ref_count == 0) {
6522 			// this vnode has been unused before
6523 			list_remove_item(&sUnusedVnodeList, vnode);
6524 			sUnusedVnodes--;
6525 		}
6526 	}
6527 
6528 	// The ref_count of the root node is 1 at this point, see above why this is
6529 	mount->root_vnode->ref_count--;
6530 
6531 	mutex_unlock(&sVnodeMutex);
6532 
6533 	mutex_lock(&sVnodeCoveredByMutex);
6534 	mount->covers_vnode->covered_by = NULL;
6535 	mutex_unlock(&sVnodeCoveredByMutex);
6536 	put_vnode(mount->covers_vnode);
6537 
6538 	// Free all vnodes associated with this mount.
6539 	// They will be removed from the mount list by free_vnode(), so
6540 	// we don't have to do this.
6541 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes))
6542 			!= NULL) {
6543 		free_vnode(vnode, false);
6544 	}
6545 
6546 	// remove the mount structure from the hash table
6547 	mutex_lock(&sMountMutex);
6548 	hash_remove(sMountsTable, mount);
6549 	mutex_unlock(&sMountMutex);
6550 
6551 	mountOpLocker.Unlock();
6552 
6553 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
6554 	notify_unmount(mount->id);
6555 
6556 	// release the file system
6557 	put_file_system(mount->fs);
6558 
6559 	// dereference the partition and mark it unmounted
6560 	if (partition) {
6561 		partition->SetVolumeID(-1);
6562 		partition->SetMountCookie(NULL);
6563 
6564 		if (mount->owns_file_device)
6565 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
6566 		partition->Unregister();
6567 	}
6568 
6569 	free(mount->device_name);
6570 	free(mount->fs_name);
6571 	free(mount->volume);
6572 	free(mount);
6573 
6574 	return B_OK;
6575 }
6576 
6577 
6578 static status_t
6579 fs_sync(dev_t device)
6580 {
6581 	struct fs_mount *mount;
6582 	status_t status = get_mount(device, &mount);
6583 	if (status < B_OK)
6584 		return status;
6585 
6586 	// First, synchronize all file caches
6587 
6588 	struct vnode *previousVnode = NULL;
6589 	while (true) {
6590 		// synchronize access to vnode list
6591 		recursive_lock_lock(&mount->rlock);
6592 
6593 		struct vnode *vnode = previousVnode;
6594 		do {
6595 			// TODO: we could track writes (and writable mapped vnodes)
6596 			//	and have a simple flag that we could test for here
6597 			vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode);
6598 		} while (vnode != NULL && vnode->cache == NULL);
6599 
6600 		ino_t id = -1;
6601 		if (vnode != NULL)
6602 			id = vnode->id;
6603 
6604 		recursive_lock_unlock(&mount->rlock);
6605 
6606 		if (vnode == NULL)
6607 			break;
6608 
6609 		// acquire a reference to the vnode
6610 
6611 		if (get_vnode(mount->id, id, &vnode, true, false) == B_OK) {
6612 			if (previousVnode != NULL)
6613 				put_vnode(previousVnode);
6614 
6615 			if (vnode->cache != NULL)
6616 				vm_cache_write_modified(vnode->cache, false);
6617 
6618 			// the next vnode might change until we lock the vnode list again,
6619 			// but this vnode won't go away since we keep a reference to it.
6620 			previousVnode = vnode;
6621 		} else {
6622 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n",
6623 				mount->id, id);
6624 			break;
6625 		}
6626 	}
6627 
6628 	if (previousVnode != NULL)
6629 		put_vnode(previousVnode);
6630 
6631 	// And then, let the file systems do their synchronizing work
6632 
6633 	mutex_lock(&sMountMutex);
6634 
6635 	if (HAS_FS_MOUNT_CALL(mount, sync))
6636 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
6637 
6638 	mutex_unlock(&sMountMutex);
6639 
6640 	put_mount(mount);
6641 	return status;
6642 }
6643 
6644 
6645 static status_t
6646 fs_read_info(dev_t device, struct fs_info *info)
6647 {
6648 	struct fs_mount *mount;
6649 	status_t status = get_mount(device, &mount);
6650 	if (status < B_OK)
6651 		return status;
6652 
6653 	memset(info, 0, sizeof(struct fs_info));
6654 
6655 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
6656 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
6657 
6658 	// fill in info the file system doesn't (have to) know about
6659 	if (status == B_OK) {
6660 		info->dev = mount->id;
6661 		info->root = mount->root_vnode->id;
6662 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
6663 		if (mount->device_name != NULL) {
6664 			strlcpy(info->device_name, mount->device_name,
6665 				sizeof(info->device_name));
6666 		}
6667 	}
6668 
6669 	// if the call is not supported by the file system, there are still
6670 	// the parts that we filled out ourselves
6671 
6672 	put_mount(mount);
6673 	return status;
6674 }
6675 
6676 
6677 static status_t
6678 fs_write_info(dev_t device, const struct fs_info *info, int mask)
6679 {
6680 	struct fs_mount *mount;
6681 	status_t status = get_mount(device, &mount);
6682 	if (status < B_OK)
6683 		return status;
6684 
6685 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
6686 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
6687 	else
6688 		status = EROFS;
6689 
6690 	put_mount(mount);
6691 	return status;
6692 }
6693 
6694 
6695 static dev_t
6696 fs_next_device(int32 *_cookie)
6697 {
6698 	struct fs_mount *mount = NULL;
6699 	dev_t device = *_cookie;
6700 
6701 	mutex_lock(&sMountMutex);
6702 
6703 	// Since device IDs are assigned sequentially, this algorithm
6704 	// does work good enough. It makes sure that the device list
6705 	// returned is sorted, and that no device is skipped when an
6706 	// already visited device got unmounted.
6707 
6708 	while (device < sNextMountID) {
6709 		mount = find_mount(device++);
6710 		if (mount != NULL && mount->volume->private_volume != NULL)
6711 			break;
6712 	}
6713 
6714 	*_cookie = device;
6715 
6716 	if (mount != NULL)
6717 		device = mount->id;
6718 	else
6719 		device = B_BAD_VALUE;
6720 
6721 	mutex_unlock(&sMountMutex);
6722 
6723 	return device;
6724 }
6725 
6726 
6727 static status_t
6728 get_cwd(char *buffer, size_t size, bool kernel)
6729 {
6730 	// Get current working directory from io context
6731 	struct io_context *context = get_current_io_context(kernel);
6732 	status_t status;
6733 
6734 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
6735 
6736 	mutex_lock(&context->io_mutex);
6737 
6738 	struct vnode* vnode = context->cwd;
6739 	if (vnode)
6740 		inc_vnode_ref_count(vnode);
6741 
6742 	mutex_unlock(&context->io_mutex);
6743 
6744 	if (vnode) {
6745 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
6746 		put_vnode(vnode);
6747 	} else
6748 		status = B_ERROR;
6749 
6750 	return status;
6751 }
6752 
6753 
6754 static status_t
6755 set_cwd(int fd, char *path, bool kernel)
6756 {
6757 	struct io_context *context;
6758 	struct vnode *vnode = NULL;
6759 	struct vnode *oldDirectory;
6760 	struct stat stat;
6761 	status_t status;
6762 
6763 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
6764 
6765 	// Get vnode for passed path, and bail if it failed
6766 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6767 	if (status < 0)
6768 		return status;
6769 
6770 	status = FS_CALL(vnode, read_stat, &stat);
6771 	if (status < 0)
6772 		goto err;
6773 
6774 	if (!S_ISDIR(stat.st_mode)) {
6775 		// nope, can't cwd to here
6776 		status = B_NOT_A_DIRECTORY;
6777 		goto err;
6778 	}
6779 
6780 	// Get current io context and lock
6781 	context = get_current_io_context(kernel);
6782 	mutex_lock(&context->io_mutex);
6783 
6784 	// save the old current working directory first
6785 	oldDirectory = context->cwd;
6786 	context->cwd = vnode;
6787 
6788 	mutex_unlock(&context->io_mutex);
6789 
6790 	if (oldDirectory)
6791 		put_vnode(oldDirectory);
6792 
6793 	return B_NO_ERROR;
6794 
6795 err:
6796 	put_vnode(vnode);
6797 	return status;
6798 }
6799 
6800 
6801 //	#pragma mark - kernel mirrored syscalls
6802 
6803 
6804 dev_t
6805 _kern_mount(const char *path, const char *device, const char *fsName,
6806 	uint32 flags, const char *args, size_t argsLength)
6807 {
6808 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6809 	if (pathBuffer.InitCheck() != B_OK)
6810 		return B_NO_MEMORY;
6811 
6812 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
6813 }
6814 
6815 
6816 status_t
6817 _kern_unmount(const char *path, uint32 flags)
6818 {
6819 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6820 	if (pathBuffer.InitCheck() != B_OK)
6821 		return B_NO_MEMORY;
6822 
6823 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
6824 }
6825 
6826 
6827 status_t
6828 _kern_read_fs_info(dev_t device, struct fs_info *info)
6829 {
6830 	if (info == NULL)
6831 		return B_BAD_VALUE;
6832 
6833 	return fs_read_info(device, info);
6834 }
6835 
6836 
6837 status_t
6838 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
6839 {
6840 	if (info == NULL)
6841 		return B_BAD_VALUE;
6842 
6843 	return fs_write_info(device, info, mask);
6844 }
6845 
6846 
6847 status_t
6848 _kern_sync(void)
6849 {
6850 	// Note: _kern_sync() is also called from _user_sync()
6851 	int32 cookie = 0;
6852 	dev_t device;
6853 	while ((device = next_dev(&cookie)) >= 0) {
6854 		status_t status = fs_sync(device);
6855 		if (status != B_OK && status != B_BAD_VALUE)
6856 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
6857 	}
6858 
6859 	return B_OK;
6860 }
6861 
6862 
6863 dev_t
6864 _kern_next_device(int32 *_cookie)
6865 {
6866 	return fs_next_device(_cookie);
6867 }
6868 
6869 
6870 status_t
6871 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
6872 	size_t infoSize)
6873 {
6874 	if (infoSize != sizeof(fd_info))
6875 		return B_BAD_VALUE;
6876 
6877 	struct io_context *context = NULL;
6878 	sem_id contextMutex = -1;
6879 	struct team *team = NULL;
6880 
6881 	cpu_status state = disable_interrupts();
6882 	GRAB_TEAM_LOCK();
6883 
6884 	team = team_get_team_struct_locked(teamID);
6885 	if (team) {
6886 		context = (io_context *)team->io_context;
6887 		contextMutex = context->io_mutex.sem;
6888 	}
6889 
6890 	RELEASE_TEAM_LOCK();
6891 	restore_interrupts(state);
6892 
6893 	// we now have a context - since we couldn't lock it while having
6894 	// safe access to the team structure, we now need to lock the mutex
6895 	// manually
6896 
6897 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
6898 		// team doesn't exit or seems to be gone
6899 		return B_BAD_TEAM_ID;
6900 	}
6901 
6902 	// the team cannot be deleted completely while we're owning its
6903 	// io_context mutex, so we can safely play with it now
6904 
6905 	context->io_mutex.holder = thread_get_current_thread_id();
6906 
6907 	uint32 slot = *_cookie;
6908 
6909 	struct file_descriptor *descriptor;
6910 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
6911 		slot++;
6912 
6913 	if (slot >= context->table_size) {
6914 		mutex_unlock(&context->io_mutex);
6915 		return B_ENTRY_NOT_FOUND;
6916 	}
6917 
6918 	info->number = slot;
6919 	info->open_mode = descriptor->open_mode;
6920 
6921 	struct vnode *vnode = fd_vnode(descriptor);
6922 	if (vnode != NULL) {
6923 		info->device = vnode->device;
6924 		info->node = vnode->id;
6925 	} else if (descriptor->u.mount != NULL) {
6926 		info->device = descriptor->u.mount->id;
6927 		info->node = -1;
6928 	}
6929 
6930 	mutex_unlock(&context->io_mutex);
6931 
6932 	*_cookie = slot + 1;
6933 	return B_OK;
6934 }
6935 
6936 
6937 int
6938 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6939 {
6940 	if (openMode & O_CREAT)
6941 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6942 
6943 	return file_open_entry_ref(device, inode, name, openMode, true);
6944 }
6945 
6946 
6947 /*!	\brief Opens a node specified by a FD + path pair.
6948 
6949 	At least one of \a fd and \a path must be specified.
6950 	If only \a fd is given, the function opens the node identified by this
6951 	FD. If only a path is given, this path is opened. If both are given and
6952 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6953 	of the directory (!) identified by \a fd.
6954 
6955 	\param fd The FD. May be < 0.
6956 	\param path The absolute or relative path. May be \c NULL.
6957 	\param openMode The open mode.
6958 	\return A FD referring to the newly opened node, or an error code,
6959 			if an error occurs.
6960 */
6961 int
6962 _kern_open(int fd, const char *path, int openMode, int perms)
6963 {
6964 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6965 	if (pathBuffer.InitCheck() != B_OK)
6966 		return B_NO_MEMORY;
6967 
6968 	if (openMode & O_CREAT)
6969 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6970 
6971 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6972 }
6973 
6974 
6975 /*!	\brief Opens a directory specified by entry_ref or node_ref.
6976 
6977 	The supplied name may be \c NULL, in which case directory identified
6978 	by \a device and \a inode will be opened. Otherwise \a device and
6979 	\a inode identify the parent directory of the directory to be opened
6980 	and \a name its entry name.
6981 
6982 	\param device If \a name is specified the ID of the device the parent
6983 		   directory of the directory to be opened resides on, otherwise
6984 		   the device of the directory itself.
6985 	\param inode If \a name is specified the node ID of the parent
6986 		   directory of the directory to be opened, otherwise node ID of the
6987 		   directory itself.
6988 	\param name The entry name of the directory to be opened. If \c NULL,
6989 		   the \a device + \a inode pair identify the node to be opened.
6990 	\return The FD of the newly opened directory or an error code, if
6991 			something went wrong.
6992 */
6993 int
6994 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6995 {
6996 	return dir_open_entry_ref(device, inode, name, true);
6997 }
6998 
6999 
7000 /*!	\brief Opens a directory specified by a FD + path pair.
7001 
7002 	At least one of \a fd and \a path must be specified.
7003 	If only \a fd is given, the function opens the directory identified by this
7004 	FD. If only a path is given, this path is opened. If both are given and
7005 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7006 	of the directory (!) identified by \a fd.
7007 
7008 	\param fd The FD. May be < 0.
7009 	\param path The absolute or relative path. May be \c NULL.
7010 	\return A FD referring to the newly opened directory, or an error code,
7011 			if an error occurs.
7012 */
7013 int
7014 _kern_open_dir(int fd, const char *path)
7015 {
7016 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7017 	if (pathBuffer.InitCheck() != B_OK)
7018 		return B_NO_MEMORY;
7019 
7020 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7021 }
7022 
7023 
7024 status_t
7025 _kern_fcntl(int fd, int op, uint32 argument)
7026 {
7027 	return common_fcntl(fd, op, argument, true);
7028 }
7029 
7030 
7031 status_t
7032 _kern_fsync(int fd)
7033 {
7034 	return common_sync(fd, true);
7035 }
7036 
7037 
7038 status_t
7039 _kern_lock_node(int fd)
7040 {
7041 	return common_lock_node(fd, true);
7042 }
7043 
7044 
7045 status_t
7046 _kern_unlock_node(int fd)
7047 {
7048 	return common_unlock_node(fd, true);
7049 }
7050 
7051 
7052 status_t
7053 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
7054 {
7055 	return dir_create_entry_ref(device, inode, name, perms, true);
7056 }
7057 
7058 
7059 /*!	\brief Creates a directory specified by a FD + path pair.
7060 
7061 	\a path must always be specified (it contains the name of the new directory
7062 	at least). If only a path is given, this path identifies the location at
7063 	which the directory shall be created. If both \a fd and \a path are given and
7064 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7065 	of the directory (!) identified by \a fd.
7066 
7067 	\param fd The FD. May be < 0.
7068 	\param path The absolute or relative path. Must not be \c NULL.
7069 	\param perms The access permissions the new directory shall have.
7070 	\return \c B_OK, if the directory has been created successfully, another
7071 			error code otherwise.
7072 */
7073 status_t
7074 _kern_create_dir(int fd, const char *path, int perms)
7075 {
7076 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7077 	if (pathBuffer.InitCheck() != B_OK)
7078 		return B_NO_MEMORY;
7079 
7080 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7081 }
7082 
7083 
7084 status_t
7085 _kern_remove_dir(int fd, const char *path)
7086 {
7087 	if (path) {
7088 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7089 		if (pathBuffer.InitCheck() != B_OK)
7090 			return B_NO_MEMORY;
7091 
7092 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7093 	}
7094 
7095 	return dir_remove(fd, NULL, true);
7096 }
7097 
7098 
7099 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7100 
7101 	At least one of \a fd and \a path must be specified.
7102 	If only \a fd is given, the function the symlink to be read is the node
7103 	identified by this FD. If only a path is given, this path identifies the
7104 	symlink to be read. If both are given and the path is absolute, \a fd is
7105 	ignored; a relative path is reckoned off of the directory (!) identified
7106 	by \a fd.
7107 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7108 	will still be updated to reflect the required buffer size.
7109 
7110 	\param fd The FD. May be < 0.
7111 	\param path The absolute or relative path. May be \c NULL.
7112 	\param buffer The buffer into which the contents of the symlink shall be
7113 		   written.
7114 	\param _bufferSize A pointer to the size of the supplied buffer.
7115 	\return The length of the link on success or an appropriate error code
7116 */
7117 status_t
7118 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
7119 {
7120 	status_t status;
7121 
7122 	if (path) {
7123 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7124 		if (pathBuffer.InitCheck() != B_OK)
7125 			return B_NO_MEMORY;
7126 
7127 		return common_read_link(fd, pathBuffer.LockBuffer(),
7128 			buffer, _bufferSize, true);
7129 	}
7130 
7131 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
7132 }
7133 
7134 
7135 /*!	\brief Creates a symlink specified by a FD + path pair.
7136 
7137 	\a path must always be specified (it contains the name of the new symlink
7138 	at least). If only a path is given, this path identifies the location at
7139 	which the symlink shall be created. If both \a fd and \a path are given and
7140 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7141 	of the directory (!) identified by \a fd.
7142 
7143 	\param fd The FD. May be < 0.
7144 	\param toPath The absolute or relative path. Must not be \c NULL.
7145 	\param mode The access permissions the new symlink shall have.
7146 	\return \c B_OK, if the symlink has been created successfully, another
7147 			error code otherwise.
7148 */
7149 status_t
7150 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
7151 {
7152 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7153 	if (pathBuffer.InitCheck() != B_OK)
7154 		return B_NO_MEMORY;
7155 
7156 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
7157 		toPath, mode, true);
7158 }
7159 
7160 
7161 status_t
7162 _kern_create_link(const char *path, const char *toPath)
7163 {
7164 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7165 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
7166 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7167 		return B_NO_MEMORY;
7168 
7169 	return common_create_link(pathBuffer.LockBuffer(),
7170 		toPathBuffer.LockBuffer(), true);
7171 }
7172 
7173 
7174 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
7175 
7176 	\a path must always be specified (it contains at least the name of the entry
7177 	to be deleted). If only a path is given, this path identifies the entry
7178 	directly. If both \a fd and \a path are given and the path is absolute,
7179 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7180 	identified by \a fd.
7181 
7182 	\param fd The FD. May be < 0.
7183 	\param path The absolute or relative path. Must not be \c NULL.
7184 	\return \c B_OK, if the entry has been removed successfully, another
7185 			error code otherwise.
7186 */
7187 status_t
7188 _kern_unlink(int fd, const char *path)
7189 {
7190 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7191 	if (pathBuffer.InitCheck() != B_OK)
7192 		return B_NO_MEMORY;
7193 
7194 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
7195 }
7196 
7197 
7198 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
7199 		   by another FD + path pair.
7200 
7201 	\a oldPath and \a newPath must always be specified (they contain at least
7202 	the name of the entry). If only a path is given, this path identifies the
7203 	entry directly. If both a FD and a path are given and the path is absolute,
7204 	the FD is ignored; a relative path is reckoned off of the directory (!)
7205 	identified by the respective FD.
7206 
7207 	\param oldFD The FD of the old location. May be < 0.
7208 	\param oldPath The absolute or relative path of the old location. Must not
7209 		   be \c NULL.
7210 	\param newFD The FD of the new location. May be < 0.
7211 	\param newPath The absolute or relative path of the new location. Must not
7212 		   be \c NULL.
7213 	\return \c B_OK, if the entry has been moved successfully, another
7214 			error code otherwise.
7215 */
7216 status_t
7217 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
7218 {
7219 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
7220 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
7221 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7222 		return B_NO_MEMORY;
7223 
7224 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
7225 		newFD, newPathBuffer.LockBuffer(), true);
7226 }
7227 
7228 
7229 status_t
7230 _kern_access(const char *path, int mode)
7231 {
7232 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7233 	if (pathBuffer.InitCheck() != B_OK)
7234 		return B_NO_MEMORY;
7235 
7236 	return common_access(pathBuffer.LockBuffer(), mode, true);
7237 }
7238 
7239 
7240 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
7241 
7242 	If only \a fd is given, the stat operation associated with the type
7243 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
7244 	given, this path identifies the entry for whose node to retrieve the
7245 	stat data. If both \a fd and \a path are given and the path is absolute,
7246 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7247 	identified by \a fd and specifies the entry whose stat data shall be
7248 	retrieved.
7249 
7250 	\param fd The FD. May be < 0.
7251 	\param path The absolute or relative path. Must not be \c NULL.
7252 	\param traverseLeafLink If \a path is given, \c true specifies that the
7253 		   function shall not stick to symlinks, but traverse them.
7254 	\param stat The buffer the stat data shall be written into.
7255 	\param statSize The size of the supplied stat buffer.
7256 	\return \c B_OK, if the the stat data have been read successfully, another
7257 			error code otherwise.
7258 */
7259 status_t
7260 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
7261 	struct stat *stat, size_t statSize)
7262 {
7263 	struct stat completeStat;
7264 	struct stat *originalStat = NULL;
7265 	status_t status;
7266 
7267 	if (statSize > sizeof(struct stat))
7268 		return B_BAD_VALUE;
7269 
7270 	// this supports different stat extensions
7271 	if (statSize < sizeof(struct stat)) {
7272 		originalStat = stat;
7273 		stat = &completeStat;
7274 	}
7275 
7276 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
7277 
7278 	if (status == B_OK && originalStat != NULL)
7279 		memcpy(originalStat, stat, statSize);
7280 
7281 	return status;
7282 }
7283 
7284 
7285 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
7286 
7287 	If only \a fd is given, the stat operation associated with the type
7288 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
7289 	given, this path identifies the entry for whose node to write the
7290 	stat data. If both \a fd and \a path are given and the path is absolute,
7291 	\a fd is ignored; a relative path is reckoned off of the directory (!)
7292 	identified by \a fd and specifies the entry whose stat data shall be
7293 	written.
7294 
7295 	\param fd The FD. May be < 0.
7296 	\param path The absolute or relative path. Must not be \c NULL.
7297 	\param traverseLeafLink If \a path is given, \c true specifies that the
7298 		   function shall not stick to symlinks, but traverse them.
7299 	\param stat The buffer containing the stat data to be written.
7300 	\param statSize The size of the supplied stat buffer.
7301 	\param statMask A mask specifying which parts of the stat data shall be
7302 		   written.
7303 	\return \c B_OK, if the the stat data have been written successfully,
7304 			another error code otherwise.
7305 */
7306 status_t
7307 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
7308 	const struct stat *stat, size_t statSize, int statMask)
7309 {
7310 	struct stat completeStat;
7311 
7312 	if (statSize > sizeof(struct stat))
7313 		return B_BAD_VALUE;
7314 
7315 	// this supports different stat extensions
7316 	if (statSize < sizeof(struct stat)) {
7317 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
7318 		memcpy(&completeStat, stat, statSize);
7319 		stat = &completeStat;
7320 	}
7321 
7322 	status_t status;
7323 
7324 	if (path) {
7325 		// path given: write the stat of the node referred to by (fd, path)
7326 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7327 		if (pathBuffer.InitCheck() != B_OK)
7328 			return B_NO_MEMORY;
7329 
7330 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
7331 			traverseLeafLink, stat, statMask, true);
7332 	} else {
7333 		// no path given: get the FD and use the FD operation
7334 		struct file_descriptor *descriptor
7335 			= get_fd(get_current_io_context(true), fd);
7336 		if (descriptor == NULL)
7337 			return B_FILE_ERROR;
7338 
7339 		if (descriptor->ops->fd_write_stat)
7340 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
7341 		else
7342 			status = EOPNOTSUPP;
7343 
7344 		put_fd(descriptor);
7345 	}
7346 
7347 	return status;
7348 }
7349 
7350 
7351 int
7352 _kern_open_attr_dir(int fd, const char *path)
7353 {
7354 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7355 	if (pathBuffer.InitCheck() != B_OK)
7356 		return B_NO_MEMORY;
7357 
7358 	if (path != NULL)
7359 		pathBuffer.SetTo(path);
7360 
7361 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
7362 }
7363 
7364 
7365 int
7366 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
7367 {
7368 	return attr_create(fd, name, type, openMode, true);
7369 }
7370 
7371 
7372 int
7373 _kern_open_attr(int fd, const char *name, int openMode)
7374 {
7375 	return attr_open(fd, name, openMode, true);
7376 }
7377 
7378 
7379 status_t
7380 _kern_remove_attr(int fd, const char *name)
7381 {
7382 	return attr_remove(fd, name, true);
7383 }
7384 
7385 
7386 status_t
7387 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
7388 {
7389 	return attr_rename(fromFile, fromName, toFile, toName, true);
7390 }
7391 
7392 
7393 int
7394 _kern_open_index_dir(dev_t device)
7395 {
7396 	return index_dir_open(device, true);
7397 }
7398 
7399 
7400 status_t
7401 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
7402 {
7403 	return index_create(device, name, type, flags, true);
7404 }
7405 
7406 
7407 status_t
7408 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
7409 {
7410 	return index_name_read_stat(device, name, stat, true);
7411 }
7412 
7413 
7414 status_t
7415 _kern_remove_index(dev_t device, const char *name)
7416 {
7417 	return index_remove(device, name, true);
7418 }
7419 
7420 
7421 status_t
7422 _kern_getcwd(char *buffer, size_t size)
7423 {
7424 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
7425 
7426 	// Call vfs to get current working directory
7427 	return get_cwd(buffer, size, true);
7428 }
7429 
7430 
7431 status_t
7432 _kern_setcwd(int fd, const char *path)
7433 {
7434 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7435 	if (pathBuffer.InitCheck() != B_OK)
7436 		return B_NO_MEMORY;
7437 
7438 	if (path != NULL)
7439 		pathBuffer.SetTo(path);
7440 
7441 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
7442 }
7443 
7444 
7445 //	#pragma mark - userland syscalls
7446 
7447 
7448 dev_t
7449 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
7450 	uint32 flags, const char *userArgs, size_t argsLength)
7451 {
7452 	char fileSystem[B_OS_NAME_LENGTH];
7453 	KPath path, device;
7454 	char *args = NULL;
7455 	status_t status;
7456 
7457 	if (!IS_USER_ADDRESS(userPath)
7458 		|| !IS_USER_ADDRESS(userFileSystem)
7459 		|| !IS_USER_ADDRESS(userDevice))
7460 		return B_BAD_ADDRESS;
7461 
7462 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
7463 		return B_NO_MEMORY;
7464 
7465 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
7466 		return B_BAD_ADDRESS;
7467 
7468 	if (userFileSystem != NULL
7469 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
7470 		return B_BAD_ADDRESS;
7471 
7472 	if (userDevice != NULL
7473 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
7474 		return B_BAD_ADDRESS;
7475 
7476 	if (userArgs != NULL && argsLength > 0) {
7477 		// this is a safety restriction
7478 		if (argsLength >= 65536)
7479 			return B_NAME_TOO_LONG;
7480 
7481 		args = (char *)malloc(argsLength + 1);
7482 		if (args == NULL)
7483 			return B_NO_MEMORY;
7484 
7485 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
7486 			free(args);
7487 			return B_BAD_ADDRESS;
7488 		}
7489 	}
7490 	path.UnlockBuffer();
7491 	device.UnlockBuffer();
7492 
7493 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
7494 		userFileSystem ? fileSystem : NULL, flags, args, false);
7495 
7496 	free(args);
7497 	return status;
7498 }
7499 
7500 
7501 status_t
7502 _user_unmount(const char *userPath, uint32 flags)
7503 {
7504 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7505 	if (pathBuffer.InitCheck() != B_OK)
7506 		return B_NO_MEMORY;
7507 
7508 	char *path = pathBuffer.LockBuffer();
7509 
7510 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7511 		return B_BAD_ADDRESS;
7512 
7513 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
7514 }
7515 
7516 
7517 status_t
7518 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
7519 {
7520 	struct fs_info info;
7521 	status_t status;
7522 
7523 	if (userInfo == NULL)
7524 		return B_BAD_VALUE;
7525 
7526 	if (!IS_USER_ADDRESS(userInfo))
7527 		return B_BAD_ADDRESS;
7528 
7529 	status = fs_read_info(device, &info);
7530 	if (status != B_OK)
7531 		return status;
7532 
7533 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
7534 		return B_BAD_ADDRESS;
7535 
7536 	return B_OK;
7537 }
7538 
7539 
7540 status_t
7541 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
7542 {
7543 	struct fs_info info;
7544 
7545 	if (userInfo == NULL)
7546 		return B_BAD_VALUE;
7547 
7548 	if (!IS_USER_ADDRESS(userInfo)
7549 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
7550 		return B_BAD_ADDRESS;
7551 
7552 	return fs_write_info(device, &info, mask);
7553 }
7554 
7555 
7556 dev_t
7557 _user_next_device(int32 *_userCookie)
7558 {
7559 	int32 cookie;
7560 	dev_t device;
7561 
7562 	if (!IS_USER_ADDRESS(_userCookie)
7563 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
7564 		return B_BAD_ADDRESS;
7565 
7566 	device = fs_next_device(&cookie);
7567 
7568 	if (device >= B_OK) {
7569 		// update user cookie
7570 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
7571 			return B_BAD_ADDRESS;
7572 	}
7573 
7574 	return device;
7575 }
7576 
7577 
7578 status_t
7579 _user_sync(void)
7580 {
7581 	return _kern_sync();
7582 }
7583 
7584 
7585 status_t
7586 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
7587 	size_t infoSize)
7588 {
7589 	struct fd_info info;
7590 	uint32 cookie;
7591 
7592 	// only root can do this (or should root's group be enough?)
7593 	if (geteuid() != 0)
7594 		return B_NOT_ALLOWED;
7595 
7596 	if (infoSize != sizeof(fd_info))
7597 		return B_BAD_VALUE;
7598 
7599 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
7600 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
7601 		return B_BAD_ADDRESS;
7602 
7603 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
7604 	if (status < B_OK)
7605 		return status;
7606 
7607 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
7608 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
7609 		return B_BAD_ADDRESS;
7610 
7611 	return status;
7612 }
7613 
7614 
7615 status_t
7616 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
7617 	char *userPath, size_t pathLength)
7618 {
7619 	if (!IS_USER_ADDRESS(userPath))
7620 		return B_BAD_ADDRESS;
7621 
7622 	KPath path(B_PATH_NAME_LENGTH + 1);
7623 	if (path.InitCheck() != B_OK)
7624 		return B_NO_MEMORY;
7625 
7626 	// copy the leaf name onto the stack
7627 	char stackLeaf[B_FILE_NAME_LENGTH];
7628 	if (leaf) {
7629 		if (!IS_USER_ADDRESS(leaf))
7630 			return B_BAD_ADDRESS;
7631 
7632 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
7633 		if (length < 0)
7634 			return length;
7635 		if (length >= B_FILE_NAME_LENGTH)
7636 			return B_NAME_TOO_LONG;
7637 
7638 		leaf = stackLeaf;
7639 	}
7640 
7641 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
7642 		path.LockBuffer(), path.BufferSize());
7643 	if (status < B_OK)
7644 		return status;
7645 
7646 	path.UnlockBuffer();
7647 
7648 	int length = user_strlcpy(userPath, path.Path(), pathLength);
7649 	if (length < 0)
7650 		return length;
7651 	if (length >= (int)pathLength)
7652 		return B_BUFFER_OVERFLOW;
7653 
7654 	return B_OK;
7655 }
7656 
7657 
7658 status_t
7659 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
7660 {
7661 	if (userPath == NULL || buffer == NULL)
7662 		return B_BAD_VALUE;
7663 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
7664 		return B_BAD_ADDRESS;
7665 
7666 	// copy path from userland
7667 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7668 	if (pathBuffer.InitCheck() != B_OK)
7669 		return B_NO_MEMORY;
7670 	char* path = pathBuffer.LockBuffer();
7671 
7672 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7673 		return B_BAD_ADDRESS;
7674 
7675 	// buffer for the leaf part
7676 	KPath leafBuffer(B_PATH_NAME_LENGTH + 1);
7677 	if (leafBuffer.InitCheck() != B_OK)
7678 		return B_NO_MEMORY;
7679 	char* leaf = leafBuffer.LockBuffer();
7680 
7681 	VNodePutter dirPutter;
7682 	struct vnode* dir = NULL;
7683 	status_t error;
7684 
7685 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
7686 		// get dir vnode + leaf name
7687 		struct vnode* nextDir;
7688 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, false);
7689 		if (error != B_OK)
7690 			return error;
7691 
7692 		dir = nextDir;
7693 		strcpy(path, leaf);
7694 		dirPutter.SetTo(dir);
7695 
7696 		// get file vnode
7697 		inc_vnode_ref_count(dir);
7698 		struct vnode* fileVnode;
7699 		error = vnode_path_to_vnode(dir, path, false, 0, false, &fileVnode,
7700 			NULL);
7701 		if (error != B_OK)
7702 			return error;
7703 		VNodePutter fileVnodePutter(fileVnode);
7704 
7705 		if (!traverseLink || !S_ISLNK(fileVnode->type)) {
7706 			// we're done -- construct the path
7707 			bool hasLeaf = true;
7708 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
7709 				// special cases "." and ".." -- get the dir, forget the leaf
7710 				inc_vnode_ref_count(dir);
7711 				error = vnode_path_to_vnode(dir, leaf, false, 0, false,
7712 					&nextDir, NULL);
7713 				if (error != B_OK)
7714 					return error;
7715 				dir = nextDir;
7716 				dirPutter.SetTo(dir);
7717 				hasLeaf = false;
7718 			}
7719 
7720 			// get the directory path
7721 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, false);
7722 			if (error != B_OK)
7723 				return error;
7724 
7725 			// append the leaf name
7726 			if (hasLeaf) {
7727 				// insert a directory separator if this is not the file system
7728 				// root
7729 				if ((strcmp(path, "/") != 0
7730 					&& strlcat(path, "/", pathBuffer.BufferSize())
7731 						>= pathBuffer.BufferSize())
7732 					|| strlcat(path, leaf, pathBuffer.BufferSize())
7733 						>= pathBuffer.BufferSize()) {
7734 					return B_NAME_TOO_LONG;
7735 				}
7736 			}
7737 
7738 			// copy back to userland
7739 			int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
7740 			if (len < 0)
7741 				return len;
7742 			if (len >= B_PATH_NAME_LENGTH)
7743 				return B_BUFFER_OVERFLOW;
7744 
7745 			return B_OK;
7746 		}
7747 
7748 		// read link
7749 		struct stat st;
7750 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
7751 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
7752 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
7753 			if (error != B_OK)
7754 				return error;
7755 			path[bufferSize] = '\0';
7756 		} else
7757 			return B_BAD_VALUE;
7758 	}
7759 
7760 	return B_LINK_LIMIT;
7761 }
7762 
7763 
7764 int
7765 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
7766 	int openMode, int perms)
7767 {
7768 	char name[B_FILE_NAME_LENGTH];
7769 
7770 	if (userName == NULL || device < 0 || inode < 0)
7771 		return B_BAD_VALUE;
7772 	if (!IS_USER_ADDRESS(userName)
7773 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7774 		return B_BAD_ADDRESS;
7775 
7776 	if (openMode & O_CREAT)
7777 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
7778 
7779 	return file_open_entry_ref(device, inode, name, openMode, false);
7780 }
7781 
7782 
7783 int
7784 _user_open(int fd, const char *userPath, int openMode, int perms)
7785 {
7786 	KPath path(B_PATH_NAME_LENGTH + 1);
7787 	if (path.InitCheck() != B_OK)
7788 		return B_NO_MEMORY;
7789 
7790 	char *buffer = path.LockBuffer();
7791 
7792 	if (!IS_USER_ADDRESS(userPath)
7793 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7794 		return B_BAD_ADDRESS;
7795 
7796 	if (openMode & O_CREAT)
7797 		return file_create(fd, buffer, openMode, perms, false);
7798 
7799 	return file_open(fd, buffer, openMode, false);
7800 }
7801 
7802 
7803 int
7804 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
7805 {
7806 	if (userName != NULL) {
7807 		char name[B_FILE_NAME_LENGTH];
7808 
7809 		if (!IS_USER_ADDRESS(userName)
7810 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7811 			return B_BAD_ADDRESS;
7812 
7813 		return dir_open_entry_ref(device, inode, name, false);
7814 	}
7815 	return dir_open_entry_ref(device, inode, NULL, false);
7816 }
7817 
7818 
7819 int
7820 _user_open_dir(int fd, const char *userPath)
7821 {
7822 	KPath path(B_PATH_NAME_LENGTH + 1);
7823 	if (path.InitCheck() != B_OK)
7824 		return B_NO_MEMORY;
7825 
7826 	char *buffer = path.LockBuffer();
7827 
7828 	if (!IS_USER_ADDRESS(userPath)
7829 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7830 		return B_BAD_ADDRESS;
7831 
7832 	return dir_open(fd, buffer, false);
7833 }
7834 
7835 
7836 /*!	\brief Opens a directory's parent directory and returns the entry name
7837 		   of the former.
7838 
7839 	Aside from that is returns the directory's entry name, this method is
7840 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
7841 	equivalent, if \a userName is \c NULL.
7842 
7843 	If a name buffer is supplied and the name does not fit the buffer, the
7844 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
7845 
7846 	\param fd A FD referring to a directory.
7847 	\param userName Buffer the directory's entry name shall be written into.
7848 		   May be \c NULL.
7849 	\param nameLength Size of the name buffer.
7850 	\return The file descriptor of the opened parent directory, if everything
7851 			went fine, an error code otherwise.
7852 */
7853 int
7854 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
7855 {
7856 	bool kernel = false;
7857 
7858 	if (userName && !IS_USER_ADDRESS(userName))
7859 		return B_BAD_ADDRESS;
7860 
7861 	// open the parent dir
7862 	int parentFD = dir_open(fd, "..", kernel);
7863 	if (parentFD < 0)
7864 		return parentFD;
7865 	FDCloser fdCloser(parentFD, kernel);
7866 
7867 	if (userName) {
7868 		// get the vnodes
7869 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
7870 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
7871 		VNodePutter parentVNodePutter(parentVNode);
7872 		VNodePutter dirVNodePutter(dirVNode);
7873 		if (!parentVNode || !dirVNode)
7874 			return B_FILE_ERROR;
7875 
7876 		// get the vnode name
7877 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
7878 		struct dirent *buffer = (struct dirent*)_buffer;
7879 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
7880 			sizeof(_buffer), get_current_io_context(false));
7881 		if (status != B_OK)
7882 			return status;
7883 
7884 		// copy the name to the userland buffer
7885 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
7886 		if (len < 0)
7887 			return len;
7888 		if (len >= (int)nameLength)
7889 			return B_BUFFER_OVERFLOW;
7890 	}
7891 
7892 	return fdCloser.Detach();
7893 }
7894 
7895 
7896 status_t
7897 _user_fcntl(int fd, int op, uint32 argument)
7898 {
7899 	status_t status = common_fcntl(fd, op, argument, false);
7900 	if (op == F_SETLKW)
7901 		syscall_restart_handle_post(status);
7902 
7903 	return status;
7904 }
7905 
7906 
7907 status_t
7908 _user_fsync(int fd)
7909 {
7910 	return common_sync(fd, false);
7911 }
7912 
7913 
7914 status_t
7915 _user_flock(int fd, int op)
7916 {
7917 	struct file_descriptor *descriptor;
7918 	struct vnode *vnode;
7919 	struct flock flock;
7920 	status_t status;
7921 
7922 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, op));
7923 
7924 	descriptor = get_fd_and_vnode(fd, &vnode, false);
7925 	if (descriptor == NULL)
7926 		return B_FILE_ERROR;
7927 
7928 	if (descriptor->type != FDTYPE_FILE) {
7929 		put_fd(descriptor);
7930 		return B_BAD_VALUE;
7931 	}
7932 
7933 	flock.l_start = 0;
7934 	flock.l_len = OFF_MAX;
7935 	flock.l_whence = 0;
7936 	flock.l_type = (op & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
7937 
7938 	if ((op & LOCK_UN) != 0)
7939 		status = release_advisory_lock(descriptor->u.vnode, &flock);
7940 	else {
7941 		status = acquire_advisory_lock(descriptor->u.vnode,
7942 			thread_get_current_thread()->team->session_id, &flock,
7943 			(op & LOCK_NB) == 0);
7944 	}
7945 
7946 	syscall_restart_handle_post(status);
7947 
7948 	put_fd(descriptor);
7949 	return status;
7950 }
7951 
7952 
7953 status_t
7954 _user_lock_node(int fd)
7955 {
7956 	return common_lock_node(fd, false);
7957 }
7958 
7959 
7960 status_t
7961 _user_unlock_node(int fd)
7962 {
7963 	return common_unlock_node(fd, false);
7964 }
7965 
7966 
7967 status_t
7968 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
7969 {
7970 	char name[B_FILE_NAME_LENGTH];
7971 	status_t status;
7972 
7973 	if (!IS_USER_ADDRESS(userName))
7974 		return B_BAD_ADDRESS;
7975 
7976 	status = user_strlcpy(name, userName, sizeof(name));
7977 	if (status < 0)
7978 		return status;
7979 
7980 	return dir_create_entry_ref(device, inode, name, perms, false);
7981 }
7982 
7983 
7984 status_t
7985 _user_create_dir(int fd, const char *userPath, int perms)
7986 {
7987 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7988 	if (pathBuffer.InitCheck() != B_OK)
7989 		return B_NO_MEMORY;
7990 
7991 	char *path = pathBuffer.LockBuffer();
7992 
7993 	if (!IS_USER_ADDRESS(userPath)
7994 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7995 		return B_BAD_ADDRESS;
7996 
7997 	return dir_create(fd, path, perms, false);
7998 }
7999 
8000 
8001 status_t
8002 _user_remove_dir(int fd, const char *userPath)
8003 {
8004 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8005 	if (pathBuffer.InitCheck() != B_OK)
8006 		return B_NO_MEMORY;
8007 
8008 	char *path = pathBuffer.LockBuffer();
8009 
8010 	if (userPath != NULL) {
8011 		if (!IS_USER_ADDRESS(userPath)
8012 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8013 			return B_BAD_ADDRESS;
8014 	}
8015 
8016 	return dir_remove(fd, userPath ? path : NULL, false);
8017 }
8018 
8019 
8020 status_t
8021 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
8022 {
8023 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8024 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8025 		return B_NO_MEMORY;
8026 
8027 	size_t bufferSize;
8028 
8029 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8030 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
8031 		return B_BAD_ADDRESS;
8032 
8033 	char *path = pathBuffer.LockBuffer();
8034 	char *buffer = linkBuffer.LockBuffer();
8035 
8036 	if (userPath) {
8037 		if (!IS_USER_ADDRESS(userPath)
8038 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8039 			return B_BAD_ADDRESS;
8040 
8041 		if (bufferSize > B_PATH_NAME_LENGTH)
8042 			bufferSize = B_PATH_NAME_LENGTH;
8043 	}
8044 
8045 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8046 		&bufferSize, false);
8047 
8048 	// we also update the bufferSize in case of errors
8049 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8050 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
8051 		return B_BAD_ADDRESS;
8052 
8053 	if (status < B_OK)
8054 		return status;
8055 
8056 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8057 		return B_BAD_ADDRESS;
8058 
8059 	return B_OK;
8060 }
8061 
8062 
8063 status_t
8064 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
8065 	int mode)
8066 {
8067 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8068 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8069 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8070 		return B_NO_MEMORY;
8071 
8072 	char *path = pathBuffer.LockBuffer();
8073 	char *toPath = toPathBuffer.LockBuffer();
8074 
8075 	if (!IS_USER_ADDRESS(userPath)
8076 		|| !IS_USER_ADDRESS(userToPath)
8077 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8078 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8079 		return B_BAD_ADDRESS;
8080 
8081 	return common_create_symlink(fd, path, toPath, mode, false);
8082 }
8083 
8084 
8085 status_t
8086 _user_create_link(const char *userPath, const char *userToPath)
8087 {
8088 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8089 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8090 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8091 		return B_NO_MEMORY;
8092 
8093 	char *path = pathBuffer.LockBuffer();
8094 	char *toPath = toPathBuffer.LockBuffer();
8095 
8096 	if (!IS_USER_ADDRESS(userPath)
8097 		|| !IS_USER_ADDRESS(userToPath)
8098 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8099 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8100 		return B_BAD_ADDRESS;
8101 
8102 	status_t status = check_path(toPath);
8103 	if (status < B_OK)
8104 		return status;
8105 
8106 	return common_create_link(path, toPath, false);
8107 }
8108 
8109 
8110 status_t
8111 _user_unlink(int fd, const char *userPath)
8112 {
8113 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8114 	if (pathBuffer.InitCheck() != B_OK)
8115 		return B_NO_MEMORY;
8116 
8117 	char *path = pathBuffer.LockBuffer();
8118 
8119 	if (!IS_USER_ADDRESS(userPath)
8120 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8121 		return B_BAD_ADDRESS;
8122 
8123 	return common_unlink(fd, path, false);
8124 }
8125 
8126 
8127 status_t
8128 _user_rename(int oldFD, const char *userOldPath, int newFD,
8129 	const char *userNewPath)
8130 {
8131 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8132 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8133 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8134 		return B_NO_MEMORY;
8135 
8136 	char *oldPath = oldPathBuffer.LockBuffer();
8137 	char *newPath = newPathBuffer.LockBuffer();
8138 
8139 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8140 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8141 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8142 		return B_BAD_ADDRESS;
8143 
8144 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8145 }
8146 
8147 
8148 status_t
8149 _user_create_fifo(const char *userPath, mode_t perms)
8150 {
8151 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8152 	if (pathBuffer.InitCheck() != B_OK)
8153 		return B_NO_MEMORY;
8154 
8155 	char *path = pathBuffer.LockBuffer();
8156 
8157 	if (!IS_USER_ADDRESS(userPath)
8158 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
8159 		return B_BAD_ADDRESS;
8160 	}
8161 
8162 	// split into directory vnode and filename path
8163 	char filename[B_FILE_NAME_LENGTH];
8164 	struct vnode *dir;
8165 	status_t status = path_to_dir_vnode(path, &dir, filename, false);
8166 	if (status != B_OK)
8167 		return status;
8168 
8169 	VNodePutter _(dir);
8170 
8171 	// the underlying FS needs to support creating FIFOs
8172 	if (!HAS_FS_CALL(dir, create_special_node))
8173 		return B_UNSUPPORTED;
8174 
8175 	// create the entry	-- the FIFO sub node is set up automatically
8176 	fs_vnode superVnode;
8177 	ino_t nodeID;
8178 	status = FS_CALL(dir, create_special_node, filename, NULL,
8179 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
8180 
8181 	// create_special_node() acquired a reference for us that we don't need.
8182 	if (status == B_OK)
8183 		put_vnode(dir->mount->volume, nodeID);
8184 
8185 	return status;
8186 }
8187 
8188 
8189 status_t
8190 _user_create_pipe(int *userFDs)
8191 {
8192 	// rootfs should support creating FIFOs, but let's be sure
8193 	if (!HAS_FS_CALL(sRoot, create_special_node))
8194 		return B_UNSUPPORTED;
8195 
8196 	// create the node	-- the FIFO sub node is set up automatically
8197 	fs_vnode superVnode;
8198 	ino_t nodeID;
8199 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
8200 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
8201 	if (status != B_OK)
8202 		return status;
8203 
8204 	// We've got one reference to the node and need another one.
8205 	struct vnode* vnode;
8206 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
8207 	if (status != B_OK) {
8208 		// that should not happen
8209 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
8210 			sRoot->mount->id, sRoot->id);
8211 		return status;
8212 	}
8213 
8214 	// Everything looks good so far. Open two FDs for reading respectively
8215 	// writing.
8216 	int fds[2];
8217 	fds[0] = open_vnode(vnode, O_RDONLY, false);
8218 	fds[1] = open_vnode(vnode, O_WRONLY, false);
8219 
8220 	FDCloser closer0(fds[0], false);
8221 	FDCloser closer1(fds[1], false);
8222 
8223 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
8224 
8225 	// copy FDs to userland
8226 	if (status == B_OK) {
8227 		if (!IS_USER_ADDRESS(userFDs)
8228 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
8229 			status = B_BAD_ADDRESS;
8230 		}
8231 	}
8232 
8233 	// keep FDs, if everything went fine
8234 	if (status == B_OK) {
8235 		closer0.Detach();
8236 		closer1.Detach();
8237 	}
8238 
8239 	return status;
8240 }
8241 
8242 
8243 status_t
8244 _user_access(const char *userPath, int mode)
8245 {
8246 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8247 	if (pathBuffer.InitCheck() != B_OK)
8248 		return B_NO_MEMORY;
8249 
8250 	char *path = pathBuffer.LockBuffer();
8251 
8252 	if (!IS_USER_ADDRESS(userPath)
8253 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8254 		return B_BAD_ADDRESS;
8255 
8256 	return common_access(path, mode, false);
8257 }
8258 
8259 
8260 status_t
8261 _user_read_stat(int fd, const char *userPath, bool traverseLink,
8262 	struct stat *userStat, size_t statSize)
8263 {
8264 	struct stat stat;
8265 	status_t status;
8266 
8267 	if (statSize > sizeof(struct stat))
8268 		return B_BAD_VALUE;
8269 
8270 	if (!IS_USER_ADDRESS(userStat))
8271 		return B_BAD_ADDRESS;
8272 
8273 	if (userPath) {
8274 		// path given: get the stat of the node referred to by (fd, path)
8275 		if (!IS_USER_ADDRESS(userPath))
8276 			return B_BAD_ADDRESS;
8277 
8278 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8279 		if (pathBuffer.InitCheck() != B_OK)
8280 			return B_NO_MEMORY;
8281 
8282 		char *path = pathBuffer.LockBuffer();
8283 
8284 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
8285 		if (length < B_OK)
8286 			return length;
8287 		if (length >= B_PATH_NAME_LENGTH)
8288 			return B_NAME_TOO_LONG;
8289 
8290 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
8291 	} else {
8292 		// no path given: get the FD and use the FD operation
8293 		struct file_descriptor *descriptor
8294 			= get_fd(get_current_io_context(false), fd);
8295 		if (descriptor == NULL)
8296 			return B_FILE_ERROR;
8297 
8298 		if (descriptor->ops->fd_read_stat)
8299 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
8300 		else
8301 			status = EOPNOTSUPP;
8302 
8303 		put_fd(descriptor);
8304 	}
8305 
8306 	if (status < B_OK)
8307 		return status;
8308 
8309 	return user_memcpy(userStat, &stat, statSize);
8310 }
8311 
8312 
8313 status_t
8314 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
8315 	const struct stat *userStat, size_t statSize, int statMask)
8316 {
8317 	if (statSize > sizeof(struct stat))
8318 		return B_BAD_VALUE;
8319 
8320 	struct stat stat;
8321 
8322 	if (!IS_USER_ADDRESS(userStat)
8323 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
8324 		return B_BAD_ADDRESS;
8325 
8326 	// clear additional stat fields
8327 	if (statSize < sizeof(struct stat))
8328 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
8329 
8330 	status_t status;
8331 
8332 	if (userPath) {
8333 		// path given: write the stat of the node referred to by (fd, path)
8334 		if (!IS_USER_ADDRESS(userPath))
8335 			return B_BAD_ADDRESS;
8336 
8337 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8338 		if (pathBuffer.InitCheck() != B_OK)
8339 			return B_NO_MEMORY;
8340 
8341 		char *path = pathBuffer.LockBuffer();
8342 
8343 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
8344 		if (length < B_OK)
8345 			return length;
8346 		if (length >= B_PATH_NAME_LENGTH)
8347 			return B_NAME_TOO_LONG;
8348 
8349 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
8350 			statMask, false);
8351 	} else {
8352 		// no path given: get the FD and use the FD operation
8353 		struct file_descriptor *descriptor
8354 			= get_fd(get_current_io_context(false), fd);
8355 		if (descriptor == NULL)
8356 			return B_FILE_ERROR;
8357 
8358 		if (descriptor->ops->fd_write_stat)
8359 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
8360 		else
8361 			status = EOPNOTSUPP;
8362 
8363 		put_fd(descriptor);
8364 	}
8365 
8366 	return status;
8367 }
8368 
8369 
8370 int
8371 _user_open_attr_dir(int fd, const char *userPath)
8372 {
8373 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8374 	if (pathBuffer.InitCheck() != B_OK)
8375 		return B_NO_MEMORY;
8376 
8377 	char *path = pathBuffer.LockBuffer();
8378 
8379 	if (userPath != NULL) {
8380 		if (!IS_USER_ADDRESS(userPath)
8381 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8382 			return B_BAD_ADDRESS;
8383 	}
8384 
8385 	return attr_dir_open(fd, userPath ? path : NULL, false);
8386 }
8387 
8388 
8389 int
8390 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
8391 {
8392 	char name[B_FILE_NAME_LENGTH];
8393 
8394 	if (!IS_USER_ADDRESS(userName)
8395 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8396 		return B_BAD_ADDRESS;
8397 
8398 	return attr_create(fd, name, type, openMode, false);
8399 }
8400 
8401 
8402 int
8403 _user_open_attr(int fd, const char *userName, int openMode)
8404 {
8405 	char name[B_FILE_NAME_LENGTH];
8406 
8407 	if (!IS_USER_ADDRESS(userName)
8408 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8409 		return B_BAD_ADDRESS;
8410 
8411 	return attr_open(fd, name, openMode, false);
8412 }
8413 
8414 
8415 status_t
8416 _user_remove_attr(int fd, const char *userName)
8417 {
8418 	char name[B_FILE_NAME_LENGTH];
8419 
8420 	if (!IS_USER_ADDRESS(userName)
8421 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8422 		return B_BAD_ADDRESS;
8423 
8424 	return attr_remove(fd, name, false);
8425 }
8426 
8427 
8428 status_t
8429 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
8430 {
8431 	if (!IS_USER_ADDRESS(userFromName)
8432 		|| !IS_USER_ADDRESS(userToName))
8433 		return B_BAD_ADDRESS;
8434 
8435 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
8436 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
8437 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
8438 		return B_NO_MEMORY;
8439 
8440 	char *fromName = fromNameBuffer.LockBuffer();
8441 	char *toName = toNameBuffer.LockBuffer();
8442 
8443 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
8444 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
8445 		return B_BAD_ADDRESS;
8446 
8447 	return attr_rename(fromFile, fromName, toFile, toName, false);
8448 }
8449 
8450 
8451 int
8452 _user_open_index_dir(dev_t device)
8453 {
8454 	return index_dir_open(device, false);
8455 }
8456 
8457 
8458 status_t
8459 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
8460 {
8461 	char name[B_FILE_NAME_LENGTH];
8462 
8463 	if (!IS_USER_ADDRESS(userName)
8464 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8465 		return B_BAD_ADDRESS;
8466 
8467 	return index_create(device, name, type, flags, false);
8468 }
8469 
8470 
8471 status_t
8472 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
8473 {
8474 	char name[B_FILE_NAME_LENGTH];
8475 	struct stat stat;
8476 	status_t status;
8477 
8478 	if (!IS_USER_ADDRESS(userName)
8479 		|| !IS_USER_ADDRESS(userStat)
8480 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8481 		return B_BAD_ADDRESS;
8482 
8483 	status = index_name_read_stat(device, name, &stat, false);
8484 	if (status == B_OK) {
8485 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
8486 			return B_BAD_ADDRESS;
8487 	}
8488 
8489 	return status;
8490 }
8491 
8492 
8493 status_t
8494 _user_remove_index(dev_t device, const char *userName)
8495 {
8496 	char name[B_FILE_NAME_LENGTH];
8497 
8498 	if (!IS_USER_ADDRESS(userName)
8499 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8500 		return B_BAD_ADDRESS;
8501 
8502 	return index_remove(device, name, false);
8503 }
8504 
8505 
8506 status_t
8507 _user_getcwd(char *userBuffer, size_t size)
8508 {
8509 	if (!IS_USER_ADDRESS(userBuffer))
8510 		return B_BAD_ADDRESS;
8511 
8512 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8513 	if (pathBuffer.InitCheck() != B_OK)
8514 		return B_NO_MEMORY;
8515 
8516 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
8517 
8518 	if (size > B_PATH_NAME_LENGTH)
8519 		size = B_PATH_NAME_LENGTH;
8520 
8521 	char *path = pathBuffer.LockBuffer();
8522 
8523 	status_t status = get_cwd(path, size, false);
8524 	if (status < B_OK)
8525 		return status;
8526 
8527 	// Copy back the result
8528 	if (user_strlcpy(userBuffer, path, size) < B_OK)
8529 		return B_BAD_ADDRESS;
8530 
8531 	return status;
8532 }
8533 
8534 
8535 status_t
8536 _user_setcwd(int fd, const char *userPath)
8537 {
8538 	TRACE(("user_setcwd: path = %p\n", userPath));
8539 
8540 	KPath pathBuffer(B_PATH_NAME_LENGTH);
8541 	if (pathBuffer.InitCheck() != B_OK)
8542 		return B_NO_MEMORY;
8543 
8544 	char *path = pathBuffer.LockBuffer();
8545 
8546 	if (userPath != NULL) {
8547 		if (!IS_USER_ADDRESS(userPath)
8548 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8549 			return B_BAD_ADDRESS;
8550 	}
8551 
8552 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
8553 }
8554 
8555 
8556 status_t
8557 _user_change_root(const char *userPath)
8558 {
8559 	// only root is allowed to chroot()
8560 	if (geteuid() != 0)
8561 		return EPERM;
8562 
8563 	// alloc path buffer
8564 	KPath pathBuffer(B_PATH_NAME_LENGTH);
8565 	if (pathBuffer.InitCheck() != B_OK)
8566 		return B_NO_MEMORY;
8567 
8568 	// copy userland path to kernel
8569 	char *path = pathBuffer.LockBuffer();
8570 	if (userPath != NULL) {
8571 		if (!IS_USER_ADDRESS(userPath)
8572 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8573 			return B_BAD_ADDRESS;
8574 	}
8575 
8576 	// get the vnode
8577 	struct vnode* vnode;
8578 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
8579 	if (status != B_OK)
8580 		return status;
8581 
8582 	// set the new root
8583 	struct io_context* context = get_current_io_context(false);
8584 	benaphore_lock(&sIOContextRootLock);
8585 	struct vnode* oldRoot = context->root;
8586 	context->root = vnode;
8587 	benaphore_unlock(&sIOContextRootLock);
8588 
8589 	put_vnode(oldRoot);
8590 
8591 	return B_OK;
8592 }
8593 
8594 
8595 int
8596 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
8597 	uint32 flags, port_id port, int32 token)
8598 {
8599 	char *query;
8600 
8601 	if (device < 0 || userQuery == NULL || queryLength == 0)
8602 		return B_BAD_VALUE;
8603 
8604 	// this is a safety restriction
8605 	if (queryLength >= 65536)
8606 		return B_NAME_TOO_LONG;
8607 
8608 	query = (char *)malloc(queryLength + 1);
8609 	if (query == NULL)
8610 		return B_NO_MEMORY;
8611 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
8612 		free(query);
8613 		return B_BAD_ADDRESS;
8614 	}
8615 
8616 	int fd = query_open(device, query, flags, port, token, false);
8617 
8618 	free(query);
8619 	return fd;
8620 }
8621