xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 1acbe440b8dd798953bec31d18ee589aa3f71b73)
1 /*
2  * Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <block_cache.h>
18 #include <fd.h>
19 #include <file_cache.h>
20 #include <khash.h>
21 #include <KPath.h>
22 #include <lock.h>
23 #include <syscalls.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <vm_low_memory.h>
28 
29 #include <boot/kernel_args.h>
30 #include <disk_device_manager/KDiskDevice.h>
31 #include <disk_device_manager/KDiskDeviceManager.h>
32 #include <disk_device_manager/KDiskDeviceUtils.h>
33 #include <disk_device_manager/KDiskSystem.h>
34 #include <fs/node_monitor.h>
35 #include <util/kernel_cpp.h>
36 
37 #include <string.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/resource.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <stddef.h>
46 
47 //#define TRACE_VFS
48 #ifdef TRACE_VFS
49 #	define TRACE(x) dprintf x
50 #	define FUNCTION(x) dprintf x
51 #else
52 #	define TRACE(x) ;
53 #	define FUNCTION(x) ;
54 #endif
55 
56 #define ADD_DEBUGGER_COMMANDS
57 
58 const static uint32 kMaxUnusedVnodes = 8192;
59 	// This is the maximum number of unused vnodes that the system
60 	// will keep around (weak limit, if there is enough memory left,
61 	// they won't get flushed even when hitting that limit).
62 	// It may be chosen with respect to the available memory or enhanced
63 	// by some timestamp/frequency heurism.
64 
65 struct vnode {
66 	struct vnode	*next;
67 	vm_cache_ref	*cache;
68 	mount_id		device;
69 	list_link		mount_link;
70 	list_link		unused_link;
71 	vnode_id		id;
72 	fs_vnode		private_node;
73 	struct fs_mount	*mount;
74 	struct vnode	*covered_by;
75 	int32			ref_count;
76 	uint8			remove : 1;
77 	uint8			busy : 1;
78 	uint8			unpublished : 1;
79 	struct advisory_locking	*advisory_locking;
80 	struct file_descriptor *mandatory_locked_by;
81 };
82 
83 struct vnode_hash_key {
84 	mount_id	device;
85 	vnode_id	vnode;
86 };
87 
88 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
89 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
90 
91 /**	\brief Structure to manage a mounted file system
92 
93 	Note: The root_vnode and covers_vnode fields (what others?) are
94 	initialized in fs_mount() and not changed afterwards. That is as soon
95 	as the mount is mounted and it is made sure it won't be unmounted
96 	(e.g. by holding a reference to a vnode of that mount) (read) access
97 	to those fields is always safe, even without additional locking. Morever
98 	while mounted the mount holds a reference to the covers_vnode, and thus
99 	making the access path vnode->mount->covers_vnode->mount->... safe if a
100 	reference to vnode is held (note that for the root mount covers_vnode
101 	is NULL, though).
102  */
103 struct fs_mount {
104 	struct fs_mount	*next;
105 	file_system_module_info *fs;
106 	mount_id		id;
107 	void			*cookie;
108 	char			*device_name;
109 	char			*fs_name;
110 	recursive_lock	rlock;	// guards the vnodes list
111 	struct vnode	*root_vnode;
112 	struct vnode	*covers_vnode;
113 	KPartition		*partition;
114 	struct list		vnodes;
115 	bool			unmounting;
116 	bool			owns_file_device;
117 };
118 
119 struct advisory_locking {
120 	sem_id			lock;
121 	sem_id			wait_sem;
122 	struct list		locks;
123 };
124 
125 struct advisory_lock {
126 	list_link		link;
127 	team_id			team;
128 	off_t			offset;
129 	off_t			length;
130 	bool			shared;
131 };
132 
133 static mutex sFileSystemsMutex;
134 
135 /**	\brief Guards sMountsTable.
136  *
137  *	The holder is allowed to read/write access the sMountsTable.
138  *	Manipulation of the fs_mount structures themselves
139  *	(and their destruction) requires different locks though.
140  */
141 static mutex sMountMutex;
142 
143 /**	\brief Guards mount/unmount operations.
144  *
145  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
146  *	That is locking the lock ensures that no FS is mounted/unmounted. In
147  *	particular this means that
148  *	- sMountsTable will not be modified,
149  *	- the fields immutable after initialization of the fs_mount structures in
150  *	  sMountsTable will not be modified,
151  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
152  *
153  *	The thread trying to lock the lock must not hold sVnodeMutex or
154  *	sMountMutex.
155  */
156 static recursive_lock sMountOpLock;
157 
158 /**	\brief Guards the vnode::covered_by field of any vnode
159  *
160  *	The holder is allowed to read access the vnode::covered_by field of any
161  *	vnode. Additionally holding sMountOpLock allows for write access.
162  *
163  *	The thread trying to lock the must not hold sVnodeMutex.
164  */
165 static mutex sVnodeCoveredByMutex;
166 
167 /**	\brief Guards sVnodeTable.
168  *
169  *	The holder is allowed to read/write access sVnodeTable and to
170  *	to any unbusy vnode in that table, save
171  *	to the immutable fields (device, id, private_node, mount) to which
172  *	only read-only access is allowed, and to the field covered_by, which is
173  *	guarded by sMountOpLock and sVnodeCoveredByMutex.
174  *
175  *	The thread trying to lock the mutex must not hold sMountMutex.
176  *	You must not have this mutex held when calling create_sem(), as this
177  *	might call vfs_free_unused_vnodes().
178  */
179 static mutex sVnodeMutex;
180 
181 #define VNODE_HASH_TABLE_SIZE 1024
182 static hash_table *sVnodeTable;
183 static list sUnusedVnodeList;
184 static uint32 sUnusedVnodes = 0;
185 static struct vnode *sRoot;
186 
187 #define MOUNTS_HASH_TABLE_SIZE 16
188 static hash_table *sMountsTable;
189 static mount_id sNextMountID = 1;
190 
191 mode_t __gUmask = 022;
192 
193 /* function declarations */
194 
195 // file descriptor operation prototypes
196 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
197 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
198 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
199 static void file_free_fd(struct file_descriptor *);
200 static status_t file_close(struct file_descriptor *);
201 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
202 	struct select_sync *sync);
203 static status_t file_deselect(struct file_descriptor *, uint8 event,
204 	struct select_sync *sync);
205 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
206 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
207 static status_t dir_rewind(struct file_descriptor *);
208 static void dir_free_fd(struct file_descriptor *);
209 static status_t dir_close(struct file_descriptor *);
210 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
211 static status_t attr_dir_rewind(struct file_descriptor *);
212 static void attr_dir_free_fd(struct file_descriptor *);
213 static status_t attr_dir_close(struct file_descriptor *);
214 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
215 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
216 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
217 static void attr_free_fd(struct file_descriptor *);
218 static status_t attr_close(struct file_descriptor *);
219 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
220 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
221 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
222 static status_t index_dir_rewind(struct file_descriptor *);
223 static void index_dir_free_fd(struct file_descriptor *);
224 static status_t index_dir_close(struct file_descriptor *);
225 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
226 static status_t query_rewind(struct file_descriptor *);
227 static void query_free_fd(struct file_descriptor *);
228 static status_t query_close(struct file_descriptor *);
229 
230 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
231 static status_t common_read_stat(struct file_descriptor *, struct stat *);
232 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
233 
234 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
235 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
236 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
237 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
238 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
239 static void inc_vnode_ref_count(struct vnode *vnode);
240 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
241 static inline void put_vnode(struct vnode *vnode);
242 
243 static struct fd_ops sFileOps = {
244 	file_read,
245 	file_write,
246 	file_seek,
247 	common_ioctl,
248 	file_select,
249 	file_deselect,
250 	NULL,		// read_dir()
251 	NULL,		// rewind_dir()
252 	common_read_stat,
253 	common_write_stat,
254 	file_close,
255 	file_free_fd
256 };
257 
258 static struct fd_ops sDirectoryOps = {
259 	NULL,		// read()
260 	NULL,		// write()
261 	NULL,		// seek()
262 	common_ioctl,
263 	NULL,		// select()
264 	NULL,		// deselect()
265 	dir_read,
266 	dir_rewind,
267 	common_read_stat,
268 	common_write_stat,
269 	dir_close,
270 	dir_free_fd
271 };
272 
273 static struct fd_ops sAttributeDirectoryOps = {
274 	NULL,		// read()
275 	NULL,		// write()
276 	NULL,		// seek()
277 	common_ioctl,
278 	NULL,		// select()
279 	NULL,		// deselect()
280 	attr_dir_read,
281 	attr_dir_rewind,
282 	common_read_stat,
283 	common_write_stat,
284 	attr_dir_close,
285 	attr_dir_free_fd
286 };
287 
288 static struct fd_ops sAttributeOps = {
289 	attr_read,
290 	attr_write,
291 	attr_seek,
292 	common_ioctl,
293 	NULL,		// select()
294 	NULL,		// deselect()
295 	NULL,		// read_dir()
296 	NULL,		// rewind_dir()
297 	attr_read_stat,
298 	attr_write_stat,
299 	attr_close,
300 	attr_free_fd
301 };
302 
303 static struct fd_ops sIndexDirectoryOps = {
304 	NULL,		// read()
305 	NULL,		// write()
306 	NULL,		// seek()
307 	NULL,		// ioctl()
308 	NULL,		// select()
309 	NULL,		// deselect()
310 	index_dir_read,
311 	index_dir_rewind,
312 	NULL,		// read_stat()
313 	NULL,		// write_stat()
314 	index_dir_close,
315 	index_dir_free_fd
316 };
317 
318 #if 0
319 static struct fd_ops sIndexOps = {
320 	NULL,		// read()
321 	NULL,		// write()
322 	NULL,		// seek()
323 	NULL,		// ioctl()
324 	NULL,		// select()
325 	NULL,		// deselect()
326 	NULL,		// dir_read()
327 	NULL,		// dir_rewind()
328 	index_read_stat,	// read_stat()
329 	NULL,		// write_stat()
330 	NULL,		// dir_close()
331 	NULL		// free_fd()
332 };
333 #endif
334 
335 static struct fd_ops sQueryOps = {
336 	NULL,		// read()
337 	NULL,		// write()
338 	NULL,		// seek()
339 	NULL,		// ioctl()
340 	NULL,		// select()
341 	NULL,		// deselect()
342 	query_read,
343 	query_rewind,
344 	NULL,		// read_stat()
345 	NULL,		// write_stat()
346 	query_close,
347 	query_free_fd
348 };
349 
350 
351 // VNodePutter
352 class VNodePutter {
353 public:
354 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
355 
356 	~VNodePutter()
357 	{
358 		Put();
359 	}
360 
361 	void SetTo(struct vnode *vnode)
362 	{
363 		Put();
364 		fVNode = vnode;
365 	}
366 
367 	void Put()
368 	{
369 		if (fVNode) {
370 			put_vnode(fVNode);
371 			fVNode = NULL;
372 		}
373 	}
374 
375 	struct vnode *Detach()
376 	{
377 		struct vnode *vnode = fVNode;
378 		fVNode = NULL;
379 		return vnode;
380 	}
381 
382 private:
383 	struct vnode *fVNode;
384 };
385 
386 
387 class FDCloser {
388 public:
389 	FDCloser() : fFD(-1), fKernel(true) {}
390 
391 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
392 
393 	~FDCloser()
394 	{
395 		Close();
396 	}
397 
398 	void SetTo(int fd, bool kernel)
399 	{
400 		Close();
401 		fFD = fd;
402 		fKernel = kernel;
403 	}
404 
405 	void Close()
406 	{
407 		if (fFD >= 0) {
408 			if (fKernel)
409 				_kern_close(fFD);
410 			else
411 				_user_close(fFD);
412 			fFD = -1;
413 		}
414 	}
415 
416 	int Detach()
417 	{
418 		int fd = fFD;
419 		fFD = -1;
420 		return fd;
421 	}
422 
423 private:
424 	int		fFD;
425 	bool	fKernel;
426 };
427 
428 
429 static int
430 mount_compare(void *_m, const void *_key)
431 {
432 	struct fs_mount *mount = (fs_mount *)_m;
433 	const mount_id *id = (mount_id *)_key;
434 
435 	if (mount->id == *id)
436 		return 0;
437 
438 	return -1;
439 }
440 
441 
442 static uint32
443 mount_hash(void *_m, const void *_key, uint32 range)
444 {
445 	struct fs_mount *mount = (fs_mount *)_m;
446 	const mount_id *id = (mount_id *)_key;
447 
448 	if (mount)
449 		return mount->id % range;
450 
451 	return (uint32)*id % range;
452 }
453 
454 
455 /** Finds the mounted device (the fs_mount structure) with the given ID.
456  *	Note, you must hold the gMountMutex lock when you call this function.
457  */
458 
459 static struct fs_mount *
460 find_mount(mount_id id)
461 {
462 	ASSERT_LOCKED_MUTEX(&sMountMutex);
463 
464 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
465 }
466 
467 
468 static status_t
469 get_mount(mount_id id, struct fs_mount **_mount)
470 {
471 	struct fs_mount *mount;
472 	status_t status;
473 
474 	mutex_lock(&sMountMutex);
475 
476 	mount = find_mount(id);
477 	if (mount) {
478 		// ToDo: the volume is locked (against removal) by locking
479 		//	its root node - investigate if that's a good idea
480 		if (mount->root_vnode)
481 			inc_vnode_ref_count(mount->root_vnode);
482 		else {
483 			// might have been called during a mount operation in which
484 			// case the root node may still be NULL
485 			mount = NULL;
486 		}
487 	} else
488 		status = B_BAD_VALUE;
489 
490 	mutex_unlock(&sMountMutex);
491 
492 	if (mount == NULL)
493 		return B_BUSY;
494 
495 	*_mount = mount;
496 	return B_OK;
497 }
498 
499 
500 static void
501 put_mount(struct fs_mount *mount)
502 {
503 	if (mount)
504 		put_vnode(mount->root_vnode);
505 }
506 
507 
508 static status_t
509 put_file_system(file_system_module_info *fs)
510 {
511 	return put_module(fs->info.name);
512 }
513 
514 
515 /**	Tries to open the specified file system module.
516  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
517  *	Returns a pointer to file system module interface, or NULL if it
518  *	could not open the module.
519  */
520 
521 static file_system_module_info *
522 get_file_system(const char *fsName)
523 {
524 	char name[B_FILE_NAME_LENGTH];
525 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
526 		// construct module name if we didn't get one
527 		// (we currently support only one API)
528 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
529 		fsName = NULL;
530 	}
531 
532 	file_system_module_info *info;
533 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
534 		return NULL;
535 
536 	return info;
537 }
538 
539 
540 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
541  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
542  *	The name is allocated for you, and you have to free() it when you're
543  *	done with it.
544  *	Returns NULL if the required memory is no available.
545  */
546 
547 static char *
548 get_file_system_name(const char *fsName)
549 {
550 	const size_t length = strlen("file_systems/");
551 
552 	if (strncmp(fsName, "file_systems/", length)) {
553 		// the name already seems to be the module's file name
554 		return strdup(fsName);
555 	}
556 
557 	fsName += length;
558 	const char *end = strchr(fsName, '/');
559 	if (end == NULL) {
560 		// this doesn't seem to be a valid name, but well...
561 		return strdup(fsName);
562 	}
563 
564 	// cut off the trailing /v1
565 
566 	char *name = (char *)malloc(end + 1 - fsName);
567 	if (name == NULL)
568 		return NULL;
569 
570 	strlcpy(name, fsName, end + 1 - fsName);
571 	return name;
572 }
573 
574 
575 static int
576 vnode_compare(void *_vnode, const void *_key)
577 {
578 	struct vnode *vnode = (struct vnode *)_vnode;
579 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
580 
581 	if (vnode->device == key->device && vnode->id == key->vnode)
582 		return 0;
583 
584 	return -1;
585 }
586 
587 
588 static uint32
589 vnode_hash(void *_vnode, const void *_key, uint32 range)
590 {
591 	struct vnode *vnode = (struct vnode *)_vnode;
592 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
593 
594 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
595 
596 	if (vnode != NULL)
597 		return VHASH(vnode->device, vnode->id) % range;
598 
599 	return VHASH(key->device, key->vnode) % range;
600 
601 #undef VHASH
602 }
603 
604 
605 static void
606 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
607 {
608 	recursive_lock_lock(&mount->rlock);
609 
610 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
611 
612 	recursive_lock_unlock(&mount->rlock);
613 }
614 
615 
616 static void
617 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
618 {
619 	recursive_lock_lock(&mount->rlock);
620 
621 	list_remove_link(&vnode->mount_link);
622 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
623 
624 	recursive_lock_unlock(&mount->rlock);
625 }
626 
627 
628 static status_t
629 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
630 {
631 	FUNCTION(("create_new_vnode()\n"));
632 
633 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
634 	if (vnode == NULL)
635 		return B_NO_MEMORY;
636 
637 	// initialize basic values
638 	memset(vnode, 0, sizeof(struct vnode));
639 	vnode->device = mountID;
640 	vnode->id = vnodeID;
641 
642 	// add the vnode to the mount structure
643 	mutex_lock(&sMountMutex);
644 	vnode->mount = find_mount(mountID);
645 	if (!vnode->mount || vnode->mount->unmounting) {
646 		mutex_unlock(&sMountMutex);
647 		free(vnode);
648 		return B_ENTRY_NOT_FOUND;
649 	}
650 
651 	hash_insert(sVnodeTable, vnode);
652 	add_vnode_to_mount_list(vnode, vnode->mount);
653 
654 	mutex_unlock(&sMountMutex);
655 
656 	vnode->ref_count = 1;
657 	*_vnode = vnode;
658 
659 	return B_OK;
660 }
661 
662 
663 /**	Frees the vnode and all resources it has acquired, and removes
664  *	it from the vnode hash as well as from its mount structure.
665  *	Will also make sure that any cache modifications are written back.
666  */
667 
668 static void
669 free_vnode(struct vnode *vnode, bool reenter)
670 {
671 	ASSERT(vnode->ref_count == 0 && vnode->busy);
672 
673 	// write back any changes in this vnode's cache -- but only
674 	// if the vnode won't be deleted, in which case the changes
675 	// will be discarded
676 
677 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
678 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
679 
680 	if (!vnode->unpublished) {
681 		if (vnode->remove)
682 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
683 		else
684 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
685 	}
686 
687 	// The file system has removed the resources of the vnode now, so we can
688 	// make it available again (and remove the busy vnode from the hash)
689 	mutex_lock(&sVnodeMutex);
690 	hash_remove(sVnodeTable, vnode);
691 	mutex_unlock(&sVnodeMutex);
692 
693 	// if we have a vm_cache attached, remove it
694 	if (vnode->cache)
695 		vm_cache_release_ref(vnode->cache);
696 
697 	vnode->cache = NULL;
698 
699 	remove_vnode_from_mount_list(vnode, vnode->mount);
700 
701 	free(vnode);
702 }
703 
704 
705 /**	\brief Decrements the reference counter of the given vnode and deletes it,
706  *	if the counter dropped to 0.
707  *
708  *	The caller must, of course, own a reference to the vnode to call this
709  *	function.
710  *	The caller must not hold the sVnodeMutex or the sMountMutex.
711  *
712  *	\param vnode the vnode.
713  *	\param reenter \c true, if this function is called (indirectly) from within
714  *		   a file system.
715  *	\return \c B_OK, if everything went fine, an error code otherwise.
716  */
717 
718 static status_t
719 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
720 {
721 	mutex_lock(&sVnodeMutex);
722 
723 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
724 
725 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
726 
727 	if (oldRefCount == 1) {
728 		if (vnode->busy)
729 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
730 
731 		bool freeNode = false;
732 
733 		// Just insert the vnode into an unused list if we don't need
734 		// to delete it
735 		if (vnode->remove) {
736 			vnode->busy = true;
737 			freeNode = true;
738 		} else {
739 			list_add_item(&sUnusedVnodeList, vnode);
740 			if (++sUnusedVnodes > kMaxUnusedVnodes
741 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
742 				// there are too many unused vnodes so we free the oldest one
743 				// ToDo: evaluate this mechanism
744 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
745 				vnode->busy = true;
746 				freeNode = true;
747 				sUnusedVnodes--;
748 			}
749 		}
750 
751 		mutex_unlock(&sVnodeMutex);
752 
753 		if (freeNode)
754 			free_vnode(vnode, reenter);
755 	} else
756 		mutex_unlock(&sVnodeMutex);
757 
758 	return B_OK;
759 }
760 
761 
762 /**	\brief Increments the reference counter of the given vnode.
763  *
764  *	The caller must either already have a reference to the vnode or hold
765  *	the sVnodeMutex.
766  *
767  *	\param vnode the vnode.
768  */
769 
770 static void
771 inc_vnode_ref_count(struct vnode *vnode)
772 {
773 	atomic_add(&vnode->ref_count, 1);
774 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
775 }
776 
777 
778 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
779  *
780  *	The caller must hold the sVnodeMutex.
781  *
782  *	\param mountID the mount ID.
783  *	\param vnodeID the node ID.
784  *
785  *	\return The vnode structure, if it was found in the hash table, \c NULL
786  *			otherwise.
787  */
788 
789 static struct vnode *
790 lookup_vnode(mount_id mountID, vnode_id vnodeID)
791 {
792 	struct vnode_hash_key key;
793 
794 	key.device = mountID;
795 	key.vnode = vnodeID;
796 
797 	return (vnode *)hash_lookup(sVnodeTable, &key);
798 }
799 
800 
801 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
802  *
803  *	If the node is not yet in memory, it will be loaded.
804  *
805  *	The caller must not hold the sVnodeMutex or the sMountMutex.
806  *
807  *	\param mountID the mount ID.
808  *	\param vnodeID the node ID.
809  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
810  *		   retrieved vnode structure shall be written.
811  *	\param reenter \c true, if this function is called (indirectly) from within
812  *		   a file system.
813  *	\return \c B_OK, if everything when fine, an error code otherwise.
814  */
815 
816 static status_t
817 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
818 {
819 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
820 
821 	mutex_lock(&sVnodeMutex);
822 
823 	int32 tries = 300;
824 		// try for 3 secs
825 restart:
826 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
827 	if (vnode && vnode->busy) {
828 		mutex_unlock(&sVnodeMutex);
829 		if (--tries < 0) {
830 			// vnode doesn't seem to become unbusy
831 			panic("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
832 			return B_BUSY;
833 		}
834 		snooze(10000); // 10 ms
835 		mutex_lock(&sVnodeMutex);
836 		goto restart;
837 	}
838 
839 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
840 
841 	status_t status;
842 
843 	if (vnode) {
844 		if (vnode->ref_count == 0) {
845 			// this vnode has been unused before
846 			list_remove_item(&sUnusedVnodeList, vnode);
847 			sUnusedVnodes--;
848 		}
849 		inc_vnode_ref_count(vnode);
850 	} else {
851 		// we need to create a new vnode and read it in
852 		status = create_new_vnode(&vnode, mountID, vnodeID);
853 		if (status < B_OK)
854 			goto err;
855 
856 		vnode->busy = true;
857 		mutex_unlock(&sVnodeMutex);
858 
859 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
860 		if (status == B_OK && vnode->private_node == NULL)
861 			status = B_BAD_VALUE;
862 
863 		mutex_lock(&sVnodeMutex);
864 
865 		if (status < B_OK)
866 			goto err1;
867 
868 		vnode->busy = false;
869 	}
870 
871 	mutex_unlock(&sVnodeMutex);
872 
873 	TRACE(("get_vnode: returning %p\n", vnode));
874 
875 	*_vnode = vnode;
876 	return B_OK;
877 
878 err1:
879 	hash_remove(sVnodeTable, vnode);
880 	remove_vnode_from_mount_list(vnode, vnode->mount);
881 err:
882 	mutex_unlock(&sVnodeMutex);
883 	if (vnode)
884 		free(vnode);
885 
886 	return status;
887 }
888 
889 
890 /**	\brief Decrements the reference counter of the given vnode and deletes it,
891  *	if the counter dropped to 0.
892  *
893  *	The caller must, of course, own a reference to the vnode to call this
894  *	function.
895  *	The caller must not hold the sVnodeMutex or the sMountMutex.
896  *
897  *	\param vnode the vnode.
898  */
899 
900 static inline void
901 put_vnode(struct vnode *vnode)
902 {
903 	dec_vnode_ref_count(vnode, false);
904 }
905 
906 
907 static void
908 vnode_low_memory_handler(void */*data*/, int32 level)
909 {
910 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
911 
912 	int32 count = 1;
913 	switch (level) {
914 		case B_NO_LOW_MEMORY:
915 			return;
916 		case B_LOW_MEMORY_NOTE:
917 			count = sUnusedVnodes / 100;
918 			break;
919 		case B_LOW_MEMORY_WARNING:
920 			count = sUnusedVnodes / 10;
921 			break;
922 		case B_LOW_MEMORY_CRITICAL:
923 			count = sUnusedVnodes;
924 			break;
925 	}
926 
927 	for (int32 i = 0; i < count; i++) {
928 		mutex_lock(&sVnodeMutex);
929 
930 		struct vnode *vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
931 		if (vnode == NULL) {
932 			mutex_unlock(&sVnodeMutex);
933 			break;
934 		}
935 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
936 
937 		vnode->busy = true;
938 		sUnusedVnodes--;
939 
940 		mutex_unlock(&sVnodeMutex);
941 
942 		free_vnode(vnode, false);
943 	}
944 }
945 
946 
947 static inline void
948 put_advisory_locking(struct advisory_locking *locking)
949 {
950 	release_sem(locking->lock);
951 }
952 
953 
954 /**	Returns the advisory_locking object of the \a vnode in case it
955  *	has one, and locks it.
956  *	You have to call put_advisory_locking() when you're done with
957  *	it.
958  *	Note, you must not have the vnode mutex locked when calling
959  *	this function.
960  */
961 
962 static struct advisory_locking *
963 get_advisory_locking(struct vnode *vnode)
964 {
965 	mutex_lock(&sVnodeMutex);
966 
967 	struct advisory_locking *locking = vnode->advisory_locking;
968 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
969 
970 	mutex_unlock(&sVnodeMutex);
971 
972 	if (lock >= B_OK)
973 		lock = acquire_sem(lock);
974 	if (lock < B_OK) {
975 		// This means the locking has been deleted in the mean time
976 		// or had never existed in the first place - otherwise, we
977 		// would get the lock at some point.
978 		return NULL;
979 	}
980 
981 	return locking;
982 }
983 
984 
985 /**	Creates a locked advisory_locking object, and attaches it to the
986  *	given \a vnode.
987  *	Returns B_OK in case of success - also if the vnode got such an
988  *	object from someone else in the mean time, you'll still get this
989  *	one locked then.
990  */
991 
992 static status_t
993 create_advisory_locking(struct vnode *vnode)
994 {
995 	if (vnode == NULL)
996 		return B_FILE_ERROR;
997 
998 	struct advisory_locking *locking = (struct advisory_locking *)malloc(
999 		sizeof(struct advisory_locking));
1000 	if (locking == NULL)
1001 		return B_NO_MEMORY;
1002 
1003 	status_t status;
1004 
1005 	locking->wait_sem = create_sem(0, "advisory lock");
1006 	if (locking->wait_sem < B_OK) {
1007 		status = locking->wait_sem;
1008 		goto err1;
1009 	}
1010 
1011 	locking->lock = create_sem(0, "advisory locking");
1012 	if (locking->lock < B_OK) {
1013 		status = locking->lock;
1014 		goto err2;
1015 	}
1016 
1017 	list_init(&locking->locks);
1018 
1019 	// We need to set the locking structure atomically - someone
1020 	// else might set one at the same time
1021 	do {
1022 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking, (addr_t)locking,
1023 				NULL) == NULL)
1024 			return B_OK;
1025 	} while (get_advisory_locking(vnode) == NULL);
1026 
1027 	status = B_OK;
1028 		// we delete the one we've just created, but nevertheless, the vnode
1029 		// does have a locking structure now
1030 
1031 	delete_sem(locking->lock);
1032 err2:
1033 	delete_sem(locking->wait_sem);
1034 err1:
1035 	free(locking);
1036 	return status;
1037 }
1038 
1039 
1040 /**	Retrieves the first lock that has been set by the current team.
1041  */
1042 
1043 static status_t
1044 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1045 {
1046 	struct advisory_locking *locking = get_advisory_locking(vnode);
1047 	if (locking == NULL)
1048 		return B_BAD_VALUE;
1049 
1050 	// TODO: this should probably get the flock by its file descriptor!
1051 	team_id team = team_get_current_team_id();
1052 	status_t status = B_BAD_VALUE;
1053 
1054 	struct advisory_lock *lock = NULL;
1055 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1056 		if (lock->team == team) {
1057 			flock->l_start = lock->offset;
1058 			flock->l_len = lock->length;
1059 			status = B_OK;
1060 			break;
1061 		}
1062 	}
1063 
1064 	put_advisory_locking(locking);
1065 	return status;
1066 }
1067 
1068 
1069 /**	Removes the specified lock, or all locks of the calling team
1070  *	if \a flock is NULL.
1071  */
1072 
1073 static status_t
1074 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1075 {
1076 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1077 
1078 	struct advisory_locking *locking = get_advisory_locking(vnode);
1079 	if (locking == NULL)
1080 		return flock != NULL ? B_BAD_VALUE : B_OK;
1081 
1082 	team_id team = team_get_current_team_id();
1083 
1084 	// find matching lock entry
1085 
1086 	status_t status = B_BAD_VALUE;
1087 	struct advisory_lock *lock = NULL;
1088 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1089 		if (lock->team == team && (flock == NULL || (flock != NULL
1090 			&& lock->offset == flock->l_start
1091 			&& lock->length == flock->l_len))) {
1092 			// we found our lock, free it
1093 			list_remove_item(&locking->locks, lock);
1094 			free(lock);
1095 			status = B_OK;
1096 			break;
1097 		}
1098 	}
1099 
1100 	bool removeLocking = list_is_empty(&locking->locks);
1101 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1102 
1103 	put_advisory_locking(locking);
1104 
1105 	if (status < B_OK)
1106 		return status;
1107 
1108 	if (removeLocking) {
1109 		// we can remove the whole advisory locking structure; it's no longer used
1110 		locking = get_advisory_locking(vnode);
1111 		if (locking != NULL) {
1112 			// the locking could have been changed in the mean time
1113 			if (list_is_empty(&locking->locks)) {
1114 				vnode->advisory_locking = NULL;
1115 
1116 				// we've detached the locking from the vnode, so we can safely delete it
1117 				delete_sem(locking->lock);
1118 				delete_sem(locking->wait_sem);
1119 				free(locking);
1120 			} else {
1121 				// the locking is in use again
1122 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1123 			}
1124 		}
1125 	}
1126 
1127 	return B_OK;
1128 }
1129 
1130 
1131 static status_t
1132 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1133 {
1134 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1135 		vnode, flock, wait ? "yes" : "no"));
1136 
1137 	bool shared = flock->l_type == F_RDLCK;
1138 	status_t status = B_OK;
1139 
1140 restart:
1141 	// if this vnode has an advisory_locking structure attached,
1142 	// lock that one and search for any colliding file lock
1143 	struct advisory_locking *locking = get_advisory_locking(vnode);
1144 	sem_id waitForLock = -1;
1145 
1146 	if (locking != NULL) {
1147 		// test for collisions
1148 		struct advisory_lock *lock = NULL;
1149 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1150 			if (lock->offset <= flock->l_start + flock->l_len
1151 				&& lock->offset + lock->length > flock->l_start) {
1152 				// locks do overlap
1153 				if (!shared || !lock->shared) {
1154 					// we need to wait
1155 					waitForLock = locking->wait_sem;
1156 					break;
1157 				}
1158 			}
1159 		}
1160 
1161 		if (waitForLock < B_OK || !wait)
1162 			put_advisory_locking(locking);
1163 	}
1164 
1165 	// wait for the lock if we have to, or else return immediately
1166 
1167 	if (waitForLock >= B_OK) {
1168 		if (!wait)
1169 			status = B_PERMISSION_DENIED;
1170 		else {
1171 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1172 			if (status == B_OK) {
1173 				// see if we're still colliding
1174 				goto restart;
1175 			}
1176 		}
1177 	}
1178 
1179 	if (status < B_OK)
1180 		return status;
1181 
1182 	// install new lock
1183 
1184 	locking = get_advisory_locking(vnode);
1185 	if (locking == NULL) {
1186 		// we need to create a new locking object
1187 		status = create_advisory_locking(vnode);
1188 		if (status < B_OK)
1189 			return status;
1190 
1191 		locking = vnode->advisory_locking;
1192 			// we own the locking object, so it can't go away
1193 	}
1194 
1195 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1196 	if (lock == NULL) {
1197 		if (waitForLock >= B_OK)
1198 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1199 		release_sem(locking->lock);
1200 		return B_NO_MEMORY;
1201 	}
1202 
1203 	lock->team = team_get_current_team_id();
1204 	// values must already be normalized when getting here
1205 	lock->offset = flock->l_start;
1206 	lock->length = flock->l_len;
1207 	lock->shared = shared;
1208 
1209 	list_add_item(&locking->locks, lock);
1210 	put_advisory_locking(locking);
1211 
1212 	return status;
1213 }
1214 
1215 
1216 static status_t
1217 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1218 {
1219 	switch (flock->l_whence) {
1220 		case SEEK_SET:
1221 			break;
1222 		case SEEK_CUR:
1223 			flock->l_start += descriptor->pos;
1224 			break;
1225 		case SEEK_END:
1226 		{
1227 			struct vnode *vnode = descriptor->u.vnode;
1228 			struct stat stat;
1229 			status_t status;
1230 
1231 			if (FS_CALL(vnode, read_stat) == NULL)
1232 				return EOPNOTSUPP;
1233 
1234 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1235 			if (status < B_OK)
1236 				return status;
1237 
1238 			flock->l_start += stat.st_size;
1239 			break;
1240 		}
1241 		default:
1242 			return B_BAD_VALUE;
1243 	}
1244 
1245 	if (flock->l_start < 0)
1246 		flock->l_start = 0;
1247 	if (flock->l_len == 0)
1248 		flock->l_len = OFF_MAX;
1249 
1250 	// don't let the offset and length overflow
1251 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1252 		flock->l_len = OFF_MAX - flock->l_start;
1253 
1254 	if (flock->l_len < 0) {
1255 		// a negative length reverses the region
1256 		flock->l_start += flock->l_len;
1257 		flock->l_len = -flock->l_len;
1258 	}
1259 
1260 	return B_OK;
1261 }
1262 
1263 
1264 /**	Disconnects all file descriptors that are associated with the
1265  *	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1266  *	\a mount object.
1267  *
1268  *	Note, after you've called this function, there might still be ongoing
1269  *	accesses - they won't be interrupted if they already happened before.
1270  *	However, any subsequent access will fail.
1271  *
1272  *	This is not a cheap function and should be used with care and rarely.
1273  *	TODO: there is currently no means to stop a blocking read/write!
1274  */
1275 
1276 void
1277 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1278 	struct vnode *vnodeToDisconnect)
1279 {
1280 	// iterate over all teams and peek into their file descriptors
1281 	int32 nextTeamID = 0;
1282 
1283 	while (true) {
1284 		struct io_context *context = NULL;
1285 		sem_id contextMutex = -1;
1286 		struct team *team = NULL;
1287 		team_id lastTeamID;
1288 
1289 		cpu_status state = disable_interrupts();
1290 		GRAB_TEAM_LOCK();
1291 
1292 		lastTeamID = peek_next_thread_id();
1293 		if (nextTeamID < lastTeamID) {
1294 			// get next valid team
1295 			while (nextTeamID < lastTeamID
1296 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1297 				nextTeamID++;
1298 			}
1299 
1300 			if (team) {
1301 				context = (io_context *)team->io_context;
1302 				contextMutex = context->io_mutex.sem;
1303 				nextTeamID++;
1304 			}
1305 		}
1306 
1307 		RELEASE_TEAM_LOCK();
1308 		restore_interrupts(state);
1309 
1310 		if (context == NULL)
1311 			break;
1312 
1313 		// we now have a context - since we couldn't lock it while having
1314 		// safe access to the team structure, we now need to lock the mutex
1315 		// manually
1316 
1317 		if (acquire_sem(contextMutex) != B_OK) {
1318 			// team seems to be gone, go over to the next team
1319 			continue;
1320 		}
1321 
1322 		// the team cannot be deleted completely while we're owning its
1323 		// io_context mutex, so we can safely play with it now
1324 
1325 		context->io_mutex.holder = thread_get_current_thread_id();
1326 
1327 		if (context->cwd != NULL && context->cwd->mount == mount) {
1328 			put_vnode(context->cwd);
1329 				// Note: We're only accessing the pointer, not the vnode itself
1330 				// in the lines below.
1331 
1332 			if (context->cwd == mount->root_vnode) {
1333 				// redirect the current working directory to the covered vnode
1334 				context->cwd = mount->covers_vnode;
1335 				inc_vnode_ref_count(context->cwd);
1336 			} else
1337 				context->cwd = NULL;
1338 		}
1339 
1340 		for (uint32 i = 0; i < context->table_size; i++) {
1341 			if (struct file_descriptor *descriptor = context->fds[i]) {
1342 				inc_fd_ref_count(descriptor);
1343 
1344 				// if this descriptor points at this mount, we
1345 				// need to disconnect it to be able to unmount
1346 				struct vnode *vnode = fd_vnode(descriptor);
1347 				if (vnodeToDisconnect != NULL) {
1348 					if (vnode == vnodeToDisconnect)
1349 						disconnect_fd(descriptor);
1350 				} else if (vnode != NULL && vnode->mount == mount
1351 					|| vnode == NULL && descriptor->u.mount == mount)
1352 					disconnect_fd(descriptor);
1353 
1354 				put_fd(descriptor);
1355 			}
1356 		}
1357 
1358 		mutex_unlock(&context->io_mutex);
1359 	}
1360 }
1361 
1362 
1363 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1364  *		   by.
1365  *
1366  *	Given an arbitrary vnode, the function checks, whether the node is covered
1367  *	by the root of a volume. If it is the function obtains a reference to the
1368  *	volume root node and returns it.
1369  *
1370  *	\param vnode The vnode in question.
1371  *	\return The volume root vnode the vnode cover is covered by, if it is
1372  *			indeed a mount point, or \c NULL otherwise.
1373  */
1374 
1375 static struct vnode *
1376 resolve_mount_point_to_volume_root(struct vnode *vnode)
1377 {
1378 	if (!vnode)
1379 		return NULL;
1380 
1381 	struct vnode *volumeRoot = NULL;
1382 
1383 	mutex_lock(&sVnodeCoveredByMutex);
1384 	if (vnode->covered_by) {
1385 		volumeRoot = vnode->covered_by;
1386 		inc_vnode_ref_count(volumeRoot);
1387 	}
1388 	mutex_unlock(&sVnodeCoveredByMutex);
1389 
1390 	return volumeRoot;
1391 }
1392 
1393 
1394 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1395  *		   by.
1396  *
1397  *	Given an arbitrary vnode (identified by mount and node ID), the function
1398  *	checks, whether the node is covered by the root of a volume. If it is the
1399  *	function returns the mount and node ID of the volume root node. Otherwise
1400  *	it simply returns the supplied mount and node ID.
1401  *
1402  *	In case of error (e.g. the supplied node could not be found) the variables
1403  *	for storing the resolved mount and node ID remain untouched and an error
1404  *	code is returned.
1405  *
1406  *	\param mountID The mount ID of the vnode in question.
1407  *	\param nodeID The node ID of the vnode in question.
1408  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1409  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1410  *	\return
1411  *	- \c B_OK, if everything went fine,
1412  *	- another error code, if something went wrong.
1413  */
1414 
1415 status_t
1416 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1417 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1418 {
1419 	// get the node
1420 	struct vnode *node;
1421 	status_t error = get_vnode(mountID, nodeID, &node, false);
1422 	if (error != B_OK)
1423 		return error;
1424 
1425 	// resolve the node
1426 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1427 	if (resolvedNode) {
1428 		put_vnode(node);
1429 		node = resolvedNode;
1430 	}
1431 
1432 	// set the return values
1433 	*resolvedMountID = node->device;
1434 	*resolvedNodeID = node->id;
1435 
1436 	put_vnode(node);
1437 
1438 	return B_OK;
1439 }
1440 
1441 
1442 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1443  *
1444  *	Given an arbitrary vnode, the function checks, whether the node is the
1445  *	root of a volume. If it is (and if it is not "/"), the function obtains
1446  *	a reference to the underlying mount point node and returns it.
1447  *
1448  *	\param vnode The vnode in question (caller must have a reference).
1449  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1450  *			root and not "/", or \c NULL otherwise.
1451  */
1452 
1453 static struct vnode *
1454 resolve_volume_root_to_mount_point(struct vnode *vnode)
1455 {
1456 	if (!vnode)
1457 		return NULL;
1458 
1459 	struct vnode *mountPoint = NULL;
1460 
1461 	struct fs_mount *mount = vnode->mount;
1462 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1463 		mountPoint = mount->covers_vnode;
1464 		inc_vnode_ref_count(mountPoint);
1465 	}
1466 
1467 	return mountPoint;
1468 }
1469 
1470 
1471 /**	\brief Gets the directory path and leaf name for a given path.
1472  *
1473  *	The supplied \a path is transformed to refer to the directory part of
1474  *	the entry identified by the original path, and into the buffer \a filename
1475  *	the leaf name of the original entry is written.
1476  *	Neither the returned path nor the leaf name can be expected to be
1477  *	canonical.
1478  *
1479  *	\param path The path to be analyzed. Must be able to store at least one
1480  *		   additional character.
1481  *	\param filename The buffer into which the leaf name will be written.
1482  *		   Must be of size B_FILE_NAME_LENGTH at least.
1483  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1484  *		   name is longer than \c B_FILE_NAME_LENGTH.
1485  */
1486 
1487 static status_t
1488 get_dir_path_and_leaf(char *path, char *filename)
1489 {
1490 	char *p = strrchr(path, '/');
1491 		// '/' are not allowed in file names!
1492 
1493 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1494 
1495 	if (!p) {
1496 		// this path is single segment with no '/' in it
1497 		// ex. "foo"
1498 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1499 			return B_NAME_TOO_LONG;
1500 		strcpy(path, ".");
1501 	} else {
1502 		p++;
1503 		if (*p == '\0') {
1504 			// special case: the path ends in '/'
1505 			strcpy(filename, ".");
1506 		} else {
1507 			// normal leaf: replace the leaf portion of the path with a '.'
1508 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1509 				>= B_FILE_NAME_LENGTH) {
1510 				return B_NAME_TOO_LONG;
1511 			}
1512 		}
1513 		p[0] = '.';
1514 		p[1] = '\0';
1515 	}
1516 	return B_OK;
1517 }
1518 
1519 
1520 static status_t
1521 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1522 {
1523 	char clonedName[B_FILE_NAME_LENGTH + 1];
1524 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1525 		return B_NAME_TOO_LONG;
1526 
1527 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1528 	struct vnode *directory;
1529 
1530 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1531 	if (status < 0)
1532 		return status;
1533 
1534 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1535 }
1536 
1537 
1538 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1539  *	\a path must not be NULL.
1540  *	If it returns successfully, \a path contains the name of the last path
1541  *	component.
1542  *	Note, this reduces the ref_count of the starting \a vnode, no matter if
1543  *	it is successful or not!
1544  */
1545 
1546 static status_t
1547 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1548 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1549 {
1550 	status_t status = 0;
1551 	vnode_id lastParentID = vnode->id;
1552 	int type = 0;
1553 
1554 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1555 
1556 	if (path == NULL) {
1557 		put_vnode(vnode);
1558 		return B_BAD_VALUE;
1559 	}
1560 
1561 	while (true) {
1562 		struct vnode *nextVnode;
1563 		vnode_id vnodeID;
1564 		char *nextPath;
1565 
1566 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1567 
1568 		// done?
1569 		if (path[0] == '\0')
1570 			break;
1571 
1572 		// walk to find the next path component ("path" will point to a single
1573 		// path component), and filter out multiple slashes
1574 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1575 
1576 		if (*nextPath == '/') {
1577 			*nextPath = '\0';
1578 			do
1579 				nextPath++;
1580 			while (*nextPath == '/');
1581 		}
1582 
1583 		// See if the '..' is at the root of a mount and move to the covered
1584 		// vnode so we pass the '..' path to the underlying filesystem
1585 		if (!strcmp("..", path)
1586 			&& vnode->mount->root_vnode == vnode
1587 			&& vnode->mount->covers_vnode) {
1588 			nextVnode = vnode->mount->covers_vnode;
1589 			inc_vnode_ref_count(nextVnode);
1590 			put_vnode(vnode);
1591 			vnode = nextVnode;
1592 		}
1593 
1594 		// Check if we have the right to search the current directory vnode.
1595 		// If a file system doesn't have the access() function, we assume that
1596 		// searching a directory is always allowed
1597 		if (FS_CALL(vnode, access))
1598 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1599 
1600 		// Tell the filesystem to get the vnode of this path component (if we got the
1601 		// permission from the call above)
1602 		if (status >= B_OK)
1603 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1604 
1605 		if (status < B_OK) {
1606 			put_vnode(vnode);
1607 			return status;
1608 		}
1609 
1610 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1611 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1612 		// ref count incremented at this point
1613 		mutex_lock(&sVnodeMutex);
1614 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1615 		mutex_unlock(&sVnodeMutex);
1616 
1617 		if (!nextVnode) {
1618 			// pretty screwed up here - the file system found the vnode, but the hash
1619 			// lookup failed, so our internal structures are messed up
1620 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1621 				vnode->device, vnodeID);
1622 			put_vnode(vnode);
1623 			return B_ENTRY_NOT_FOUND;
1624 		}
1625 
1626 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1627 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1628 			size_t bufferSize;
1629 			char *buffer;
1630 
1631 			TRACE(("traverse link\n"));
1632 
1633 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1634 			if (count + 1 > B_MAX_SYMLINKS) {
1635 				status = B_LINK_LIMIT;
1636 				goto resolve_link_error;
1637 			}
1638 
1639 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1640 			if (buffer == NULL) {
1641 				status = B_NO_MEMORY;
1642 				goto resolve_link_error;
1643 			}
1644 
1645 			if (FS_CALL(nextVnode, read_symlink) != NULL) {
1646 				status = FS_CALL(nextVnode, read_symlink)(
1647 					nextVnode->mount->cookie, nextVnode->private_node, buffer,
1648 					&bufferSize);
1649 			} else
1650 				status = B_BAD_VALUE;
1651 
1652 			if (status < B_OK) {
1653 				free(buffer);
1654 
1655 		resolve_link_error:
1656 				put_vnode(vnode);
1657 				put_vnode(nextVnode);
1658 
1659 				return status;
1660 			}
1661 			put_vnode(nextVnode);
1662 
1663 			// Check if we start from the root directory or the current
1664 			// directory ("vnode" still points to that one).
1665 			// Cut off all leading slashes if it's the root directory
1666 			path = buffer;
1667 			if (path[0] == '/') {
1668 				// we don't need the old directory anymore
1669 				put_vnode(vnode);
1670 
1671 				while (*++path == '/')
1672 					;
1673 				vnode = sRoot;
1674 				inc_vnode_ref_count(vnode);
1675 			}
1676 			inc_vnode_ref_count(vnode);
1677 				// balance the next recursion - we will decrement the ref_count
1678 				// of the vnode, no matter if we succeeded or not
1679 
1680 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1681 				&nextVnode, &lastParentID, _type);
1682 
1683 			free(buffer);
1684 
1685 			if (status < B_OK) {
1686 				put_vnode(vnode);
1687 				return status;
1688 			}
1689 		} else
1690 			lastParentID = vnode->id;
1691 
1692 		// decrease the ref count on the old dir we just looked up into
1693 		put_vnode(vnode);
1694 
1695 		path = nextPath;
1696 		vnode = nextVnode;
1697 
1698 		// see if we hit a mount point
1699 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1700 		if (mountPoint) {
1701 			put_vnode(vnode);
1702 			vnode = mountPoint;
1703 		}
1704 	}
1705 
1706 	*_vnode = vnode;
1707 	if (_type)
1708 		*_type = type;
1709 	if (_parentID)
1710 		*_parentID = lastParentID;
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 static status_t
1717 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1718 	vnode_id *_parentID, bool kernel)
1719 {
1720 	struct vnode *start = NULL;
1721 
1722 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1723 
1724 	if (!path)
1725 		return B_BAD_VALUE;
1726 
1727 	// figure out if we need to start at root or at cwd
1728 	if (*path == '/') {
1729 		if (sRoot == NULL) {
1730 			// we're a bit early, aren't we?
1731 			return B_ERROR;
1732 		}
1733 
1734 		while (*++path == '/')
1735 			;
1736 		start = sRoot;
1737 		inc_vnode_ref_count(start);
1738 	} else {
1739 		struct io_context *context = get_current_io_context(kernel);
1740 
1741 		mutex_lock(&context->io_mutex);
1742 		start = context->cwd;
1743 		if (start != NULL)
1744 			inc_vnode_ref_count(start);
1745 		mutex_unlock(&context->io_mutex);
1746 
1747 		if (start == NULL)
1748 			return B_ERROR;
1749 	}
1750 
1751 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1752 }
1753 
1754 
1755 /** Returns the vnode in the next to last segment of the path, and returns
1756  *	the last portion in filename.
1757  *	The path buffer must be able to store at least one additional character.
1758  */
1759 
1760 static status_t
1761 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1762 {
1763 	status_t status = get_dir_path_and_leaf(path, filename);
1764 	if (status != B_OK)
1765 		return status;
1766 
1767 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1768 }
1769 
1770 
1771 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1772  *		   to by a FD + path pair.
1773  *
1774  *	\a path must be given in either case. \a fd might be omitted, in which
1775  *	case \a path is either an absolute path or one relative to the current
1776  *	directory. If both a supplied and \a path is relative it is reckoned off
1777  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1778  *	ignored.
1779  *
1780  *	The caller has the responsibility to call put_vnode() on the returned
1781  *	directory vnode.
1782  *
1783  *	\param fd The FD. May be < 0.
1784  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1785  *	       is modified by this function. It must have at least room for a
1786  *	       string one character longer than the path it contains.
1787  *	\param _vnode A pointer to a variable the directory vnode shall be written
1788  *		   into.
1789  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1790  *		   the leaf name of the specified entry will be written.
1791  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1792  *		   invoked from userland.
1793  *	\return \c B_OK, if everything went fine, another error code otherwise.
1794  */
1795 
1796 static status_t
1797 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1798 	char *filename, bool kernel)
1799 {
1800 	if (!path)
1801 		return B_BAD_VALUE;
1802 	if (fd < 0)
1803 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1804 
1805 	status_t status = get_dir_path_and_leaf(path, filename);
1806 	if (status != B_OK)
1807 		return status;
1808 
1809 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1810 }
1811 
1812 
1813 /** Returns a vnode's name in the d_name field of a supplied dirent buffer.
1814  */
1815 
1816 static status_t
1817 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
1818 	size_t bufferSize)
1819 {
1820 	if (bufferSize < sizeof(struct dirent))
1821 		return B_BAD_VALUE;
1822 
1823 	// See if vnode is the root of a mount and move to the covered
1824 	// vnode so we get the underlying file system
1825 	VNodePutter vnodePutter;
1826 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1827 		vnode = vnode->mount->covers_vnode;
1828 		inc_vnode_ref_count(vnode);
1829 		vnodePutter.SetTo(vnode);
1830 	}
1831 
1832 	if (FS_CALL(vnode, get_vnode_name)) {
1833 		// The FS supports getting the name of a vnode.
1834 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1835 			vnode->private_node, buffer->d_name,
1836 			(char*)buffer + bufferSize - buffer->d_name);
1837 	}
1838 
1839 	// The FS doesn't support getting the name of a vnode. So we search the
1840 	// parent directory for the vnode, if the caller let us.
1841 
1842 	if (parent == NULL)
1843 		return EOPNOTSUPP;
1844 
1845 	fs_cookie cookie;
1846 
1847 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1848 		parent->private_node, &cookie);
1849 	if (status >= B_OK) {
1850 		while (true) {
1851 			uint32 num = 1;
1852 			status = dir_read(parent, cookie, buffer, bufferSize, &num);
1853 			if (status < B_OK)
1854 				break;
1855 			if (num == 0) {
1856 				status = B_ENTRY_NOT_FOUND;
1857 				break;
1858 			}
1859 
1860 			if (vnode->id == buffer->d_ino) {
1861 				// found correct entry!
1862 				break;
1863 			}
1864 		}
1865 
1866 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node,
1867 			cookie);
1868 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie,
1869 			vnode->private_node, cookie);
1870 	}
1871 	return status;
1872 }
1873 
1874 
1875 static status_t
1876 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
1877 	size_t nameSize)
1878 {
1879 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1880 	struct dirent *dirent = (struct dirent *)buffer;
1881 
1882 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer));
1883 	if (status != B_OK)
1884 		return status;
1885 
1886 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1887 		return B_BUFFER_OVERFLOW;
1888 
1889 	return B_OK;
1890 }
1891 
1892 
1893 /**	Gets the full path to a given directory vnode.
1894  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1895  *	file system doesn't support this call, it will fall back to iterating
1896  *	through the parent directory to get the name of the child.
1897  *
1898  *	To protect against circular loops, it supports a maximum tree depth
1899  *	of 256 levels.
1900  *
1901  *	Note that the path may not be correct the time this function returns!
1902  *	It doesn't use any locking to prevent returning the correct path, as
1903  *	paths aren't safe anyway: the path to a file can change at any time.
1904  *
1905  *	It might be a good idea, though, to check if the returned path exists
1906  *	in the calling function (it's not done here because of efficiency)
1907  */
1908 
1909 static status_t
1910 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1911 {
1912 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1913 
1914 	if (vnode == NULL || buffer == NULL)
1915 		return B_BAD_VALUE;
1916 
1917 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1918 	KPath pathBuffer;
1919 	if (pathBuffer.InitCheck() != B_OK)
1920 		return B_NO_MEMORY;
1921 
1922 	char *path = pathBuffer.LockBuffer();
1923 	int32 insert = pathBuffer.BufferSize();
1924 	int32 maxLevel = 256;
1925 	int32 length;
1926 	status_t status;
1927 
1928 	// we don't use get_vnode() here because this call is more
1929 	// efficient and does all we need from get_vnode()
1930 	inc_vnode_ref_count(vnode);
1931 
1932 	// resolve a volume root to its mount point
1933 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1934 	if (mountPoint) {
1935 		put_vnode(vnode);
1936 		vnode = mountPoint;
1937 	}
1938 
1939 	path[--insert] = '\0';
1940 
1941 	while (true) {
1942 		// the name buffer is also used for fs_read_dir()
1943 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1944 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1945 		struct vnode *parentVnode;
1946 		vnode_id parentID;
1947 		int type;
1948 
1949 		// lookup the parent vnode
1950 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
1951 			&parentID, &type);
1952 		if (status < B_OK)
1953 			goto out;
1954 
1955 		mutex_lock(&sVnodeMutex);
1956 		parentVnode = lookup_vnode(vnode->device, parentID);
1957 		mutex_unlock(&sVnodeMutex);
1958 
1959 		if (parentVnode == NULL) {
1960 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1961 				vnode->device, parentID);
1962 			status = B_ENTRY_NOT_FOUND;
1963 			goto out;
1964 		}
1965 
1966 		// get the node's name
1967 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
1968 			sizeof(nameBuffer));
1969 
1970 		// resolve a volume root to its mount point
1971 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1972 		if (mountPoint) {
1973 			put_vnode(parentVnode);
1974 			parentVnode = mountPoint;
1975 			parentID = parentVnode->id;
1976 		}
1977 
1978 		bool hitRoot = (parentVnode == vnode);
1979 
1980 		// release the current vnode, we only need its parent from now on
1981 		put_vnode(vnode);
1982 		vnode = parentVnode;
1983 
1984 		if (status < B_OK)
1985 			goto out;
1986 
1987 		if (hitRoot) {
1988 			// we have reached "/", which means we have constructed the full
1989 			// path
1990 			break;
1991 		}
1992 
1993 		// ToDo: add an explicit check for loops in about 10 levels to do
1994 		// real loop detection
1995 
1996 		// don't go deeper as 'maxLevel' to prevent circular loops
1997 		if (maxLevel-- < 0) {
1998 			status = ELOOP;
1999 			goto out;
2000 		}
2001 
2002 		// add the name in front of the current path
2003 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2004 		length = strlen(name);
2005 		insert -= length;
2006 		if (insert <= 0) {
2007 			status = ENOBUFS;
2008 			goto out;
2009 		}
2010 		memcpy(path + insert, name, length);
2011 		path[--insert] = '/';
2012 	}
2013 
2014 	// the root dir will result in an empty path: fix it
2015 	if (path[insert] == '\0')
2016 		path[--insert] = '/';
2017 
2018 	TRACE(("  path is: %s\n", path + insert));
2019 
2020 	// copy the path to the output buffer
2021 	length = pathBuffer.BufferSize() - insert;
2022 	if (length <= (int)bufferSize)
2023 		memcpy(buffer, path + insert, length);
2024 	else
2025 		status = ENOBUFS;
2026 
2027 out:
2028 	put_vnode(vnode);
2029 	return status;
2030 }
2031 
2032 
2033 /**	Checks the length of every path component, and adds a '.'
2034  *	if the path ends in a slash.
2035  *	The given path buffer must be able to store at least one
2036  *	additional character.
2037  */
2038 
2039 static status_t
2040 check_path(char *to)
2041 {
2042 	int32 length = 0;
2043 
2044 	// check length of every path component
2045 
2046 	while (*to) {
2047 		char *begin;
2048 		if (*to == '/')
2049 			to++, length++;
2050 
2051 		begin = to;
2052 		while (*to != '/' && *to)
2053 			to++, length++;
2054 
2055 		if (to - begin > B_FILE_NAME_LENGTH)
2056 			return B_NAME_TOO_LONG;
2057 	}
2058 
2059 	if (length == 0)
2060 		return B_ENTRY_NOT_FOUND;
2061 
2062 	// complete path if there is a slash at the end
2063 
2064 	if (*(to - 1) == '/') {
2065 		if (length > B_PATH_NAME_LENGTH - 2)
2066 			return B_NAME_TOO_LONG;
2067 
2068 		to[0] = '.';
2069 		to[1] = '\0';
2070 	}
2071 
2072 	return B_OK;
2073 }
2074 
2075 
2076 static struct file_descriptor *
2077 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2078 {
2079 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2080 	if (descriptor == NULL)
2081 		return NULL;
2082 
2083 	if (fd_vnode(descriptor) == NULL) {
2084 		put_fd(descriptor);
2085 		return NULL;
2086 	}
2087 
2088 	// ToDo: when we can close a file descriptor at any point, investigate
2089 	//	if this is still valid to do (accessing the vnode without ref_count
2090 	//	or locking)
2091 	*_vnode = descriptor->u.vnode;
2092 	return descriptor;
2093 }
2094 
2095 
2096 static struct vnode *
2097 get_vnode_from_fd(int fd, bool kernel)
2098 {
2099 	struct file_descriptor *descriptor;
2100 	struct vnode *vnode;
2101 
2102 	descriptor = get_fd(get_current_io_context(kernel), fd);
2103 	if (descriptor == NULL)
2104 		return NULL;
2105 
2106 	vnode = fd_vnode(descriptor);
2107 	if (vnode != NULL)
2108 		inc_vnode_ref_count(vnode);
2109 
2110 	put_fd(descriptor);
2111 	return vnode;
2112 }
2113 
2114 
2115 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2116  *	only the path will be considered. In this case, the \a path must not be
2117  *	NULL.
2118  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2119  *	and should be NULL for files.
2120  */
2121 
2122 static status_t
2123 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2124 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
2125 {
2126 	if (fd < 0 && !path)
2127 		return B_BAD_VALUE;
2128 
2129 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2130 		// no FD or absolute path
2131 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2132 	}
2133 
2134 	// FD only, or FD + relative path
2135 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2136 	if (!vnode)
2137 		return B_FILE_ERROR;
2138 
2139 	if (path != NULL) {
2140 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2141 			_vnode, _parentID, NULL);
2142 	}
2143 
2144 	// there is no relative path to take into account
2145 
2146 	*_vnode = vnode;
2147 	if (_parentID)
2148 		*_parentID = -1;
2149 
2150 	return B_OK;
2151 }
2152 
2153 
2154 static int
2155 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2156 	fs_cookie cookie, int openMode, bool kernel)
2157 {
2158 	struct file_descriptor *descriptor;
2159 	int fd;
2160 
2161 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2162 	if (vnode && vnode->mandatory_locked_by != NULL)
2163 		return B_BUSY;
2164 
2165 	descriptor = alloc_fd();
2166 	if (!descriptor)
2167 		return B_NO_MEMORY;
2168 
2169 	if (vnode)
2170 		descriptor->u.vnode = vnode;
2171 	else
2172 		descriptor->u.mount = mount;
2173 	descriptor->cookie = cookie;
2174 
2175 	switch (type) {
2176 		// vnode types
2177 		case FDTYPE_FILE:
2178 			descriptor->ops = &sFileOps;
2179 			break;
2180 		case FDTYPE_DIR:
2181 			descriptor->ops = &sDirectoryOps;
2182 			break;
2183 		case FDTYPE_ATTR:
2184 			descriptor->ops = &sAttributeOps;
2185 			break;
2186 		case FDTYPE_ATTR_DIR:
2187 			descriptor->ops = &sAttributeDirectoryOps;
2188 			break;
2189 
2190 		// mount types
2191 		case FDTYPE_INDEX_DIR:
2192 			descriptor->ops = &sIndexDirectoryOps;
2193 			break;
2194 		case FDTYPE_QUERY:
2195 			descriptor->ops = &sQueryOps;
2196 			break;
2197 
2198 		default:
2199 			panic("get_new_fd() called with unknown type %d\n", type);
2200 			break;
2201 	}
2202 	descriptor->type = type;
2203 	descriptor->open_mode = openMode;
2204 
2205 	fd = new_fd(get_current_io_context(kernel), descriptor);
2206 	if (fd < 0) {
2207 		free(descriptor);
2208 		return B_NO_MORE_FDS;
2209 	}
2210 
2211 	return fd;
2212 }
2213 
2214 #ifdef ADD_DEBUGGER_COMMANDS
2215 
2216 
2217 static void
2218 _dump_advisory_locking(advisory_locking *locking)
2219 {
2220 	if (locking == NULL)
2221 		return;
2222 
2223 	kprintf("   lock:        %ld", locking->lock);
2224 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2225 
2226 	struct advisory_lock *lock = NULL;
2227 	int32 index = 0;
2228 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2229 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2230 		kprintf("        offset: %Ld\n", lock->offset);
2231 		kprintf("        length: %Ld\n", lock->length);
2232 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2233 	}
2234 }
2235 
2236 
2237 static void
2238 _dump_mount(struct fs_mount *mount)
2239 {
2240 	kprintf("MOUNT: %p\n", mount);
2241 	kprintf(" id:            %ld\n", mount->id);
2242 	kprintf(" device_name:   %s\n", mount->device_name);
2243 	kprintf(" fs_name:       %s\n", mount->fs_name);
2244 	kprintf(" cookie:        %p\n", mount->cookie);
2245 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2246 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2247 	kprintf(" partition:     %p\n", mount->partition);
2248 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2249 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2250 		mount->owns_file_device ? " owns_file_device" : "");
2251 }
2252 
2253 
2254 static void
2255 _dump_vnode(struct vnode *vnode)
2256 {
2257 	kprintf("VNODE: %p\n", vnode);
2258 	kprintf(" device:        %ld\n", vnode->device);
2259 	kprintf(" id:            %Ld\n", vnode->id);
2260 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2261 	kprintf(" private_node:  %p\n", vnode->private_node);
2262 	kprintf(" mount:         %p\n", vnode->mount);
2263 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2264 	kprintf(" cache_ref:     %p\n", vnode->cache);
2265 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2266 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2267 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2268 
2269 	_dump_advisory_locking(vnode->advisory_locking);
2270 }
2271 
2272 
2273 static int
2274 dump_mount(int argc, char **argv)
2275 {
2276 	if (argc != 2) {
2277 		kprintf("usage: mount [id/address]\n");
2278 		return 0;
2279 	}
2280 
2281 	struct fs_mount *mount = NULL;
2282 
2283 	// if the argument looks like a hex number, treat it as such
2284 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2285 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2286 		if (IS_USER_ADDRESS(mount)) {
2287 			kprintf("invalid fs_mount address\n");
2288 			return 0;
2289 		}
2290 	} else {
2291 		mount_id id = atoll(argv[1]);
2292 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2293 		if (mount == NULL) {
2294 			kprintf("fs_mount not found\n");
2295 			return 0;
2296 		}
2297 	}
2298 
2299 	_dump_mount(mount);
2300 	return 0;
2301 }
2302 
2303 
2304 static int
2305 dump_mounts(int argc, char **argv)
2306 {
2307 	struct hash_iterator iterator;
2308 	struct fs_mount *mount;
2309 
2310 	kprintf("address     id root       covers     fs_name\n");
2311 
2312 	hash_open(sMountsTable, &iterator);
2313 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2314 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2315 			mount->covers_vnode, mount->fs_name);
2316 	}
2317 
2318 	hash_close(sMountsTable, &iterator, false);
2319 	return 0;
2320 }
2321 
2322 
2323 static int
2324 dump_vnode(int argc, char **argv)
2325 {
2326 	if (argc < 2) {
2327 		kprintf("usage: vnode [id/device id/address]\n");
2328 		return 0;
2329 	}
2330 
2331 	struct vnode *vnode = NULL;
2332 
2333 	// if the argument looks like a hex number, treat it as such
2334 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2335 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2336 		if (IS_USER_ADDRESS(vnode)) {
2337 			kprintf("invalid vnode address\n");
2338 			return 0;
2339 		}
2340 		_dump_vnode(vnode);
2341 		return 0;
2342 	}
2343 
2344 	struct hash_iterator iterator;
2345 	mount_id device = -1;
2346 	vnode_id id;
2347 	if (argc > 2) {
2348 		device = atoi(argv[1]);
2349 		id = atoll(argv[2]);
2350 	} else
2351 		id = atoll(argv[1]);
2352 
2353 	hash_open(sVnodeTable, &iterator);
2354 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2355 		if (vnode->id != id || device != -1 && vnode->device != device)
2356 			continue;
2357 
2358 		_dump_vnode(vnode);
2359 	}
2360 
2361 	hash_close(sVnodeTable, &iterator, false);
2362 	return 0;
2363 }
2364 
2365 
2366 static int
2367 dump_vnodes(int argc, char **argv)
2368 {
2369 	// restrict dumped nodes to a certain device if requested
2370 	mount_id device = -1;
2371 	if (argc > 1)
2372 		device = atoi(argv[1]);
2373 
2374 	struct hash_iterator iterator;
2375 	struct vnode *vnode;
2376 
2377 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2378 
2379 	hash_open(sVnodeTable, &iterator);
2380 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2381 		if (device != -1 && vnode->device != device)
2382 			continue;
2383 
2384 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2385 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2386 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2387 			vnode->unpublished ? "u" : "-");
2388 	}
2389 
2390 	hash_close(sVnodeTable, &iterator, false);
2391 	return 0;
2392 }
2393 
2394 
2395 static int
2396 dump_vnode_caches(int argc, char **argv)
2397 {
2398 	struct hash_iterator iterator;
2399 	struct vnode *vnode;
2400 
2401 	// restrict dumped nodes to a certain device if requested
2402 	mount_id device = -1;
2403 	if (argc > 1)
2404 		device = atoi(argv[1]);
2405 
2406 	kprintf("address    dev     inode cache          size   pages\n");
2407 
2408 	hash_open(sVnodeTable, &iterator);
2409 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2410 		if (vnode->cache == NULL)
2411 			continue;
2412 		if (device != -1 && vnode->device != device)
2413 			continue;
2414 
2415 		// count pages in cache
2416 		size_t numPages = 0;
2417 		for (struct vm_page *page = vnode->cache->cache->page_list;
2418 				page != NULL; page = page->cache_next) {
2419 			numPages++;
2420 		}
2421 
2422 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2423 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2424 	}
2425 
2426 	hash_close(sVnodeTable, &iterator, false);
2427 	return 0;
2428 }
2429 
2430 
2431 int
2432 dump_io_context(int argc, char **argv)
2433 {
2434 	if (argc > 2) {
2435 		kprintf("usage: io_context [team id/address]\n");
2436 		return 0;
2437 	}
2438 
2439 	struct io_context *context = NULL;
2440 
2441 	if (argc > 1) {
2442 		uint32 num = strtoul(argv[1], NULL, 0);
2443 		if (IS_KERNEL_ADDRESS(num))
2444 			context = (struct io_context *)num;
2445 		else {
2446 			struct team *team = team_get_team_struct_locked(num);
2447 			if (team == NULL) {
2448 				kprintf("could not find team with ID %ld\n", num);
2449 				return 0;
2450 			}
2451 			context = (struct io_context *)team->io_context;
2452 		}
2453 	} else
2454 		context = get_current_io_context(true);
2455 
2456 	kprintf("I/O CONTEXT: %p\n", context);
2457 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2458 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2459 	kprintf(" max fds:\t%lu\n", context->table_size);
2460 
2461 	if (context->num_used_fds)
2462 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2463 
2464 	for (uint32 i = 0; i < context->table_size; i++) {
2465 		struct file_descriptor *fd = context->fds[i];
2466 		if (fd == NULL)
2467 			continue;
2468 
2469 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2470 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2471 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2472 			fd->u.vnode);
2473 	}
2474 
2475 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2476 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2477 
2478 	return 0;
2479 }
2480 
2481 
2482 int
2483 dump_vnode_usage(int argc, char **argv)
2484 {
2485 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes, kMaxUnusedVnodes);
2486 
2487 	struct hash_iterator iterator;
2488 	hash_open(sVnodeTable, &iterator);
2489 
2490 	uint32 count = 0;
2491 	struct vnode *vnode;
2492 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2493 		count++;
2494 	}
2495 
2496 	hash_close(sVnodeTable, &iterator, false);
2497 
2498 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2499 	return 0;
2500 }
2501 
2502 #endif	// ADD_DEBUGGER_COMMANDS
2503 
2504 
2505 //	#pragma mark - public VFS API
2506 
2507 
2508 extern "C" status_t
2509 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2510 {
2511 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2512 		mountID, vnodeID, privateNode));
2513 
2514 	if (privateNode == NULL)
2515 		return B_BAD_VALUE;
2516 
2517 	mutex_lock(&sVnodeMutex);
2518 
2519 	// file system integrity check:
2520 	// test if the vnode already exists and bail out if this is the case!
2521 
2522 	// ToDo: the R5 implementation obviously checks for a different cookie
2523 	//	and doesn't panic if they are equal
2524 
2525 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2526 	if (vnode != NULL)
2527 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2528 
2529 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2530 	if (status == B_OK) {
2531 		vnode->private_node = privateNode;
2532 		vnode->busy = true;
2533 		vnode->unpublished = true;
2534 	}
2535 
2536 	TRACE(("returns: %s\n", strerror(status)));
2537 
2538 	mutex_unlock(&sVnodeMutex);
2539 	return status;
2540 }
2541 
2542 
2543 extern "C" status_t
2544 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2545 {
2546 	FUNCTION(("publish_vnode()\n"));
2547 
2548 	mutex_lock(&sVnodeMutex);
2549 
2550 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2551 	status_t status = B_OK;
2552 
2553 	if (vnode != NULL && vnode->busy && vnode->unpublished
2554 		&& vnode->private_node == privateNode) {
2555 		vnode->busy = false;
2556 		vnode->unpublished = false;
2557 	} else if (vnode == NULL && privateNode != NULL) {
2558 		status = create_new_vnode(&vnode, mountID, vnodeID);
2559 		if (status == B_OK)
2560 			vnode->private_node = privateNode;
2561 	} else
2562 		status = B_BAD_VALUE;
2563 
2564 	TRACE(("returns: %s\n", strerror(status)));
2565 
2566 	mutex_unlock(&sVnodeMutex);
2567 	return status;
2568 }
2569 
2570 
2571 extern "C" status_t
2572 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2573 {
2574 	struct vnode *vnode;
2575 
2576 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2577 	if (status < B_OK)
2578 		return status;
2579 
2580 	*_fsNode = vnode->private_node;
2581 	return B_OK;
2582 }
2583 
2584 
2585 extern "C" status_t
2586 put_vnode(mount_id mountID, vnode_id vnodeID)
2587 {
2588 	struct vnode *vnode;
2589 
2590 	mutex_lock(&sVnodeMutex);
2591 	vnode = lookup_vnode(mountID, vnodeID);
2592 	mutex_unlock(&sVnodeMutex);
2593 
2594 	if (vnode)
2595 		dec_vnode_ref_count(vnode, true);
2596 
2597 	return B_OK;
2598 }
2599 
2600 
2601 extern "C" status_t
2602 remove_vnode(mount_id mountID, vnode_id vnodeID)
2603 {
2604 	struct vnode *vnode;
2605 	bool remove = false;
2606 
2607 	mutex_lock(&sVnodeMutex);
2608 
2609 	vnode = lookup_vnode(mountID, vnodeID);
2610 	if (vnode != NULL) {
2611 		if (vnode->covered_by != NULL) {
2612 			// this vnode is in use
2613 			mutex_unlock(&sVnodeMutex);
2614 			return B_BUSY;
2615 		}
2616 
2617 		vnode->remove = true;
2618 		if (vnode->unpublished) {
2619 			// prepare the vnode for deletion
2620 			vnode->busy = true;
2621 			remove = true;
2622 		}
2623 	}
2624 
2625 	mutex_unlock(&sVnodeMutex);
2626 
2627 	if (remove) {
2628 		// if the vnode hasn't been published yet, we delete it here
2629 		atomic_add(&vnode->ref_count, -1);
2630 		free_vnode(vnode, true);
2631 	}
2632 
2633 	return B_OK;
2634 }
2635 
2636 
2637 extern "C" status_t
2638 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2639 {
2640 	struct vnode *vnode;
2641 
2642 	mutex_lock(&sVnodeMutex);
2643 
2644 	vnode = lookup_vnode(mountID, vnodeID);
2645 	if (vnode)
2646 		vnode->remove = false;
2647 
2648 	mutex_unlock(&sVnodeMutex);
2649 	return B_OK;
2650 }
2651 
2652 
2653 extern "C" status_t
2654 get_vnode_removed(mount_id mountID, vnode_id vnodeID, bool* removed)
2655 {
2656 	mutex_lock(&sVnodeMutex);
2657 
2658 	status_t result;
2659 
2660 	if (struct vnode* vnode = lookup_vnode(mountID, vnodeID)) {
2661 		if (removed)
2662 			*removed = vnode->remove;
2663 		result = B_OK;
2664 	} else
2665 		result = B_BAD_VALUE;
2666 
2667 	mutex_unlock(&sVnodeMutex);
2668 	return result;
2669 }
2670 
2671 
2672 //	#pragma mark - private VFS API
2673 //	Functions the VFS exports for other parts of the kernel
2674 
2675 
2676 /** Acquires another reference to the vnode that has to be released
2677  *	by calling vfs_put_vnode().
2678  */
2679 
2680 void
2681 vfs_acquire_vnode(void *_vnode)
2682 {
2683 	inc_vnode_ref_count((struct vnode *)_vnode);
2684 }
2685 
2686 
2687 /** This is currently called from file_cache_create() only.
2688  *	It's probably a temporary solution as long as devfs requires that
2689  *	fs_read_pages()/fs_write_pages() are called with the standard
2690  *	open cookie and not with a device cookie.
2691  *	If that's done differently, remove this call; it has no other
2692  *	purpose.
2693  */
2694 
2695 extern "C" status_t
2696 vfs_get_cookie_from_fd(int fd, void **_cookie)
2697 {
2698 	struct file_descriptor *descriptor;
2699 
2700 	descriptor = get_fd(get_current_io_context(true), fd);
2701 	if (descriptor == NULL)
2702 		return B_FILE_ERROR;
2703 
2704 	*_cookie = descriptor->cookie;
2705 	return B_OK;
2706 }
2707 
2708 
2709 extern "C" int
2710 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2711 {
2712 	*vnode = get_vnode_from_fd(fd, kernel);
2713 
2714 	if (*vnode == NULL)
2715 		return B_FILE_ERROR;
2716 
2717 	return B_NO_ERROR;
2718 }
2719 
2720 
2721 extern "C" status_t
2722 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2723 {
2724 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2725 
2726 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2727 	if (pathBuffer.InitCheck() != B_OK)
2728 		return B_NO_MEMORY;
2729 
2730 	char *buffer = pathBuffer.LockBuffer();
2731 	strlcpy(buffer, path, pathBuffer.BufferSize());
2732 
2733 	struct vnode *vnode;
2734 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2735 	if (status < B_OK)
2736 		return status;
2737 
2738 	*_vnode = vnode;
2739 	return B_OK;
2740 }
2741 
2742 
2743 extern "C" status_t
2744 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2745 {
2746 	struct vnode *vnode;
2747 
2748 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2749 	if (status < B_OK)
2750 		return status;
2751 
2752 	*_vnode = vnode;
2753 	return B_OK;
2754 }
2755 
2756 
2757 extern "C" status_t
2758 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2759 	const char *name, void **_vnode)
2760 {
2761 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2762 }
2763 
2764 
2765 extern "C" void
2766 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2767 {
2768 	struct vnode *vnode = (struct vnode *)_vnode;
2769 
2770 	*_mountID = vnode->device;
2771 	*_vnodeID = vnode->id;
2772 }
2773 
2774 
2775 /**	Looks up a vnode with the given mount and vnode ID.
2776  *	Must only be used with "in-use" vnodes as it doesn't grab a reference
2777  *	to the node.
2778  *	It's currently only be used by file_cache_create().
2779  */
2780 
2781 extern "C" status_t
2782 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2783 {
2784 	mutex_lock(&sVnodeMutex);
2785 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2786 	mutex_unlock(&sVnodeMutex);
2787 
2788 	if (vnode == NULL)
2789 		return B_ERROR;
2790 
2791 	*_vnode = vnode;
2792 	return B_OK;
2793 }
2794 
2795 
2796 extern "C" status_t
2797 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2798 {
2799 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
2800 		mountID, path, kernel));
2801 
2802 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2803 	if (pathBuffer.InitCheck() != B_OK)
2804 		return B_NO_MEMORY;
2805 
2806 	fs_mount *mount;
2807 	status_t status = get_mount(mountID, &mount);
2808 	if (status < B_OK)
2809 		return status;
2810 
2811 	char *buffer = pathBuffer.LockBuffer();
2812 	strlcpy(buffer, path, pathBuffer.BufferSize());
2813 
2814 	struct vnode *vnode = mount->root_vnode;
2815 
2816 	if (buffer[0] == '/')
2817 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
2818 	else {
2819 		inc_vnode_ref_count(vnode);
2820 			// vnode_path_to_vnode() releases a reference to the starting vnode
2821 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
2822 	}
2823 
2824 	put_mount(mount);
2825 
2826 	if (status < B_OK)
2827 		return status;
2828 
2829 	if (vnode->device != mountID) {
2830 		// wrong mount ID - must not gain access on foreign file system nodes
2831 		put_vnode(vnode);
2832 		return B_BAD_VALUE;
2833 	}
2834 
2835 	*_node = vnode->private_node;
2836 	return B_OK;
2837 }
2838 
2839 
2840 /**	Finds the full path to the file that contains the module \a moduleName,
2841  *	puts it into \a pathBuffer, and returns B_OK for success.
2842  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2843  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2844  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2845  *	functions returns unsuccessfully.
2846  */
2847 
2848 status_t
2849 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2850 	size_t bufferSize)
2851 {
2852 	struct vnode *dir, *file;
2853 	status_t status;
2854 	size_t length;
2855 	char *path;
2856 
2857 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2858 		return B_BUFFER_OVERFLOW;
2859 
2860 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2861 	if (status < B_OK)
2862 		return status;
2863 
2864 	// the path buffer had been clobbered by the above call
2865 	length = strlcpy(pathBuffer, basePath, bufferSize);
2866 	if (pathBuffer[length - 1] != '/')
2867 		pathBuffer[length++] = '/';
2868 
2869 	path = pathBuffer + length;
2870 	bufferSize -= length;
2871 
2872 	while (moduleName) {
2873 		int type;
2874 
2875 		char *nextPath = strchr(moduleName, '/');
2876 		if (nextPath == NULL)
2877 			length = strlen(moduleName);
2878 		else {
2879 			length = nextPath - moduleName;
2880 			nextPath++;
2881 		}
2882 
2883 		if (length + 1 >= bufferSize) {
2884 			status = B_BUFFER_OVERFLOW;
2885 			goto err;
2886 		}
2887 
2888 		memcpy(path, moduleName, length);
2889 		path[length] = '\0';
2890 		moduleName = nextPath;
2891 
2892 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2893 		if (status < B_OK) {
2894 			// vnode_path_to_vnode() has already released the reference to dir
2895 			return status;
2896 		}
2897 
2898 		if (S_ISDIR(type)) {
2899 			// goto the next directory
2900 			path[length] = '/';
2901 			path[length + 1] = '\0';
2902 			path += length + 1;
2903 			bufferSize -= length + 1;
2904 
2905 			dir = file;
2906 		} else if (S_ISREG(type)) {
2907 			// it's a file so it should be what we've searched for
2908 			put_vnode(file);
2909 
2910 			return B_OK;
2911 		} else {
2912 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
2913 			status = B_ERROR;
2914 			dir = file;
2915 			goto err;
2916 		}
2917 	}
2918 
2919 	// if we got here, the moduleName just pointed to a directory, not to
2920 	// a real module - what should we do in this case?
2921 	status = B_ENTRY_NOT_FOUND;
2922 
2923 err:
2924 	put_vnode(dir);
2925 	return status;
2926 }
2927 
2928 
2929 /**	\brief Normalizes a given path.
2930  *
2931  *	The path must refer to an existing or non-existing entry in an existing
2932  *	directory, that is chopping off the leaf component the remaining path must
2933  *	refer to an existing directory.
2934  *
2935  *	The returned will be canonical in that it will be absolute, will not
2936  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2937  *	and none of the directory components will by symbolic links.
2938  *
2939  *	Any two paths referring to the same entry, will result in the same
2940  *	normalized path (well, that is pretty much the definition of `normalized',
2941  *	isn't it :-).
2942  *
2943  *	\param path The path to be normalized.
2944  *	\param buffer The buffer into which the normalized path will be written.
2945  *	\param bufferSize The size of \a buffer.
2946  *	\param kernel \c true, if the IO context of the kernel shall be used,
2947  *		   otherwise that of the team this thread belongs to. Only relevant,
2948  *		   if the path is relative (to get the CWD).
2949  *	\return \c B_OK if everything went fine, another error code otherwise.
2950  */
2951 
2952 status_t
2953 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2954 	bool kernel)
2955 {
2956 	if (!path || !buffer || bufferSize < 1)
2957 		return B_BAD_VALUE;
2958 
2959 	TRACE(("vfs_normalize_path(`%s')\n", path));
2960 
2961 	// copy the supplied path to the stack, so it can be modified
2962 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
2963 	if (mutablePathBuffer.InitCheck() != B_OK)
2964 		return B_NO_MEMORY;
2965 
2966 	char *mutablePath = mutablePathBuffer.LockBuffer();
2967 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2968 		return B_NAME_TOO_LONG;
2969 
2970 	// get the dir vnode and the leaf name
2971 	struct vnode *dirNode;
2972 	char leaf[B_FILE_NAME_LENGTH];
2973 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2974 	if (error != B_OK) {
2975 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2976 		return error;
2977 	}
2978 
2979 	// if the leaf is "." or "..", we directly get the correct directory
2980 	// vnode and ignore the leaf later
2981 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2982 	if (isDir)
2983 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2984 	if (error != B_OK) {
2985 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
2986 			strerror(error)));
2987 		return error;
2988 	}
2989 
2990 	// get the directory path
2991 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2992 	put_vnode(dirNode);
2993 	if (error < B_OK) {
2994 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2995 		return error;
2996 	}
2997 
2998 	// append the leaf name
2999 	if (!isDir) {
3000 		// insert a directory separator only if this is not the file system root
3001 		if ((strcmp(buffer, "/") != 0
3002 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3003 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3004 			return B_NAME_TOO_LONG;
3005 		}
3006 	}
3007 
3008 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3009 	return B_OK;
3010 }
3011 
3012 
3013 extern "C" void
3014 vfs_put_vnode(void *_vnode)
3015 {
3016 	put_vnode((struct vnode *)_vnode);
3017 }
3018 
3019 
3020 extern "C" status_t
3021 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
3022 {
3023 	// Get current working directory from io context
3024 	struct io_context *context = get_current_io_context(false);
3025 	status_t status = B_OK;
3026 
3027 	mutex_lock(&context->io_mutex);
3028 
3029 	if (context->cwd != NULL) {
3030 		*_mountID = context->cwd->device;
3031 		*_vnodeID = context->cwd->id;
3032 	} else
3033 		status = B_ERROR;
3034 
3035 	mutex_unlock(&context->io_mutex);
3036 	return status;
3037 }
3038 
3039 
3040 extern "C" status_t
3041 vfs_disconnect_vnode(mount_id mountID, vnode_id vnodeID)
3042 {
3043 	struct vnode *vnode;
3044 
3045 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
3046 	if (status < B_OK)
3047 		return status;
3048 
3049 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3050 	return B_OK;
3051 }
3052 
3053 
3054 extern "C" void
3055 vfs_free_unused_vnodes(int32 level)
3056 {
3057 	vnode_low_memory_handler(NULL, level);
3058 }
3059 
3060 
3061 extern "C" bool
3062 vfs_can_page(void *_vnode, void *cookie)
3063 {
3064 	struct vnode *vnode = (struct vnode *)_vnode;
3065 
3066 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3067 
3068 	if (FS_CALL(vnode, can_page))
3069 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
3070 
3071 	return false;
3072 }
3073 
3074 
3075 extern "C" status_t
3076 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3077 	size_t *_numBytes, bool fsReenter)
3078 {
3079 	struct vnode *vnode = (struct vnode *)_vnode;
3080 
3081 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3082 
3083 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3084 		cookie, pos, vecs, count, _numBytes, fsReenter);
3085 }
3086 
3087 
3088 extern "C" status_t
3089 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3090 	size_t *_numBytes, bool fsReenter)
3091 {
3092 	struct vnode *vnode = (struct vnode *)_vnode;
3093 
3094 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3095 
3096 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3097 		cookie, pos, vecs, count, _numBytes, fsReenter);
3098 }
3099 
3100 
3101 /** Gets the vnode's vm_cache object. If it didn't have one, it will be
3102  *	created if \a allocate is \c true.
3103  *	In case it's successful, it will also grab a reference to the cache
3104  *	it returns (and therefore, one from the \a vnode in question as well).
3105  */
3106 
3107 extern "C" status_t
3108 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
3109 {
3110 	struct vnode *vnode = (struct vnode *)_vnode;
3111 
3112 	if (vnode->cache != NULL) {
3113 		vm_cache_acquire_ref(vnode->cache);
3114 		*_cache = vnode->cache;
3115 		return B_OK;
3116 	}
3117 
3118 	mutex_lock(&sVnodeMutex);
3119 
3120 	status_t status = B_OK;
3121 
3122 	// The cache could have been created in the meantime
3123 	if (vnode->cache == NULL) {
3124 		if (allocate) {
3125 			// TODO: actually the vnode need to be busy already here, or
3126 			//	else this won't work...
3127 			bool wasBusy = vnode->busy;
3128 			vnode->busy = true;
3129 			mutex_unlock(&sVnodeMutex);
3130 
3131 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3132 
3133 			mutex_lock(&sVnodeMutex);
3134 			vnode->busy = wasBusy;
3135 		} else
3136 			status = B_BAD_VALUE;
3137 	} else
3138 		vm_cache_acquire_ref(vnode->cache);
3139 
3140 	if (status == B_OK)
3141 		*_cache = vnode->cache;
3142 
3143 	mutex_unlock(&sVnodeMutex);
3144 	return status;
3145 }
3146 
3147 
3148 status_t
3149 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
3150 {
3151 	struct vnode *vnode = (struct vnode *)_vnode;
3152 
3153 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3154 
3155 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
3156 }
3157 
3158 
3159 status_t
3160 vfs_stat_vnode(void *_vnode, struct stat *stat)
3161 {
3162 	struct vnode *vnode = (struct vnode *)_vnode;
3163 
3164 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3165 		vnode->private_node, stat);
3166 
3167 	// fill in the st_dev and st_ino fields
3168 	if (status == B_OK) {
3169 		stat->st_dev = vnode->device;
3170 		stat->st_ino = vnode->id;
3171 	}
3172 
3173 	return status;
3174 }
3175 
3176 
3177 status_t
3178 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
3179 {
3180 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
3181 }
3182 
3183 
3184 /**	If the given descriptor locked its vnode, that lock will be released.
3185  */
3186 
3187 void
3188 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3189 {
3190 	struct vnode *vnode = fd_vnode(descriptor);
3191 
3192 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3193 		vnode->mandatory_locked_by = NULL;
3194 }
3195 
3196 
3197 /**	Closes all file descriptors of the specified I/O context that
3198  *	don't have the O_CLOEXEC flag set.
3199  */
3200 
3201 void
3202 vfs_exec_io_context(void *_context)
3203 {
3204 	struct io_context *context = (struct io_context *)_context;
3205 	uint32 i;
3206 
3207 	for (i = 0; i < context->table_size; i++) {
3208 		mutex_lock(&context->io_mutex);
3209 
3210 		struct file_descriptor *descriptor = context->fds[i];
3211 		bool remove = false;
3212 
3213 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3214 			context->fds[i] = NULL;
3215 			context->num_used_fds--;
3216 
3217 			remove = true;
3218 		}
3219 
3220 		mutex_unlock(&context->io_mutex);
3221 
3222 		if (remove) {
3223 			close_fd(descriptor);
3224 			put_fd(descriptor);
3225 		}
3226 	}
3227 }
3228 
3229 
3230 /** Sets up a new io_control structure, and inherits the properties
3231  *	of the parent io_control if it is given.
3232  */
3233 
3234 void *
3235 vfs_new_io_context(void *_parentContext)
3236 {
3237 	size_t tableSize;
3238 	struct io_context *context;
3239 	struct io_context *parentContext;
3240 
3241 	context = (io_context *)malloc(sizeof(struct io_context));
3242 	if (context == NULL)
3243 		return NULL;
3244 
3245 	memset(context, 0, sizeof(struct io_context));
3246 
3247 	parentContext = (struct io_context *)_parentContext;
3248 	if (parentContext)
3249 		tableSize = parentContext->table_size;
3250 	else
3251 		tableSize = DEFAULT_FD_TABLE_SIZE;
3252 
3253 	// allocate space for FDs and their close-on-exec flag
3254 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
3255 		+ (tableSize + 7) / 8);
3256 	if (context->fds == NULL) {
3257 		free(context);
3258 		return NULL;
3259 	}
3260 
3261 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
3262 		+ (tableSize + 7) / 8);
3263 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
3264 
3265 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3266 		free(context->fds);
3267 		free(context);
3268 		return NULL;
3269 	}
3270 
3271 	// Copy all parent files which don't have the O_CLOEXEC flag set
3272 
3273 	if (parentContext) {
3274 		size_t i;
3275 
3276 		mutex_lock(&parentContext->io_mutex);
3277 
3278 		context->cwd = parentContext->cwd;
3279 		if (context->cwd)
3280 			inc_vnode_ref_count(context->cwd);
3281 
3282 		for (i = 0; i < tableSize; i++) {
3283 			struct file_descriptor *descriptor = parentContext->fds[i];
3284 
3285 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
3286 				context->fds[i] = descriptor;
3287 				context->num_used_fds++;
3288 				atomic_add(&descriptor->ref_count, 1);
3289 				atomic_add(&descriptor->open_count, 1);
3290 			}
3291 		}
3292 
3293 		mutex_unlock(&parentContext->io_mutex);
3294 	} else {
3295 		context->cwd = sRoot;
3296 
3297 		if (context->cwd)
3298 			inc_vnode_ref_count(context->cwd);
3299 	}
3300 
3301 	context->table_size = tableSize;
3302 
3303 	list_init(&context->node_monitors);
3304 	context->max_monitors = DEFAULT_NODE_MONITORS;
3305 
3306 	return context;
3307 }
3308 
3309 
3310 status_t
3311 vfs_free_io_context(void *_ioContext)
3312 {
3313 	struct io_context *context = (struct io_context *)_ioContext;
3314 	uint32 i;
3315 
3316 	if (context->cwd)
3317 		dec_vnode_ref_count(context->cwd, false);
3318 
3319 	mutex_lock(&context->io_mutex);
3320 
3321 	for (i = 0; i < context->table_size; i++) {
3322 		if (struct file_descriptor *descriptor = context->fds[i]) {
3323 			close_fd(descriptor);
3324 			put_fd(descriptor);
3325 		}
3326 	}
3327 
3328 	mutex_destroy(&context->io_mutex);
3329 
3330 	remove_node_monitors(context);
3331 	free(context->fds);
3332 	free(context);
3333 
3334 	return B_OK;
3335 }
3336 
3337 
3338 static status_t
3339 vfs_resize_fd_table(struct io_context *context, const int newSize)
3340 {
3341 	struct file_descriptor **fds;
3342 	int	status = B_OK;
3343 
3344 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3345 		return EINVAL;
3346 
3347 	mutex_lock(&context->io_mutex);
3348 
3349 	int oldSize = context->table_size;
3350 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
3351 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
3352 
3353 	if (newSize < oldSize) {
3354 		// shrink the fd table
3355 
3356 		// Make sure none of the fds being dropped are in use
3357 		for (int i = oldSize; i-- > newSize;) {
3358 			if (context->fds[i]) {
3359 				status = EBUSY;
3360 				goto out;
3361 			}
3362 		}
3363 
3364 		fds = (struct file_descriptor **)malloc(
3365 			sizeof(struct file_descriptor *) * newSize
3366 			+ newCloseOnExitBitmapSize);
3367 		if (fds == NULL) {
3368 			status = ENOMEM;
3369 			goto out;
3370 		}
3371 
3372 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
3373 
3374 		// copy close-on-exit bitmap
3375 		memcpy(fds + newSize, context->fds + oldSize, newCloseOnExitBitmapSize);
3376 	} else {
3377 		// enlarge the fd table
3378 
3379 		fds = (struct file_descriptor **)malloc(
3380 			sizeof(struct file_descriptor *) * newSize
3381 			+ newCloseOnExitBitmapSize);
3382 		if (fds == NULL) {
3383 			status = ENOMEM;
3384 			goto out;
3385 		}
3386 
3387 		// copy the fd array, and zero the additional slots
3388 		memcpy(fds, context->fds, sizeof(void *) * oldSize);
3389 		memset(fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
3390 
3391 		// copy close-on-exit bitmap, and zero out additional bytes
3392 		memcpy(fds + newSize, context->fds + oldSize, oldCloseOnExitBitmapSize);
3393 		memset((uint8*)(fds + newSize) + oldCloseOnExitBitmapSize, 0,
3394 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
3395 	}
3396 
3397 	free(context->fds);
3398 	context->fds = fds;
3399 	context->fds_close_on_exec = (uint8 *)(context->fds + newSize);
3400 	context->table_size = newSize;
3401 
3402 out:
3403 	mutex_unlock(&context->io_mutex);
3404 	return status;
3405 }
3406 
3407 
3408 static status_t
3409 vfs_resize_monitor_table(struct io_context *context, const int newSize)
3410 {
3411 	void *fds;
3412 	int	status = B_OK;
3413 
3414 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3415 		return EINVAL;
3416 
3417 	mutex_lock(&context->io_mutex);
3418 
3419 	if ((size_t)newSize < context->num_monitors) {
3420 		status = EBUSY;
3421 		goto out;
3422 	}
3423 	context->max_monitors = newSize;
3424 
3425 out:
3426 	mutex_unlock(&context->io_mutex);
3427 	return status;
3428 }
3429 
3430 
3431 int
3432 vfs_getrlimit(int resource, struct rlimit * rlp)
3433 {
3434 	if (!rlp)
3435 		return B_BAD_ADDRESS;
3436 
3437 	switch (resource) {
3438 		case RLIMIT_NOFILE:
3439 		{
3440 			struct io_context *ioctx = get_current_io_context(false);
3441 
3442 			mutex_lock(&ioctx->io_mutex);
3443 
3444 			rlp->rlim_cur = ioctx->table_size;
3445 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3446 
3447 			mutex_unlock(&ioctx->io_mutex);
3448 
3449 			return 0;
3450 		}
3451 
3452 		case RLIMIT_NOVMON:
3453 		{
3454 			struct io_context *ioctx = get_current_io_context(false);
3455 
3456 			mutex_lock(&ioctx->io_mutex);
3457 
3458 			rlp->rlim_cur = ioctx->max_monitors;
3459 			rlp->rlim_max = MAX_NODE_MONITORS;
3460 
3461 			mutex_unlock(&ioctx->io_mutex);
3462 
3463 			return 0;
3464 		}
3465 
3466 		default:
3467 			return EINVAL;
3468 	}
3469 }
3470 
3471 
3472 int
3473 vfs_setrlimit(int resource, const struct rlimit * rlp)
3474 {
3475 	if (!rlp)
3476 		return B_BAD_ADDRESS;
3477 
3478 	switch (resource) {
3479 		case RLIMIT_NOFILE:
3480 			/* TODO: check getuid() */
3481 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3482 			    rlp->rlim_max != MAX_FD_TABLE_SIZE)
3483 				return EPERM;
3484 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3485 
3486 		case RLIMIT_NOVMON:
3487 			/* TODO: check getuid() */
3488 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3489 			    rlp->rlim_max != MAX_NODE_MONITORS)
3490 				return EPERM;
3491 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
3492 
3493 		default:
3494 			return EINVAL;
3495 	}
3496 }
3497 
3498 
3499 status_t
3500 vfs_init(kernel_args *args)
3501 {
3502 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3503 		&vnode_compare, &vnode_hash);
3504 	if (sVnodeTable == NULL)
3505 		panic("vfs_init: error creating vnode hash table\n");
3506 
3507 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3508 
3509 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3510 		&mount_compare, &mount_hash);
3511 	if (sMountsTable == NULL)
3512 		panic("vfs_init: error creating mounts hash table\n");
3513 
3514 	node_monitor_init();
3515 
3516 	sRoot = NULL;
3517 
3518 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3519 		panic("vfs_init: error allocating file systems lock\n");
3520 
3521 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3522 		panic("vfs_init: error allocating mount op lock\n");
3523 
3524 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3525 		panic("vfs_init: error allocating mount lock\n");
3526 
3527 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
3528 		panic("vfs_init: error allocating vnode::covered_by lock\n");
3529 
3530 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3531 		panic("vfs_init: error allocating vnode lock\n");
3532 
3533 	if (block_cache_init() != B_OK)
3534 		return B_ERROR;
3535 
3536 #ifdef ADD_DEBUGGER_COMMANDS
3537 	// add some debugger commands
3538 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3539 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3540 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3541 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3542 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3543 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3544 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3545 #endif
3546 
3547 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3548 
3549 	return file_cache_init();
3550 }
3551 
3552 
3553 //	#pragma mark -
3554 //	The filetype-dependent implementations (fd_ops + open/create/rename/remove, ...)
3555 
3556 
3557 /** Calls fs_open() on the given vnode and returns a new
3558  *	file descriptor for it
3559  */
3560 
3561 static int
3562 create_vnode(struct vnode *directory, const char *name, int openMode, int perms, bool kernel)
3563 {
3564 	struct vnode *vnode;
3565 	fs_cookie cookie;
3566 	vnode_id newID;
3567 	int status;
3568 
3569 	if (FS_CALL(directory, create) == NULL)
3570 		return EROFS;
3571 
3572 	status = FS_CALL(directory, create)(directory->mount->cookie, directory->private_node, name, openMode, perms, &cookie, &newID);
3573 	if (status < B_OK)
3574 		return status;
3575 
3576 	mutex_lock(&sVnodeMutex);
3577 	vnode = lookup_vnode(directory->device, newID);
3578 	mutex_unlock(&sVnodeMutex);
3579 
3580 	if (vnode == NULL) {
3581 		dprintf("vfs: fs_create() returned success but there is no vnode!");
3582 		return EINVAL;
3583 	}
3584 
3585 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3586 		return status;
3587 
3588 	// something went wrong, clean up
3589 
3590 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3591 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3592 	put_vnode(vnode);
3593 
3594 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3595 
3596 	return status;
3597 }
3598 
3599 
3600 /** Calls fs_open() on the given vnode and returns a new
3601  *	file descriptor for it
3602  */
3603 
3604 static int
3605 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3606 {
3607 	fs_cookie cookie;
3608 	int status;
3609 
3610 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3611 	if (status < 0)
3612 		return status;
3613 
3614 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3615 	if (status < 0) {
3616 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3617 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3618 	}
3619 	return status;
3620 }
3621 
3622 
3623 /** Calls fs open_dir() on the given vnode and returns a new
3624  *	file descriptor for it
3625  */
3626 
3627 static int
3628 open_dir_vnode(struct vnode *vnode, bool kernel)
3629 {
3630 	fs_cookie cookie;
3631 	int status;
3632 
3633 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3634 	if (status < B_OK)
3635 		return status;
3636 
3637 	// file is opened, create a fd
3638 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3639 	if (status >= 0)
3640 		return status;
3641 
3642 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3643 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3644 
3645 	return status;
3646 }
3647 
3648 
3649 /** Calls fs open_attr_dir() on the given vnode and returns a new
3650  *	file descriptor for it.
3651  *	Used by attr_dir_open(), and attr_dir_open_fd().
3652  */
3653 
3654 static int
3655 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3656 {
3657 	fs_cookie cookie;
3658 	int status;
3659 
3660 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3661 		return EOPNOTSUPP;
3662 
3663 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3664 	if (status < 0)
3665 		return status;
3666 
3667 	// file is opened, create a fd
3668 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3669 	if (status >= 0)
3670 		return status;
3671 
3672 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3673 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3674 
3675 	return status;
3676 }
3677 
3678 
3679 static int
3680 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3681 {
3682 	struct vnode *directory;
3683 	int status;
3684 
3685 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3686 
3687 	// get directory to put the new file in
3688 	status = get_vnode(mountID, directoryID, &directory, false);
3689 	if (status < B_OK)
3690 		return status;
3691 
3692 	status = create_vnode(directory, name, openMode, perms, kernel);
3693 	put_vnode(directory);
3694 
3695 	return status;
3696 }
3697 
3698 
3699 static int
3700 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3701 {
3702 	char name[B_FILE_NAME_LENGTH];
3703 	struct vnode *directory;
3704 	int status;
3705 
3706 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3707 
3708 	// get directory to put the new file in
3709 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3710 	if (status < 0)
3711 		return status;
3712 
3713 	status = create_vnode(directory, name, openMode, perms, kernel);
3714 
3715 	put_vnode(directory);
3716 	return status;
3717 }
3718 
3719 
3720 static int
3721 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3722 {
3723 	struct vnode *vnode;
3724 	int status;
3725 
3726 	if (name == NULL || *name == '\0')
3727 		return B_BAD_VALUE;
3728 
3729 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3730 		mountID, directoryID, name, openMode));
3731 
3732 	// get the vnode matching the entry_ref
3733 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3734 	if (status < B_OK)
3735 		return status;
3736 
3737 	status = open_vnode(vnode, openMode, kernel);
3738 	if (status < B_OK)
3739 		put_vnode(vnode);
3740 
3741 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3742 	return status;
3743 }
3744 
3745 
3746 static int
3747 file_open(int fd, char *path, int openMode, bool kernel)
3748 {
3749 	int status = B_OK;
3750 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3751 
3752 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3753 		fd, path, openMode, kernel));
3754 
3755 	// get the vnode matching the vnode + path combination
3756 	struct vnode *vnode = NULL;
3757 	vnode_id parentID;
3758 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3759 	if (status != B_OK)
3760 		return status;
3761 
3762 	// open the vnode
3763 	status = open_vnode(vnode, openMode, kernel);
3764 	// put only on error -- otherwise our reference was transferred to the FD
3765 	if (status < B_OK)
3766 		put_vnode(vnode);
3767 
3768 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3769 		vnode->device, parentID, vnode->id, NULL);
3770 
3771 	return status;
3772 }
3773 
3774 
3775 static status_t
3776 file_close(struct file_descriptor *descriptor)
3777 {
3778 	struct vnode *vnode = descriptor->u.vnode;
3779 	status_t status = B_OK;
3780 
3781 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3782 
3783 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3784 	if (FS_CALL(vnode, close))
3785 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3786 
3787 	if (status == B_OK) {
3788 		// remove all outstanding locks for this team
3789 		release_advisory_lock(vnode, NULL);
3790 	}
3791 	return status;
3792 }
3793 
3794 
3795 static void
3796 file_free_fd(struct file_descriptor *descriptor)
3797 {
3798 	struct vnode *vnode = descriptor->u.vnode;
3799 
3800 	if (vnode != NULL) {
3801 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3802 		put_vnode(vnode);
3803 	}
3804 }
3805 
3806 
3807 static status_t
3808 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3809 {
3810 	struct vnode *vnode = descriptor->u.vnode;
3811 
3812 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3813 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3814 }
3815 
3816 
3817 static status_t
3818 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3819 {
3820 	struct vnode *vnode = descriptor->u.vnode;
3821 
3822 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3823 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3824 }
3825 
3826 
3827 static off_t
3828 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3829 {
3830 	off_t offset;
3831 
3832 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3833 	// ToDo: seek should fail for pipes and FIFOs...
3834 
3835 	switch (seekType) {
3836 		case SEEK_SET:
3837 			offset = 0;
3838 			break;
3839 		case SEEK_CUR:
3840 			offset = descriptor->pos;
3841 			break;
3842 		case SEEK_END:
3843 		{
3844 			struct vnode *vnode = descriptor->u.vnode;
3845 			struct stat stat;
3846 			status_t status;
3847 
3848 			if (FS_CALL(vnode, read_stat) == NULL)
3849 				return EOPNOTSUPP;
3850 
3851 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3852 			if (status < B_OK)
3853 				return status;
3854 
3855 			offset = stat.st_size;
3856 			break;
3857 		}
3858 		default:
3859 			return B_BAD_VALUE;
3860 	}
3861 
3862 	// assumes off_t is 64 bits wide
3863 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3864 		return EOVERFLOW;
3865 
3866 	pos += offset;
3867 	if (pos < 0)
3868 		return B_BAD_VALUE;
3869 
3870 	return descriptor->pos = pos;
3871 }
3872 
3873 
3874 static status_t
3875 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3876 	struct select_sync *sync)
3877 {
3878 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3879 
3880 	struct vnode *vnode = descriptor->u.vnode;
3881 
3882 	// If the FS has no select() hook, notify select() now.
3883 	if (FS_CALL(vnode, select) == NULL)
3884 		return notify_select_event((selectsync*)sync, ref, event);
3885 
3886 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3887 		descriptor->cookie, event, ref, (selectsync*)sync);
3888 }
3889 
3890 
3891 static status_t
3892 file_deselect(struct file_descriptor *descriptor, uint8 event,
3893 	struct select_sync *sync)
3894 {
3895 	struct vnode *vnode = descriptor->u.vnode;
3896 
3897 	if (FS_CALL(vnode, deselect) == NULL)
3898 		return B_OK;
3899 
3900 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3901 		descriptor->cookie, event, (selectsync*)sync);
3902 }
3903 
3904 
3905 static status_t
3906 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3907 {
3908 	struct vnode *vnode;
3909 	vnode_id newID;
3910 	status_t status;
3911 
3912 	if (name == NULL || *name == '\0')
3913 		return B_BAD_VALUE;
3914 
3915 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3916 
3917 	status = get_vnode(mountID, parentID, &vnode, kernel);
3918 	if (status < B_OK)
3919 		return status;
3920 
3921 	if (FS_CALL(vnode, create_dir))
3922 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3923 	else
3924 		status = EROFS;
3925 
3926 	put_vnode(vnode);
3927 	return status;
3928 }
3929 
3930 
3931 static status_t
3932 dir_create(int fd, char *path, int perms, bool kernel)
3933 {
3934 	char filename[B_FILE_NAME_LENGTH];
3935 	struct vnode *vnode;
3936 	vnode_id newID;
3937 	status_t status;
3938 
3939 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3940 
3941 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3942 	if (status < 0)
3943 		return status;
3944 
3945 	if (FS_CALL(vnode, create_dir))
3946 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3947 	else
3948 		status = EROFS;
3949 
3950 	put_vnode(vnode);
3951 	return status;
3952 }
3953 
3954 
3955 static int
3956 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3957 {
3958 	struct vnode *vnode;
3959 	int status;
3960 
3961 	FUNCTION(("dir_open_entry_ref()\n"));
3962 
3963 	if (name && *name == '\0')
3964 		return B_BAD_VALUE;
3965 
3966 	// get the vnode matching the entry_ref/node_ref
3967 	if (name)
3968 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3969 	else
3970 		status = get_vnode(mountID, parentID, &vnode, false);
3971 	if (status < B_OK)
3972 		return status;
3973 
3974 	status = open_dir_vnode(vnode, kernel);
3975 	if (status < B_OK)
3976 		put_vnode(vnode);
3977 
3978 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3979 	return status;
3980 }
3981 
3982 
3983 static int
3984 dir_open(int fd, char *path, bool kernel)
3985 {
3986 	int status = B_OK;
3987 
3988 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3989 
3990 	// get the vnode matching the vnode + path combination
3991 	struct vnode *vnode = NULL;
3992 	vnode_id parentID;
3993 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3994 	if (status != B_OK)
3995 		return status;
3996 
3997 	// open the dir
3998 	status = open_dir_vnode(vnode, kernel);
3999 	if (status < B_OK)
4000 		put_vnode(vnode);
4001 
4002 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4003 	return status;
4004 }
4005 
4006 
4007 static status_t
4008 dir_close(struct file_descriptor *descriptor)
4009 {
4010 	struct vnode *vnode = descriptor->u.vnode;
4011 
4012 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4013 
4014 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4015 	if (FS_CALL(vnode, close_dir))
4016 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4017 
4018 	return B_OK;
4019 }
4020 
4021 
4022 static void
4023 dir_free_fd(struct file_descriptor *descriptor)
4024 {
4025 	struct vnode *vnode = descriptor->u.vnode;
4026 
4027 	if (vnode != NULL) {
4028 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4029 		put_vnode(vnode);
4030 	}
4031 }
4032 
4033 
4034 static status_t
4035 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4036 {
4037 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
4038 }
4039 
4040 
4041 static void
4042 fix_dirent(struct vnode *parent, struct dirent *entry)
4043 {
4044 	// set d_pdev and d_pino
4045 	entry->d_pdev = parent->device;
4046 	entry->d_pino = parent->id;
4047 
4048 	// If this is the ".." entry and the directory is the root of a FS,
4049 	// we need to replace d_dev and d_ino with the actual values.
4050 	if (strcmp(entry->d_name, "..") == 0
4051 		&& parent->mount->root_vnode == parent
4052 		&& parent->mount->covers_vnode) {
4053 		inc_vnode_ref_count(parent);
4054 			// vnode_path_to_vnode() puts the node
4055 
4056 		struct vnode *vnode;
4057 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
4058 			NULL, NULL);
4059 
4060 		if (status == B_OK) {
4061 			entry->d_dev = vnode->device;
4062 			entry->d_ino = vnode->id;
4063 		}
4064 	} else {
4065 		// resolve mount points
4066 		struct vnode *vnode = NULL;
4067 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
4068 		if (status != B_OK)
4069 			return;
4070 
4071 		mutex_lock(&sVnodeCoveredByMutex);
4072 		if (vnode->covered_by) {
4073 			entry->d_dev = vnode->covered_by->device;
4074 			entry->d_ino = vnode->covered_by->id;
4075 		}
4076 		mutex_unlock(&sVnodeCoveredByMutex);
4077 
4078 		put_vnode(vnode);
4079 	}
4080 }
4081 
4082 
4083 static status_t
4084 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4085 {
4086 	if (!FS_CALL(vnode, read_dir))
4087 		return EOPNOTSUPP;
4088 
4089 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
4090 	if (error != B_OK)
4091 		return error;
4092 
4093 	// we need to adjust the read dirents
4094 	if (*_count > 0) {
4095 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4096 		fix_dirent(vnode, buffer);
4097 	}
4098 
4099 	return error;
4100 }
4101 
4102 
4103 static status_t
4104 dir_rewind(struct file_descriptor *descriptor)
4105 {
4106 	struct vnode *vnode = descriptor->u.vnode;
4107 
4108 	if (FS_CALL(vnode, rewind_dir))
4109 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4110 
4111 	return EOPNOTSUPP;
4112 }
4113 
4114 
4115 static status_t
4116 dir_remove(int fd, char *path, bool kernel)
4117 {
4118 	char name[B_FILE_NAME_LENGTH];
4119 	struct vnode *directory;
4120 	status_t status;
4121 
4122 	if (path != NULL) {
4123 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4124 		char *lastSlash = strrchr(path, '/');
4125 		if (lastSlash != NULL) {
4126 			char *leaf = lastSlash + 1;
4127 			if (!strcmp(leaf, ".."))
4128 				return B_NOT_ALLOWED;
4129 
4130 			// omit multiple slashes
4131 			while (lastSlash > path && lastSlash[-1] == '/') {
4132 				lastSlash--;
4133 			}
4134 
4135 			if (!leaf[0]
4136 				|| !strcmp(leaf, ".")) {
4137 				// "name/" -> "name", or "name/." -> "name"
4138 				lastSlash[0] = '\0';
4139 			}
4140 		} else if (!strcmp(path, ".."))
4141 			return B_NOT_ALLOWED;
4142 	}
4143 
4144 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4145 	if (status < B_OK)
4146 		return status;
4147 
4148 	if (FS_CALL(directory, remove_dir)) {
4149 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4150 			directory->private_node, name);
4151 	} else
4152 		status = EROFS;
4153 
4154 	put_vnode(directory);
4155 	return status;
4156 }
4157 
4158 
4159 static status_t
4160 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
4161 {
4162 	struct vnode *vnode = descriptor->u.vnode;
4163 
4164 	if (FS_CALL(vnode, ioctl)) {
4165 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4166 			descriptor->cookie, op, buffer, length);
4167 	}
4168 
4169 	return EOPNOTSUPP;
4170 }
4171 
4172 
4173 static status_t
4174 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4175 {
4176 	struct file_descriptor *descriptor;
4177 	struct vnode *vnode;
4178 	struct flock flock;
4179 	status_t status;
4180 
4181 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4182 		fd, op, argument, kernel ? "kernel" : "user"));
4183 
4184 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4185 	if (descriptor == NULL)
4186 		return B_FILE_ERROR;
4187 
4188 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4189 		if (descriptor->type != FDTYPE_FILE)
4190 			return B_BAD_VALUE;
4191 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
4192 			return B_BAD_ADDRESS;
4193 	}
4194 
4195 	switch (op) {
4196 		case F_SETFD:
4197 		{
4198 			struct io_context *context = get_current_io_context(kernel);
4199 			// Set file descriptor flags
4200 
4201 			// O_CLOEXEC is the only flag available at this time
4202 			mutex_lock(&context->io_mutex);
4203 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
4204 			mutex_unlock(&context->io_mutex);
4205 
4206 			status = B_OK;
4207 			break;
4208 		}
4209 
4210 		case F_GETFD:
4211 		{
4212 			struct io_context *context = get_current_io_context(kernel);
4213 
4214 			// Get file descriptor flags
4215 			mutex_lock(&context->io_mutex);
4216 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4217 			mutex_unlock(&context->io_mutex);
4218 			break;
4219 		}
4220 
4221 		case F_SETFL:
4222 			// Set file descriptor open mode
4223 			if (FS_CALL(vnode, set_flags)) {
4224 				// we only accept changes to O_APPEND and O_NONBLOCK
4225 				argument &= O_APPEND | O_NONBLOCK;
4226 
4227 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4228 					vnode->private_node, descriptor->cookie, (int)argument);
4229 				if (status == B_OK) {
4230 					// update this descriptor's open_mode field
4231 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
4232 						| argument;
4233 				}
4234 			} else
4235 				status = EOPNOTSUPP;
4236 			break;
4237 
4238 		case F_GETFL:
4239 			// Get file descriptor open mode
4240 			status = descriptor->open_mode;
4241 			break;
4242 
4243 		case F_DUPFD:
4244 		{
4245 			struct io_context *context = get_current_io_context(kernel);
4246 
4247 			status = new_fd_etc(context, descriptor, (int)argument);
4248 			if (status >= 0) {
4249 				mutex_lock(&context->io_mutex);
4250 				fd_set_close_on_exec(context, fd, false);
4251 				mutex_unlock(&context->io_mutex);
4252 
4253 				atomic_add(&descriptor->ref_count, 1);
4254 			}
4255 			break;
4256 		}
4257 
4258 		case F_GETLK:
4259 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4260 			if (status == B_OK) {
4261 				// copy back flock structure
4262 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
4263 			}
4264 			break;
4265 
4266 		case F_SETLK:
4267 		case F_SETLKW:
4268 			status = normalize_flock(descriptor, &flock);
4269 			if (status < B_OK)
4270 				break;
4271 
4272 			if (flock.l_type == F_UNLCK)
4273 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4274 			else {
4275 				// the open mode must match the lock type
4276 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
4277 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
4278 					status = B_FILE_ERROR;
4279 				else
4280 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
4281 			}
4282 			break;
4283 
4284 		// ToDo: add support for more ops?
4285 
4286 		default:
4287 			status = B_BAD_VALUE;
4288 	}
4289 
4290 	put_fd(descriptor);
4291 	return status;
4292 }
4293 
4294 
4295 static status_t
4296 common_sync(int fd, bool kernel)
4297 {
4298 	struct file_descriptor *descriptor;
4299 	struct vnode *vnode;
4300 	status_t status;
4301 
4302 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4303 
4304 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4305 	if (descriptor == NULL)
4306 		return B_FILE_ERROR;
4307 
4308 	if (FS_CALL(vnode, fsync) != NULL)
4309 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4310 	else
4311 		status = EOPNOTSUPP;
4312 
4313 	put_fd(descriptor);
4314 	return status;
4315 }
4316 
4317 
4318 static status_t
4319 common_lock_node(int fd, bool kernel)
4320 {
4321 	struct file_descriptor *descriptor;
4322 	struct vnode *vnode;
4323 
4324 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4325 	if (descriptor == NULL)
4326 		return B_FILE_ERROR;
4327 
4328 	status_t status = B_OK;
4329 
4330 	// We need to set the locking atomically - someone
4331 	// else might set one at the same time
4332 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4333 			(addr_t)descriptor, NULL) != NULL)
4334 		status = B_BUSY;
4335 
4336 	put_fd(descriptor);
4337 	return status;
4338 }
4339 
4340 
4341 static status_t
4342 common_unlock_node(int fd, bool kernel)
4343 {
4344 	struct file_descriptor *descriptor;
4345 	struct vnode *vnode;
4346 
4347 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4348 	if (descriptor == NULL)
4349 		return B_FILE_ERROR;
4350 
4351 	status_t status = B_OK;
4352 
4353 	// We need to set the locking atomically - someone
4354 	// else might set one at the same time
4355 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4356 			NULL, (addr_t)descriptor) != (int32)descriptor)
4357 		status = B_BAD_VALUE;
4358 
4359 	put_fd(descriptor);
4360 	return status;
4361 }
4362 
4363 
4364 static status_t
4365 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4366 	bool kernel)
4367 {
4368 	struct vnode *vnode;
4369 	status_t status;
4370 
4371 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4372 	if (status < B_OK)
4373 		return status;
4374 
4375 	if (FS_CALL(vnode, read_symlink) != NULL) {
4376 		status = FS_CALL(vnode, read_symlink)(vnode->mount->cookie,
4377 			vnode->private_node, buffer, _bufferSize);
4378 	} else
4379 		status = B_BAD_VALUE;
4380 
4381 	put_vnode(vnode);
4382 	return status;
4383 }
4384 
4385 
4386 static status_t
4387 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4388 	bool kernel)
4389 {
4390 	// path validity checks have to be in the calling function!
4391 	char name[B_FILE_NAME_LENGTH];
4392 	struct vnode *vnode;
4393 	status_t status;
4394 
4395 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4396 
4397 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4398 	if (status < B_OK)
4399 		return status;
4400 
4401 	if (FS_CALL(vnode, create_symlink) != NULL)
4402 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4403 	else
4404 		status = EROFS;
4405 
4406 	put_vnode(vnode);
4407 
4408 	return status;
4409 }
4410 
4411 
4412 static status_t
4413 common_create_link(char *path, char *toPath, bool kernel)
4414 {
4415 	// path validity checks have to be in the calling function!
4416 	char name[B_FILE_NAME_LENGTH];
4417 	struct vnode *directory, *vnode;
4418 	status_t status;
4419 
4420 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4421 
4422 	status = path_to_dir_vnode(path, &directory, name, kernel);
4423 	if (status < B_OK)
4424 		return status;
4425 
4426 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4427 	if (status < B_OK)
4428 		goto err;
4429 
4430 	if (directory->mount != vnode->mount) {
4431 		status = B_CROSS_DEVICE_LINK;
4432 		goto err1;
4433 	}
4434 
4435 	if (FS_CALL(vnode, link) != NULL)
4436 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4437 	else
4438 		status = EROFS;
4439 
4440 err1:
4441 	put_vnode(vnode);
4442 err:
4443 	put_vnode(directory);
4444 
4445 	return status;
4446 }
4447 
4448 
4449 static status_t
4450 common_unlink(int fd, char *path, bool kernel)
4451 {
4452 	char filename[B_FILE_NAME_LENGTH];
4453 	struct vnode *vnode;
4454 	status_t status;
4455 
4456 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4457 
4458 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4459 	if (status < 0)
4460 		return status;
4461 
4462 	if (FS_CALL(vnode, unlink) != NULL)
4463 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4464 	else
4465 		status = EROFS;
4466 
4467 	put_vnode(vnode);
4468 
4469 	return status;
4470 }
4471 
4472 
4473 static status_t
4474 common_access(char *path, int mode, bool kernel)
4475 {
4476 	struct vnode *vnode;
4477 	status_t status;
4478 
4479 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4480 	if (status < B_OK)
4481 		return status;
4482 
4483 	if (FS_CALL(vnode, access) != NULL)
4484 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4485 	else
4486 		status = B_OK;
4487 
4488 	put_vnode(vnode);
4489 
4490 	return status;
4491 }
4492 
4493 
4494 static status_t
4495 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4496 {
4497 	struct vnode *fromVnode, *toVnode;
4498 	char fromName[B_FILE_NAME_LENGTH];
4499 	char toName[B_FILE_NAME_LENGTH];
4500 	status_t status;
4501 
4502 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4503 
4504 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4505 	if (status < 0)
4506 		return status;
4507 
4508 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4509 	if (status < 0)
4510 		goto err;
4511 
4512 	if (fromVnode->device != toVnode->device) {
4513 		status = B_CROSS_DEVICE_LINK;
4514 		goto err1;
4515 	}
4516 
4517 	if (FS_CALL(fromVnode, rename) != NULL)
4518 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4519 	else
4520 		status = EROFS;
4521 
4522 err1:
4523 	put_vnode(toVnode);
4524 err:
4525 	put_vnode(fromVnode);
4526 
4527 	return status;
4528 }
4529 
4530 
4531 static status_t
4532 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4533 {
4534 	struct vnode *vnode = descriptor->u.vnode;
4535 
4536 	FUNCTION(("common_read_stat: stat %p\n", stat));
4537 
4538 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4539 		vnode->private_node, stat);
4540 
4541 	// fill in the st_dev and st_ino fields
4542 	if (status == B_OK) {
4543 		stat->st_dev = vnode->device;
4544 		stat->st_ino = vnode->id;
4545 	}
4546 
4547 	return status;
4548 }
4549 
4550 
4551 static status_t
4552 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4553 {
4554 	struct vnode *vnode = descriptor->u.vnode;
4555 
4556 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4557 	if (!FS_CALL(vnode, write_stat))
4558 		return EROFS;
4559 
4560 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4561 }
4562 
4563 
4564 static status_t
4565 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4566 	struct stat *stat, bool kernel)
4567 {
4568 	struct vnode *vnode;
4569 	status_t status;
4570 
4571 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4572 
4573 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4574 	if (status < 0)
4575 		return status;
4576 
4577 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4578 
4579 	// fill in the st_dev and st_ino fields
4580 	if (status == B_OK) {
4581 		stat->st_dev = vnode->device;
4582 		stat->st_ino = vnode->id;
4583 	}
4584 
4585 	put_vnode(vnode);
4586 	return status;
4587 }
4588 
4589 
4590 static status_t
4591 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4592 	const struct stat *stat, int statMask, bool kernel)
4593 {
4594 	struct vnode *vnode;
4595 	status_t status;
4596 
4597 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4598 
4599 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4600 	if (status < 0)
4601 		return status;
4602 
4603 	if (FS_CALL(vnode, write_stat))
4604 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4605 	else
4606 		status = EROFS;
4607 
4608 	put_vnode(vnode);
4609 
4610 	return status;
4611 }
4612 
4613 
4614 static int
4615 attr_dir_open(int fd, char *path, bool kernel)
4616 {
4617 	struct vnode *vnode;
4618 	int status;
4619 
4620 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4621 
4622 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4623 	if (status < B_OK)
4624 		return status;
4625 
4626 	status = open_attr_dir_vnode(vnode, kernel);
4627 	if (status < 0)
4628 		put_vnode(vnode);
4629 
4630 	return status;
4631 }
4632 
4633 
4634 static status_t
4635 attr_dir_close(struct file_descriptor *descriptor)
4636 {
4637 	struct vnode *vnode = descriptor->u.vnode;
4638 
4639 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4640 
4641 	if (FS_CALL(vnode, close_attr_dir))
4642 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4643 
4644 	return B_OK;
4645 }
4646 
4647 
4648 static void
4649 attr_dir_free_fd(struct file_descriptor *descriptor)
4650 {
4651 	struct vnode *vnode = descriptor->u.vnode;
4652 
4653 	if (vnode != NULL) {
4654 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4655 		put_vnode(vnode);
4656 	}
4657 }
4658 
4659 
4660 static status_t
4661 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4662 {
4663 	struct vnode *vnode = descriptor->u.vnode;
4664 
4665 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4666 
4667 	if (FS_CALL(vnode, read_attr_dir))
4668 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4669 
4670 	return EOPNOTSUPP;
4671 }
4672 
4673 
4674 static status_t
4675 attr_dir_rewind(struct file_descriptor *descriptor)
4676 {
4677 	struct vnode *vnode = descriptor->u.vnode;
4678 
4679 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4680 
4681 	if (FS_CALL(vnode, rewind_attr_dir))
4682 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4683 
4684 	return EOPNOTSUPP;
4685 }
4686 
4687 
4688 static int
4689 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4690 {
4691 	struct vnode *vnode;
4692 	fs_cookie cookie;
4693 	int status;
4694 
4695 	if (name == NULL || *name == '\0')
4696 		return B_BAD_VALUE;
4697 
4698 	vnode = get_vnode_from_fd(fd, kernel);
4699 	if (vnode == NULL)
4700 		return B_FILE_ERROR;
4701 
4702 	if (FS_CALL(vnode, create_attr) == NULL) {
4703 		status = EROFS;
4704 		goto err;
4705 	}
4706 
4707 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4708 	if (status < B_OK)
4709 		goto err;
4710 
4711 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4712 		return status;
4713 
4714 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4715 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4716 
4717 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4718 
4719 err:
4720 	put_vnode(vnode);
4721 
4722 	return status;
4723 }
4724 
4725 
4726 static int
4727 attr_open(int fd, const char *name, int openMode, bool kernel)
4728 {
4729 	struct vnode *vnode;
4730 	fs_cookie cookie;
4731 	int status;
4732 
4733 	if (name == NULL || *name == '\0')
4734 		return B_BAD_VALUE;
4735 
4736 	vnode = get_vnode_from_fd(fd, kernel);
4737 	if (vnode == NULL)
4738 		return B_FILE_ERROR;
4739 
4740 	if (FS_CALL(vnode, open_attr) == NULL) {
4741 		status = EOPNOTSUPP;
4742 		goto err;
4743 	}
4744 
4745 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4746 	if (status < B_OK)
4747 		goto err;
4748 
4749 	// now we only need a file descriptor for this attribute and we're done
4750 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4751 		return status;
4752 
4753 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4754 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4755 
4756 err:
4757 	put_vnode(vnode);
4758 
4759 	return status;
4760 }
4761 
4762 
4763 static status_t
4764 attr_close(struct file_descriptor *descriptor)
4765 {
4766 	struct vnode *vnode = descriptor->u.vnode;
4767 
4768 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4769 
4770 	if (FS_CALL(vnode, close_attr))
4771 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4772 
4773 	return B_OK;
4774 }
4775 
4776 
4777 static void
4778 attr_free_fd(struct file_descriptor *descriptor)
4779 {
4780 	struct vnode *vnode = descriptor->u.vnode;
4781 
4782 	if (vnode != NULL) {
4783 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4784 		put_vnode(vnode);
4785 	}
4786 }
4787 
4788 
4789 static status_t
4790 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4791 {
4792 	struct vnode *vnode = descriptor->u.vnode;
4793 
4794 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4795 	if (!FS_CALL(vnode, read_attr))
4796 		return EOPNOTSUPP;
4797 
4798 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4799 }
4800 
4801 
4802 static status_t
4803 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4804 {
4805 	struct vnode *vnode = descriptor->u.vnode;
4806 
4807 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4808 	if (!FS_CALL(vnode, write_attr))
4809 		return EOPNOTSUPP;
4810 
4811 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4812 }
4813 
4814 
4815 static off_t
4816 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4817 {
4818 	off_t offset;
4819 
4820 	switch (seekType) {
4821 		case SEEK_SET:
4822 			offset = 0;
4823 			break;
4824 		case SEEK_CUR:
4825 			offset = descriptor->pos;
4826 			break;
4827 		case SEEK_END:
4828 		{
4829 			struct vnode *vnode = descriptor->u.vnode;
4830 			struct stat stat;
4831 			status_t status;
4832 
4833 			if (FS_CALL(vnode, read_stat) == NULL)
4834 				return EOPNOTSUPP;
4835 
4836 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4837 			if (status < B_OK)
4838 				return status;
4839 
4840 			offset = stat.st_size;
4841 			break;
4842 		}
4843 		default:
4844 			return B_BAD_VALUE;
4845 	}
4846 
4847 	// assumes off_t is 64 bits wide
4848 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4849 		return EOVERFLOW;
4850 
4851 	pos += offset;
4852 	if (pos < 0)
4853 		return B_BAD_VALUE;
4854 
4855 	return descriptor->pos = pos;
4856 }
4857 
4858 
4859 static status_t
4860 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4861 {
4862 	struct vnode *vnode = descriptor->u.vnode;
4863 
4864 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4865 
4866 	if (!FS_CALL(vnode, read_attr_stat))
4867 		return EOPNOTSUPP;
4868 
4869 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4870 }
4871 
4872 
4873 static status_t
4874 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4875 {
4876 	struct vnode *vnode = descriptor->u.vnode;
4877 
4878 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4879 
4880 	if (!FS_CALL(vnode, write_attr_stat))
4881 		return EROFS;
4882 
4883 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4884 }
4885 
4886 
4887 static status_t
4888 attr_remove(int fd, const char *name, bool kernel)
4889 {
4890 	struct file_descriptor *descriptor;
4891 	struct vnode *vnode;
4892 	status_t status;
4893 
4894 	if (name == NULL || *name == '\0')
4895 		return B_BAD_VALUE;
4896 
4897 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4898 
4899 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4900 	if (descriptor == NULL)
4901 		return B_FILE_ERROR;
4902 
4903 	if (FS_CALL(vnode, remove_attr))
4904 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4905 	else
4906 		status = EROFS;
4907 
4908 	put_fd(descriptor);
4909 
4910 	return status;
4911 }
4912 
4913 
4914 static status_t
4915 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4916 {
4917 	struct file_descriptor *fromDescriptor, *toDescriptor;
4918 	struct vnode *fromVnode, *toVnode;
4919 	status_t status;
4920 
4921 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4922 		return B_BAD_VALUE;
4923 
4924 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4925 
4926 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4927 	if (fromDescriptor == NULL)
4928 		return B_FILE_ERROR;
4929 
4930 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4931 	if (toDescriptor == NULL) {
4932 		status = B_FILE_ERROR;
4933 		goto err;
4934 	}
4935 
4936 	// are the files on the same volume?
4937 	if (fromVnode->device != toVnode->device) {
4938 		status = B_CROSS_DEVICE_LINK;
4939 		goto err1;
4940 	}
4941 
4942 	if (FS_CALL(fromVnode, rename_attr))
4943 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4944 	else
4945 		status = EROFS;
4946 
4947 err1:
4948 	put_fd(toDescriptor);
4949 err:
4950 	put_fd(fromDescriptor);
4951 
4952 	return status;
4953 }
4954 
4955 
4956 static status_t
4957 index_dir_open(mount_id mountID, bool kernel)
4958 {
4959 	struct fs_mount *mount;
4960 	fs_cookie cookie;
4961 
4962 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4963 
4964 	status_t status = get_mount(mountID, &mount);
4965 	if (status < B_OK)
4966 		return status;
4967 
4968 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4969 		status = EOPNOTSUPP;
4970 		goto out;
4971 	}
4972 
4973 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4974 	if (status < B_OK)
4975 		goto out;
4976 
4977 	// get fd for the index directory
4978 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4979 	if (status >= 0)
4980 		goto out;
4981 
4982 	// something went wrong
4983 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4984 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4985 
4986 out:
4987 	put_mount(mount);
4988 	return status;
4989 }
4990 
4991 
4992 static status_t
4993 index_dir_close(struct file_descriptor *descriptor)
4994 {
4995 	struct fs_mount *mount = descriptor->u.mount;
4996 
4997 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
4998 
4999 	if (FS_MOUNT_CALL(mount, close_index_dir))
5000 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
5001 
5002 	return B_OK;
5003 }
5004 
5005 
5006 static void
5007 index_dir_free_fd(struct file_descriptor *descriptor)
5008 {
5009 	struct fs_mount *mount = descriptor->u.mount;
5010 
5011 	if (mount != NULL) {
5012 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
5013 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5014 		//put_vnode(vnode);
5015 	}
5016 }
5017 
5018 
5019 static status_t
5020 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5021 {
5022 	struct fs_mount *mount = descriptor->u.mount;
5023 
5024 	if (FS_MOUNT_CALL(mount, read_index_dir))
5025 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5026 
5027 	return EOPNOTSUPP;
5028 }
5029 
5030 
5031 static status_t
5032 index_dir_rewind(struct file_descriptor *descriptor)
5033 {
5034 	struct fs_mount *mount = descriptor->u.mount;
5035 
5036 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
5037 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
5038 
5039 	return EOPNOTSUPP;
5040 }
5041 
5042 
5043 static status_t
5044 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5045 {
5046 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5047 
5048 	struct fs_mount *mount;
5049 	status_t status = get_mount(mountID, &mount);
5050 	if (status < B_OK)
5051 		return status;
5052 
5053 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
5054 		status = EROFS;
5055 		goto out;
5056 	}
5057 
5058 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
5059 
5060 out:
5061 	put_mount(mount);
5062 	return status;
5063 }
5064 
5065 
5066 #if 0
5067 static status_t
5068 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5069 {
5070 	struct vnode *vnode = descriptor->u.vnode;
5071 
5072 	// ToDo: currently unused!
5073 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5074 	if (!FS_CALL(vnode, read_index_stat))
5075 		return EOPNOTSUPP;
5076 
5077 	return EOPNOTSUPP;
5078 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5079 }
5080 
5081 
5082 static void
5083 index_free_fd(struct file_descriptor *descriptor)
5084 {
5085 	struct vnode *vnode = descriptor->u.vnode;
5086 
5087 	if (vnode != NULL) {
5088 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5089 		put_vnode(vnode);
5090 	}
5091 }
5092 #endif
5093 
5094 
5095 static status_t
5096 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
5097 {
5098 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5099 
5100 	struct fs_mount *mount;
5101 	status_t status = get_mount(mountID, &mount);
5102 	if (status < B_OK)
5103 		return status;
5104 
5105 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5106 		status = EOPNOTSUPP;
5107 		goto out;
5108 	}
5109 
5110 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5111 
5112 out:
5113 	put_mount(mount);
5114 	return status;
5115 }
5116 
5117 
5118 static status_t
5119 index_remove(mount_id mountID, const char *name, bool kernel)
5120 {
5121 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5122 
5123 	struct fs_mount *mount;
5124 	status_t status = get_mount(mountID, &mount);
5125 	if (status < B_OK)
5126 		return status;
5127 
5128 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5129 		status = EROFS;
5130 		goto out;
5131 	}
5132 
5133 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5134 
5135 out:
5136 	put_mount(mount);
5137 	return status;
5138 }
5139 
5140 
5141 /**	ToDo: the query FS API is still the pretty much the same as in R5.
5142  *		It would be nice if the FS would find some more kernel support
5143  *		for them.
5144  *		For example, query parsing should be moved into the kernel.
5145  */
5146 
5147 static int
5148 query_open(dev_t device, const char *query, uint32 flags,
5149 	port_id port, int32 token, bool kernel)
5150 {
5151 	struct fs_mount *mount;
5152 	fs_cookie cookie;
5153 
5154 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5155 
5156 	status_t status = get_mount(device, &mount);
5157 	if (status < B_OK)
5158 		return status;
5159 
5160 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5161 		status = EOPNOTSUPP;
5162 		goto out;
5163 	}
5164 
5165 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5166 	if (status < B_OK)
5167 		goto out;
5168 
5169 	// get fd for the index directory
5170 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5171 	if (status >= 0)
5172 		goto out;
5173 
5174 	// something went wrong
5175 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5176 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5177 
5178 out:
5179 	put_mount(mount);
5180 	return status;
5181 }
5182 
5183 
5184 static status_t
5185 query_close(struct file_descriptor *descriptor)
5186 {
5187 	struct fs_mount *mount = descriptor->u.mount;
5188 
5189 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5190 
5191 	if (FS_MOUNT_CALL(mount, close_query))
5192 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5193 
5194 	return B_OK;
5195 }
5196 
5197 
5198 static void
5199 query_free_fd(struct file_descriptor *descriptor)
5200 {
5201 	struct fs_mount *mount = descriptor->u.mount;
5202 
5203 	if (mount != NULL) {
5204 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5205 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5206 		//put_vnode(vnode);
5207 	}
5208 }
5209 
5210 
5211 static status_t
5212 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5213 {
5214 	struct fs_mount *mount = descriptor->u.mount;
5215 
5216 	if (FS_MOUNT_CALL(mount, read_query))
5217 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5218 
5219 	return EOPNOTSUPP;
5220 }
5221 
5222 
5223 static status_t
5224 query_rewind(struct file_descriptor *descriptor)
5225 {
5226 	struct fs_mount *mount = descriptor->u.mount;
5227 
5228 	if (FS_MOUNT_CALL(mount, rewind_query))
5229 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5230 
5231 	return EOPNOTSUPP;
5232 }
5233 
5234 
5235 //	#pragma mark -
5236 //	General File System functions
5237 
5238 
5239 static dev_t
5240 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5241 	const char *args, bool kernel)
5242 {
5243 	struct fs_mount *mount;
5244 	status_t status = 0;
5245 
5246 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5247 
5248 	// The path is always safe, we just have to make sure that fsName is
5249 	// almost valid - we can't make any assumptions about args, though.
5250 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5251 	// We'll get it from the DDM later.
5252 	if (fsName == NULL) {
5253 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5254 			return B_BAD_VALUE;
5255 	} else if (fsName[0] == '\0')
5256 		return B_BAD_VALUE;
5257 
5258 	RecursiveLocker mountOpLocker(sMountOpLock);
5259 
5260 	// Helper to delete a newly created file device on failure.
5261 	// Not exactly beautiful, but helps to keep the code below cleaner.
5262 	struct FileDeviceDeleter {
5263 		FileDeviceDeleter() : id(-1) {}
5264 		~FileDeviceDeleter()
5265 		{
5266 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5267 		}
5268 
5269 		partition_id id;
5270 	} fileDeviceDeleter;
5271 
5272 	// If the file system is not a "virtual" one, the device argument should
5273 	// point to a real file/device (if given at all).
5274 	// get the partition
5275 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5276 	KPartition *partition = NULL;
5277 	KPath normalizedDevice;
5278 	bool newlyCreatedFileDevice = false;
5279 
5280 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5281 		// normalize the device path
5282 		status = normalizedDevice.SetTo(device, true);
5283 		if (status != B_OK)
5284 			return status;
5285 
5286 		// get a corresponding partition from the DDM
5287 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
5288 
5289 		if (!partition) {
5290 			// Partition not found: This either means, the user supplied
5291 			// an invalid path, or the path refers to an image file. We try
5292 			// to let the DDM create a file device for the path.
5293 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5294 				&newlyCreatedFileDevice, false);
5295 			if (deviceID >= 0) {
5296 				partition = ddm->RegisterPartition(deviceID, true);
5297 				if (newlyCreatedFileDevice)
5298 					fileDeviceDeleter.id = deviceID;
5299 			}
5300 		}
5301 
5302 		if (!partition) {
5303 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5304 				normalizedDevice.Path()));
5305 			return B_ENTRY_NOT_FOUND;
5306 		}
5307 
5308 		device = normalizedDevice.Path();
5309 			// correct path to file device
5310 	}
5311 	PartitionRegistrar partitionRegistrar(partition, true);
5312 
5313 	// Write lock the partition's device. For the time being, we keep the lock
5314 	// until we're done mounting -- not nice, but ensure, that no-one is
5315 	// interfering.
5316 	// TODO: Find a better solution.
5317 	KDiskDevice *diskDevice = NULL;
5318 	if (partition) {
5319 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5320 		if (!diskDevice) {
5321 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5322 			return B_ERROR;
5323 		}
5324 	}
5325 
5326 	DeviceWriteLocker writeLocker(diskDevice, true);
5327 		// this takes over the write lock acquired before
5328 
5329 	if (partition) {
5330 		// make sure, that the partition is not busy
5331 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5332 			TRACE(("fs_mount(): Partition is busy.\n"));
5333 			return B_BUSY;
5334 		}
5335 
5336 		// if no FS name had been supplied, we get it from the partition
5337 		if (!fsName) {
5338 			KDiskSystem *diskSystem = partition->DiskSystem();
5339 			if (!diskSystem) {
5340 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5341 					"recognize it.\n"));
5342 				return B_BAD_VALUE;
5343 			}
5344 
5345 			if (!diskSystem->IsFileSystem()) {
5346 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5347 					"partitioning system.\n"));
5348 				return B_BAD_VALUE;
5349 			}
5350 
5351 			// The disk system name will not change, and the KDiskSystem
5352 			// object will not go away while the disk device is locked (and
5353 			// the partition has a reference to it), so this is safe.
5354 			fsName = diskSystem->Name();
5355 		}
5356 	}
5357 
5358 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5359 	if (mount == NULL)
5360 		return B_NO_MEMORY;
5361 
5362 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5363 
5364 	mount->fs_name = get_file_system_name(fsName);
5365 	if (mount->fs_name == NULL) {
5366 		status = B_NO_MEMORY;
5367 		goto err1;
5368 	}
5369 
5370 	mount->device_name = strdup(device);
5371 		// "device" can be NULL
5372 
5373 	mount->fs = get_file_system(fsName);
5374 	if (mount->fs == NULL) {
5375 		status = ENODEV;
5376 		goto err3;
5377 	}
5378 
5379 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5380 	if (status < B_OK)
5381 		goto err4;
5382 
5383 	// initialize structure
5384 	mount->id = sNextMountID++;
5385 	mount->partition = NULL;
5386 	mount->root_vnode = NULL;
5387 	mount->covers_vnode = NULL;
5388 	mount->cookie = NULL;
5389 	mount->unmounting = false;
5390 	mount->owns_file_device = false;
5391 
5392 	// insert mount struct into list before we call FS's mount() function
5393 	// so that vnodes can be created for this mount
5394 	mutex_lock(&sMountMutex);
5395 	hash_insert(sMountsTable, mount);
5396 	mutex_unlock(&sMountMutex);
5397 
5398 	vnode_id rootID;
5399 
5400 	if (!sRoot) {
5401 		// we haven't mounted anything yet
5402 		if (strcmp(path, "/") != 0) {
5403 			status = B_ERROR;
5404 			goto err5;
5405 		}
5406 
5407 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5408 		if (status < 0) {
5409 			// ToDo: why should we hide the error code from the file system here?
5410 			//status = ERR_VFS_GENERAL;
5411 			goto err5;
5412 		}
5413 	} else {
5414 		struct vnode *coveredVnode;
5415 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5416 		if (status < B_OK)
5417 			goto err5;
5418 
5419 		// make sure covered_vnode is a DIR
5420 		struct stat coveredNodeStat;
5421 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5422 			coveredVnode->private_node, &coveredNodeStat);
5423 		if (status < B_OK)
5424 			goto err5;
5425 
5426 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5427 			status = B_NOT_A_DIRECTORY;
5428 			goto err5;
5429 		}
5430 
5431 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5432 			// this is already a mount point
5433 			status = B_BUSY;
5434 			goto err5;
5435 		}
5436 
5437 		mount->covers_vnode = coveredVnode;
5438 
5439 		// mount it
5440 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5441 		if (status < B_OK)
5442 			goto err6;
5443 	}
5444 
5445 	// the root node is supposed to be owned by the file system - it must
5446 	// exist at this point
5447 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5448 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5449 		panic("fs_mount: file system does not own its root node!\n");
5450 		status = B_ERROR;
5451 		goto err7;
5452 	}
5453 
5454 	// No race here, since fs_mount() is the only function changing
5455 	// covers_vnode (and holds sMountOpLock at that time).
5456 	mutex_lock(&sVnodeCoveredByMutex);
5457 	if (mount->covers_vnode)
5458 		mount->covers_vnode->covered_by = mount->root_vnode;
5459 	mutex_unlock(&sVnodeCoveredByMutex);
5460 
5461 	if (!sRoot)
5462 		sRoot = mount->root_vnode;
5463 
5464 	// supply the partition (if any) with the mount cookie and mark it mounted
5465 	if (partition) {
5466 		partition->SetMountCookie(mount->cookie);
5467 		partition->SetVolumeID(mount->id);
5468 
5469 		// keep a partition reference as long as the partition is mounted
5470 		partitionRegistrar.Detach();
5471 		mount->partition = partition;
5472 		mount->owns_file_device = newlyCreatedFileDevice;
5473 		fileDeviceDeleter.id = -1;
5474 	}
5475 
5476 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5477 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5478 
5479 	return mount->id;
5480 
5481 err7:
5482 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5483 err6:
5484 	if (mount->covers_vnode)
5485 		put_vnode(mount->covers_vnode);
5486 err5:
5487 	mutex_lock(&sMountMutex);
5488 	hash_remove(sMountsTable, mount);
5489 	mutex_unlock(&sMountMutex);
5490 
5491 	recursive_lock_destroy(&mount->rlock);
5492 err4:
5493 	put_file_system(mount->fs);
5494 	free(mount->device_name);
5495 err3:
5496 	free(mount->fs_name);
5497 err1:
5498 	free(mount);
5499 
5500 	return status;
5501 }
5502 
5503 
5504 static status_t
5505 fs_unmount(char *path, uint32 flags, bool kernel)
5506 {
5507 	struct fs_mount *mount;
5508 	struct vnode *vnode;
5509 	status_t err;
5510 
5511 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5512 
5513 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5514 	if (err < 0)
5515 		return B_ENTRY_NOT_FOUND;
5516 
5517 	RecursiveLocker mountOpLocker(sMountOpLock);
5518 
5519 	mount = find_mount(vnode->device);
5520 	if (!mount)
5521 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5522 
5523 	if (mount->root_vnode != vnode) {
5524 		// not mountpoint
5525 		put_vnode(vnode);
5526 		return B_BAD_VALUE;
5527 	}
5528 
5529 	// if the volume is associated with a partition, lock the device of the
5530 	// partition as long as we are unmounting
5531 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5532 	KPartition *partition = mount->partition;
5533 	KDiskDevice *diskDevice = NULL;
5534 	if (partition) {
5535 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5536 		if (!diskDevice) {
5537 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5538 			return B_ERROR;
5539 		}
5540 	}
5541 	DeviceWriteLocker writeLocker(diskDevice, true);
5542 
5543 	// make sure, that the partition is not busy
5544 	if (partition) {
5545 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5546 			TRACE(("fs_unmount(): Partition is busy.\n"));
5547 			return B_BUSY;
5548 		}
5549 	}
5550 
5551 	// grab the vnode master mutex to keep someone from creating
5552 	// a vnode while we're figuring out if we can continue
5553 	mutex_lock(&sVnodeMutex);
5554 
5555 	bool disconnectedDescriptors = false;
5556 
5557 	while (true) {
5558 		bool busy = false;
5559 
5560 		// cycle through the list of vnodes associated with this mount and
5561 		// make sure all of them are not busy or have refs on them
5562 		vnode = NULL;
5563 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5564 			// The root vnode ref_count needs to be 2 here: one for the file
5565 			// system, one from the path_to_vnode() call above
5566 			if (vnode->busy
5567 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
5568 					|| (vnode->ref_count != 2 && mount->root_vnode == vnode))) {
5569 				// there are still vnodes in use on this mount, so we cannot
5570 				// unmount yet
5571 				busy = true;
5572 				break;
5573 			}
5574 		}
5575 
5576 		if (!busy)
5577 			break;
5578 
5579 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5580 			mutex_unlock(&sVnodeMutex);
5581 			put_vnode(mount->root_vnode);
5582 
5583 			return B_BUSY;
5584 		}
5585 
5586 		if (disconnectedDescriptors) {
5587 			// wait a bit until the last access is finished, and then try again
5588 			mutex_unlock(&sVnodeMutex);
5589 			snooze(100000);
5590 			// TODO: if there is some kind of bug that prevents the ref counts
5591 			//	from getting back to zero, this will fall into an endless loop...
5592 			mutex_lock(&sVnodeMutex);
5593 			continue;
5594 		}
5595 
5596 		// the file system is still busy - but we're forced to unmount it,
5597 		// so let's disconnect all open file descriptors
5598 
5599 		mount->unmounting = true;
5600 			// prevent new vnodes from being created
5601 
5602 		mutex_unlock(&sVnodeMutex);
5603 
5604 		disconnect_mount_or_vnode_fds(mount, NULL);
5605 		disconnectedDescriptors = true;
5606 
5607 		mutex_lock(&sVnodeMutex);
5608 	}
5609 
5610 	// we can safely continue, mark all of the vnodes busy and this mount
5611 	// structure in unmounting state
5612 	mount->unmounting = true;
5613 
5614 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5615 		vnode->busy = true;
5616 
5617 		if (vnode->ref_count == 0) {
5618 			// this vnode has been unused before
5619 			list_remove_item(&sUnusedVnodeList, vnode);
5620 			sUnusedVnodes--;
5621 		}
5622 	}
5623 
5624 	// The ref_count of the root node is 2 at this point, see above why this is
5625 	mount->root_vnode->ref_count -= 2;
5626 
5627 	mutex_unlock(&sVnodeMutex);
5628 
5629 	mutex_lock(&sVnodeCoveredByMutex);
5630 	mount->covers_vnode->covered_by = NULL;
5631 	mutex_unlock(&sVnodeCoveredByMutex);
5632 	put_vnode(mount->covers_vnode);
5633 
5634 	// Free all vnodes associated with this mount.
5635 	// They will be removed from the mount list by free_vnode(), so
5636 	// we don't have to do this.
5637 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5638 		free_vnode(vnode, false);
5639 	}
5640 
5641 	// remove the mount structure from the hash table
5642 	mutex_lock(&sMountMutex);
5643 	hash_remove(sMountsTable, mount);
5644 	mutex_unlock(&sMountMutex);
5645 
5646 	mountOpLocker.Unlock();
5647 
5648 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5649 	notify_unmount(mount->id);
5650 
5651 	// release the file system
5652 	put_file_system(mount->fs);
5653 
5654 	// dereference the partition and mark it unmounted
5655 	if (partition) {
5656 		partition->SetVolumeID(-1);
5657 		partition->SetMountCookie(NULL);
5658 
5659 		if (mount->owns_file_device)
5660 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5661 		partition->Unregister();
5662 	}
5663 
5664 	free(mount->device_name);
5665 	free(mount->fs_name);
5666 	free(mount);
5667 
5668 	return B_OK;
5669 }
5670 
5671 
5672 static status_t
5673 fs_sync(dev_t device)
5674 {
5675 	struct fs_mount *mount;
5676 	status_t status = get_mount(device, &mount);
5677 	if (status < B_OK)
5678 		return status;
5679 
5680 	mutex_lock(&sMountMutex);
5681 
5682 	if (FS_MOUNT_CALL(mount, sync))
5683 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5684 
5685 	mutex_unlock(&sMountMutex);
5686 
5687 	struct vnode *previousVnode = NULL;
5688 	while (true) {
5689 		// synchronize access to vnode list
5690 		recursive_lock_lock(&mount->rlock);
5691 
5692 		struct vnode *vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
5693 			previousVnode);
5694 
5695 		vnode_id id = -1;
5696 		if (vnode != NULL)
5697 			id = vnode->id;
5698 
5699 		recursive_lock_unlock(&mount->rlock);
5700 
5701 		if (vnode == NULL)
5702 			break;
5703 
5704 		// acquire a reference to the vnode
5705 
5706 		if (get_vnode(mount->id, id, &vnode, true) == B_OK) {
5707 			if (previousVnode != NULL)
5708 				put_vnode(previousVnode);
5709 
5710 			if (FS_CALL(vnode, fsync) != NULL)
5711 				FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
5712 
5713 			// the next vnode might change until we lock the vnode list again,
5714 			// but this vnode won't go away since we keep a reference to it.
5715 			previousVnode = vnode;
5716 		} else {
5717 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n", mount->id, id);
5718 			break;
5719 		}
5720 	}
5721 
5722 	if (previousVnode != NULL)
5723 		put_vnode(previousVnode);
5724 
5725 	put_mount(mount);
5726 	return status;
5727 }
5728 
5729 
5730 static status_t
5731 fs_read_info(dev_t device, struct fs_info *info)
5732 {
5733 	struct fs_mount *mount;
5734 	status_t status = get_mount(device, &mount);
5735 	if (status < B_OK)
5736 		return status;
5737 
5738 	memset(info, 0, sizeof(struct fs_info));
5739 
5740 	if (FS_MOUNT_CALL(mount, read_fs_info))
5741 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5742 
5743 	// fill in info the file system doesn't (have to) know about
5744 	if (status == B_OK) {
5745 		info->dev = mount->id;
5746 		info->root = mount->root_vnode->id;
5747 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5748 		if (mount->device_name != NULL) {
5749 			strlcpy(info->device_name, mount->device_name,
5750 				sizeof(info->device_name));
5751 		}
5752 	}
5753 
5754 	// if the call is not supported by the file system, there are still
5755 	// the parts that we filled out ourselves
5756 
5757 	put_mount(mount);
5758 	return status;
5759 }
5760 
5761 
5762 static status_t
5763 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5764 {
5765 	struct fs_mount *mount;
5766 	status_t status = get_mount(device, &mount);
5767 	if (status < B_OK)
5768 		return status;
5769 
5770 	if (FS_MOUNT_CALL(mount, write_fs_info))
5771 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5772 	else
5773 		status = EROFS;
5774 
5775 	put_mount(mount);
5776 	return status;
5777 }
5778 
5779 
5780 static dev_t
5781 fs_next_device(int32 *_cookie)
5782 {
5783 	struct fs_mount *mount = NULL;
5784 	dev_t device = *_cookie;
5785 
5786 	mutex_lock(&sMountMutex);
5787 
5788 	// Since device IDs are assigned sequentially, this algorithm
5789 	// does work good enough. It makes sure that the device list
5790 	// returned is sorted, and that no device is skipped when an
5791 	// already visited device got unmounted.
5792 
5793 	while (device < sNextMountID) {
5794 		mount = find_mount(device++);
5795 		if (mount != NULL && mount->cookie != NULL)
5796 			break;
5797 	}
5798 
5799 	*_cookie = device;
5800 
5801 	if (mount != NULL)
5802 		device = mount->id;
5803 	else
5804 		device = B_BAD_VALUE;
5805 
5806 	mutex_unlock(&sMountMutex);
5807 
5808 	return device;
5809 }
5810 
5811 
5812 static status_t
5813 get_cwd(char *buffer, size_t size, bool kernel)
5814 {
5815 	// Get current working directory from io context
5816 	struct io_context *context = get_current_io_context(kernel);
5817 	status_t status;
5818 
5819 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5820 
5821 	mutex_lock(&context->io_mutex);
5822 
5823 	if (context->cwd)
5824 		status = dir_vnode_to_path(context->cwd, buffer, size);
5825 	else
5826 		status = B_ERROR;
5827 
5828 	mutex_unlock(&context->io_mutex);
5829 	return status;
5830 }
5831 
5832 
5833 static status_t
5834 set_cwd(int fd, char *path, bool kernel)
5835 {
5836 	struct io_context *context;
5837 	struct vnode *vnode = NULL;
5838 	struct vnode *oldDirectory;
5839 	struct stat stat;
5840 	status_t status;
5841 
5842 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5843 
5844 	// Get vnode for passed path, and bail if it failed
5845 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5846 	if (status < 0)
5847 		return status;
5848 
5849 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5850 	if (status < 0)
5851 		goto err;
5852 
5853 	if (!S_ISDIR(stat.st_mode)) {
5854 		// nope, can't cwd to here
5855 		status = B_NOT_A_DIRECTORY;
5856 		goto err;
5857 	}
5858 
5859 	// Get current io context and lock
5860 	context = get_current_io_context(kernel);
5861 	mutex_lock(&context->io_mutex);
5862 
5863 	// save the old current working directory first
5864 	oldDirectory = context->cwd;
5865 	context->cwd = vnode;
5866 
5867 	mutex_unlock(&context->io_mutex);
5868 
5869 	if (oldDirectory)
5870 		put_vnode(oldDirectory);
5871 
5872 	return B_NO_ERROR;
5873 
5874 err:
5875 	put_vnode(vnode);
5876 	return status;
5877 }
5878 
5879 
5880 //	#pragma mark -
5881 //	Calls from within the kernel
5882 
5883 
5884 dev_t
5885 _kern_mount(const char *path, const char *device, const char *fsName,
5886 	uint32 flags, const char *args, size_t argsLength)
5887 {
5888 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5889 	if (pathBuffer.InitCheck() != B_OK)
5890 		return B_NO_MEMORY;
5891 
5892 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5893 }
5894 
5895 
5896 status_t
5897 _kern_unmount(const char *path, uint32 flags)
5898 {
5899 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5900 	if (pathBuffer.InitCheck() != B_OK)
5901 		return B_NO_MEMORY;
5902 
5903 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5904 }
5905 
5906 
5907 status_t
5908 _kern_read_fs_info(dev_t device, struct fs_info *info)
5909 {
5910 	if (info == NULL)
5911 		return B_BAD_VALUE;
5912 
5913 	return fs_read_info(device, info);
5914 }
5915 
5916 
5917 status_t
5918 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5919 {
5920 	if (info == NULL)
5921 		return B_BAD_VALUE;
5922 
5923 	return fs_write_info(device, info, mask);
5924 }
5925 
5926 
5927 status_t
5928 _kern_sync(void)
5929 {
5930 	// Note: _kern_sync() is also called from _user_sync()
5931 	int32 cookie = 0;
5932 	dev_t device;
5933 	while ((device = next_dev(&cookie)) >= 0) {
5934 		status_t status = fs_sync(device);
5935 		if (status != B_OK && status != B_BAD_VALUE)
5936 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5937 	}
5938 
5939 	return B_OK;
5940 }
5941 
5942 
5943 dev_t
5944 _kern_next_device(int32 *_cookie)
5945 {
5946 	return fs_next_device(_cookie);
5947 }
5948 
5949 
5950 status_t
5951 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
5952 	size_t infoSize)
5953 {
5954 	if (infoSize != sizeof(fd_info))
5955 		return B_BAD_VALUE;
5956 
5957 	struct io_context *context = NULL;
5958 	sem_id contextMutex = -1;
5959 	struct team *team = NULL;
5960 
5961 	cpu_status state = disable_interrupts();
5962 	GRAB_TEAM_LOCK();
5963 
5964 	team = team_get_team_struct_locked(teamID);
5965 	if (team) {
5966 		context = (io_context *)team->io_context;
5967 		contextMutex = context->io_mutex.sem;
5968 	}
5969 
5970 	RELEASE_TEAM_LOCK();
5971 	restore_interrupts(state);
5972 
5973 	// we now have a context - since we couldn't lock it while having
5974 	// safe access to the team structure, we now need to lock the mutex
5975 	// manually
5976 
5977 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
5978 		// team doesn't exit or seems to be gone
5979 		return B_BAD_TEAM_ID;
5980 	}
5981 
5982 	// the team cannot be deleted completely while we're owning its
5983 	// io_context mutex, so we can safely play with it now
5984 
5985 	context->io_mutex.holder = thread_get_current_thread_id();
5986 
5987 	uint32 slot = *_cookie;
5988 
5989 	struct file_descriptor *descriptor;
5990 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
5991 		slot++;
5992 
5993 	if (slot >= context->table_size) {
5994 		mutex_unlock(&context->io_mutex);
5995 		return B_ENTRY_NOT_FOUND;
5996 	}
5997 
5998 	info->number = slot;
5999 	info->open_mode = descriptor->open_mode;
6000 
6001 	struct vnode *vnode = fd_vnode(descriptor);
6002 	if (vnode != NULL) {
6003 		info->device = vnode->device;
6004 		info->node = vnode->id;
6005 	} else if (descriptor->u.mount != NULL) {
6006 		info->device = descriptor->u.mount->id;
6007 		info->node = -1;
6008 	}
6009 
6010 	mutex_unlock(&context->io_mutex);
6011 
6012 	*_cookie = slot + 1;
6013 	return B_OK;
6014 }
6015 
6016 
6017 int
6018 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6019 {
6020 	if (openMode & O_CREAT)
6021 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6022 
6023 	return file_open_entry_ref(device, inode, name, openMode, true);
6024 }
6025 
6026 
6027 /**	\brief Opens a node specified by a FD + path pair.
6028  *
6029  *	At least one of \a fd and \a path must be specified.
6030  *	If only \a fd is given, the function opens the node identified by this
6031  *	FD. If only a path is given, this path is opened. If both are given and
6032  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6033  *	of the directory (!) identified by \a fd.
6034  *
6035  *	\param fd The FD. May be < 0.
6036  *	\param path The absolute or relative path. May be \c NULL.
6037  *	\param openMode The open mode.
6038  *	\return A FD referring to the newly opened node, or an error code,
6039  *			if an error occurs.
6040  */
6041 
6042 int
6043 _kern_open(int fd, const char *path, int openMode, int perms)
6044 {
6045 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6046 	if (pathBuffer.InitCheck() != B_OK)
6047 		return B_NO_MEMORY;
6048 
6049 	if (openMode & O_CREAT)
6050 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6051 
6052 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6053 }
6054 
6055 
6056 /**	\brief Opens a directory specified by entry_ref or node_ref.
6057  *
6058  *	The supplied name may be \c NULL, in which case directory identified
6059  *	by \a device and \a inode will be opened. Otherwise \a device and
6060  *	\a inode identify the parent directory of the directory to be opened
6061  *	and \a name its entry name.
6062  *
6063  *	\param device If \a name is specified the ID of the device the parent
6064  *		   directory of the directory to be opened resides on, otherwise
6065  *		   the device of the directory itself.
6066  *	\param inode If \a name is specified the node ID of the parent
6067  *		   directory of the directory to be opened, otherwise node ID of the
6068  *		   directory itself.
6069  *	\param name The entry name of the directory to be opened. If \c NULL,
6070  *		   the \a device + \a inode pair identify the node to be opened.
6071  *	\return The FD of the newly opened directory or an error code, if
6072  *			something went wrong.
6073  */
6074 
6075 int
6076 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6077 {
6078 	return dir_open_entry_ref(device, inode, name, true);
6079 }
6080 
6081 
6082 /**	\brief Opens a directory specified by a FD + path pair.
6083  *
6084  *	At least one of \a fd and \a path must be specified.
6085  *	If only \a fd is given, the function opens the directory identified by this
6086  *	FD. If only a path is given, this path is opened. If both are given and
6087  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6088  *	of the directory (!) identified by \a fd.
6089  *
6090  *	\param fd The FD. May be < 0.
6091  *	\param path The absolute or relative path. May be \c NULL.
6092  *	\return A FD referring to the newly opened directory, or an error code,
6093  *			if an error occurs.
6094  */
6095 
6096 int
6097 _kern_open_dir(int fd, const char *path)
6098 {
6099 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6100 	if (pathBuffer.InitCheck() != B_OK)
6101 		return B_NO_MEMORY;
6102 
6103 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6104 }
6105 
6106 
6107 status_t
6108 _kern_fcntl(int fd, int op, uint32 argument)
6109 {
6110 	return common_fcntl(fd, op, argument, true);
6111 }
6112 
6113 
6114 status_t
6115 _kern_fsync(int fd)
6116 {
6117 	return common_sync(fd, true);
6118 }
6119 
6120 
6121 status_t
6122 _kern_lock_node(int fd)
6123 {
6124 	return common_lock_node(fd, true);
6125 }
6126 
6127 
6128 status_t
6129 _kern_unlock_node(int fd)
6130 {
6131 	return common_unlock_node(fd, true);
6132 }
6133 
6134 
6135 status_t
6136 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6137 {
6138 	return dir_create_entry_ref(device, inode, name, perms, true);
6139 }
6140 
6141 
6142 /**	\brief Creates a directory specified by a FD + path pair.
6143  *
6144  *	\a path must always be specified (it contains the name of the new directory
6145  *	at least). If only a path is given, this path identifies the location at
6146  *	which the directory shall be created. If both \a fd and \a path are given and
6147  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6148  *	of the directory (!) identified by \a fd.
6149  *
6150  *	\param fd The FD. May be < 0.
6151  *	\param path The absolute or relative path. Must not be \c NULL.
6152  *	\param perms The access permissions the new directory shall have.
6153  *	\return \c B_OK, if the directory has been created successfully, another
6154  *			error code otherwise.
6155  */
6156 
6157 status_t
6158 _kern_create_dir(int fd, const char *path, int perms)
6159 {
6160 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6161 	if (pathBuffer.InitCheck() != B_OK)
6162 		return B_NO_MEMORY;
6163 
6164 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6165 }
6166 
6167 
6168 status_t
6169 _kern_remove_dir(int fd, const char *path)
6170 {
6171 	if (path) {
6172 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6173 		if (pathBuffer.InitCheck() != B_OK)
6174 			return B_NO_MEMORY;
6175 
6176 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6177 	}
6178 
6179 	return dir_remove(fd, NULL, true);
6180 }
6181 
6182 
6183 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
6184  *
6185  *	At least one of \a fd and \a path must be specified.
6186  *	If only \a fd is given, the function the symlink to be read is the node
6187  *	identified by this FD. If only a path is given, this path identifies the
6188  *	symlink to be read. If both are given and the path is absolute, \a fd is
6189  *	ignored; a relative path is reckoned off of the directory (!) identified
6190  *	by \a fd.
6191  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6192  *	will still be updated to reflect the required buffer size.
6193  *
6194  *	\param fd The FD. May be < 0.
6195  *	\param path The absolute or relative path. May be \c NULL.
6196  *	\param buffer The buffer into which the contents of the symlink shall be
6197  *		   written.
6198  *	\param _bufferSize A pointer to the size of the supplied buffer.
6199  *	\return The length of the link on success or an appropriate error code
6200  */
6201 
6202 status_t
6203 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6204 {
6205 	status_t status;
6206 
6207 	if (path) {
6208 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6209 		if (pathBuffer.InitCheck() != B_OK)
6210 			return B_NO_MEMORY;
6211 
6212 		return common_read_link(fd, pathBuffer.LockBuffer(),
6213 			buffer, _bufferSize, true);
6214 	}
6215 
6216 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6217 }
6218 
6219 
6220 /**	\brief Creates a symlink specified by a FD + path pair.
6221  *
6222  *	\a path must always be specified (it contains the name of the new symlink
6223  *	at least). If only a path is given, this path identifies the location at
6224  *	which the symlink shall be created. If both \a fd and \a path are given and
6225  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6226  *	of the directory (!) identified by \a fd.
6227  *
6228  *	\param fd The FD. May be < 0.
6229  *	\param toPath The absolute or relative path. Must not be \c NULL.
6230  *	\param mode The access permissions the new symlink shall have.
6231  *	\return \c B_OK, if the symlink has been created successfully, another
6232  *			error code otherwise.
6233  */
6234 
6235 status_t
6236 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6237 {
6238 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6239 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6240 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6241 		return B_NO_MEMORY;
6242 
6243 	char *toBuffer = toPathBuffer.LockBuffer();
6244 
6245 	status_t status = check_path(toBuffer);
6246 	if (status < B_OK)
6247 		return status;
6248 
6249 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6250 		toBuffer, mode, true);
6251 }
6252 
6253 
6254 status_t
6255 _kern_create_link(const char *path, const char *toPath)
6256 {
6257 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6258 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6259 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6260 		return B_NO_MEMORY;
6261 
6262 	return common_create_link(pathBuffer.LockBuffer(),
6263 		toPathBuffer.LockBuffer(), true);
6264 }
6265 
6266 
6267 /**	\brief Removes an entry specified by a FD + path pair from its directory.
6268  *
6269  *	\a path must always be specified (it contains at least the name of the entry
6270  *	to be deleted). If only a path is given, this path identifies the entry
6271  *	directly. If both \a fd and \a path are given and the path is absolute,
6272  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6273  *	identified by \a fd.
6274  *
6275  *	\param fd The FD. May be < 0.
6276  *	\param path The absolute or relative path. Must not be \c NULL.
6277  *	\return \c B_OK, if the entry has been removed successfully, another
6278  *			error code otherwise.
6279  */
6280 
6281 status_t
6282 _kern_unlink(int fd, const char *path)
6283 {
6284 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6285 	if (pathBuffer.InitCheck() != B_OK)
6286 		return B_NO_MEMORY;
6287 
6288 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6289 }
6290 
6291 
6292 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
6293  *		   by another FD + path pair.
6294  *
6295  *	\a oldPath and \a newPath must always be specified (they contain at least
6296  *	the name of the entry). If only a path is given, this path identifies the
6297  *	entry directly. If both a FD and a path are given and the path is absolute,
6298  *	the FD is ignored; a relative path is reckoned off of the directory (!)
6299  *	identified by the respective FD.
6300  *
6301  *	\param oldFD The FD of the old location. May be < 0.
6302  *	\param oldPath The absolute or relative path of the old location. Must not
6303  *		   be \c NULL.
6304  *	\param newFD The FD of the new location. May be < 0.
6305  *	\param newPath The absolute or relative path of the new location. Must not
6306  *		   be \c NULL.
6307  *	\return \c B_OK, if the entry has been moved successfully, another
6308  *			error code otherwise.
6309  */
6310 
6311 status_t
6312 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6313 {
6314 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6315 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6316 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6317 		return B_NO_MEMORY;
6318 
6319 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6320 		newFD, newPathBuffer.LockBuffer(), true);
6321 }
6322 
6323 
6324 status_t
6325 _kern_access(const char *path, int mode)
6326 {
6327 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6328 	if (pathBuffer.InitCheck() != B_OK)
6329 		return B_NO_MEMORY;
6330 
6331 	return common_access(pathBuffer.LockBuffer(), mode, true);
6332 }
6333 
6334 
6335 /**	\brief Reads stat data of an entity specified by a FD + path pair.
6336  *
6337  *	If only \a fd is given, the stat operation associated with the type
6338  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6339  *	given, this path identifies the entry for whose node to retrieve the
6340  *	stat data. If both \a fd and \a path are given and the path is absolute,
6341  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6342  *	identified by \a fd and specifies the entry whose stat data shall be
6343  *	retrieved.
6344  *
6345  *	\param fd The FD. May be < 0.
6346  *	\param path The absolute or relative path. Must not be \c NULL.
6347  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6348  *		   function shall not stick to symlinks, but traverse them.
6349  *	\param stat The buffer the stat data shall be written into.
6350  *	\param statSize The size of the supplied stat buffer.
6351  *	\return \c B_OK, if the the stat data have been read successfully, another
6352  *			error code otherwise.
6353  */
6354 
6355 status_t
6356 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6357 	struct stat *stat, size_t statSize)
6358 {
6359 	struct stat completeStat;
6360 	struct stat *originalStat = NULL;
6361 	status_t status;
6362 
6363 	if (statSize > sizeof(struct stat))
6364 		return B_BAD_VALUE;
6365 
6366 	// this supports different stat extensions
6367 	if (statSize < sizeof(struct stat)) {
6368 		originalStat = stat;
6369 		stat = &completeStat;
6370 	}
6371 
6372 	if (path) {
6373 		// path given: get the stat of the node referred to by (fd, path)
6374 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6375 		if (pathBuffer.InitCheck() != B_OK)
6376 			return B_NO_MEMORY;
6377 
6378 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6379 			traverseLeafLink, stat, true);
6380 	} else {
6381 		// no path given: get the FD and use the FD operation
6382 		struct file_descriptor *descriptor
6383 			= get_fd(get_current_io_context(true), fd);
6384 		if (descriptor == NULL)
6385 			return B_FILE_ERROR;
6386 
6387 		if (descriptor->ops->fd_read_stat)
6388 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6389 		else
6390 			status = EOPNOTSUPP;
6391 
6392 		put_fd(descriptor);
6393 	}
6394 
6395 	if (status == B_OK && originalStat != NULL)
6396 		memcpy(originalStat, stat, statSize);
6397 
6398 	return status;
6399 }
6400 
6401 
6402 /**	\brief Writes stat data of an entity specified by a FD + path pair.
6403  *
6404  *	If only \a fd is given, the stat operation associated with the type
6405  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6406  *	given, this path identifies the entry for whose node to write the
6407  *	stat data. If both \a fd and \a path are given and the path is absolute,
6408  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6409  *	identified by \a fd and specifies the entry whose stat data shall be
6410  *	written.
6411  *
6412  *	\param fd The FD. May be < 0.
6413  *	\param path The absolute or relative path. Must not be \c NULL.
6414  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6415  *		   function shall not stick to symlinks, but traverse them.
6416  *	\param stat The buffer containing the stat data to be written.
6417  *	\param statSize The size of the supplied stat buffer.
6418  *	\param statMask A mask specifying which parts of the stat data shall be
6419  *		   written.
6420  *	\return \c B_OK, if the the stat data have been written successfully,
6421  *			another error code otherwise.
6422  */
6423 
6424 status_t
6425 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6426 	const struct stat *stat, size_t statSize, int statMask)
6427 {
6428 	struct stat completeStat;
6429 
6430 	if (statSize > sizeof(struct stat))
6431 		return B_BAD_VALUE;
6432 
6433 	// this supports different stat extensions
6434 	if (statSize < sizeof(struct stat)) {
6435 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6436 		memcpy(&completeStat, stat, statSize);
6437 		stat = &completeStat;
6438 	}
6439 
6440 	status_t status;
6441 
6442 	if (path) {
6443 		// path given: write the stat of the node referred to by (fd, path)
6444 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6445 		if (pathBuffer.InitCheck() != B_OK)
6446 			return B_NO_MEMORY;
6447 
6448 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6449 			traverseLeafLink, stat, statMask, true);
6450 	} else {
6451 		// no path given: get the FD and use the FD operation
6452 		struct file_descriptor *descriptor
6453 			= get_fd(get_current_io_context(true), fd);
6454 		if (descriptor == NULL)
6455 			return B_FILE_ERROR;
6456 
6457 		if (descriptor->ops->fd_write_stat)
6458 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6459 		else
6460 			status = EOPNOTSUPP;
6461 
6462 		put_fd(descriptor);
6463 	}
6464 
6465 	return status;
6466 }
6467 
6468 
6469 int
6470 _kern_open_attr_dir(int fd, const char *path)
6471 {
6472 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6473 	if (pathBuffer.InitCheck() != B_OK)
6474 		return B_NO_MEMORY;
6475 
6476 	if (path != NULL)
6477 		pathBuffer.SetTo(path);
6478 
6479 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6480 }
6481 
6482 
6483 int
6484 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6485 {
6486 	return attr_create(fd, name, type, openMode, true);
6487 }
6488 
6489 
6490 int
6491 _kern_open_attr(int fd, const char *name, int openMode)
6492 {
6493 	return attr_open(fd, name, openMode, true);
6494 }
6495 
6496 
6497 status_t
6498 _kern_remove_attr(int fd, const char *name)
6499 {
6500 	return attr_remove(fd, name, true);
6501 }
6502 
6503 
6504 status_t
6505 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6506 {
6507 	return attr_rename(fromFile, fromName, toFile, toName, true);
6508 }
6509 
6510 
6511 int
6512 _kern_open_index_dir(dev_t device)
6513 {
6514 	return index_dir_open(device, true);
6515 }
6516 
6517 
6518 status_t
6519 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6520 {
6521 	return index_create(device, name, type, flags, true);
6522 }
6523 
6524 
6525 status_t
6526 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6527 {
6528 	return index_name_read_stat(device, name, stat, true);
6529 }
6530 
6531 
6532 status_t
6533 _kern_remove_index(dev_t device, const char *name)
6534 {
6535 	return index_remove(device, name, true);
6536 }
6537 
6538 
6539 status_t
6540 _kern_getcwd(char *buffer, size_t size)
6541 {
6542 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6543 
6544 	// Call vfs to get current working directory
6545 	return get_cwd(buffer, size, true);
6546 }
6547 
6548 
6549 status_t
6550 _kern_setcwd(int fd, const char *path)
6551 {
6552 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6553 	if (pathBuffer.InitCheck() != B_OK)
6554 		return B_NO_MEMORY;
6555 
6556 	if (path != NULL)
6557 		pathBuffer.SetTo(path);
6558 
6559 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6560 }
6561 
6562 
6563 //	#pragma mark -
6564 //	Calls from userland (with extra address checks)
6565 
6566 
6567 dev_t
6568 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6569 	uint32 flags, const char *userArgs, size_t argsLength)
6570 {
6571 	char fileSystem[B_OS_NAME_LENGTH];
6572 	KPath path, device;
6573 	char *args = NULL;
6574 	status_t status;
6575 
6576 	if (!IS_USER_ADDRESS(userPath)
6577 		|| !IS_USER_ADDRESS(userFileSystem)
6578 		|| !IS_USER_ADDRESS(userDevice))
6579 		return B_BAD_ADDRESS;
6580 
6581 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6582 		return B_NO_MEMORY;
6583 
6584 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6585 		return B_BAD_ADDRESS;
6586 
6587 	if (userFileSystem != NULL
6588 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6589 		return B_BAD_ADDRESS;
6590 
6591 	if (userDevice != NULL
6592 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6593 		return B_BAD_ADDRESS;
6594 
6595 	if (userArgs != NULL && argsLength > 0) {
6596 		// this is a safety restriction
6597 		if (argsLength >= 65536)
6598 			return B_NAME_TOO_LONG;
6599 
6600 		args = (char *)malloc(argsLength + 1);
6601 		if (args == NULL)
6602 			return B_NO_MEMORY;
6603 
6604 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
6605 			free(args);
6606 			return B_BAD_ADDRESS;
6607 		}
6608 	}
6609 	path.UnlockBuffer();
6610 	device.UnlockBuffer();
6611 
6612 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6613 		userFileSystem ? fileSystem : NULL, flags, args, false);
6614 
6615 	free(args);
6616 	return status;
6617 }
6618 
6619 
6620 status_t
6621 _user_unmount(const char *userPath, uint32 flags)
6622 {
6623 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6624 	if (pathBuffer.InitCheck() != B_OK)
6625 		return B_NO_MEMORY;
6626 
6627 	char *path = pathBuffer.LockBuffer();
6628 
6629 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6630 		return B_BAD_ADDRESS;
6631 
6632 	return fs_unmount(path, flags, false);
6633 }
6634 
6635 
6636 status_t
6637 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6638 {
6639 	struct fs_info info;
6640 	status_t status;
6641 
6642 	if (userInfo == NULL)
6643 		return B_BAD_VALUE;
6644 
6645 	if (!IS_USER_ADDRESS(userInfo))
6646 		return B_BAD_ADDRESS;
6647 
6648 	status = fs_read_info(device, &info);
6649 	if (status != B_OK)
6650 		return status;
6651 
6652 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6653 		return B_BAD_ADDRESS;
6654 
6655 	return B_OK;
6656 }
6657 
6658 
6659 status_t
6660 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6661 {
6662 	struct fs_info info;
6663 
6664 	if (userInfo == NULL)
6665 		return B_BAD_VALUE;
6666 
6667 	if (!IS_USER_ADDRESS(userInfo)
6668 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6669 		return B_BAD_ADDRESS;
6670 
6671 	return fs_write_info(device, &info, mask);
6672 }
6673 
6674 
6675 dev_t
6676 _user_next_device(int32 *_userCookie)
6677 {
6678 	int32 cookie;
6679 	dev_t device;
6680 
6681 	if (!IS_USER_ADDRESS(_userCookie)
6682 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6683 		return B_BAD_ADDRESS;
6684 
6685 	device = fs_next_device(&cookie);
6686 
6687 	if (device >= B_OK) {
6688 		// update user cookie
6689 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6690 			return B_BAD_ADDRESS;
6691 	}
6692 
6693 	return device;
6694 }
6695 
6696 
6697 status_t
6698 _user_sync(void)
6699 {
6700 	return _kern_sync();
6701 }
6702 
6703 
6704 status_t
6705 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
6706 	size_t infoSize)
6707 {
6708 	struct fd_info info;
6709 	uint32 cookie;
6710 
6711 	// only root can do this (or should root's group be enough?)
6712 	if (geteuid() != 0)
6713 		return B_NOT_ALLOWED;
6714 
6715 	if (infoSize != sizeof(fd_info))
6716 		return B_BAD_VALUE;
6717 
6718 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
6719 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
6720 		return B_BAD_ADDRESS;
6721 
6722 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
6723 	if (status < B_OK)
6724 		return status;
6725 
6726 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
6727 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
6728 		return B_BAD_ADDRESS;
6729 
6730 	return status;
6731 }
6732 
6733 
6734 status_t
6735 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6736 	char *userPath, size_t pathLength)
6737 {
6738 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6739 	if (pathBuffer.InitCheck() != B_OK)
6740 		return B_NO_MEMORY;
6741 
6742 	struct vnode *vnode;
6743 	status_t status;
6744 
6745 	if (!IS_USER_ADDRESS(userPath))
6746 		return B_BAD_ADDRESS;
6747 
6748 	// copy the leaf name onto the stack
6749 	char stackLeaf[B_FILE_NAME_LENGTH];
6750 	if (leaf) {
6751 		if (!IS_USER_ADDRESS(leaf))
6752 			return B_BAD_ADDRESS;
6753 
6754 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6755 		if (len < 0)
6756 			return len;
6757 		if (len >= B_FILE_NAME_LENGTH)
6758 			return B_NAME_TOO_LONG;
6759 		leaf = stackLeaf;
6760 
6761 		// filter invalid leaf names
6762 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6763 			return B_BAD_VALUE;
6764 	}
6765 
6766 	// get the vnode matching the dir's node_ref
6767 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6768 		// special cases "." and "..": we can directly get the vnode of the
6769 		// referenced directory
6770 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6771 		leaf = NULL;
6772 	} else
6773 		status = get_vnode(device, inode, &vnode, false);
6774 	if (status < B_OK)
6775 		return status;
6776 
6777 	char *path = pathBuffer.LockBuffer();
6778 
6779 	// get the directory path
6780 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
6781 	put_vnode(vnode);
6782 		// we don't need the vnode anymore
6783 	if (status < B_OK)
6784 		return status;
6785 
6786 	// append the leaf name
6787 	if (leaf) {
6788 		// insert a directory separator if this is not the file system root
6789 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
6790 				>= pathBuffer.BufferSize())
6791 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
6792 			return B_NAME_TOO_LONG;
6793 		}
6794 	}
6795 
6796 	int len = user_strlcpy(userPath, path, pathLength);
6797 	if (len < 0)
6798 		return len;
6799 	if (len >= (int)pathLength)
6800 		return B_BUFFER_OVERFLOW;
6801 
6802 	return B_OK;
6803 }
6804 
6805 
6806 int
6807 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6808 	int openMode, int perms)
6809 {
6810 	char name[B_FILE_NAME_LENGTH];
6811 
6812 	if (userName == NULL || device < 0 || inode < 0)
6813 		return B_BAD_VALUE;
6814 	if (!IS_USER_ADDRESS(userName)
6815 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6816 		return B_BAD_ADDRESS;
6817 
6818 	if (openMode & O_CREAT)
6819 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6820 
6821 	return file_open_entry_ref(device, inode, name, openMode, false);
6822 }
6823 
6824 
6825 int
6826 _user_open(int fd, const char *userPath, int openMode, int perms)
6827 {
6828 	KPath path(B_PATH_NAME_LENGTH + 1);
6829 	if (path.InitCheck() != B_OK)
6830 		return B_NO_MEMORY;
6831 
6832 	char *buffer = path.LockBuffer();
6833 
6834 	if (!IS_USER_ADDRESS(userPath)
6835 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6836 		return B_BAD_ADDRESS;
6837 
6838 	if (openMode & O_CREAT)
6839 		return file_create(fd, buffer, openMode, perms, false);
6840 
6841 	return file_open(fd, buffer, openMode, false);
6842 }
6843 
6844 
6845 int
6846 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
6847 {
6848 	if (userName != NULL) {
6849 		char name[B_FILE_NAME_LENGTH];
6850 
6851 		if (!IS_USER_ADDRESS(userName)
6852 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6853 			return B_BAD_ADDRESS;
6854 
6855 		return dir_open_entry_ref(device, inode, name, false);
6856 	}
6857 	return dir_open_entry_ref(device, inode, NULL, false);
6858 }
6859 
6860 
6861 int
6862 _user_open_dir(int fd, const char *userPath)
6863 {
6864 	KPath path(B_PATH_NAME_LENGTH + 1);
6865 	if (path.InitCheck() != B_OK)
6866 		return B_NO_MEMORY;
6867 
6868 	char *buffer = path.LockBuffer();
6869 
6870 	if (!IS_USER_ADDRESS(userPath)
6871 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6872 		return B_BAD_ADDRESS;
6873 
6874 	return dir_open(fd, buffer, false);
6875 }
6876 
6877 
6878 /**	\brief Opens a directory's parent directory and returns the entry name
6879  *		   of the former.
6880  *
6881  *	Aside from that is returns the directory's entry name, this method is
6882  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6883  *	equivalent, if \a userName is \c NULL.
6884  *
6885  *	If a name buffer is supplied and the name does not fit the buffer, the
6886  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6887  *
6888  *	\param fd A FD referring to a directory.
6889  *	\param userName Buffer the directory's entry name shall be written into.
6890  *		   May be \c NULL.
6891  *	\param nameLength Size of the name buffer.
6892  *	\return The file descriptor of the opened parent directory, if everything
6893  *			went fine, an error code otherwise.
6894  */
6895 
6896 int
6897 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6898 {
6899 	bool kernel = false;
6900 
6901 	if (userName && !IS_USER_ADDRESS(userName))
6902 		return B_BAD_ADDRESS;
6903 
6904 	// open the parent dir
6905 	int parentFD = dir_open(fd, "..", kernel);
6906 	if (parentFD < 0)
6907 		return parentFD;
6908 	FDCloser fdCloser(parentFD, kernel);
6909 
6910 	if (userName) {
6911 		// get the vnodes
6912 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6913 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6914 		VNodePutter parentVNodePutter(parentVNode);
6915 		VNodePutter dirVNodePutter(dirVNode);
6916 		if (!parentVNode || !dirVNode)
6917 			return B_FILE_ERROR;
6918 
6919 		// get the vnode name
6920 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
6921 		struct dirent *buffer = (struct dirent*)_buffer;
6922 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
6923 			sizeof(_buffer));
6924 		if (status != B_OK)
6925 			return status;
6926 
6927 		// copy the name to the userland buffer
6928 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
6929 		if (len < 0)
6930 			return len;
6931 		if (len >= (int)nameLength)
6932 			return B_BUFFER_OVERFLOW;
6933 	}
6934 
6935 	return fdCloser.Detach();
6936 }
6937 
6938 
6939 status_t
6940 _user_fcntl(int fd, int op, uint32 argument)
6941 {
6942 	return common_fcntl(fd, op, argument, false);
6943 }
6944 
6945 
6946 status_t
6947 _user_fsync(int fd)
6948 {
6949 	return common_sync(fd, false);
6950 }
6951 
6952 
6953 status_t
6954 _user_lock_node(int fd)
6955 {
6956 	return common_lock_node(fd, false);
6957 }
6958 
6959 
6960 status_t
6961 _user_unlock_node(int fd)
6962 {
6963 	return common_unlock_node(fd, false);
6964 }
6965 
6966 
6967 status_t
6968 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6969 {
6970 	char name[B_FILE_NAME_LENGTH];
6971 	status_t status;
6972 
6973 	if (!IS_USER_ADDRESS(userName))
6974 		return B_BAD_ADDRESS;
6975 
6976 	status = user_strlcpy(name, userName, sizeof(name));
6977 	if (status < 0)
6978 		return status;
6979 
6980 	return dir_create_entry_ref(device, inode, name, perms, false);
6981 }
6982 
6983 
6984 status_t
6985 _user_create_dir(int fd, const char *userPath, int perms)
6986 {
6987 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6988 	if (pathBuffer.InitCheck() != B_OK)
6989 		return B_NO_MEMORY;
6990 
6991 	char *path = pathBuffer.LockBuffer();
6992 
6993 	if (!IS_USER_ADDRESS(userPath)
6994 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6995 		return B_BAD_ADDRESS;
6996 
6997 	return dir_create(fd, path, perms, false);
6998 }
6999 
7000 
7001 status_t
7002 _user_remove_dir(int fd, const char *userPath)
7003 {
7004 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7005 	if (pathBuffer.InitCheck() != B_OK)
7006 		return B_NO_MEMORY;
7007 
7008 	char *path = pathBuffer.LockBuffer();
7009 
7010 	if (userPath != NULL) {
7011 		if (!IS_USER_ADDRESS(userPath)
7012 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7013 			return B_BAD_ADDRESS;
7014 	}
7015 
7016 	return dir_remove(fd, userPath ? path : NULL, false);
7017 }
7018 
7019 
7020 status_t
7021 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
7022 {
7023 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
7024 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
7025 		return B_NO_MEMORY;
7026 
7027 	size_t bufferSize;
7028 
7029 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
7030 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
7031 		return B_BAD_ADDRESS;
7032 
7033 	char *path = pathBuffer.LockBuffer();
7034 	char *buffer = linkBuffer.LockBuffer();
7035 
7036 	if (userPath) {
7037 		if (!IS_USER_ADDRESS(userPath)
7038 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7039 			return B_BAD_ADDRESS;
7040 
7041 		if (bufferSize > B_PATH_NAME_LENGTH)
7042 			bufferSize = B_PATH_NAME_LENGTH;
7043 	}
7044 
7045 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
7046 		&bufferSize, false);
7047 
7048 	// we also update the bufferSize in case of errors
7049 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
7050 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
7051 		return B_BAD_ADDRESS;
7052 
7053 	if (status < B_OK)
7054 		return status;
7055 
7056 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
7057 		return B_BAD_ADDRESS;
7058 
7059 	return B_OK;
7060 }
7061 
7062 
7063 status_t
7064 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7065 	int mode)
7066 {
7067 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7068 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7069 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7070 		return B_NO_MEMORY;
7071 
7072 	char *path = pathBuffer.LockBuffer();
7073 	char *toPath = toPathBuffer.LockBuffer();
7074 
7075 	if (!IS_USER_ADDRESS(userPath)
7076 		|| !IS_USER_ADDRESS(userToPath)
7077 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7078 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7079 		return B_BAD_ADDRESS;
7080 
7081 	status_t status = check_path(toPath);
7082 	if (status < B_OK)
7083 		return status;
7084 
7085 	return common_create_symlink(fd, path, toPath, mode, false);
7086 }
7087 
7088 
7089 status_t
7090 _user_create_link(const char *userPath, const char *userToPath)
7091 {
7092 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7093 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7094 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7095 		return B_NO_MEMORY;
7096 
7097 	char *path = pathBuffer.LockBuffer();
7098 	char *toPath = toPathBuffer.LockBuffer();
7099 
7100 	if (!IS_USER_ADDRESS(userPath)
7101 		|| !IS_USER_ADDRESS(userToPath)
7102 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7103 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7104 		return B_BAD_ADDRESS;
7105 
7106 	status_t status = check_path(toPath);
7107 	if (status < B_OK)
7108 		return status;
7109 
7110 	return common_create_link(path, toPath, false);
7111 }
7112 
7113 
7114 status_t
7115 _user_unlink(int fd, const char *userPath)
7116 {
7117 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7118 	if (pathBuffer.InitCheck() != B_OK)
7119 		return B_NO_MEMORY;
7120 
7121 	char *path = pathBuffer.LockBuffer();
7122 
7123 	if (!IS_USER_ADDRESS(userPath)
7124 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7125 		return B_BAD_ADDRESS;
7126 
7127 	return common_unlink(fd, path, false);
7128 }
7129 
7130 
7131 status_t
7132 _user_rename(int oldFD, const char *userOldPath, int newFD,
7133 	const char *userNewPath)
7134 {
7135 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7136 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7137 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7138 		return B_NO_MEMORY;
7139 
7140 	char *oldPath = oldPathBuffer.LockBuffer();
7141 	char *newPath = newPathBuffer.LockBuffer();
7142 
7143 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7144 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7145 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7146 		return B_BAD_ADDRESS;
7147 
7148 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7149 }
7150 
7151 
7152 status_t
7153 _user_access(const char *userPath, int mode)
7154 {
7155 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7156 	if (pathBuffer.InitCheck() != B_OK)
7157 		return B_NO_MEMORY;
7158 
7159 	char *path = pathBuffer.LockBuffer();
7160 
7161 	if (!IS_USER_ADDRESS(userPath)
7162 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7163 		return B_BAD_ADDRESS;
7164 
7165 	return common_access(path, mode, false);
7166 }
7167 
7168 
7169 status_t
7170 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7171 	struct stat *userStat, size_t statSize)
7172 {
7173 	struct stat stat;
7174 	status_t status;
7175 
7176 	if (statSize > sizeof(struct stat))
7177 		return B_BAD_VALUE;
7178 
7179 	if (!IS_USER_ADDRESS(userStat))
7180 		return B_BAD_ADDRESS;
7181 
7182 	if (userPath) {
7183 		// path given: get the stat of the node referred to by (fd, path)
7184 		if (!IS_USER_ADDRESS(userPath))
7185 			return B_BAD_ADDRESS;
7186 
7187 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7188 		if (pathBuffer.InitCheck() != B_OK)
7189 			return B_NO_MEMORY;
7190 
7191 		char *path = pathBuffer.LockBuffer();
7192 
7193 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7194 		if (length < B_OK)
7195 			return length;
7196 		if (length >= B_PATH_NAME_LENGTH)
7197 			return B_NAME_TOO_LONG;
7198 
7199 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7200 	} else {
7201 		// no path given: get the FD and use the FD operation
7202 		struct file_descriptor *descriptor
7203 			= get_fd(get_current_io_context(false), fd);
7204 		if (descriptor == NULL)
7205 			return B_FILE_ERROR;
7206 
7207 		if (descriptor->ops->fd_read_stat)
7208 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7209 		else
7210 			status = EOPNOTSUPP;
7211 
7212 		put_fd(descriptor);
7213 	}
7214 
7215 	if (status < B_OK)
7216 		return status;
7217 
7218 	return user_memcpy(userStat, &stat, statSize);
7219 }
7220 
7221 
7222 status_t
7223 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7224 	const struct stat *userStat, size_t statSize, int statMask)
7225 {
7226 	if (statSize > sizeof(struct stat))
7227 		return B_BAD_VALUE;
7228 
7229 	struct stat stat;
7230 
7231 	if (!IS_USER_ADDRESS(userStat)
7232 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7233 		return B_BAD_ADDRESS;
7234 
7235 	// clear additional stat fields
7236 	if (statSize < sizeof(struct stat))
7237 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7238 
7239 	status_t status;
7240 
7241 	if (userPath) {
7242 		// path given: write the stat of the node referred to by (fd, path)
7243 		if (!IS_USER_ADDRESS(userPath))
7244 			return B_BAD_ADDRESS;
7245 
7246 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7247 		if (pathBuffer.InitCheck() != B_OK)
7248 			return B_NO_MEMORY;
7249 
7250 		char *path = pathBuffer.LockBuffer();
7251 
7252 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7253 		if (length < B_OK)
7254 			return length;
7255 		if (length >= B_PATH_NAME_LENGTH)
7256 			return B_NAME_TOO_LONG;
7257 
7258 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7259 			statMask, false);
7260 	} else {
7261 		// no path given: get the FD and use the FD operation
7262 		struct file_descriptor *descriptor
7263 			= get_fd(get_current_io_context(false), fd);
7264 		if (descriptor == NULL)
7265 			return B_FILE_ERROR;
7266 
7267 		if (descriptor->ops->fd_write_stat)
7268 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7269 		else
7270 			status = EOPNOTSUPP;
7271 
7272 		put_fd(descriptor);
7273 	}
7274 
7275 	return status;
7276 }
7277 
7278 
7279 int
7280 _user_open_attr_dir(int fd, const char *userPath)
7281 {
7282 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7283 	if (pathBuffer.InitCheck() != B_OK)
7284 		return B_NO_MEMORY;
7285 
7286 	char *path = pathBuffer.LockBuffer();
7287 
7288 	if (userPath != NULL) {
7289 		if (!IS_USER_ADDRESS(userPath)
7290 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7291 			return B_BAD_ADDRESS;
7292 	}
7293 
7294 	return attr_dir_open(fd, userPath ? path : NULL, false);
7295 }
7296 
7297 
7298 int
7299 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7300 {
7301 	char name[B_FILE_NAME_LENGTH];
7302 
7303 	if (!IS_USER_ADDRESS(userName)
7304 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7305 		return B_BAD_ADDRESS;
7306 
7307 	return attr_create(fd, name, type, openMode, false);
7308 }
7309 
7310 
7311 int
7312 _user_open_attr(int fd, const char *userName, int openMode)
7313 {
7314 	char name[B_FILE_NAME_LENGTH];
7315 
7316 	if (!IS_USER_ADDRESS(userName)
7317 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7318 		return B_BAD_ADDRESS;
7319 
7320 	return attr_open(fd, name, openMode, false);
7321 }
7322 
7323 
7324 status_t
7325 _user_remove_attr(int fd, const char *userName)
7326 {
7327 	char name[B_FILE_NAME_LENGTH];
7328 
7329 	if (!IS_USER_ADDRESS(userName)
7330 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7331 		return B_BAD_ADDRESS;
7332 
7333 	return attr_remove(fd, name, false);
7334 }
7335 
7336 
7337 status_t
7338 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7339 {
7340 	if (!IS_USER_ADDRESS(userFromName)
7341 		|| !IS_USER_ADDRESS(userToName))
7342 		return B_BAD_ADDRESS;
7343 
7344 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7345 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7346 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7347 		return B_NO_MEMORY;
7348 
7349 	char *fromName = fromNameBuffer.LockBuffer();
7350 	char *toName = toNameBuffer.LockBuffer();
7351 
7352 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7353 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7354 		return B_BAD_ADDRESS;
7355 
7356 	return attr_rename(fromFile, fromName, toFile, toName, false);
7357 }
7358 
7359 
7360 int
7361 _user_open_index_dir(dev_t device)
7362 {
7363 	return index_dir_open(device, false);
7364 }
7365 
7366 
7367 status_t
7368 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7369 {
7370 	char name[B_FILE_NAME_LENGTH];
7371 
7372 	if (!IS_USER_ADDRESS(userName)
7373 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7374 		return B_BAD_ADDRESS;
7375 
7376 	return index_create(device, name, type, flags, false);
7377 }
7378 
7379 
7380 status_t
7381 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7382 {
7383 	char name[B_FILE_NAME_LENGTH];
7384 	struct stat stat;
7385 	status_t status;
7386 
7387 	if (!IS_USER_ADDRESS(userName)
7388 		|| !IS_USER_ADDRESS(userStat)
7389 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7390 		return B_BAD_ADDRESS;
7391 
7392 	status = index_name_read_stat(device, name, &stat, false);
7393 	if (status == B_OK) {
7394 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7395 			return B_BAD_ADDRESS;
7396 	}
7397 
7398 	return status;
7399 }
7400 
7401 
7402 status_t
7403 _user_remove_index(dev_t device, const char *userName)
7404 {
7405 	char name[B_FILE_NAME_LENGTH];
7406 
7407 	if (!IS_USER_ADDRESS(userName)
7408 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7409 		return B_BAD_ADDRESS;
7410 
7411 	return index_remove(device, name, false);
7412 }
7413 
7414 
7415 status_t
7416 _user_getcwd(char *userBuffer, size_t size)
7417 {
7418 	if (!IS_USER_ADDRESS(userBuffer))
7419 		return B_BAD_ADDRESS;
7420 
7421 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7422 	if (pathBuffer.InitCheck() != B_OK)
7423 		return B_NO_MEMORY;
7424 
7425 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7426 
7427 	if (size > B_PATH_NAME_LENGTH)
7428 		size = B_PATH_NAME_LENGTH;
7429 
7430 	char *path = pathBuffer.LockBuffer();
7431 
7432 	status_t status = get_cwd(path, size, false);
7433 	if (status < B_OK)
7434 		return status;
7435 
7436 	// Copy back the result
7437 	if (user_strlcpy(userBuffer, path, size) < B_OK)
7438 		return B_BAD_ADDRESS;
7439 
7440 	return status;
7441 }
7442 
7443 
7444 status_t
7445 _user_setcwd(int fd, const char *userPath)
7446 {
7447 	TRACE(("user_setcwd: path = %p\n", userPath));
7448 
7449 	KPath pathBuffer(B_PATH_NAME_LENGTH);
7450 	if (pathBuffer.InitCheck() != B_OK)
7451 		return B_NO_MEMORY;
7452 
7453 	char *path = pathBuffer.LockBuffer();
7454 
7455 	if (userPath != NULL) {
7456 		if (!IS_USER_ADDRESS(userPath)
7457 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7458 			return B_BAD_ADDRESS;
7459 	}
7460 
7461 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7462 }
7463 
7464 
7465 int
7466 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7467 	uint32 flags, port_id port, int32 token)
7468 {
7469 	char *query;
7470 
7471 	if (device < 0 || userQuery == NULL || queryLength == 0)
7472 		return B_BAD_VALUE;
7473 
7474 	// this is a safety restriction
7475 	if (queryLength >= 65536)
7476 		return B_NAME_TOO_LONG;
7477 
7478 	query = (char *)malloc(queryLength + 1);
7479 	if (query == NULL)
7480 		return B_NO_MEMORY;
7481 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7482 		free(query);
7483 		return B_BAD_ADDRESS;
7484 	}
7485 
7486 	int fd = query_open(device, query, flags, port, token, false);
7487 
7488 	free(query);
7489 	return fd;
7490 }
7491