xref: /haiku/src/system/kernel/fs/vfs.cpp (revision b9a5b9a6ee494261f2882bfc0ee9fde92282bef6)
1 /*
2  * Copyright 2002-2007, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <block_cache.h>
18 #include <fd.h>
19 #include <file_cache.h>
20 #include <khash.h>
21 #include <KPath.h>
22 #include <lock.h>
23 #include <syscalls.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <vm_low_memory.h>
28 
29 #include <boot/kernel_args.h>
30 #include <disk_device_manager/KDiskDevice.h>
31 #include <disk_device_manager/KDiskDeviceManager.h>
32 #include <disk_device_manager/KDiskDeviceUtils.h>
33 #include <disk_device_manager/KDiskSystem.h>
34 #include <fs/node_monitor.h>
35 #include <util/kernel_cpp.h>
36 
37 #include <string.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/resource.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <stddef.h>
46 
47 //#define TRACE_VFS
48 #ifdef TRACE_VFS
49 #	define TRACE(x) dprintf x
50 #	define FUNCTION(x) dprintf x
51 #else
52 #	define TRACE(x) ;
53 #	define FUNCTION(x) ;
54 #endif
55 
56 #define ADD_DEBUGGER_COMMANDS
57 
58 const static uint32 kMaxUnusedVnodes = 8192;
59 	// This is the maximum number of unused vnodes that the system
60 	// will keep around (weak limit, if there is enough memory left,
61 	// they won't get flushed even when hitting that limit).
62 	// It may be chosen with respect to the available memory or enhanced
63 	// by some timestamp/frequency heurism.
64 
65 struct vnode {
66 	struct vnode	*next;
67 	vm_cache_ref	*cache;
68 	mount_id		device;
69 	list_link		mount_link;
70 	list_link		unused_link;
71 	vnode_id		id;
72 	fs_vnode		private_node;
73 	struct fs_mount	*mount;
74 	struct vnode	*covered_by;
75 	int32			ref_count;
76 	uint8			remove : 1;
77 	uint8			busy : 1;
78 	uint8			unpublished : 1;
79 	struct advisory_locking	*advisory_locking;
80 	struct file_descriptor *mandatory_locked_by;
81 };
82 
83 struct vnode_hash_key {
84 	mount_id	device;
85 	vnode_id	vnode;
86 };
87 
88 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
89 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
90 
91 /**	\brief Structure to manage a mounted file system
92 
93 	Note: The root_vnode and covers_vnode fields (what others?) are
94 	initialized in fs_mount() and not changed afterwards. That is as soon
95 	as the mount is mounted and it is made sure it won't be unmounted
96 	(e.g. by holding a reference to a vnode of that mount) (read) access
97 	to those fields is always safe, even without additional locking. Morever
98 	while mounted the mount holds a reference to the covers_vnode, and thus
99 	making the access path vnode->mount->covers_vnode->mount->... safe if a
100 	reference to vnode is held (note that for the root mount covers_vnode
101 	is NULL, though).
102  */
103 struct fs_mount {
104 	struct fs_mount	*next;
105 	file_system_module_info *fs;
106 	mount_id		id;
107 	void			*cookie;
108 	char			*device_name;
109 	char			*fs_name;
110 	recursive_lock	rlock;	// guards the vnodes list
111 	struct vnode	*root_vnode;
112 	struct vnode	*covers_vnode;
113 	KPartition		*partition;
114 	struct list		vnodes;
115 	bool			unmounting;
116 	bool			owns_file_device;
117 };
118 
119 struct advisory_locking {
120 	sem_id			lock;
121 	sem_id			wait_sem;
122 	struct list		locks;
123 };
124 
125 struct advisory_lock {
126 	list_link		link;
127 	team_id			team;
128 	off_t			offset;
129 	off_t			length;
130 	bool			shared;
131 };
132 
133 static mutex sFileSystemsMutex;
134 
135 /**	\brief Guards sMountsTable.
136  *
137  *	The holder is allowed to read/write access the sMountsTable.
138  *	Manipulation of the fs_mount structures themselves
139  *	(and their destruction) requires different locks though.
140  */
141 static mutex sMountMutex;
142 
143 /**	\brief Guards mount/unmount operations.
144  *
145  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
146  *	That is locking the lock ensures that no FS is mounted/unmounted. In
147  *	particular this means that
148  *	- sMountsTable will not be modified,
149  *	- the fields immutable after initialization of the fs_mount structures in
150  *	  sMountsTable will not be modified,
151  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
152  *
153  *	The thread trying to lock the lock must not hold sVnodeMutex or
154  *	sMountMutex.
155  */
156 static recursive_lock sMountOpLock;
157 
158 /**	\brief Guards the vnode::covered_by field of any vnode
159  *
160  *	The holder is allowed to read access the vnode::covered_by field of any
161  *	vnode. Additionally holding sMountOpLock allows for write access.
162  *
163  *	The thread trying to lock the must not hold sVnodeMutex.
164  */
165 static mutex sVnodeCoveredByMutex;
166 
167 /**	\brief Guards sVnodeTable.
168  *
169  *	The holder is allowed to read/write access sVnodeTable and to
170  *	to any unbusy vnode in that table, save
171  *	to the immutable fields (device, id, private_node, mount) to which
172  *	only read-only access is allowed, and to the field covered_by, which is
173  *	guarded by sMountOpLock and sVnodeCoveredByMutex.
174  *
175  *	The thread trying to lock the mutex must not hold sMountMutex.
176  *	You must not have this mutex held when calling create_sem(), as this
177  *	might call vfs_free_unused_vnodes().
178  */
179 static mutex sVnodeMutex;
180 
181 #define VNODE_HASH_TABLE_SIZE 1024
182 static hash_table *sVnodeTable;
183 static list sUnusedVnodeList;
184 static uint32 sUnusedVnodes = 0;
185 static struct vnode *sRoot;
186 
187 #define MOUNTS_HASH_TABLE_SIZE 16
188 static hash_table *sMountsTable;
189 static mount_id sNextMountID = 1;
190 
191 mode_t __gUmask = 022;
192 
193 /* function declarations */
194 
195 // file descriptor operation prototypes
196 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
197 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
198 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
199 static void file_free_fd(struct file_descriptor *);
200 static status_t file_close(struct file_descriptor *);
201 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
202 	struct select_sync *sync);
203 static status_t file_deselect(struct file_descriptor *, uint8 event,
204 	struct select_sync *sync);
205 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
206 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
207 static status_t dir_rewind(struct file_descriptor *);
208 static void dir_free_fd(struct file_descriptor *);
209 static status_t dir_close(struct file_descriptor *);
210 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
211 static status_t attr_dir_rewind(struct file_descriptor *);
212 static void attr_dir_free_fd(struct file_descriptor *);
213 static status_t attr_dir_close(struct file_descriptor *);
214 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
215 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
216 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
217 static void attr_free_fd(struct file_descriptor *);
218 static status_t attr_close(struct file_descriptor *);
219 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
220 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
221 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
222 static status_t index_dir_rewind(struct file_descriptor *);
223 static void index_dir_free_fd(struct file_descriptor *);
224 static status_t index_dir_close(struct file_descriptor *);
225 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
226 static status_t query_rewind(struct file_descriptor *);
227 static void query_free_fd(struct file_descriptor *);
228 static status_t query_close(struct file_descriptor *);
229 
230 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
231 static status_t common_read_stat(struct file_descriptor *, struct stat *);
232 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
233 
234 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
235 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
236 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
237 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
238 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
239 static void inc_vnode_ref_count(struct vnode *vnode);
240 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
241 static inline void put_vnode(struct vnode *vnode);
242 
243 static struct fd_ops sFileOps = {
244 	file_read,
245 	file_write,
246 	file_seek,
247 	common_ioctl,
248 	file_select,
249 	file_deselect,
250 	NULL,		// read_dir()
251 	NULL,		// rewind_dir()
252 	common_read_stat,
253 	common_write_stat,
254 	file_close,
255 	file_free_fd
256 };
257 
258 static struct fd_ops sDirectoryOps = {
259 	NULL,		// read()
260 	NULL,		// write()
261 	NULL,		// seek()
262 	common_ioctl,
263 	NULL,		// select()
264 	NULL,		// deselect()
265 	dir_read,
266 	dir_rewind,
267 	common_read_stat,
268 	common_write_stat,
269 	dir_close,
270 	dir_free_fd
271 };
272 
273 static struct fd_ops sAttributeDirectoryOps = {
274 	NULL,		// read()
275 	NULL,		// write()
276 	NULL,		// seek()
277 	common_ioctl,
278 	NULL,		// select()
279 	NULL,		// deselect()
280 	attr_dir_read,
281 	attr_dir_rewind,
282 	common_read_stat,
283 	common_write_stat,
284 	attr_dir_close,
285 	attr_dir_free_fd
286 };
287 
288 static struct fd_ops sAttributeOps = {
289 	attr_read,
290 	attr_write,
291 	attr_seek,
292 	common_ioctl,
293 	NULL,		// select()
294 	NULL,		// deselect()
295 	NULL,		// read_dir()
296 	NULL,		// rewind_dir()
297 	attr_read_stat,
298 	attr_write_stat,
299 	attr_close,
300 	attr_free_fd
301 };
302 
303 static struct fd_ops sIndexDirectoryOps = {
304 	NULL,		// read()
305 	NULL,		// write()
306 	NULL,		// seek()
307 	NULL,		// ioctl()
308 	NULL,		// select()
309 	NULL,		// deselect()
310 	index_dir_read,
311 	index_dir_rewind,
312 	NULL,		// read_stat()
313 	NULL,		// write_stat()
314 	index_dir_close,
315 	index_dir_free_fd
316 };
317 
318 #if 0
319 static struct fd_ops sIndexOps = {
320 	NULL,		// read()
321 	NULL,		// write()
322 	NULL,		// seek()
323 	NULL,		// ioctl()
324 	NULL,		// select()
325 	NULL,		// deselect()
326 	NULL,		// dir_read()
327 	NULL,		// dir_rewind()
328 	index_read_stat,	// read_stat()
329 	NULL,		// write_stat()
330 	NULL,		// dir_close()
331 	NULL		// free_fd()
332 };
333 #endif
334 
335 static struct fd_ops sQueryOps = {
336 	NULL,		// read()
337 	NULL,		// write()
338 	NULL,		// seek()
339 	NULL,		// ioctl()
340 	NULL,		// select()
341 	NULL,		// deselect()
342 	query_read,
343 	query_rewind,
344 	NULL,		// read_stat()
345 	NULL,		// write_stat()
346 	query_close,
347 	query_free_fd
348 };
349 
350 
351 // VNodePutter
352 class VNodePutter {
353 public:
354 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
355 
356 	~VNodePutter()
357 	{
358 		Put();
359 	}
360 
361 	void SetTo(struct vnode *vnode)
362 	{
363 		Put();
364 		fVNode = vnode;
365 	}
366 
367 	void Put()
368 	{
369 		if (fVNode) {
370 			put_vnode(fVNode);
371 			fVNode = NULL;
372 		}
373 	}
374 
375 	struct vnode *Detach()
376 	{
377 		struct vnode *vnode = fVNode;
378 		fVNode = NULL;
379 		return vnode;
380 	}
381 
382 private:
383 	struct vnode *fVNode;
384 };
385 
386 
387 class FDCloser {
388 public:
389 	FDCloser() : fFD(-1), fKernel(true) {}
390 
391 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
392 
393 	~FDCloser()
394 	{
395 		Close();
396 	}
397 
398 	void SetTo(int fd, bool kernel)
399 	{
400 		Close();
401 		fFD = fd;
402 		fKernel = kernel;
403 	}
404 
405 	void Close()
406 	{
407 		if (fFD >= 0) {
408 			if (fKernel)
409 				_kern_close(fFD);
410 			else
411 				_user_close(fFD);
412 			fFD = -1;
413 		}
414 	}
415 
416 	int Detach()
417 	{
418 		int fd = fFD;
419 		fFD = -1;
420 		return fd;
421 	}
422 
423 private:
424 	int		fFD;
425 	bool	fKernel;
426 };
427 
428 
429 static int
430 mount_compare(void *_m, const void *_key)
431 {
432 	struct fs_mount *mount = (fs_mount *)_m;
433 	const mount_id *id = (mount_id *)_key;
434 
435 	if (mount->id == *id)
436 		return 0;
437 
438 	return -1;
439 }
440 
441 
442 static uint32
443 mount_hash(void *_m, const void *_key, uint32 range)
444 {
445 	struct fs_mount *mount = (fs_mount *)_m;
446 	const mount_id *id = (mount_id *)_key;
447 
448 	if (mount)
449 		return mount->id % range;
450 
451 	return (uint32)*id % range;
452 }
453 
454 
455 /** Finds the mounted device (the fs_mount structure) with the given ID.
456  *	Note, you must hold the gMountMutex lock when you call this function.
457  */
458 
459 static struct fs_mount *
460 find_mount(mount_id id)
461 {
462 	ASSERT_LOCKED_MUTEX(&sMountMutex);
463 
464 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
465 }
466 
467 
468 static status_t
469 get_mount(mount_id id, struct fs_mount **_mount)
470 {
471 	struct fs_mount *mount;
472 	status_t status;
473 
474 	mutex_lock(&sMountMutex);
475 
476 	mount = find_mount(id);
477 	if (mount) {
478 		// ToDo: the volume is locked (against removal) by locking
479 		//	its root node - investigate if that's a good idea
480 		if (mount->root_vnode)
481 			inc_vnode_ref_count(mount->root_vnode);
482 		else {
483 			// might have been called during a mount operation in which
484 			// case the root node may still be NULL
485 			mount = NULL;
486 		}
487 	} else
488 		status = B_BAD_VALUE;
489 
490 	mutex_unlock(&sMountMutex);
491 
492 	if (mount == NULL)
493 		return B_BUSY;
494 
495 	*_mount = mount;
496 	return B_OK;
497 }
498 
499 
500 static void
501 put_mount(struct fs_mount *mount)
502 {
503 	if (mount)
504 		put_vnode(mount->root_vnode);
505 }
506 
507 
508 static status_t
509 put_file_system(file_system_module_info *fs)
510 {
511 	return put_module(fs->info.name);
512 }
513 
514 
515 /**	Tries to open the specified file system module.
516  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
517  *	Returns a pointer to file system module interface, or NULL if it
518  *	could not open the module.
519  */
520 
521 static file_system_module_info *
522 get_file_system(const char *fsName)
523 {
524 	char name[B_FILE_NAME_LENGTH];
525 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
526 		// construct module name if we didn't get one
527 		// (we currently support only one API)
528 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
529 		fsName = NULL;
530 	}
531 
532 	file_system_module_info *info;
533 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
534 		return NULL;
535 
536 	return info;
537 }
538 
539 
540 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
541  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
542  *	The name is allocated for you, and you have to free() it when you're
543  *	done with it.
544  *	Returns NULL if the required memory is no available.
545  */
546 
547 static char *
548 get_file_system_name(const char *fsName)
549 {
550 	const size_t length = strlen("file_systems/");
551 
552 	if (strncmp(fsName, "file_systems/", length)) {
553 		// the name already seems to be the module's file name
554 		return strdup(fsName);
555 	}
556 
557 	fsName += length;
558 	const char *end = strchr(fsName, '/');
559 	if (end == NULL) {
560 		// this doesn't seem to be a valid name, but well...
561 		return strdup(fsName);
562 	}
563 
564 	// cut off the trailing /v1
565 
566 	char *name = (char *)malloc(end + 1 - fsName);
567 	if (name == NULL)
568 		return NULL;
569 
570 	strlcpy(name, fsName, end + 1 - fsName);
571 	return name;
572 }
573 
574 
575 static int
576 vnode_compare(void *_vnode, const void *_key)
577 {
578 	struct vnode *vnode = (struct vnode *)_vnode;
579 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
580 
581 	if (vnode->device == key->device && vnode->id == key->vnode)
582 		return 0;
583 
584 	return -1;
585 }
586 
587 
588 static uint32
589 vnode_hash(void *_vnode, const void *_key, uint32 range)
590 {
591 	struct vnode *vnode = (struct vnode *)_vnode;
592 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
593 
594 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
595 
596 	if (vnode != NULL)
597 		return VHASH(vnode->device, vnode->id) % range;
598 
599 	return VHASH(key->device, key->vnode) % range;
600 
601 #undef VHASH
602 }
603 
604 
605 static void
606 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
607 {
608 	recursive_lock_lock(&mount->rlock);
609 
610 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
611 
612 	recursive_lock_unlock(&mount->rlock);
613 }
614 
615 
616 static void
617 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
618 {
619 	recursive_lock_lock(&mount->rlock);
620 
621 	list_remove_link(&vnode->mount_link);
622 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
623 
624 	recursive_lock_unlock(&mount->rlock);
625 }
626 
627 
628 static status_t
629 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
630 {
631 	FUNCTION(("create_new_vnode()\n"));
632 
633 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
634 	if (vnode == NULL)
635 		return B_NO_MEMORY;
636 
637 	// initialize basic values
638 	memset(vnode, 0, sizeof(struct vnode));
639 	vnode->device = mountID;
640 	vnode->id = vnodeID;
641 
642 	// add the vnode to the mount structure
643 	mutex_lock(&sMountMutex);
644 	vnode->mount = find_mount(mountID);
645 	if (!vnode->mount || vnode->mount->unmounting) {
646 		mutex_unlock(&sMountMutex);
647 		free(vnode);
648 		return B_ENTRY_NOT_FOUND;
649 	}
650 
651 	hash_insert(sVnodeTable, vnode);
652 	add_vnode_to_mount_list(vnode, vnode->mount);
653 
654 	mutex_unlock(&sMountMutex);
655 
656 	vnode->ref_count = 1;
657 	*_vnode = vnode;
658 
659 	return B_OK;
660 }
661 
662 
663 /**	Frees the vnode and all resources it has acquired, and removes
664  *	it from the vnode hash as well as from its mount structure.
665  *	Will also make sure that any cache modifications are written back.
666  */
667 
668 static void
669 free_vnode(struct vnode *vnode, bool reenter)
670 {
671 	ASSERT(vnode->ref_count == 0 && vnode->busy);
672 
673 	// write back any changes in this vnode's cache -- but only
674 	// if the vnode won't be deleted, in which case the changes
675 	// will be discarded
676 
677 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
678 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
679 
680 	if (!vnode->unpublished) {
681 		if (vnode->remove)
682 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
683 		else
684 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
685 	}
686 
687 	// The file system has removed the resources of the vnode now, so we can
688 	// make it available again (and remove the busy vnode from the hash)
689 	mutex_lock(&sVnodeMutex);
690 	hash_remove(sVnodeTable, vnode);
691 	mutex_unlock(&sVnodeMutex);
692 
693 	// if we have a vm_cache attached, remove it
694 	if (vnode->cache)
695 		vm_cache_release_ref(vnode->cache);
696 
697 	vnode->cache = NULL;
698 
699 	remove_vnode_from_mount_list(vnode, vnode->mount);
700 
701 	free(vnode);
702 }
703 
704 
705 /**	\brief Decrements the reference counter of the given vnode and deletes it,
706  *	if the counter dropped to 0.
707  *
708  *	The caller must, of course, own a reference to the vnode to call this
709  *	function.
710  *	The caller must not hold the sVnodeMutex or the sMountMutex.
711  *
712  *	\param vnode the vnode.
713  *	\param reenter \c true, if this function is called (indirectly) from within
714  *		   a file system.
715  *	\return \c B_OK, if everything went fine, an error code otherwise.
716  */
717 
718 static status_t
719 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
720 {
721 	mutex_lock(&sVnodeMutex);
722 
723 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
724 
725 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
726 
727 	if (oldRefCount == 1) {
728 		if (vnode->busy)
729 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
730 
731 		bool freeNode = false;
732 
733 		// Just insert the vnode into an unused list if we don't need
734 		// to delete it
735 		if (vnode->remove) {
736 			vnode->busy = true;
737 			freeNode = true;
738 		} else {
739 			list_add_item(&sUnusedVnodeList, vnode);
740 			if (++sUnusedVnodes > kMaxUnusedVnodes
741 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
742 				// there are too many unused vnodes so we free the oldest one
743 				// ToDo: evaluate this mechanism
744 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
745 				vnode->busy = true;
746 				freeNode = true;
747 				sUnusedVnodes--;
748 			}
749 		}
750 
751 		mutex_unlock(&sVnodeMutex);
752 
753 		if (freeNode)
754 			free_vnode(vnode, reenter);
755 	} else
756 		mutex_unlock(&sVnodeMutex);
757 
758 	return B_OK;
759 }
760 
761 
762 /**	\brief Increments the reference counter of the given vnode.
763  *
764  *	The caller must either already have a reference to the vnode or hold
765  *	the sVnodeMutex.
766  *
767  *	\param vnode the vnode.
768  */
769 
770 static void
771 inc_vnode_ref_count(struct vnode *vnode)
772 {
773 	atomic_add(&vnode->ref_count, 1);
774 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
775 }
776 
777 
778 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
779  *
780  *	The caller must hold the sVnodeMutex.
781  *
782  *	\param mountID the mount ID.
783  *	\param vnodeID the node ID.
784  *
785  *	\return The vnode structure, if it was found in the hash table, \c NULL
786  *			otherwise.
787  */
788 
789 static struct vnode *
790 lookup_vnode(mount_id mountID, vnode_id vnodeID)
791 {
792 	struct vnode_hash_key key;
793 
794 	key.device = mountID;
795 	key.vnode = vnodeID;
796 
797 	return (vnode *)hash_lookup(sVnodeTable, &key);
798 }
799 
800 
801 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
802  *
803  *	If the node is not yet in memory, it will be loaded.
804  *
805  *	The caller must not hold the sVnodeMutex or the sMountMutex.
806  *
807  *	\param mountID the mount ID.
808  *	\param vnodeID the node ID.
809  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
810  *		   retrieved vnode structure shall be written.
811  *	\param reenter \c true, if this function is called (indirectly) from within
812  *		   a file system.
813  *	\return \c B_OK, if everything when fine, an error code otherwise.
814  */
815 
816 static status_t
817 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
818 {
819 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
820 
821 	mutex_lock(&sVnodeMutex);
822 
823 	int32 tries = 300;
824 		// try for 3 secs
825 restart:
826 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
827 	if (vnode && vnode->busy) {
828 		mutex_unlock(&sVnodeMutex);
829 		if (--tries < 0) {
830 			// vnode doesn't seem to become unbusy
831 			panic("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
832 			return B_BUSY;
833 		}
834 		snooze(10000); // 10 ms
835 		mutex_lock(&sVnodeMutex);
836 		goto restart;
837 	}
838 
839 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
840 
841 	status_t status;
842 
843 	if (vnode) {
844 		if (vnode->ref_count == 0) {
845 			// this vnode has been unused before
846 			list_remove_item(&sUnusedVnodeList, vnode);
847 			sUnusedVnodes--;
848 		}
849 		inc_vnode_ref_count(vnode);
850 	} else {
851 		// we need to create a new vnode and read it in
852 		status = create_new_vnode(&vnode, mountID, vnodeID);
853 		if (status < B_OK)
854 			goto err;
855 
856 		vnode->busy = true;
857 		mutex_unlock(&sVnodeMutex);
858 
859 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
860 		if (status == B_OK && vnode->private_node == NULL)
861 			status = B_BAD_VALUE;
862 
863 		mutex_lock(&sVnodeMutex);
864 
865 		if (status < B_OK)
866 			goto err1;
867 
868 		vnode->busy = false;
869 	}
870 
871 	mutex_unlock(&sVnodeMutex);
872 
873 	TRACE(("get_vnode: returning %p\n", vnode));
874 
875 	*_vnode = vnode;
876 	return B_OK;
877 
878 err1:
879 	hash_remove(sVnodeTable, vnode);
880 	remove_vnode_from_mount_list(vnode, vnode->mount);
881 err:
882 	mutex_unlock(&sVnodeMutex);
883 	if (vnode)
884 		free(vnode);
885 
886 	return status;
887 }
888 
889 
890 /**	\brief Decrements the reference counter of the given vnode and deletes it,
891  *	if the counter dropped to 0.
892  *
893  *	The caller must, of course, own a reference to the vnode to call this
894  *	function.
895  *	The caller must not hold the sVnodeMutex or the sMountMutex.
896  *
897  *	\param vnode the vnode.
898  */
899 
900 static inline void
901 put_vnode(struct vnode *vnode)
902 {
903 	dec_vnode_ref_count(vnode, false);
904 }
905 
906 
907 static void
908 vnode_low_memory_handler(void */*data*/, int32 level)
909 {
910 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
911 
912 	int32 count = 1;
913 	switch (level) {
914 		case B_NO_LOW_MEMORY:
915 			return;
916 		case B_LOW_MEMORY_NOTE:
917 			count = sUnusedVnodes / 100;
918 			break;
919 		case B_LOW_MEMORY_WARNING:
920 			count = sUnusedVnodes / 10;
921 			break;
922 		case B_LOW_MEMORY_CRITICAL:
923 			count = sUnusedVnodes;
924 			break;
925 	}
926 
927 	for (int32 i = 0; i < count; i++) {
928 		mutex_lock(&sVnodeMutex);
929 
930 		struct vnode *vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
931 		if (vnode == NULL) {
932 			mutex_unlock(&sVnodeMutex);
933 			break;
934 		}
935 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
936 
937 		vnode->busy = true;
938 		sUnusedVnodes--;
939 
940 		mutex_unlock(&sVnodeMutex);
941 
942 		free_vnode(vnode, false);
943 	}
944 }
945 
946 
947 static inline void
948 put_advisory_locking(struct advisory_locking *locking)
949 {
950 	release_sem(locking->lock);
951 }
952 
953 
954 /**	Returns the advisory_locking object of the \a vnode in case it
955  *	has one, and locks it.
956  *	You have to call put_advisory_locking() when you're done with
957  *	it.
958  *	Note, you must not have the vnode mutex locked when calling
959  *	this function.
960  */
961 
962 static struct advisory_locking *
963 get_advisory_locking(struct vnode *vnode)
964 {
965 	mutex_lock(&sVnodeMutex);
966 
967 	struct advisory_locking *locking = vnode->advisory_locking;
968 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
969 
970 	mutex_unlock(&sVnodeMutex);
971 
972 	if (lock >= B_OK)
973 		lock = acquire_sem(lock);
974 	if (lock < B_OK) {
975 		// This means the locking has been deleted in the mean time
976 		// or had never existed in the first place - otherwise, we
977 		// would get the lock at some point.
978 		return NULL;
979 	}
980 
981 	return locking;
982 }
983 
984 
985 /**	Creates a locked advisory_locking object, and attaches it to the
986  *	given \a vnode.
987  *	Returns B_OK in case of success - also if the vnode got such an
988  *	object from someone else in the mean time, you'll still get this
989  *	one locked then.
990  */
991 
992 static status_t
993 create_advisory_locking(struct vnode *vnode)
994 {
995 	if (vnode == NULL)
996 		return B_FILE_ERROR;
997 
998 	struct advisory_locking *locking = (struct advisory_locking *)malloc(
999 		sizeof(struct advisory_locking));
1000 	if (locking == NULL)
1001 		return B_NO_MEMORY;
1002 
1003 	status_t status;
1004 
1005 	locking->wait_sem = create_sem(0, "advisory lock");
1006 	if (locking->wait_sem < B_OK) {
1007 		status = locking->wait_sem;
1008 		goto err1;
1009 	}
1010 
1011 	locking->lock = create_sem(0, "advisory locking");
1012 	if (locking->lock < B_OK) {
1013 		status = locking->lock;
1014 		goto err2;
1015 	}
1016 
1017 	list_init(&locking->locks);
1018 
1019 	// We need to set the locking structure atomically - someone
1020 	// else might set one at the same time
1021 	do {
1022 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking, (addr_t)locking,
1023 				NULL) == NULL)
1024 			return B_OK;
1025 	} while (get_advisory_locking(vnode) == NULL);
1026 
1027 	status = B_OK;
1028 		// we delete the one we've just created, but nevertheless, the vnode
1029 		// does have a locking structure now
1030 
1031 	delete_sem(locking->lock);
1032 err2:
1033 	delete_sem(locking->wait_sem);
1034 err1:
1035 	free(locking);
1036 	return status;
1037 }
1038 
1039 
1040 /**	Retrieves the first lock that has been set by the current team.
1041  */
1042 
1043 static status_t
1044 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1045 {
1046 	struct advisory_locking *locking = get_advisory_locking(vnode);
1047 	if (locking == NULL)
1048 		return B_BAD_VALUE;
1049 
1050 	// TODO: this should probably get the flock by its file descriptor!
1051 	team_id team = team_get_current_team_id();
1052 	status_t status = B_BAD_VALUE;
1053 
1054 	struct advisory_lock *lock = NULL;
1055 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1056 		if (lock->team == team) {
1057 			flock->l_start = lock->offset;
1058 			flock->l_len = lock->length;
1059 			status = B_OK;
1060 			break;
1061 		}
1062 	}
1063 
1064 	put_advisory_locking(locking);
1065 	return status;
1066 }
1067 
1068 
1069 /**	Removes the specified lock, or all locks of the calling team
1070  *	if \a flock is NULL.
1071  */
1072 
1073 static status_t
1074 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1075 {
1076 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1077 
1078 	struct advisory_locking *locking = get_advisory_locking(vnode);
1079 	if (locking == NULL)
1080 		return flock != NULL ? B_BAD_VALUE : B_OK;
1081 
1082 	team_id team = team_get_current_team_id();
1083 
1084 	// find matching lock entry
1085 
1086 	status_t status = B_BAD_VALUE;
1087 	struct advisory_lock *lock = NULL;
1088 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1089 		if (lock->team == team && (flock == NULL || (flock != NULL
1090 			&& lock->offset == flock->l_start
1091 			&& lock->length == flock->l_len))) {
1092 			// we found our lock, free it
1093 			list_remove_item(&locking->locks, lock);
1094 			free(lock);
1095 			status = B_OK;
1096 			break;
1097 		}
1098 	}
1099 
1100 	bool removeLocking = list_is_empty(&locking->locks);
1101 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1102 
1103 	put_advisory_locking(locking);
1104 
1105 	if (status < B_OK)
1106 		return status;
1107 
1108 	if (removeLocking) {
1109 		// we can remove the whole advisory locking structure; it's no longer used
1110 		locking = get_advisory_locking(vnode);
1111 		if (locking != NULL) {
1112 			// the locking could have been changed in the mean time
1113 			if (list_is_empty(&locking->locks)) {
1114 				vnode->advisory_locking = NULL;
1115 
1116 				// we've detached the locking from the vnode, so we can safely delete it
1117 				delete_sem(locking->lock);
1118 				delete_sem(locking->wait_sem);
1119 				free(locking);
1120 			} else {
1121 				// the locking is in use again
1122 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1123 			}
1124 		}
1125 	}
1126 
1127 	return B_OK;
1128 }
1129 
1130 
1131 static status_t
1132 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1133 {
1134 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1135 		vnode, flock, wait ? "yes" : "no"));
1136 
1137 	bool shared = flock->l_type == F_RDLCK;
1138 	status_t status = B_OK;
1139 
1140 restart:
1141 	// if this vnode has an advisory_locking structure attached,
1142 	// lock that one and search for any colliding file lock
1143 	struct advisory_locking *locking = get_advisory_locking(vnode);
1144 	sem_id waitForLock = -1;
1145 
1146 	if (locking != NULL) {
1147 		// test for collisions
1148 		struct advisory_lock *lock = NULL;
1149 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1150 			if (lock->offset <= flock->l_start + flock->l_len
1151 				&& lock->offset + lock->length > flock->l_start) {
1152 				// locks do overlap
1153 				if (!shared || !lock->shared) {
1154 					// we need to wait
1155 					waitForLock = locking->wait_sem;
1156 					break;
1157 				}
1158 			}
1159 		}
1160 
1161 		if (waitForLock < B_OK || !wait)
1162 			put_advisory_locking(locking);
1163 	}
1164 
1165 	// wait for the lock if we have to, or else return immediately
1166 
1167 	if (waitForLock >= B_OK) {
1168 		if (!wait)
1169 			status = B_PERMISSION_DENIED;
1170 		else {
1171 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1172 			if (status == B_OK) {
1173 				// see if we're still colliding
1174 				goto restart;
1175 			}
1176 		}
1177 	}
1178 
1179 	if (status < B_OK)
1180 		return status;
1181 
1182 	// install new lock
1183 
1184 	locking = get_advisory_locking(vnode);
1185 	if (locking == NULL) {
1186 		// we need to create a new locking object
1187 		status = create_advisory_locking(vnode);
1188 		if (status < B_OK)
1189 			return status;
1190 
1191 		locking = vnode->advisory_locking;
1192 			// we own the locking object, so it can't go away
1193 	}
1194 
1195 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1196 	if (lock == NULL) {
1197 		if (waitForLock >= B_OK)
1198 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1199 		release_sem(locking->lock);
1200 		return B_NO_MEMORY;
1201 	}
1202 
1203 	lock->team = team_get_current_team_id();
1204 	// values must already be normalized when getting here
1205 	lock->offset = flock->l_start;
1206 	lock->length = flock->l_len;
1207 	lock->shared = shared;
1208 
1209 	list_add_item(&locking->locks, lock);
1210 	put_advisory_locking(locking);
1211 
1212 	return status;
1213 }
1214 
1215 
1216 static status_t
1217 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1218 {
1219 	switch (flock->l_whence) {
1220 		case SEEK_SET:
1221 			break;
1222 		case SEEK_CUR:
1223 			flock->l_start += descriptor->pos;
1224 			break;
1225 		case SEEK_END:
1226 		{
1227 			struct vnode *vnode = descriptor->u.vnode;
1228 			struct stat stat;
1229 			status_t status;
1230 
1231 			if (FS_CALL(vnode, read_stat) == NULL)
1232 				return EOPNOTSUPP;
1233 
1234 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1235 			if (status < B_OK)
1236 				return status;
1237 
1238 			flock->l_start += stat.st_size;
1239 			break;
1240 		}
1241 		default:
1242 			return B_BAD_VALUE;
1243 	}
1244 
1245 	if (flock->l_start < 0)
1246 		flock->l_start = 0;
1247 	if (flock->l_len == 0)
1248 		flock->l_len = OFF_MAX;
1249 
1250 	// don't let the offset and length overflow
1251 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1252 		flock->l_len = OFF_MAX - flock->l_start;
1253 
1254 	if (flock->l_len < 0) {
1255 		// a negative length reverses the region
1256 		flock->l_start += flock->l_len;
1257 		flock->l_len = -flock->l_len;
1258 	}
1259 
1260 	return B_OK;
1261 }
1262 
1263 
1264 /**	Disconnects all file descriptors that are associated with the
1265  *	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1266  *	\a mount object.
1267  *
1268  *	Note, after you've called this function, there might still be ongoing
1269  *	accesses - they won't be interrupted if they already happened before.
1270  *	However, any subsequent access will fail.
1271  *
1272  *	This is not a cheap function and should be used with care and rarely.
1273  *	TODO: there is currently no means to stop a blocking read/write!
1274  */
1275 
1276 void
1277 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1278 	struct vnode *vnodeToDisconnect)
1279 {
1280 	// iterate over all teams and peek into their file descriptors
1281 	int32 nextTeamID = 0;
1282 
1283 	while (true) {
1284 		struct io_context *context = NULL;
1285 		sem_id contextMutex = -1;
1286 		struct team *team = NULL;
1287 		team_id lastTeamID;
1288 
1289 		cpu_status state = disable_interrupts();
1290 		GRAB_TEAM_LOCK();
1291 
1292 		lastTeamID = peek_next_thread_id();
1293 		if (nextTeamID < lastTeamID) {
1294 			// get next valid team
1295 			while (nextTeamID < lastTeamID
1296 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1297 				nextTeamID++;
1298 			}
1299 
1300 			if (team) {
1301 				context = (io_context *)team->io_context;
1302 				contextMutex = context->io_mutex.sem;
1303 				nextTeamID++;
1304 			}
1305 		}
1306 
1307 		RELEASE_TEAM_LOCK();
1308 		restore_interrupts(state);
1309 
1310 		if (context == NULL)
1311 			break;
1312 
1313 		// we now have a context - since we couldn't lock it while having
1314 		// safe access to the team structure, we now need to lock the mutex
1315 		// manually
1316 
1317 		if (acquire_sem(contextMutex) != B_OK) {
1318 			// team seems to be gone, go over to the next team
1319 			continue;
1320 		}
1321 
1322 		// the team cannot be deleted completely while we're owning its
1323 		// io_context mutex, so we can safely play with it now
1324 
1325 		context->io_mutex.holder = thread_get_current_thread_id();
1326 
1327 		if (context->cwd != NULL && context->cwd->mount == mount) {
1328 			put_vnode(context->cwd);
1329 				// Note: We're only accessing the pointer, not the vnode itself
1330 				// in the lines below.
1331 
1332 			if (context->cwd == mount->root_vnode) {
1333 				// redirect the current working directory to the covered vnode
1334 				context->cwd = mount->covers_vnode;
1335 				inc_vnode_ref_count(context->cwd);
1336 			} else
1337 				context->cwd = NULL;
1338 		}
1339 
1340 		for (uint32 i = 0; i < context->table_size; i++) {
1341 			if (struct file_descriptor *descriptor = context->fds[i]) {
1342 				inc_fd_ref_count(descriptor);
1343 
1344 				// if this descriptor points at this mount, we
1345 				// need to disconnect it to be able to unmount
1346 				struct vnode *vnode = fd_vnode(descriptor);
1347 				if (vnodeToDisconnect != NULL) {
1348 					if (vnode == vnodeToDisconnect)
1349 						disconnect_fd(descriptor);
1350 				} else if (vnode != NULL && vnode->mount == mount
1351 					|| vnode == NULL && descriptor->u.mount == mount)
1352 					disconnect_fd(descriptor);
1353 
1354 				put_fd(descriptor);
1355 			}
1356 		}
1357 
1358 		mutex_unlock(&context->io_mutex);
1359 	}
1360 }
1361 
1362 
1363 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1364  *		   by.
1365  *
1366  *	Given an arbitrary vnode, the function checks, whether the node is covered
1367  *	by the root of a volume. If it is the function obtains a reference to the
1368  *	volume root node and returns it.
1369  *
1370  *	\param vnode The vnode in question.
1371  *	\return The volume root vnode the vnode cover is covered by, if it is
1372  *			indeed a mount point, or \c NULL otherwise.
1373  */
1374 
1375 static struct vnode *
1376 resolve_mount_point_to_volume_root(struct vnode *vnode)
1377 {
1378 	if (!vnode)
1379 		return NULL;
1380 
1381 	struct vnode *volumeRoot = NULL;
1382 
1383 	mutex_lock(&sVnodeCoveredByMutex);
1384 	if (vnode->covered_by) {
1385 		volumeRoot = vnode->covered_by;
1386 		inc_vnode_ref_count(volumeRoot);
1387 	}
1388 	mutex_unlock(&sVnodeCoveredByMutex);
1389 
1390 	return volumeRoot;
1391 }
1392 
1393 
1394 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1395  *		   by.
1396  *
1397  *	Given an arbitrary vnode (identified by mount and node ID), the function
1398  *	checks, whether the node is covered by the root of a volume. If it is the
1399  *	function returns the mount and node ID of the volume root node. Otherwise
1400  *	it simply returns the supplied mount and node ID.
1401  *
1402  *	In case of error (e.g. the supplied node could not be found) the variables
1403  *	for storing the resolved mount and node ID remain untouched and an error
1404  *	code is returned.
1405  *
1406  *	\param mountID The mount ID of the vnode in question.
1407  *	\param nodeID The node ID of the vnode in question.
1408  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1409  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1410  *	\return
1411  *	- \c B_OK, if everything went fine,
1412  *	- another error code, if something went wrong.
1413  */
1414 
1415 status_t
1416 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1417 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1418 {
1419 	// get the node
1420 	struct vnode *node;
1421 	status_t error = get_vnode(mountID, nodeID, &node, false);
1422 	if (error != B_OK)
1423 		return error;
1424 
1425 	// resolve the node
1426 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1427 	if (resolvedNode) {
1428 		put_vnode(node);
1429 		node = resolvedNode;
1430 	}
1431 
1432 	// set the return values
1433 	*resolvedMountID = node->device;
1434 	*resolvedNodeID = node->id;
1435 
1436 	put_vnode(node);
1437 
1438 	return B_OK;
1439 }
1440 
1441 
1442 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1443  *
1444  *	Given an arbitrary vnode, the function checks, whether the node is the
1445  *	root of a volume. If it is (and if it is not "/"), the function obtains
1446  *	a reference to the underlying mount point node and returns it.
1447  *
1448  *	\param vnode The vnode in question (caller must have a reference).
1449  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1450  *			root and not "/", or \c NULL otherwise.
1451  */
1452 
1453 static struct vnode *
1454 resolve_volume_root_to_mount_point(struct vnode *vnode)
1455 {
1456 	if (!vnode)
1457 		return NULL;
1458 
1459 	struct vnode *mountPoint = NULL;
1460 
1461 	struct fs_mount *mount = vnode->mount;
1462 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1463 		mountPoint = mount->covers_vnode;
1464 		inc_vnode_ref_count(mountPoint);
1465 	}
1466 
1467 	return mountPoint;
1468 }
1469 
1470 
1471 /**	\brief Gets the directory path and leaf name for a given path.
1472  *
1473  *	The supplied \a path is transformed to refer to the directory part of
1474  *	the entry identified by the original path, and into the buffer \a filename
1475  *	the leaf name of the original entry is written.
1476  *	Neither the returned path nor the leaf name can be expected to be
1477  *	canonical.
1478  *
1479  *	\param path The path to be analyzed. Must be able to store at least one
1480  *		   additional character.
1481  *	\param filename The buffer into which the leaf name will be written.
1482  *		   Must be of size B_FILE_NAME_LENGTH at least.
1483  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1484  *		   name is longer than \c B_FILE_NAME_LENGTH.
1485  */
1486 
1487 static status_t
1488 get_dir_path_and_leaf(char *path, char *filename)
1489 {
1490 	char *p = strrchr(path, '/');
1491 		// '/' are not allowed in file names!
1492 
1493 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1494 
1495 	if (!p) {
1496 		// this path is single segment with no '/' in it
1497 		// ex. "foo"
1498 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1499 			return B_NAME_TOO_LONG;
1500 		strcpy(path, ".");
1501 	} else {
1502 		p++;
1503 		if (*p == '\0') {
1504 			// special case: the path ends in '/'
1505 			strcpy(filename, ".");
1506 		} else {
1507 			// normal leaf: replace the leaf portion of the path with a '.'
1508 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1509 				>= B_FILE_NAME_LENGTH) {
1510 				return B_NAME_TOO_LONG;
1511 			}
1512 		}
1513 		p[0] = '.';
1514 		p[1] = '\0';
1515 	}
1516 	return B_OK;
1517 }
1518 
1519 
1520 static status_t
1521 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1522 {
1523 	char clonedName[B_FILE_NAME_LENGTH + 1];
1524 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1525 		return B_NAME_TOO_LONG;
1526 
1527 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1528 	struct vnode *directory;
1529 
1530 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1531 	if (status < 0)
1532 		return status;
1533 
1534 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1535 }
1536 
1537 
1538 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1539  *	\a path must not be NULL.
1540  *	If it returns successfully, \a path contains the name of the last path
1541  *	component.
1542  *	Note, this reduces the ref_count of the starting \a vnode, no matter if
1543  *	it is successful or not!
1544  */
1545 
1546 static status_t
1547 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1548 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1549 {
1550 	status_t status = 0;
1551 	vnode_id lastParentID = vnode->id;
1552 	int type = 0;
1553 
1554 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1555 
1556 	if (path == NULL) {
1557 		put_vnode(vnode);
1558 		return B_BAD_VALUE;
1559 	}
1560 
1561 	while (true) {
1562 		struct vnode *nextVnode;
1563 		vnode_id vnodeID;
1564 		char *nextPath;
1565 
1566 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1567 
1568 		// done?
1569 		if (path[0] == '\0')
1570 			break;
1571 
1572 		// walk to find the next path component ("path" will point to a single
1573 		// path component), and filter out multiple slashes
1574 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1575 
1576 		if (*nextPath == '/') {
1577 			*nextPath = '\0';
1578 			do
1579 				nextPath++;
1580 			while (*nextPath == '/');
1581 		}
1582 
1583 		// See if the '..' is at the root of a mount and move to the covered
1584 		// vnode so we pass the '..' path to the underlying filesystem
1585 		if (!strcmp("..", path)
1586 			&& vnode->mount->root_vnode == vnode
1587 			&& vnode->mount->covers_vnode) {
1588 			nextVnode = vnode->mount->covers_vnode;
1589 			inc_vnode_ref_count(nextVnode);
1590 			put_vnode(vnode);
1591 			vnode = nextVnode;
1592 		}
1593 
1594 		// Check if we have the right to search the current directory vnode.
1595 		// If a file system doesn't have the access() function, we assume that
1596 		// searching a directory is always allowed
1597 		if (FS_CALL(vnode, access))
1598 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1599 
1600 		// Tell the filesystem to get the vnode of this path component (if we got the
1601 		// permission from the call above)
1602 		if (status >= B_OK)
1603 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1604 
1605 		if (status < B_OK) {
1606 			put_vnode(vnode);
1607 			return status;
1608 		}
1609 
1610 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1611 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1612 		// ref count incremented at this point
1613 		mutex_lock(&sVnodeMutex);
1614 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1615 		mutex_unlock(&sVnodeMutex);
1616 
1617 		if (!nextVnode) {
1618 			// pretty screwed up here - the file system found the vnode, but the hash
1619 			// lookup failed, so our internal structures are messed up
1620 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1621 				vnode->device, vnodeID);
1622 			put_vnode(vnode);
1623 			return B_ENTRY_NOT_FOUND;
1624 		}
1625 
1626 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1627 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1628 			size_t bufferSize;
1629 			char *buffer;
1630 
1631 			TRACE(("traverse link\n"));
1632 
1633 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1634 			if (count + 1 > B_MAX_SYMLINKS) {
1635 				status = B_LINK_LIMIT;
1636 				goto resolve_link_error;
1637 			}
1638 
1639 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1640 			if (buffer == NULL) {
1641 				status = B_NO_MEMORY;
1642 				goto resolve_link_error;
1643 			}
1644 
1645 			if (FS_CALL(nextVnode, read_symlink) != NULL) {
1646 				status = FS_CALL(nextVnode, read_symlink)(
1647 					nextVnode->mount->cookie, nextVnode->private_node, buffer,
1648 					&bufferSize);
1649 			} else
1650 				status = B_BAD_VALUE;
1651 
1652 			if (status < B_OK) {
1653 				free(buffer);
1654 
1655 		resolve_link_error:
1656 				put_vnode(vnode);
1657 				put_vnode(nextVnode);
1658 
1659 				return status;
1660 			}
1661 			put_vnode(nextVnode);
1662 
1663 			// Check if we start from the root directory or the current
1664 			// directory ("vnode" still points to that one).
1665 			// Cut off all leading slashes if it's the root directory
1666 			path = buffer;
1667 			if (path[0] == '/') {
1668 				// we don't need the old directory anymore
1669 				put_vnode(vnode);
1670 
1671 				while (*++path == '/')
1672 					;
1673 				vnode = sRoot;
1674 				inc_vnode_ref_count(vnode);
1675 			}
1676 			inc_vnode_ref_count(vnode);
1677 				// balance the next recursion - we will decrement the ref_count
1678 				// of the vnode, no matter if we succeeded or not
1679 
1680 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1681 				&nextVnode, &lastParentID, _type);
1682 
1683 			free(buffer);
1684 
1685 			if (status < B_OK) {
1686 				put_vnode(vnode);
1687 				return status;
1688 			}
1689 		} else
1690 			lastParentID = vnode->id;
1691 
1692 		// decrease the ref count on the old dir we just looked up into
1693 		put_vnode(vnode);
1694 
1695 		path = nextPath;
1696 		vnode = nextVnode;
1697 
1698 		// see if we hit a mount point
1699 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1700 		if (mountPoint) {
1701 			put_vnode(vnode);
1702 			vnode = mountPoint;
1703 		}
1704 	}
1705 
1706 	*_vnode = vnode;
1707 	if (_type)
1708 		*_type = type;
1709 	if (_parentID)
1710 		*_parentID = lastParentID;
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 static status_t
1717 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1718 	vnode_id *_parentID, bool kernel)
1719 {
1720 	struct vnode *start = NULL;
1721 
1722 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1723 
1724 	if (!path)
1725 		return B_BAD_VALUE;
1726 
1727 	// figure out if we need to start at root or at cwd
1728 	if (*path == '/') {
1729 		if (sRoot == NULL) {
1730 			// we're a bit early, aren't we?
1731 			return B_ERROR;
1732 		}
1733 
1734 		while (*++path == '/')
1735 			;
1736 		start = sRoot;
1737 		inc_vnode_ref_count(start);
1738 	} else {
1739 		struct io_context *context = get_current_io_context(kernel);
1740 
1741 		mutex_lock(&context->io_mutex);
1742 		start = context->cwd;
1743 		if (start != NULL)
1744 			inc_vnode_ref_count(start);
1745 		mutex_unlock(&context->io_mutex);
1746 
1747 		if (start == NULL)
1748 			return B_ERROR;
1749 	}
1750 
1751 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1752 }
1753 
1754 
1755 /** Returns the vnode in the next to last segment of the path, and returns
1756  *	the last portion in filename.
1757  *	The path buffer must be able to store at least one additional character.
1758  */
1759 
1760 static status_t
1761 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1762 {
1763 	status_t status = get_dir_path_and_leaf(path, filename);
1764 	if (status != B_OK)
1765 		return status;
1766 
1767 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1768 }
1769 
1770 
1771 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1772  *		   to by a FD + path pair.
1773  *
1774  *	\a path must be given in either case. \a fd might be omitted, in which
1775  *	case \a path is either an absolute path or one relative to the current
1776  *	directory. If both a supplied and \a path is relative it is reckoned off
1777  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1778  *	ignored.
1779  *
1780  *	The caller has the responsibility to call put_vnode() on the returned
1781  *	directory vnode.
1782  *
1783  *	\param fd The FD. May be < 0.
1784  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1785  *	       is modified by this function. It must have at least room for a
1786  *	       string one character longer than the path it contains.
1787  *	\param _vnode A pointer to a variable the directory vnode shall be written
1788  *		   into.
1789  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1790  *		   the leaf name of the specified entry will be written.
1791  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1792  *		   invoked from userland.
1793  *	\return \c B_OK, if everything went fine, another error code otherwise.
1794  */
1795 
1796 static status_t
1797 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1798 	char *filename, bool kernel)
1799 {
1800 	if (!path)
1801 		return B_BAD_VALUE;
1802 	if (fd < 0)
1803 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1804 
1805 	status_t status = get_dir_path_and_leaf(path, filename);
1806 	if (status != B_OK)
1807 		return status;
1808 
1809 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1810 }
1811 
1812 
1813 /** Returns a vnode's name in the d_name field of a supplied dirent buffer.
1814  */
1815 
1816 static status_t
1817 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
1818 	size_t bufferSize)
1819 {
1820 	if (bufferSize < sizeof(struct dirent))
1821 		return B_BAD_VALUE;
1822 
1823 	// See if vnode is the root of a mount and move to the covered
1824 	// vnode so we get the underlying file system
1825 	VNodePutter vnodePutter;
1826 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1827 		vnode = vnode->mount->covers_vnode;
1828 		inc_vnode_ref_count(vnode);
1829 		vnodePutter.SetTo(vnode);
1830 	}
1831 
1832 	if (FS_CALL(vnode, get_vnode_name)) {
1833 		// The FS supports getting the name of a vnode.
1834 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1835 			vnode->private_node, buffer->d_name,
1836 			(char*)buffer + bufferSize - buffer->d_name);
1837 	}
1838 
1839 	// The FS doesn't support getting the name of a vnode. So we search the
1840 	// parent directory for the vnode, if the caller let us.
1841 
1842 	if (parent == NULL)
1843 		return EOPNOTSUPP;
1844 
1845 	fs_cookie cookie;
1846 
1847 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1848 		parent->private_node, &cookie);
1849 	if (status >= B_OK) {
1850 		while (true) {
1851 			uint32 num = 1;
1852 			status = dir_read(parent, cookie, buffer, bufferSize, &num);
1853 			if (status < B_OK)
1854 				break;
1855 			if (num == 0) {
1856 				status = B_ENTRY_NOT_FOUND;
1857 				break;
1858 			}
1859 
1860 			if (vnode->id == buffer->d_ino) {
1861 				// found correct entry!
1862 				break;
1863 			}
1864 		}
1865 
1866 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node,
1867 			cookie);
1868 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie,
1869 			vnode->private_node, cookie);
1870 	}
1871 	return status;
1872 }
1873 
1874 
1875 static status_t
1876 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
1877 	size_t nameSize)
1878 {
1879 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1880 	struct dirent *dirent = (struct dirent *)buffer;
1881 
1882 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer));
1883 	if (status != B_OK)
1884 		return status;
1885 
1886 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1887 		return B_BUFFER_OVERFLOW;
1888 
1889 	return B_OK;
1890 }
1891 
1892 
1893 /**	Gets the full path to a given directory vnode.
1894  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1895  *	file system doesn't support this call, it will fall back to iterating
1896  *	through the parent directory to get the name of the child.
1897  *
1898  *	To protect against circular loops, it supports a maximum tree depth
1899  *	of 256 levels.
1900  *
1901  *	Note that the path may not be correct the time this function returns!
1902  *	It doesn't use any locking to prevent returning the correct path, as
1903  *	paths aren't safe anyway: the path to a file can change at any time.
1904  *
1905  *	It might be a good idea, though, to check if the returned path exists
1906  *	in the calling function (it's not done here because of efficiency)
1907  */
1908 
1909 static status_t
1910 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1911 {
1912 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1913 
1914 	if (vnode == NULL || buffer == NULL)
1915 		return B_BAD_VALUE;
1916 
1917 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1918 	KPath pathBuffer;
1919 	if (pathBuffer.InitCheck() != B_OK)
1920 		return B_NO_MEMORY;
1921 
1922 	char *path = pathBuffer.LockBuffer();
1923 	int32 insert = pathBuffer.BufferSize();
1924 	int32 maxLevel = 256;
1925 	int32 length;
1926 	status_t status;
1927 
1928 	// we don't use get_vnode() here because this call is more
1929 	// efficient and does all we need from get_vnode()
1930 	inc_vnode_ref_count(vnode);
1931 
1932 	// resolve a volume root to its mount point
1933 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1934 	if (mountPoint) {
1935 		put_vnode(vnode);
1936 		vnode = mountPoint;
1937 	}
1938 
1939 	path[--insert] = '\0';
1940 
1941 	while (true) {
1942 		// the name buffer is also used for fs_read_dir()
1943 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1944 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1945 		struct vnode *parentVnode;
1946 		vnode_id parentID;
1947 		int type;
1948 
1949 		// lookup the parent vnode
1950 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
1951 			&parentID, &type);
1952 		if (status < B_OK)
1953 			goto out;
1954 
1955 		mutex_lock(&sVnodeMutex);
1956 		parentVnode = lookup_vnode(vnode->device, parentID);
1957 		mutex_unlock(&sVnodeMutex);
1958 
1959 		if (parentVnode == NULL) {
1960 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1961 				vnode->device, parentID);
1962 			status = B_ENTRY_NOT_FOUND;
1963 			goto out;
1964 		}
1965 
1966 		// get the node's name
1967 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
1968 			sizeof(nameBuffer));
1969 
1970 		// resolve a volume root to its mount point
1971 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1972 		if (mountPoint) {
1973 			put_vnode(parentVnode);
1974 			parentVnode = mountPoint;
1975 			parentID = parentVnode->id;
1976 		}
1977 
1978 		bool hitRoot = (parentVnode == vnode);
1979 
1980 		// release the current vnode, we only need its parent from now on
1981 		put_vnode(vnode);
1982 		vnode = parentVnode;
1983 
1984 		if (status < B_OK)
1985 			goto out;
1986 
1987 		if (hitRoot) {
1988 			// we have reached "/", which means we have constructed the full
1989 			// path
1990 			break;
1991 		}
1992 
1993 		// ToDo: add an explicit check for loops in about 10 levels to do
1994 		// real loop detection
1995 
1996 		// don't go deeper as 'maxLevel' to prevent circular loops
1997 		if (maxLevel-- < 0) {
1998 			status = ELOOP;
1999 			goto out;
2000 		}
2001 
2002 		// add the name in front of the current path
2003 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2004 		length = strlen(name);
2005 		insert -= length;
2006 		if (insert <= 0) {
2007 			status = ENOBUFS;
2008 			goto out;
2009 		}
2010 		memcpy(path + insert, name, length);
2011 		path[--insert] = '/';
2012 	}
2013 
2014 	// the root dir will result in an empty path: fix it
2015 	if (path[insert] == '\0')
2016 		path[--insert] = '/';
2017 
2018 	TRACE(("  path is: %s\n", path + insert));
2019 
2020 	// copy the path to the output buffer
2021 	length = pathBuffer.BufferSize() - insert;
2022 	if (length <= (int)bufferSize)
2023 		memcpy(buffer, path + insert, length);
2024 	else
2025 		status = ENOBUFS;
2026 
2027 out:
2028 	put_vnode(vnode);
2029 	return status;
2030 }
2031 
2032 
2033 /**	Checks the length of every path component, and adds a '.'
2034  *	if the path ends in a slash.
2035  *	The given path buffer must be able to store at least one
2036  *	additional character.
2037  */
2038 
2039 static status_t
2040 check_path(char *to)
2041 {
2042 	int32 length = 0;
2043 
2044 	// check length of every path component
2045 
2046 	while (*to) {
2047 		char *begin;
2048 		if (*to == '/')
2049 			to++, length++;
2050 
2051 		begin = to;
2052 		while (*to != '/' && *to)
2053 			to++, length++;
2054 
2055 		if (to - begin > B_FILE_NAME_LENGTH)
2056 			return B_NAME_TOO_LONG;
2057 	}
2058 
2059 	if (length == 0)
2060 		return B_ENTRY_NOT_FOUND;
2061 
2062 	// complete path if there is a slash at the end
2063 
2064 	if (*(to - 1) == '/') {
2065 		if (length > B_PATH_NAME_LENGTH - 2)
2066 			return B_NAME_TOO_LONG;
2067 
2068 		to[0] = '.';
2069 		to[1] = '\0';
2070 	}
2071 
2072 	return B_OK;
2073 }
2074 
2075 
2076 static struct file_descriptor *
2077 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2078 {
2079 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2080 	if (descriptor == NULL)
2081 		return NULL;
2082 
2083 	if (fd_vnode(descriptor) == NULL) {
2084 		put_fd(descriptor);
2085 		return NULL;
2086 	}
2087 
2088 	// ToDo: when we can close a file descriptor at any point, investigate
2089 	//	if this is still valid to do (accessing the vnode without ref_count
2090 	//	or locking)
2091 	*_vnode = descriptor->u.vnode;
2092 	return descriptor;
2093 }
2094 
2095 
2096 static struct vnode *
2097 get_vnode_from_fd(int fd, bool kernel)
2098 {
2099 	struct file_descriptor *descriptor;
2100 	struct vnode *vnode;
2101 
2102 	descriptor = get_fd(get_current_io_context(kernel), fd);
2103 	if (descriptor == NULL)
2104 		return NULL;
2105 
2106 	vnode = fd_vnode(descriptor);
2107 	if (vnode != NULL)
2108 		inc_vnode_ref_count(vnode);
2109 
2110 	put_fd(descriptor);
2111 	return vnode;
2112 }
2113 
2114 
2115 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2116  *	only the path will be considered. In this case, the \a path must not be
2117  *	NULL.
2118  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2119  *	and should be NULL for files.
2120  */
2121 
2122 static status_t
2123 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2124 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
2125 {
2126 	if (fd < 0 && !path)
2127 		return B_BAD_VALUE;
2128 
2129 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2130 		// no FD or absolute path
2131 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2132 	}
2133 
2134 	// FD only, or FD + relative path
2135 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2136 	if (!vnode)
2137 		return B_FILE_ERROR;
2138 
2139 	if (path != NULL) {
2140 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2141 			_vnode, _parentID, NULL);
2142 	}
2143 
2144 	// there is no relative path to take into account
2145 
2146 	*_vnode = vnode;
2147 	if (_parentID)
2148 		*_parentID = -1;
2149 
2150 	return B_OK;
2151 }
2152 
2153 
2154 static int
2155 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2156 	fs_cookie cookie, int openMode, bool kernel)
2157 {
2158 	struct file_descriptor *descriptor;
2159 	int fd;
2160 
2161 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2162 	if (vnode && vnode->mandatory_locked_by != NULL)
2163 		return B_BUSY;
2164 
2165 	descriptor = alloc_fd();
2166 	if (!descriptor)
2167 		return B_NO_MEMORY;
2168 
2169 	if (vnode)
2170 		descriptor->u.vnode = vnode;
2171 	else
2172 		descriptor->u.mount = mount;
2173 	descriptor->cookie = cookie;
2174 
2175 	switch (type) {
2176 		// vnode types
2177 		case FDTYPE_FILE:
2178 			descriptor->ops = &sFileOps;
2179 			break;
2180 		case FDTYPE_DIR:
2181 			descriptor->ops = &sDirectoryOps;
2182 			break;
2183 		case FDTYPE_ATTR:
2184 			descriptor->ops = &sAttributeOps;
2185 			break;
2186 		case FDTYPE_ATTR_DIR:
2187 			descriptor->ops = &sAttributeDirectoryOps;
2188 			break;
2189 
2190 		// mount types
2191 		case FDTYPE_INDEX_DIR:
2192 			descriptor->ops = &sIndexDirectoryOps;
2193 			break;
2194 		case FDTYPE_QUERY:
2195 			descriptor->ops = &sQueryOps;
2196 			break;
2197 
2198 		default:
2199 			panic("get_new_fd() called with unknown type %d\n", type);
2200 			break;
2201 	}
2202 	descriptor->type = type;
2203 	descriptor->open_mode = openMode;
2204 
2205 	fd = new_fd(get_current_io_context(kernel), descriptor);
2206 	if (fd < 0) {
2207 		free(descriptor);
2208 		return B_NO_MORE_FDS;
2209 	}
2210 
2211 	return fd;
2212 }
2213 
2214 #ifdef ADD_DEBUGGER_COMMANDS
2215 
2216 
2217 static void
2218 _dump_advisory_locking(advisory_locking *locking)
2219 {
2220 	if (locking == NULL)
2221 		return;
2222 
2223 	kprintf("   lock:        %ld", locking->lock);
2224 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2225 
2226 	struct advisory_lock *lock = NULL;
2227 	int32 index = 0;
2228 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2229 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2230 		kprintf("        offset: %Ld\n", lock->offset);
2231 		kprintf("        length: %Ld\n", lock->length);
2232 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2233 	}
2234 }
2235 
2236 
2237 static void
2238 _dump_mount(struct fs_mount *mount)
2239 {
2240 	kprintf("MOUNT: %p\n", mount);
2241 	kprintf(" id:            %ld\n", mount->id);
2242 	kprintf(" device_name:   %s\n", mount->device_name);
2243 	kprintf(" fs_name:       %s\n", mount->fs_name);
2244 	kprintf(" cookie:        %p\n", mount->cookie);
2245 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2246 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2247 	kprintf(" partition:     %p\n", mount->partition);
2248 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2249 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2250 		mount->owns_file_device ? " owns_file_device" : "");
2251 }
2252 
2253 
2254 static void
2255 _dump_vnode(struct vnode *vnode)
2256 {
2257 	kprintf("VNODE: %p\n", vnode);
2258 	kprintf(" device:        %ld\n", vnode->device);
2259 	kprintf(" id:            %Ld\n", vnode->id);
2260 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2261 	kprintf(" private_node:  %p\n", vnode->private_node);
2262 	kprintf(" mount:         %p\n", vnode->mount);
2263 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2264 	kprintf(" cache_ref:     %p\n", vnode->cache);
2265 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2266 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2267 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2268 
2269 	_dump_advisory_locking(vnode->advisory_locking);
2270 }
2271 
2272 
2273 static int
2274 dump_mount(int argc, char **argv)
2275 {
2276 	if (argc != 2) {
2277 		kprintf("usage: mount [id/address]\n");
2278 		return 0;
2279 	}
2280 
2281 	struct fs_mount *mount = NULL;
2282 
2283 	// if the argument looks like a hex number, treat it as such
2284 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2285 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2286 		if (IS_USER_ADDRESS(mount)) {
2287 			kprintf("invalid fs_mount address\n");
2288 			return 0;
2289 		}
2290 	} else {
2291 		mount_id id = atoll(argv[1]);
2292 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2293 		if (mount == NULL) {
2294 			kprintf("fs_mount not found\n");
2295 			return 0;
2296 		}
2297 	}
2298 
2299 	_dump_mount(mount);
2300 	return 0;
2301 }
2302 
2303 
2304 static int
2305 dump_mounts(int argc, char **argv)
2306 {
2307 	struct hash_iterator iterator;
2308 	struct fs_mount *mount;
2309 
2310 	kprintf("address     id root       covers     fs_name\n");
2311 
2312 	hash_open(sMountsTable, &iterator);
2313 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2314 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2315 			mount->covers_vnode, mount->fs_name);
2316 	}
2317 
2318 	hash_close(sMountsTable, &iterator, false);
2319 	return 0;
2320 }
2321 
2322 
2323 static int
2324 dump_vnode(int argc, char **argv)
2325 {
2326 	if (argc < 2) {
2327 		kprintf("usage: vnode [id/device id/address]\n");
2328 		return 0;
2329 	}
2330 
2331 	struct vnode *vnode = NULL;
2332 
2333 	// if the argument looks like a hex number, treat it as such
2334 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2335 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2336 		if (IS_USER_ADDRESS(vnode)) {
2337 			kprintf("invalid vnode address\n");
2338 			return 0;
2339 		}
2340 		_dump_vnode(vnode);
2341 		return 0;
2342 	}
2343 
2344 	struct hash_iterator iterator;
2345 	mount_id device = -1;
2346 	vnode_id id;
2347 	if (argc > 2) {
2348 		device = atoi(argv[1]);
2349 		id = atoll(argv[2]);
2350 	} else
2351 		id = atoll(argv[1]);
2352 
2353 	hash_open(sVnodeTable, &iterator);
2354 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2355 		if (vnode->id != id || device != -1 && vnode->device != device)
2356 			continue;
2357 
2358 		_dump_vnode(vnode);
2359 	}
2360 
2361 	hash_close(sVnodeTable, &iterator, false);
2362 	return 0;
2363 }
2364 
2365 
2366 static int
2367 dump_vnodes(int argc, char **argv)
2368 {
2369 	// restrict dumped nodes to a certain device if requested
2370 	mount_id device = -1;
2371 	if (argc > 1)
2372 		device = atoi(argv[1]);
2373 
2374 	struct hash_iterator iterator;
2375 	struct vnode *vnode;
2376 
2377 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2378 
2379 	hash_open(sVnodeTable, &iterator);
2380 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2381 		if (device != -1 && vnode->device != device)
2382 			continue;
2383 
2384 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2385 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2386 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2387 			vnode->unpublished ? "u" : "-");
2388 	}
2389 
2390 	hash_close(sVnodeTable, &iterator, false);
2391 	return 0;
2392 }
2393 
2394 
2395 static int
2396 dump_vnode_caches(int argc, char **argv)
2397 {
2398 	struct hash_iterator iterator;
2399 	struct vnode *vnode;
2400 
2401 	// restrict dumped nodes to a certain device if requested
2402 	mount_id device = -1;
2403 	if (argc > 1)
2404 		device = atoi(argv[1]);
2405 
2406 	kprintf("address    dev     inode cache          size   pages\n");
2407 
2408 	hash_open(sVnodeTable, &iterator);
2409 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2410 		if (vnode->cache == NULL)
2411 			continue;
2412 		if (device != -1 && vnode->device != device)
2413 			continue;
2414 
2415 		// count pages in cache
2416 		size_t numPages = 0;
2417 		for (struct vm_page *page = vnode->cache->cache->page_list;
2418 				page != NULL; page = page->cache_next) {
2419 			numPages++;
2420 		}
2421 
2422 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2423 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2424 	}
2425 
2426 	hash_close(sVnodeTable, &iterator, false);
2427 	return 0;
2428 }
2429 
2430 
2431 int
2432 dump_io_context(int argc, char **argv)
2433 {
2434 	if (argc > 2) {
2435 		kprintf("usage: io_context [team id/address]\n");
2436 		return 0;
2437 	}
2438 
2439 	struct io_context *context = NULL;
2440 
2441 	if (argc > 1) {
2442 		uint32 num = strtoul(argv[1], NULL, 0);
2443 		if (IS_KERNEL_ADDRESS(num))
2444 			context = (struct io_context *)num;
2445 		else {
2446 			struct team *team = team_get_team_struct_locked(num);
2447 			if (team == NULL) {
2448 				kprintf("could not find team with ID %ld\n", num);
2449 				return 0;
2450 			}
2451 			context = (struct io_context *)team->io_context;
2452 		}
2453 	} else
2454 		context = get_current_io_context(true);
2455 
2456 	kprintf("I/O CONTEXT: %p\n", context);
2457 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2458 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2459 	kprintf(" max fds:\t%lu\n", context->table_size);
2460 
2461 	if (context->num_used_fds)
2462 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2463 
2464 	for (uint32 i = 0; i < context->table_size; i++) {
2465 		struct file_descriptor *fd = context->fds[i];
2466 		if (fd == NULL)
2467 			continue;
2468 
2469 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2470 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2471 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2472 			fd->u.vnode);
2473 	}
2474 
2475 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2476 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2477 
2478 	return 0;
2479 }
2480 
2481 
2482 int
2483 dump_vnode_usage(int argc, char **argv)
2484 {
2485 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes, kMaxUnusedVnodes);
2486 
2487 	struct hash_iterator iterator;
2488 	hash_open(sVnodeTable, &iterator);
2489 
2490 	uint32 count = 0;
2491 	struct vnode *vnode;
2492 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2493 		count++;
2494 	}
2495 
2496 	hash_close(sVnodeTable, &iterator, false);
2497 
2498 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2499 	return 0;
2500 }
2501 
2502 #endif	// ADD_DEBUGGER_COMMANDS
2503 
2504 
2505 //	#pragma mark - public VFS API
2506 
2507 
2508 extern "C" status_t
2509 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2510 {
2511 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2512 		mountID, vnodeID, privateNode));
2513 
2514 	if (privateNode == NULL)
2515 		return B_BAD_VALUE;
2516 
2517 	mutex_lock(&sVnodeMutex);
2518 
2519 	// file system integrity check:
2520 	// test if the vnode already exists and bail out if this is the case!
2521 
2522 	// ToDo: the R5 implementation obviously checks for a different cookie
2523 	//	and doesn't panic if they are equal
2524 
2525 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2526 	if (vnode != NULL)
2527 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2528 
2529 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2530 	if (status == B_OK) {
2531 		vnode->private_node = privateNode;
2532 		vnode->busy = true;
2533 		vnode->unpublished = true;
2534 	}
2535 
2536 	TRACE(("returns: %s\n", strerror(status)));
2537 
2538 	mutex_unlock(&sVnodeMutex);
2539 	return status;
2540 }
2541 
2542 
2543 extern "C" status_t
2544 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2545 {
2546 	FUNCTION(("publish_vnode()\n"));
2547 
2548 	mutex_lock(&sVnodeMutex);
2549 
2550 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2551 	status_t status = B_OK;
2552 
2553 	if (vnode != NULL && vnode->busy && vnode->unpublished
2554 		&& vnode->private_node == privateNode) {
2555 		vnode->busy = false;
2556 		vnode->unpublished = false;
2557 	} else if (vnode == NULL && privateNode != NULL) {
2558 		status = create_new_vnode(&vnode, mountID, vnodeID);
2559 		if (status == B_OK)
2560 			vnode->private_node = privateNode;
2561 	} else
2562 		status = B_BAD_VALUE;
2563 
2564 	TRACE(("returns: %s\n", strerror(status)));
2565 
2566 	mutex_unlock(&sVnodeMutex);
2567 	return status;
2568 }
2569 
2570 
2571 extern "C" status_t
2572 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2573 {
2574 	struct vnode *vnode;
2575 
2576 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2577 	if (status < B_OK)
2578 		return status;
2579 
2580 	*_fsNode = vnode->private_node;
2581 	return B_OK;
2582 }
2583 
2584 
2585 extern "C" status_t
2586 put_vnode(mount_id mountID, vnode_id vnodeID)
2587 {
2588 	struct vnode *vnode;
2589 
2590 	mutex_lock(&sVnodeMutex);
2591 	vnode = lookup_vnode(mountID, vnodeID);
2592 	mutex_unlock(&sVnodeMutex);
2593 
2594 	if (vnode)
2595 		dec_vnode_ref_count(vnode, true);
2596 
2597 	return B_OK;
2598 }
2599 
2600 
2601 extern "C" status_t
2602 remove_vnode(mount_id mountID, vnode_id vnodeID)
2603 {
2604 	struct vnode *vnode;
2605 	bool remove = false;
2606 
2607 	mutex_lock(&sVnodeMutex);
2608 
2609 	vnode = lookup_vnode(mountID, vnodeID);
2610 	if (vnode != NULL) {
2611 		if (vnode->covered_by != NULL) {
2612 			// this vnode is in use
2613 			mutex_unlock(&sVnodeMutex);
2614 			return B_BUSY;
2615 		}
2616 
2617 		vnode->remove = true;
2618 		if (vnode->unpublished) {
2619 			// prepare the vnode for deletion
2620 			vnode->busy = true;
2621 			remove = true;
2622 		}
2623 	}
2624 
2625 	mutex_unlock(&sVnodeMutex);
2626 
2627 	if (remove) {
2628 		// if the vnode hasn't been published yet, we delete it here
2629 		atomic_add(&vnode->ref_count, -1);
2630 		free_vnode(vnode, true);
2631 	}
2632 
2633 	return B_OK;
2634 }
2635 
2636 
2637 extern "C" status_t
2638 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2639 {
2640 	struct vnode *vnode;
2641 
2642 	mutex_lock(&sVnodeMutex);
2643 
2644 	vnode = lookup_vnode(mountID, vnodeID);
2645 	if (vnode)
2646 		vnode->remove = false;
2647 
2648 	mutex_unlock(&sVnodeMutex);
2649 	return B_OK;
2650 }
2651 
2652 
2653 extern "C" status_t
2654 get_vnode_removed(mount_id mountID, vnode_id vnodeID, bool* removed)
2655 {
2656 	mutex_lock(&sVnodeMutex);
2657 
2658 	status_t result;
2659 
2660 	if (struct vnode* vnode = lookup_vnode(mountID, vnodeID)) {
2661 		if (removed)
2662 			*removed = vnode->remove;
2663 		result = B_OK;
2664 	} else
2665 		result = B_BAD_VALUE;
2666 
2667 	mutex_unlock(&sVnodeMutex);
2668 	return result;
2669 }
2670 
2671 
2672 //	#pragma mark - private VFS API
2673 //	Functions the VFS exports for other parts of the kernel
2674 
2675 
2676 /** Acquires another reference to the vnode that has to be released
2677  *	by calling vfs_put_vnode().
2678  */
2679 
2680 void
2681 vfs_acquire_vnode(void *_vnode)
2682 {
2683 	inc_vnode_ref_count((struct vnode *)_vnode);
2684 }
2685 
2686 
2687 /** This is currently called from file_cache_create() only.
2688  *	It's probably a temporary solution as long as devfs requires that
2689  *	fs_read_pages()/fs_write_pages() are called with the standard
2690  *	open cookie and not with a device cookie.
2691  *	If that's done differently, remove this call; it has no other
2692  *	purpose.
2693  */
2694 
2695 extern "C" status_t
2696 vfs_get_cookie_from_fd(int fd, void **_cookie)
2697 {
2698 	struct file_descriptor *descriptor;
2699 
2700 	descriptor = get_fd(get_current_io_context(true), fd);
2701 	if (descriptor == NULL)
2702 		return B_FILE_ERROR;
2703 
2704 	*_cookie = descriptor->cookie;
2705 	return B_OK;
2706 }
2707 
2708 
2709 extern "C" int
2710 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2711 {
2712 	*vnode = get_vnode_from_fd(fd, kernel);
2713 
2714 	if (*vnode == NULL)
2715 		return B_FILE_ERROR;
2716 
2717 	return B_NO_ERROR;
2718 }
2719 
2720 
2721 extern "C" status_t
2722 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2723 {
2724 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2725 
2726 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2727 	if (pathBuffer.InitCheck() != B_OK)
2728 		return B_NO_MEMORY;
2729 
2730 	char *buffer = pathBuffer.LockBuffer();
2731 	strlcpy(buffer, path, pathBuffer.BufferSize());
2732 
2733 	struct vnode *vnode;
2734 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2735 	if (status < B_OK)
2736 		return status;
2737 
2738 	*_vnode = vnode;
2739 	return B_OK;
2740 }
2741 
2742 
2743 extern "C" status_t
2744 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2745 {
2746 	struct vnode *vnode;
2747 
2748 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2749 	if (status < B_OK)
2750 		return status;
2751 
2752 	*_vnode = vnode;
2753 	return B_OK;
2754 }
2755 
2756 
2757 extern "C" status_t
2758 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2759 	const char *name, void **_vnode)
2760 {
2761 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2762 }
2763 
2764 
2765 extern "C" void
2766 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2767 {
2768 	struct vnode *vnode = (struct vnode *)_vnode;
2769 
2770 	*_mountID = vnode->device;
2771 	*_vnodeID = vnode->id;
2772 }
2773 
2774 
2775 /**	Looks up a vnode with the given mount and vnode ID.
2776  *	Must only be used with "in-use" vnodes as it doesn't grab a reference
2777  *	to the node.
2778  *	It's currently only be used by file_cache_create().
2779  */
2780 
2781 extern "C" status_t
2782 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2783 {
2784 	mutex_lock(&sVnodeMutex);
2785 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2786 	mutex_unlock(&sVnodeMutex);
2787 
2788 	if (vnode == NULL)
2789 		return B_ERROR;
2790 
2791 	*_vnode = vnode;
2792 	return B_OK;
2793 }
2794 
2795 
2796 extern "C" status_t
2797 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2798 {
2799 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
2800 		mountID, path, kernel));
2801 
2802 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2803 	if (pathBuffer.InitCheck() != B_OK)
2804 		return B_NO_MEMORY;
2805 
2806 	fs_mount *mount;
2807 	status_t status = get_mount(mountID, &mount);
2808 	if (status < B_OK)
2809 		return status;
2810 
2811 	char *buffer = pathBuffer.LockBuffer();
2812 	strlcpy(buffer, path, pathBuffer.BufferSize());
2813 
2814 	struct vnode *vnode = mount->root_vnode;
2815 
2816 	if (buffer[0] == '/')
2817 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
2818 	else {
2819 		inc_vnode_ref_count(vnode);
2820 			// vnode_path_to_vnode() releases a reference to the starting vnode
2821 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
2822 	}
2823 
2824 	put_mount(mount);
2825 
2826 	if (status < B_OK)
2827 		return status;
2828 
2829 	if (vnode->device != mountID) {
2830 		// wrong mount ID - must not gain access on foreign file system nodes
2831 		put_vnode(vnode);
2832 		return B_BAD_VALUE;
2833 	}
2834 
2835 	*_node = vnode->private_node;
2836 	return B_OK;
2837 }
2838 
2839 
2840 /**	Finds the full path to the file that contains the module \a moduleName,
2841  *	puts it into \a pathBuffer, and returns B_OK for success.
2842  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2843  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2844  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2845  *	functions returns unsuccessfully.
2846  */
2847 
2848 status_t
2849 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2850 	size_t bufferSize)
2851 {
2852 	struct vnode *dir, *file;
2853 	status_t status;
2854 	size_t length;
2855 	char *path;
2856 
2857 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2858 		return B_BUFFER_OVERFLOW;
2859 
2860 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2861 	if (status < B_OK)
2862 		return status;
2863 
2864 	// the path buffer had been clobbered by the above call
2865 	length = strlcpy(pathBuffer, basePath, bufferSize);
2866 	if (pathBuffer[length - 1] != '/')
2867 		pathBuffer[length++] = '/';
2868 
2869 	path = pathBuffer + length;
2870 	bufferSize -= length;
2871 
2872 	while (moduleName) {
2873 		int type;
2874 
2875 		char *nextPath = strchr(moduleName, '/');
2876 		if (nextPath == NULL)
2877 			length = strlen(moduleName);
2878 		else {
2879 			length = nextPath - moduleName;
2880 			nextPath++;
2881 		}
2882 
2883 		if (length + 1 >= bufferSize) {
2884 			status = B_BUFFER_OVERFLOW;
2885 			goto err;
2886 		}
2887 
2888 		memcpy(path, moduleName, length);
2889 		path[length] = '\0';
2890 		moduleName = nextPath;
2891 
2892 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2893 		if (status < B_OK) {
2894 			// vnode_path_to_vnode() has already released the reference to dir
2895 			return status;
2896 		}
2897 
2898 		if (S_ISDIR(type)) {
2899 			// goto the next directory
2900 			path[length] = '/';
2901 			path[length + 1] = '\0';
2902 			path += length + 1;
2903 			bufferSize -= length + 1;
2904 
2905 			dir = file;
2906 		} else if (S_ISREG(type)) {
2907 			// it's a file so it should be what we've searched for
2908 			put_vnode(file);
2909 
2910 			return B_OK;
2911 		} else {
2912 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
2913 			status = B_ERROR;
2914 			dir = file;
2915 			goto err;
2916 		}
2917 	}
2918 
2919 	// if we got here, the moduleName just pointed to a directory, not to
2920 	// a real module - what should we do in this case?
2921 	status = B_ENTRY_NOT_FOUND;
2922 
2923 err:
2924 	put_vnode(dir);
2925 	return status;
2926 }
2927 
2928 
2929 /**	\brief Normalizes a given path.
2930  *
2931  *	The path must refer to an existing or non-existing entry in an existing
2932  *	directory, that is chopping off the leaf component the remaining path must
2933  *	refer to an existing directory.
2934  *
2935  *	The returned will be canonical in that it will be absolute, will not
2936  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2937  *	and none of the directory components will by symbolic links.
2938  *
2939  *	Any two paths referring to the same entry, will result in the same
2940  *	normalized path (well, that is pretty much the definition of `normalized',
2941  *	isn't it :-).
2942  *
2943  *	\param path The path to be normalized.
2944  *	\param buffer The buffer into which the normalized path will be written.
2945  *	\param bufferSize The size of \a buffer.
2946  *	\param kernel \c true, if the IO context of the kernel shall be used,
2947  *		   otherwise that of the team this thread belongs to. Only relevant,
2948  *		   if the path is relative (to get the CWD).
2949  *	\return \c B_OK if everything went fine, another error code otherwise.
2950  */
2951 
2952 status_t
2953 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2954 	bool kernel)
2955 {
2956 	if (!path || !buffer || bufferSize < 1)
2957 		return B_BAD_VALUE;
2958 
2959 	TRACE(("vfs_normalize_path(`%s')\n", path));
2960 
2961 	// copy the supplied path to the stack, so it can be modified
2962 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
2963 	if (mutablePathBuffer.InitCheck() != B_OK)
2964 		return B_NO_MEMORY;
2965 
2966 	char *mutablePath = mutablePathBuffer.LockBuffer();
2967 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2968 		return B_NAME_TOO_LONG;
2969 
2970 	// get the dir vnode and the leaf name
2971 	struct vnode *dirNode;
2972 	char leaf[B_FILE_NAME_LENGTH];
2973 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2974 	if (error != B_OK) {
2975 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2976 		return error;
2977 	}
2978 
2979 	// if the leaf is "." or "..", we directly get the correct directory
2980 	// vnode and ignore the leaf later
2981 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2982 	if (isDir)
2983 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2984 	if (error != B_OK) {
2985 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
2986 			strerror(error)));
2987 		return error;
2988 	}
2989 
2990 	// get the directory path
2991 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2992 	put_vnode(dirNode);
2993 	if (error < B_OK) {
2994 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2995 		return error;
2996 	}
2997 
2998 	// append the leaf name
2999 	if (!isDir) {
3000 		// insert a directory separator only if this is not the file system root
3001 		if ((strcmp(buffer, "/") != 0
3002 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3003 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3004 			return B_NAME_TOO_LONG;
3005 		}
3006 	}
3007 
3008 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3009 	return B_OK;
3010 }
3011 
3012 
3013 extern "C" void
3014 vfs_put_vnode(void *_vnode)
3015 {
3016 	put_vnode((struct vnode *)_vnode);
3017 }
3018 
3019 
3020 extern "C" status_t
3021 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
3022 {
3023 	// Get current working directory from io context
3024 	struct io_context *context = get_current_io_context(false);
3025 	status_t status = B_OK;
3026 
3027 	mutex_lock(&context->io_mutex);
3028 
3029 	if (context->cwd != NULL) {
3030 		*_mountID = context->cwd->device;
3031 		*_vnodeID = context->cwd->id;
3032 	} else
3033 		status = B_ERROR;
3034 
3035 	mutex_unlock(&context->io_mutex);
3036 	return status;
3037 }
3038 
3039 
3040 extern "C" status_t
3041 vfs_disconnect_vnode(mount_id mountID, vnode_id vnodeID)
3042 {
3043 	struct vnode *vnode;
3044 
3045 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
3046 	if (status < B_OK)
3047 		return status;
3048 
3049 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3050 	return B_OK;
3051 }
3052 
3053 
3054 extern "C" void
3055 vfs_free_unused_vnodes(int32 level)
3056 {
3057 	vnode_low_memory_handler(NULL, level);
3058 }
3059 
3060 
3061 extern "C" bool
3062 vfs_can_page(void *_vnode, void *cookie)
3063 {
3064 	struct vnode *vnode = (struct vnode *)_vnode;
3065 
3066 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3067 
3068 	if (FS_CALL(vnode, can_page))
3069 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
3070 
3071 	return false;
3072 }
3073 
3074 
3075 extern "C" status_t
3076 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3077 	size_t *_numBytes, bool fsReenter)
3078 {
3079 	struct vnode *vnode = (struct vnode *)_vnode;
3080 
3081 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3082 
3083 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3084 		cookie, pos, vecs, count, _numBytes, fsReenter);
3085 }
3086 
3087 
3088 extern "C" status_t
3089 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3090 	size_t *_numBytes, bool fsReenter)
3091 {
3092 	struct vnode *vnode = (struct vnode *)_vnode;
3093 
3094 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3095 
3096 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3097 		cookie, pos, vecs, count, _numBytes, fsReenter);
3098 }
3099 
3100 
3101 /** Gets the vnode's vm_cache object. If it didn't have one, it will be
3102  *	created if \a allocate is \c true.
3103  *	In case it's successful, it will also grab a reference to the cache
3104  *	it returns (and therefore, one from the \a vnode in question as well).
3105  */
3106 
3107 extern "C" status_t
3108 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
3109 {
3110 	struct vnode *vnode = (struct vnode *)_vnode;
3111 
3112 	if (vnode->cache != NULL) {
3113 		vm_cache_acquire_ref(vnode->cache);
3114 		*_cache = vnode->cache;
3115 		return B_OK;
3116 	}
3117 
3118 	mutex_lock(&sVnodeMutex);
3119 
3120 	status_t status = B_OK;
3121 
3122 	// The cache could have been created in the meantime
3123 	if (vnode->cache == NULL) {
3124 		if (allocate) {
3125 			// TODO: actually the vnode need to be busy already here, or
3126 			//	else this won't work...
3127 			bool wasBusy = vnode->busy;
3128 			vnode->busy = true;
3129 			mutex_unlock(&sVnodeMutex);
3130 
3131 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3132 
3133 			mutex_lock(&sVnodeMutex);
3134 			vnode->busy = wasBusy;
3135 		} else
3136 			status = B_BAD_VALUE;
3137 	} else
3138 		vm_cache_acquire_ref(vnode->cache);
3139 
3140 	if (status == B_OK)
3141 		*_cache = vnode->cache;
3142 
3143 	mutex_unlock(&sVnodeMutex);
3144 	return status;
3145 }
3146 
3147 
3148 status_t
3149 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
3150 {
3151 	struct vnode *vnode = (struct vnode *)_vnode;
3152 
3153 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3154 
3155 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
3156 }
3157 
3158 
3159 status_t
3160 vfs_stat_vnode(void *_vnode, struct stat *stat)
3161 {
3162 	struct vnode *vnode = (struct vnode *)_vnode;
3163 
3164 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3165 		vnode->private_node, stat);
3166 
3167 	// fill in the st_dev and st_ino fields
3168 	if (status == B_OK) {
3169 		stat->st_dev = vnode->device;
3170 		stat->st_ino = vnode->id;
3171 	}
3172 
3173 	return status;
3174 }
3175 
3176 
3177 status_t
3178 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
3179 {
3180 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
3181 }
3182 
3183 
3184 /**	If the given descriptor locked its vnode, that lock will be released.
3185  */
3186 
3187 void
3188 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3189 {
3190 	struct vnode *vnode = fd_vnode(descriptor);
3191 
3192 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3193 		vnode->mandatory_locked_by = NULL;
3194 }
3195 
3196 
3197 /**	Closes all file descriptors of the specified I/O context that
3198  *	don't have the O_CLOEXEC flag set.
3199  */
3200 
3201 void
3202 vfs_exec_io_context(void *_context)
3203 {
3204 	struct io_context *context = (struct io_context *)_context;
3205 	uint32 i;
3206 
3207 	for (i = 0; i < context->table_size; i++) {
3208 		mutex_lock(&context->io_mutex);
3209 
3210 		struct file_descriptor *descriptor = context->fds[i];
3211 		bool remove = false;
3212 
3213 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3214 			context->fds[i] = NULL;
3215 			context->num_used_fds--;
3216 
3217 			remove = true;
3218 		}
3219 
3220 		mutex_unlock(&context->io_mutex);
3221 
3222 		if (remove) {
3223 			close_fd(descriptor);
3224 			put_fd(descriptor);
3225 		}
3226 	}
3227 }
3228 
3229 
3230 /** Sets up a new io_control structure, and inherits the properties
3231  *	of the parent io_control if it is given.
3232  */
3233 
3234 void *
3235 vfs_new_io_context(void *_parentContext)
3236 {
3237 	size_t tableSize;
3238 	struct io_context *context;
3239 	struct io_context *parentContext;
3240 
3241 	context = (io_context *)malloc(sizeof(struct io_context));
3242 	if (context == NULL)
3243 		return NULL;
3244 
3245 	memset(context, 0, sizeof(struct io_context));
3246 
3247 	parentContext = (struct io_context *)_parentContext;
3248 	if (parentContext)
3249 		tableSize = parentContext->table_size;
3250 	else
3251 		tableSize = DEFAULT_FD_TABLE_SIZE;
3252 
3253 	// allocate space for FDs and their close-on-exec flag
3254 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
3255 		+ (tableSize + 7) / 8);
3256 	if (context->fds == NULL) {
3257 		free(context);
3258 		return NULL;
3259 	}
3260 
3261 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
3262 		+ (tableSize + 7) / 8);
3263 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
3264 
3265 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3266 		free(context->fds);
3267 		free(context);
3268 		return NULL;
3269 	}
3270 
3271 	// Copy all parent files which don't have the O_CLOEXEC flag set
3272 
3273 	if (parentContext) {
3274 		size_t i;
3275 
3276 		mutex_lock(&parentContext->io_mutex);
3277 
3278 		context->cwd = parentContext->cwd;
3279 		if (context->cwd)
3280 			inc_vnode_ref_count(context->cwd);
3281 
3282 		for (i = 0; i < tableSize; i++) {
3283 			struct file_descriptor *descriptor = parentContext->fds[i];
3284 
3285 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
3286 				context->fds[i] = descriptor;
3287 				context->num_used_fds++;
3288 				atomic_add(&descriptor->ref_count, 1);
3289 				atomic_add(&descriptor->open_count, 1);
3290 			}
3291 		}
3292 
3293 		mutex_unlock(&parentContext->io_mutex);
3294 	} else {
3295 		context->cwd = sRoot;
3296 
3297 		if (context->cwd)
3298 			inc_vnode_ref_count(context->cwd);
3299 	}
3300 
3301 	context->table_size = tableSize;
3302 
3303 	list_init(&context->node_monitors);
3304 	context->max_monitors = DEFAULT_NODE_MONITORS;
3305 
3306 	return context;
3307 }
3308 
3309 
3310 status_t
3311 vfs_free_io_context(void *_ioContext)
3312 {
3313 	struct io_context *context = (struct io_context *)_ioContext;
3314 	uint32 i;
3315 
3316 	if (context->cwd)
3317 		dec_vnode_ref_count(context->cwd, false);
3318 
3319 	mutex_lock(&context->io_mutex);
3320 
3321 	for (i = 0; i < context->table_size; i++) {
3322 		if (struct file_descriptor *descriptor = context->fds[i]) {
3323 			close_fd(descriptor);
3324 			put_fd(descriptor);
3325 		}
3326 	}
3327 
3328 	mutex_destroy(&context->io_mutex);
3329 
3330 	remove_node_monitors(context);
3331 	free(context->fds);
3332 	free(context);
3333 
3334 	return B_OK;
3335 }
3336 
3337 
3338 static status_t
3339 vfs_resize_fd_table(struct io_context *context, const int newSize)
3340 {
3341 	struct file_descriptor **fds;
3342 	int	status = B_OK;
3343 
3344 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3345 		return EINVAL;
3346 
3347 	mutex_lock(&context->io_mutex);
3348 
3349 	int oldSize = context->table_size;
3350 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
3351 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
3352 
3353 	if (newSize < oldSize) {
3354 		// shrink the fd table
3355 
3356 		// Make sure none of the fds being dropped are in use
3357 		for (int i = oldSize; i-- > newSize;) {
3358 			if (context->fds[i]) {
3359 				status = EBUSY;
3360 				goto out;
3361 			}
3362 		}
3363 
3364 		fds = (struct file_descriptor **)malloc(
3365 			sizeof(struct file_descriptor *) * newSize
3366 			+ newCloseOnExitBitmapSize);
3367 		if (fds == NULL) {
3368 			status = ENOMEM;
3369 			goto out;
3370 		}
3371 
3372 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
3373 
3374 		// copy close-on-exit bitmap
3375 		memcpy(fds + newSize, context->fds + oldSize, newCloseOnExitBitmapSize);
3376 	} else {
3377 		// enlarge the fd table
3378 
3379 		fds = (struct file_descriptor **)malloc(
3380 			sizeof(struct file_descriptor *) * newSize
3381 			+ newCloseOnExitBitmapSize);
3382 		if (fds == NULL) {
3383 			status = ENOMEM;
3384 			goto out;
3385 		}
3386 
3387 		// copy the fd array, and zero the additional slots
3388 		memcpy(fds, context->fds, sizeof(void *) * oldSize);
3389 		memset(fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
3390 
3391 		// copy close-on-exit bitmap, and zero out additional bytes
3392 		memcpy(fds + newSize, context->fds + oldSize, oldCloseOnExitBitmapSize);
3393 		memset((uint8*)(fds + newSize) + oldCloseOnExitBitmapSize, 0,
3394 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
3395 	}
3396 
3397 	free(context->fds);
3398 	context->fds = fds;
3399 	context->fds_close_on_exec = (uint8 *)(context->fds + newSize);
3400 	context->table_size = newSize;
3401 
3402 out:
3403 	mutex_unlock(&context->io_mutex);
3404 	return status;
3405 }
3406 
3407 
3408 static status_t
3409 vfs_resize_monitor_table(struct io_context *context, const int newSize)
3410 {
3411 	void *fds;
3412 	int	status = B_OK;
3413 
3414 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3415 		return EINVAL;
3416 
3417 	mutex_lock(&context->io_mutex);
3418 
3419 	if ((size_t)newSize < context->num_monitors) {
3420 		status = EBUSY;
3421 		goto out;
3422 	}
3423 	context->max_monitors = newSize;
3424 
3425 out:
3426 	mutex_unlock(&context->io_mutex);
3427 	return status;
3428 }
3429 
3430 
3431 int
3432 vfs_getrlimit(int resource, struct rlimit * rlp)
3433 {
3434 	if (!rlp)
3435 		return B_BAD_ADDRESS;
3436 
3437 	switch (resource) {
3438 		case RLIMIT_NOFILE:
3439 		{
3440 			struct io_context *ioctx = get_current_io_context(false);
3441 
3442 			mutex_lock(&ioctx->io_mutex);
3443 
3444 			rlp->rlim_cur = ioctx->table_size;
3445 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3446 
3447 			mutex_unlock(&ioctx->io_mutex);
3448 
3449 			return 0;
3450 		}
3451 
3452 		case RLIMIT_NOVMON:
3453 		{
3454 			struct io_context *ioctx = get_current_io_context(false);
3455 
3456 			mutex_lock(&ioctx->io_mutex);
3457 
3458 			rlp->rlim_cur = ioctx->max_monitors;
3459 			rlp->rlim_max = MAX_NODE_MONITORS;
3460 
3461 			mutex_unlock(&ioctx->io_mutex);
3462 
3463 			return 0;
3464 		}
3465 
3466 		default:
3467 			return EINVAL;
3468 	}
3469 }
3470 
3471 
3472 int
3473 vfs_setrlimit(int resource, const struct rlimit * rlp)
3474 {
3475 	if (!rlp)
3476 		return B_BAD_ADDRESS;
3477 
3478 	switch (resource) {
3479 		case RLIMIT_NOFILE:
3480 			/* TODO: check getuid() */
3481 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3482 			    rlp->rlim_max != MAX_FD_TABLE_SIZE)
3483 				return EPERM;
3484 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3485 
3486 		case RLIMIT_NOVMON:
3487 			/* TODO: check getuid() */
3488 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3489 			    rlp->rlim_max != MAX_NODE_MONITORS)
3490 				return EPERM;
3491 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
3492 
3493 		default:
3494 			return EINVAL;
3495 	}
3496 }
3497 
3498 
3499 status_t
3500 vfs_init(kernel_args *args)
3501 {
3502 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3503 		&vnode_compare, &vnode_hash);
3504 	if (sVnodeTable == NULL)
3505 		panic("vfs_init: error creating vnode hash table\n");
3506 
3507 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3508 
3509 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3510 		&mount_compare, &mount_hash);
3511 	if (sMountsTable == NULL)
3512 		panic("vfs_init: error creating mounts hash table\n");
3513 
3514 	node_monitor_init();
3515 
3516 	sRoot = NULL;
3517 
3518 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3519 		panic("vfs_init: error allocating file systems lock\n");
3520 
3521 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3522 		panic("vfs_init: error allocating mount op lock\n");
3523 
3524 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3525 		panic("vfs_init: error allocating mount lock\n");
3526 
3527 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
3528 		panic("vfs_init: error allocating vnode::covered_by lock\n");
3529 
3530 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3531 		panic("vfs_init: error allocating vnode lock\n");
3532 
3533 	if (block_cache_init() != B_OK)
3534 		return B_ERROR;
3535 
3536 #ifdef ADD_DEBUGGER_COMMANDS
3537 	// add some debugger commands
3538 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3539 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3540 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3541 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3542 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3543 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3544 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3545 #endif
3546 
3547 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3548 
3549 	return file_cache_init();
3550 }
3551 
3552 
3553 //	#pragma mark - fd_ops implementations
3554 
3555 
3556 /*!
3557 	Calls fs_open() on the given vnode and returns a new
3558 	file descriptor for it
3559 */
3560 static int
3561 create_vnode(struct vnode *directory, const char *name, int openMode,
3562 	int perms, bool kernel)
3563 {
3564 	struct vnode *vnode;
3565 	fs_cookie cookie;
3566 	vnode_id newID;
3567 	int status;
3568 
3569 	if (FS_CALL(directory, create) == NULL)
3570 		return EROFS;
3571 
3572 	status = FS_CALL(directory, create)(directory->mount->cookie,
3573 		directory->private_node, name, openMode, perms, &cookie, &newID);
3574 	if (status < B_OK)
3575 		return status;
3576 
3577 	mutex_lock(&sVnodeMutex);
3578 	vnode = lookup_vnode(directory->device, newID);
3579 	mutex_unlock(&sVnodeMutex);
3580 
3581 	if (vnode == NULL) {
3582 		panic("vfs: fs_create() returned success but there is no vnode, mount ID %ld!\n",
3583 			directory->device);
3584 		return B_BAD_VALUE;
3585 	}
3586 
3587 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3588 		return status;
3589 
3590 	// something went wrong, clean up
3591 
3592 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3593 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3594 	put_vnode(vnode);
3595 
3596 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3597 
3598 	return status;
3599 }
3600 
3601 
3602 /*!
3603 	Calls fs_open() on the given vnode and returns a new
3604 	file descriptor for it
3605 */
3606 static int
3607 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3608 {
3609 	fs_cookie cookie;
3610 	int status;
3611 
3612 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3613 	if (status < 0)
3614 		return status;
3615 
3616 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3617 	if (status < 0) {
3618 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3619 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3620 	}
3621 	return status;
3622 }
3623 
3624 
3625 /** Calls fs open_dir() on the given vnode and returns a new
3626  *	file descriptor for it
3627  */
3628 
3629 static int
3630 open_dir_vnode(struct vnode *vnode, bool kernel)
3631 {
3632 	fs_cookie cookie;
3633 	int status;
3634 
3635 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3636 	if (status < B_OK)
3637 		return status;
3638 
3639 	// file is opened, create a fd
3640 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3641 	if (status >= 0)
3642 		return status;
3643 
3644 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3645 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3646 
3647 	return status;
3648 }
3649 
3650 
3651 /** Calls fs open_attr_dir() on the given vnode and returns a new
3652  *	file descriptor for it.
3653  *	Used by attr_dir_open(), and attr_dir_open_fd().
3654  */
3655 
3656 static int
3657 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3658 {
3659 	fs_cookie cookie;
3660 	int status;
3661 
3662 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3663 		return EOPNOTSUPP;
3664 
3665 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3666 	if (status < 0)
3667 		return status;
3668 
3669 	// file is opened, create a fd
3670 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3671 	if (status >= 0)
3672 		return status;
3673 
3674 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3675 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3676 
3677 	return status;
3678 }
3679 
3680 
3681 static int
3682 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3683 {
3684 	struct vnode *directory;
3685 	int status;
3686 
3687 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3688 
3689 	// get directory to put the new file in
3690 	status = get_vnode(mountID, directoryID, &directory, false);
3691 	if (status < B_OK)
3692 		return status;
3693 
3694 	status = create_vnode(directory, name, openMode, perms, kernel);
3695 	put_vnode(directory);
3696 
3697 	return status;
3698 }
3699 
3700 
3701 static int
3702 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3703 {
3704 	char name[B_FILE_NAME_LENGTH];
3705 	struct vnode *directory;
3706 	int status;
3707 
3708 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3709 
3710 	// get directory to put the new file in
3711 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3712 	if (status < 0)
3713 		return status;
3714 
3715 	status = create_vnode(directory, name, openMode, perms, kernel);
3716 
3717 	put_vnode(directory);
3718 	return status;
3719 }
3720 
3721 
3722 static int
3723 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3724 {
3725 	struct vnode *vnode;
3726 	int status;
3727 
3728 	if (name == NULL || *name == '\0')
3729 		return B_BAD_VALUE;
3730 
3731 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3732 		mountID, directoryID, name, openMode));
3733 
3734 	// get the vnode matching the entry_ref
3735 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3736 	if (status < B_OK)
3737 		return status;
3738 
3739 	status = open_vnode(vnode, openMode, kernel);
3740 	if (status < B_OK)
3741 		put_vnode(vnode);
3742 
3743 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3744 	return status;
3745 }
3746 
3747 
3748 static int
3749 file_open(int fd, char *path, int openMode, bool kernel)
3750 {
3751 	int status = B_OK;
3752 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3753 
3754 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3755 		fd, path, openMode, kernel));
3756 
3757 	// get the vnode matching the vnode + path combination
3758 	struct vnode *vnode = NULL;
3759 	vnode_id parentID;
3760 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3761 	if (status != B_OK)
3762 		return status;
3763 
3764 	// open the vnode
3765 	status = open_vnode(vnode, openMode, kernel);
3766 	// put only on error -- otherwise our reference was transferred to the FD
3767 	if (status < B_OK)
3768 		put_vnode(vnode);
3769 
3770 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3771 		vnode->device, parentID, vnode->id, NULL);
3772 
3773 	return status;
3774 }
3775 
3776 
3777 static status_t
3778 file_close(struct file_descriptor *descriptor)
3779 {
3780 	struct vnode *vnode = descriptor->u.vnode;
3781 	status_t status = B_OK;
3782 
3783 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3784 
3785 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3786 	if (FS_CALL(vnode, close))
3787 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3788 
3789 	if (status == B_OK) {
3790 		// remove all outstanding locks for this team
3791 		release_advisory_lock(vnode, NULL);
3792 	}
3793 	return status;
3794 }
3795 
3796 
3797 static void
3798 file_free_fd(struct file_descriptor *descriptor)
3799 {
3800 	struct vnode *vnode = descriptor->u.vnode;
3801 
3802 	if (vnode != NULL) {
3803 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3804 		put_vnode(vnode);
3805 	}
3806 }
3807 
3808 
3809 static status_t
3810 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3811 {
3812 	struct vnode *vnode = descriptor->u.vnode;
3813 
3814 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3815 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3816 }
3817 
3818 
3819 static status_t
3820 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3821 {
3822 	struct vnode *vnode = descriptor->u.vnode;
3823 
3824 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3825 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3826 }
3827 
3828 
3829 static off_t
3830 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3831 {
3832 	off_t offset;
3833 
3834 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3835 	// ToDo: seek should fail for pipes and FIFOs...
3836 
3837 	switch (seekType) {
3838 		case SEEK_SET:
3839 			offset = 0;
3840 			break;
3841 		case SEEK_CUR:
3842 			offset = descriptor->pos;
3843 			break;
3844 		case SEEK_END:
3845 		{
3846 			struct vnode *vnode = descriptor->u.vnode;
3847 			struct stat stat;
3848 			status_t status;
3849 
3850 			if (FS_CALL(vnode, read_stat) == NULL)
3851 				return EOPNOTSUPP;
3852 
3853 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3854 			if (status < B_OK)
3855 				return status;
3856 
3857 			offset = stat.st_size;
3858 			break;
3859 		}
3860 		default:
3861 			return B_BAD_VALUE;
3862 	}
3863 
3864 	// assumes off_t is 64 bits wide
3865 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3866 		return EOVERFLOW;
3867 
3868 	pos += offset;
3869 	if (pos < 0)
3870 		return B_BAD_VALUE;
3871 
3872 	return descriptor->pos = pos;
3873 }
3874 
3875 
3876 static status_t
3877 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3878 	struct select_sync *sync)
3879 {
3880 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3881 
3882 	struct vnode *vnode = descriptor->u.vnode;
3883 
3884 	// If the FS has no select() hook, notify select() now.
3885 	if (FS_CALL(vnode, select) == NULL)
3886 		return notify_select_event((selectsync*)sync, ref, event);
3887 
3888 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3889 		descriptor->cookie, event, ref, (selectsync*)sync);
3890 }
3891 
3892 
3893 static status_t
3894 file_deselect(struct file_descriptor *descriptor, uint8 event,
3895 	struct select_sync *sync)
3896 {
3897 	struct vnode *vnode = descriptor->u.vnode;
3898 
3899 	if (FS_CALL(vnode, deselect) == NULL)
3900 		return B_OK;
3901 
3902 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3903 		descriptor->cookie, event, (selectsync*)sync);
3904 }
3905 
3906 
3907 static status_t
3908 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3909 {
3910 	struct vnode *vnode;
3911 	vnode_id newID;
3912 	status_t status;
3913 
3914 	if (name == NULL || *name == '\0')
3915 		return B_BAD_VALUE;
3916 
3917 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3918 
3919 	status = get_vnode(mountID, parentID, &vnode, kernel);
3920 	if (status < B_OK)
3921 		return status;
3922 
3923 	if (FS_CALL(vnode, create_dir))
3924 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3925 	else
3926 		status = EROFS;
3927 
3928 	put_vnode(vnode);
3929 	return status;
3930 }
3931 
3932 
3933 static status_t
3934 dir_create(int fd, char *path, int perms, bool kernel)
3935 {
3936 	char filename[B_FILE_NAME_LENGTH];
3937 	struct vnode *vnode;
3938 	vnode_id newID;
3939 	status_t status;
3940 
3941 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3942 
3943 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3944 	if (status < 0)
3945 		return status;
3946 
3947 	if (FS_CALL(vnode, create_dir))
3948 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3949 	else
3950 		status = EROFS;
3951 
3952 	put_vnode(vnode);
3953 	return status;
3954 }
3955 
3956 
3957 static int
3958 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3959 {
3960 	struct vnode *vnode;
3961 	int status;
3962 
3963 	FUNCTION(("dir_open_entry_ref()\n"));
3964 
3965 	if (name && *name == '\0')
3966 		return B_BAD_VALUE;
3967 
3968 	// get the vnode matching the entry_ref/node_ref
3969 	if (name)
3970 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3971 	else
3972 		status = get_vnode(mountID, parentID, &vnode, false);
3973 	if (status < B_OK)
3974 		return status;
3975 
3976 	status = open_dir_vnode(vnode, kernel);
3977 	if (status < B_OK)
3978 		put_vnode(vnode);
3979 
3980 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3981 	return status;
3982 }
3983 
3984 
3985 static int
3986 dir_open(int fd, char *path, bool kernel)
3987 {
3988 	int status = B_OK;
3989 
3990 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3991 
3992 	// get the vnode matching the vnode + path combination
3993 	struct vnode *vnode = NULL;
3994 	vnode_id parentID;
3995 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3996 	if (status != B_OK)
3997 		return status;
3998 
3999 	// open the dir
4000 	status = open_dir_vnode(vnode, kernel);
4001 	if (status < B_OK)
4002 		put_vnode(vnode);
4003 
4004 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4005 	return status;
4006 }
4007 
4008 
4009 static status_t
4010 dir_close(struct file_descriptor *descriptor)
4011 {
4012 	struct vnode *vnode = descriptor->u.vnode;
4013 
4014 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4015 
4016 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4017 	if (FS_CALL(vnode, close_dir))
4018 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4019 
4020 	return B_OK;
4021 }
4022 
4023 
4024 static void
4025 dir_free_fd(struct file_descriptor *descriptor)
4026 {
4027 	struct vnode *vnode = descriptor->u.vnode;
4028 
4029 	if (vnode != NULL) {
4030 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4031 		put_vnode(vnode);
4032 	}
4033 }
4034 
4035 
4036 static status_t
4037 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4038 {
4039 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
4040 }
4041 
4042 
4043 static void
4044 fix_dirent(struct vnode *parent, struct dirent *entry)
4045 {
4046 	// set d_pdev and d_pino
4047 	entry->d_pdev = parent->device;
4048 	entry->d_pino = parent->id;
4049 
4050 	// If this is the ".." entry and the directory is the root of a FS,
4051 	// we need to replace d_dev and d_ino with the actual values.
4052 	if (strcmp(entry->d_name, "..") == 0
4053 		&& parent->mount->root_vnode == parent
4054 		&& parent->mount->covers_vnode) {
4055 		inc_vnode_ref_count(parent);
4056 			// vnode_path_to_vnode() puts the node
4057 
4058 		struct vnode *vnode;
4059 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
4060 			NULL, NULL);
4061 
4062 		if (status == B_OK) {
4063 			entry->d_dev = vnode->device;
4064 			entry->d_ino = vnode->id;
4065 		}
4066 	} else {
4067 		// resolve mount points
4068 		struct vnode *vnode = NULL;
4069 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
4070 		if (status != B_OK)
4071 			return;
4072 
4073 		mutex_lock(&sVnodeCoveredByMutex);
4074 		if (vnode->covered_by) {
4075 			entry->d_dev = vnode->covered_by->device;
4076 			entry->d_ino = vnode->covered_by->id;
4077 		}
4078 		mutex_unlock(&sVnodeCoveredByMutex);
4079 
4080 		put_vnode(vnode);
4081 	}
4082 }
4083 
4084 
4085 static status_t
4086 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4087 {
4088 	if (!FS_CALL(vnode, read_dir))
4089 		return EOPNOTSUPP;
4090 
4091 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
4092 	if (error != B_OK)
4093 		return error;
4094 
4095 	// we need to adjust the read dirents
4096 	if (*_count > 0) {
4097 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4098 		fix_dirent(vnode, buffer);
4099 	}
4100 
4101 	return error;
4102 }
4103 
4104 
4105 static status_t
4106 dir_rewind(struct file_descriptor *descriptor)
4107 {
4108 	struct vnode *vnode = descriptor->u.vnode;
4109 
4110 	if (FS_CALL(vnode, rewind_dir))
4111 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4112 
4113 	return EOPNOTSUPP;
4114 }
4115 
4116 
4117 static status_t
4118 dir_remove(int fd, char *path, bool kernel)
4119 {
4120 	char name[B_FILE_NAME_LENGTH];
4121 	struct vnode *directory;
4122 	status_t status;
4123 
4124 	if (path != NULL) {
4125 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4126 		char *lastSlash = strrchr(path, '/');
4127 		if (lastSlash != NULL) {
4128 			char *leaf = lastSlash + 1;
4129 			if (!strcmp(leaf, ".."))
4130 				return B_NOT_ALLOWED;
4131 
4132 			// omit multiple slashes
4133 			while (lastSlash > path && lastSlash[-1] == '/') {
4134 				lastSlash--;
4135 			}
4136 
4137 			if (!leaf[0]
4138 				|| !strcmp(leaf, ".")) {
4139 				// "name/" -> "name", or "name/." -> "name"
4140 				lastSlash[0] = '\0';
4141 			}
4142 		} else if (!strcmp(path, ".."))
4143 			return B_NOT_ALLOWED;
4144 	}
4145 
4146 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4147 	if (status < B_OK)
4148 		return status;
4149 
4150 	if (FS_CALL(directory, remove_dir)) {
4151 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4152 			directory->private_node, name);
4153 	} else
4154 		status = EROFS;
4155 
4156 	put_vnode(directory);
4157 	return status;
4158 }
4159 
4160 
4161 static status_t
4162 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
4163 {
4164 	struct vnode *vnode = descriptor->u.vnode;
4165 
4166 	if (FS_CALL(vnode, ioctl)) {
4167 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4168 			descriptor->cookie, op, buffer, length);
4169 	}
4170 
4171 	return EOPNOTSUPP;
4172 }
4173 
4174 
4175 static status_t
4176 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4177 {
4178 	struct file_descriptor *descriptor;
4179 	struct vnode *vnode;
4180 	struct flock flock;
4181 	status_t status;
4182 
4183 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4184 		fd, op, argument, kernel ? "kernel" : "user"));
4185 
4186 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4187 	if (descriptor == NULL)
4188 		return B_FILE_ERROR;
4189 
4190 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4191 		if (descriptor->type != FDTYPE_FILE)
4192 			return B_BAD_VALUE;
4193 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
4194 			return B_BAD_ADDRESS;
4195 	}
4196 
4197 	switch (op) {
4198 		case F_SETFD:
4199 		{
4200 			struct io_context *context = get_current_io_context(kernel);
4201 			// Set file descriptor flags
4202 
4203 			// O_CLOEXEC is the only flag available at this time
4204 			mutex_lock(&context->io_mutex);
4205 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
4206 			mutex_unlock(&context->io_mutex);
4207 
4208 			status = B_OK;
4209 			break;
4210 		}
4211 
4212 		case F_GETFD:
4213 		{
4214 			struct io_context *context = get_current_io_context(kernel);
4215 
4216 			// Get file descriptor flags
4217 			mutex_lock(&context->io_mutex);
4218 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4219 			mutex_unlock(&context->io_mutex);
4220 			break;
4221 		}
4222 
4223 		case F_SETFL:
4224 			// Set file descriptor open mode
4225 			if (FS_CALL(vnode, set_flags)) {
4226 				// we only accept changes to O_APPEND and O_NONBLOCK
4227 				argument &= O_APPEND | O_NONBLOCK;
4228 
4229 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4230 					vnode->private_node, descriptor->cookie, (int)argument);
4231 				if (status == B_OK) {
4232 					// update this descriptor's open_mode field
4233 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
4234 						| argument;
4235 				}
4236 			} else
4237 				status = EOPNOTSUPP;
4238 			break;
4239 
4240 		case F_GETFL:
4241 			// Get file descriptor open mode
4242 			status = descriptor->open_mode;
4243 			break;
4244 
4245 		case F_DUPFD:
4246 		{
4247 			struct io_context *context = get_current_io_context(kernel);
4248 
4249 			status = new_fd_etc(context, descriptor, (int)argument);
4250 			if (status >= 0) {
4251 				mutex_lock(&context->io_mutex);
4252 				fd_set_close_on_exec(context, fd, false);
4253 				mutex_unlock(&context->io_mutex);
4254 
4255 				atomic_add(&descriptor->ref_count, 1);
4256 			}
4257 			break;
4258 		}
4259 
4260 		case F_GETLK:
4261 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4262 			if (status == B_OK) {
4263 				// copy back flock structure
4264 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
4265 			}
4266 			break;
4267 
4268 		case F_SETLK:
4269 		case F_SETLKW:
4270 			status = normalize_flock(descriptor, &flock);
4271 			if (status < B_OK)
4272 				break;
4273 
4274 			if (flock.l_type == F_UNLCK)
4275 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4276 			else {
4277 				// the open mode must match the lock type
4278 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
4279 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
4280 					status = B_FILE_ERROR;
4281 				else
4282 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
4283 			}
4284 			break;
4285 
4286 		// ToDo: add support for more ops?
4287 
4288 		default:
4289 			status = B_BAD_VALUE;
4290 	}
4291 
4292 	put_fd(descriptor);
4293 	return status;
4294 }
4295 
4296 
4297 static status_t
4298 common_sync(int fd, bool kernel)
4299 {
4300 	struct file_descriptor *descriptor;
4301 	struct vnode *vnode;
4302 	status_t status;
4303 
4304 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4305 
4306 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4307 	if (descriptor == NULL)
4308 		return B_FILE_ERROR;
4309 
4310 	if (FS_CALL(vnode, fsync) != NULL)
4311 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4312 	else
4313 		status = EOPNOTSUPP;
4314 
4315 	put_fd(descriptor);
4316 	return status;
4317 }
4318 
4319 
4320 static status_t
4321 common_lock_node(int fd, bool kernel)
4322 {
4323 	struct file_descriptor *descriptor;
4324 	struct vnode *vnode;
4325 
4326 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4327 	if (descriptor == NULL)
4328 		return B_FILE_ERROR;
4329 
4330 	status_t status = B_OK;
4331 
4332 	// We need to set the locking atomically - someone
4333 	// else might set one at the same time
4334 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4335 			(addr_t)descriptor, NULL) != NULL)
4336 		status = B_BUSY;
4337 
4338 	put_fd(descriptor);
4339 	return status;
4340 }
4341 
4342 
4343 static status_t
4344 common_unlock_node(int fd, bool kernel)
4345 {
4346 	struct file_descriptor *descriptor;
4347 	struct vnode *vnode;
4348 
4349 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4350 	if (descriptor == NULL)
4351 		return B_FILE_ERROR;
4352 
4353 	status_t status = B_OK;
4354 
4355 	// We need to set the locking atomically - someone
4356 	// else might set one at the same time
4357 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4358 			NULL, (addr_t)descriptor) != (int32)descriptor)
4359 		status = B_BAD_VALUE;
4360 
4361 	put_fd(descriptor);
4362 	return status;
4363 }
4364 
4365 
4366 static status_t
4367 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4368 	bool kernel)
4369 {
4370 	struct vnode *vnode;
4371 	status_t status;
4372 
4373 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4374 	if (status < B_OK)
4375 		return status;
4376 
4377 	if (FS_CALL(vnode, read_symlink) != NULL) {
4378 		status = FS_CALL(vnode, read_symlink)(vnode->mount->cookie,
4379 			vnode->private_node, buffer, _bufferSize);
4380 	} else
4381 		status = B_BAD_VALUE;
4382 
4383 	put_vnode(vnode);
4384 	return status;
4385 }
4386 
4387 
4388 static status_t
4389 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4390 	bool kernel)
4391 {
4392 	// path validity checks have to be in the calling function!
4393 	char name[B_FILE_NAME_LENGTH];
4394 	struct vnode *vnode;
4395 	status_t status;
4396 
4397 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4398 
4399 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4400 	if (status < B_OK)
4401 		return status;
4402 
4403 	if (FS_CALL(vnode, create_symlink) != NULL)
4404 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4405 	else
4406 		status = EROFS;
4407 
4408 	put_vnode(vnode);
4409 
4410 	return status;
4411 }
4412 
4413 
4414 static status_t
4415 common_create_link(char *path, char *toPath, bool kernel)
4416 {
4417 	// path validity checks have to be in the calling function!
4418 	char name[B_FILE_NAME_LENGTH];
4419 	struct vnode *directory, *vnode;
4420 	status_t status;
4421 
4422 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4423 
4424 	status = path_to_dir_vnode(path, &directory, name, kernel);
4425 	if (status < B_OK)
4426 		return status;
4427 
4428 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4429 	if (status < B_OK)
4430 		goto err;
4431 
4432 	if (directory->mount != vnode->mount) {
4433 		status = B_CROSS_DEVICE_LINK;
4434 		goto err1;
4435 	}
4436 
4437 	if (FS_CALL(vnode, link) != NULL)
4438 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4439 	else
4440 		status = EROFS;
4441 
4442 err1:
4443 	put_vnode(vnode);
4444 err:
4445 	put_vnode(directory);
4446 
4447 	return status;
4448 }
4449 
4450 
4451 static status_t
4452 common_unlink(int fd, char *path, bool kernel)
4453 {
4454 	char filename[B_FILE_NAME_LENGTH];
4455 	struct vnode *vnode;
4456 	status_t status;
4457 
4458 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4459 
4460 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4461 	if (status < 0)
4462 		return status;
4463 
4464 	if (FS_CALL(vnode, unlink) != NULL)
4465 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4466 	else
4467 		status = EROFS;
4468 
4469 	put_vnode(vnode);
4470 
4471 	return status;
4472 }
4473 
4474 
4475 static status_t
4476 common_access(char *path, int mode, bool kernel)
4477 {
4478 	struct vnode *vnode;
4479 	status_t status;
4480 
4481 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4482 	if (status < B_OK)
4483 		return status;
4484 
4485 	if (FS_CALL(vnode, access) != NULL)
4486 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4487 	else
4488 		status = B_OK;
4489 
4490 	put_vnode(vnode);
4491 
4492 	return status;
4493 }
4494 
4495 
4496 static status_t
4497 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4498 {
4499 	struct vnode *fromVnode, *toVnode;
4500 	char fromName[B_FILE_NAME_LENGTH];
4501 	char toName[B_FILE_NAME_LENGTH];
4502 	status_t status;
4503 
4504 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4505 
4506 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4507 	if (status < 0)
4508 		return status;
4509 
4510 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4511 	if (status < 0)
4512 		goto err;
4513 
4514 	if (fromVnode->device != toVnode->device) {
4515 		status = B_CROSS_DEVICE_LINK;
4516 		goto err1;
4517 	}
4518 
4519 	if (FS_CALL(fromVnode, rename) != NULL)
4520 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4521 	else
4522 		status = EROFS;
4523 
4524 err1:
4525 	put_vnode(toVnode);
4526 err:
4527 	put_vnode(fromVnode);
4528 
4529 	return status;
4530 }
4531 
4532 
4533 static status_t
4534 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4535 {
4536 	struct vnode *vnode = descriptor->u.vnode;
4537 
4538 	FUNCTION(("common_read_stat: stat %p\n", stat));
4539 
4540 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4541 		vnode->private_node, stat);
4542 
4543 	// fill in the st_dev and st_ino fields
4544 	if (status == B_OK) {
4545 		stat->st_dev = vnode->device;
4546 		stat->st_ino = vnode->id;
4547 	}
4548 
4549 	return status;
4550 }
4551 
4552 
4553 static status_t
4554 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4555 {
4556 	struct vnode *vnode = descriptor->u.vnode;
4557 
4558 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4559 	if (!FS_CALL(vnode, write_stat))
4560 		return EROFS;
4561 
4562 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4563 }
4564 
4565 
4566 static status_t
4567 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4568 	struct stat *stat, bool kernel)
4569 {
4570 	struct vnode *vnode;
4571 	status_t status;
4572 
4573 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4574 
4575 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4576 	if (status < 0)
4577 		return status;
4578 
4579 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4580 
4581 	// fill in the st_dev and st_ino fields
4582 	if (status == B_OK) {
4583 		stat->st_dev = vnode->device;
4584 		stat->st_ino = vnode->id;
4585 	}
4586 
4587 	put_vnode(vnode);
4588 	return status;
4589 }
4590 
4591 
4592 static status_t
4593 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4594 	const struct stat *stat, int statMask, bool kernel)
4595 {
4596 	struct vnode *vnode;
4597 	status_t status;
4598 
4599 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4600 
4601 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4602 	if (status < 0)
4603 		return status;
4604 
4605 	if (FS_CALL(vnode, write_stat))
4606 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4607 	else
4608 		status = EROFS;
4609 
4610 	put_vnode(vnode);
4611 
4612 	return status;
4613 }
4614 
4615 
4616 static int
4617 attr_dir_open(int fd, char *path, bool kernel)
4618 {
4619 	struct vnode *vnode;
4620 	int status;
4621 
4622 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4623 
4624 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4625 	if (status < B_OK)
4626 		return status;
4627 
4628 	status = open_attr_dir_vnode(vnode, kernel);
4629 	if (status < 0)
4630 		put_vnode(vnode);
4631 
4632 	return status;
4633 }
4634 
4635 
4636 static status_t
4637 attr_dir_close(struct file_descriptor *descriptor)
4638 {
4639 	struct vnode *vnode = descriptor->u.vnode;
4640 
4641 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4642 
4643 	if (FS_CALL(vnode, close_attr_dir))
4644 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4645 
4646 	return B_OK;
4647 }
4648 
4649 
4650 static void
4651 attr_dir_free_fd(struct file_descriptor *descriptor)
4652 {
4653 	struct vnode *vnode = descriptor->u.vnode;
4654 
4655 	if (vnode != NULL) {
4656 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4657 		put_vnode(vnode);
4658 	}
4659 }
4660 
4661 
4662 static status_t
4663 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4664 {
4665 	struct vnode *vnode = descriptor->u.vnode;
4666 
4667 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4668 
4669 	if (FS_CALL(vnode, read_attr_dir))
4670 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4671 
4672 	return EOPNOTSUPP;
4673 }
4674 
4675 
4676 static status_t
4677 attr_dir_rewind(struct file_descriptor *descriptor)
4678 {
4679 	struct vnode *vnode = descriptor->u.vnode;
4680 
4681 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4682 
4683 	if (FS_CALL(vnode, rewind_attr_dir))
4684 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4685 
4686 	return EOPNOTSUPP;
4687 }
4688 
4689 
4690 static int
4691 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4692 {
4693 	struct vnode *vnode;
4694 	fs_cookie cookie;
4695 	int status;
4696 
4697 	if (name == NULL || *name == '\0')
4698 		return B_BAD_VALUE;
4699 
4700 	vnode = get_vnode_from_fd(fd, kernel);
4701 	if (vnode == NULL)
4702 		return B_FILE_ERROR;
4703 
4704 	if (FS_CALL(vnode, create_attr) == NULL) {
4705 		status = EROFS;
4706 		goto err;
4707 	}
4708 
4709 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4710 	if (status < B_OK)
4711 		goto err;
4712 
4713 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4714 		return status;
4715 
4716 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4717 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4718 
4719 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4720 
4721 err:
4722 	put_vnode(vnode);
4723 
4724 	return status;
4725 }
4726 
4727 
4728 static int
4729 attr_open(int fd, const char *name, int openMode, bool kernel)
4730 {
4731 	struct vnode *vnode;
4732 	fs_cookie cookie;
4733 	int status;
4734 
4735 	if (name == NULL || *name == '\0')
4736 		return B_BAD_VALUE;
4737 
4738 	vnode = get_vnode_from_fd(fd, kernel);
4739 	if (vnode == NULL)
4740 		return B_FILE_ERROR;
4741 
4742 	if (FS_CALL(vnode, open_attr) == NULL) {
4743 		status = EOPNOTSUPP;
4744 		goto err;
4745 	}
4746 
4747 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4748 	if (status < B_OK)
4749 		goto err;
4750 
4751 	// now we only need a file descriptor for this attribute and we're done
4752 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4753 		return status;
4754 
4755 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4756 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4757 
4758 err:
4759 	put_vnode(vnode);
4760 
4761 	return status;
4762 }
4763 
4764 
4765 static status_t
4766 attr_close(struct file_descriptor *descriptor)
4767 {
4768 	struct vnode *vnode = descriptor->u.vnode;
4769 
4770 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4771 
4772 	if (FS_CALL(vnode, close_attr))
4773 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4774 
4775 	return B_OK;
4776 }
4777 
4778 
4779 static void
4780 attr_free_fd(struct file_descriptor *descriptor)
4781 {
4782 	struct vnode *vnode = descriptor->u.vnode;
4783 
4784 	if (vnode != NULL) {
4785 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4786 		put_vnode(vnode);
4787 	}
4788 }
4789 
4790 
4791 static status_t
4792 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4793 {
4794 	struct vnode *vnode = descriptor->u.vnode;
4795 
4796 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4797 	if (!FS_CALL(vnode, read_attr))
4798 		return EOPNOTSUPP;
4799 
4800 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4801 }
4802 
4803 
4804 static status_t
4805 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4806 {
4807 	struct vnode *vnode = descriptor->u.vnode;
4808 
4809 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4810 	if (!FS_CALL(vnode, write_attr))
4811 		return EOPNOTSUPP;
4812 
4813 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4814 }
4815 
4816 
4817 static off_t
4818 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4819 {
4820 	off_t offset;
4821 
4822 	switch (seekType) {
4823 		case SEEK_SET:
4824 			offset = 0;
4825 			break;
4826 		case SEEK_CUR:
4827 			offset = descriptor->pos;
4828 			break;
4829 		case SEEK_END:
4830 		{
4831 			struct vnode *vnode = descriptor->u.vnode;
4832 			struct stat stat;
4833 			status_t status;
4834 
4835 			if (FS_CALL(vnode, read_stat) == NULL)
4836 				return EOPNOTSUPP;
4837 
4838 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4839 			if (status < B_OK)
4840 				return status;
4841 
4842 			offset = stat.st_size;
4843 			break;
4844 		}
4845 		default:
4846 			return B_BAD_VALUE;
4847 	}
4848 
4849 	// assumes off_t is 64 bits wide
4850 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4851 		return EOVERFLOW;
4852 
4853 	pos += offset;
4854 	if (pos < 0)
4855 		return B_BAD_VALUE;
4856 
4857 	return descriptor->pos = pos;
4858 }
4859 
4860 
4861 static status_t
4862 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4863 {
4864 	struct vnode *vnode = descriptor->u.vnode;
4865 
4866 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4867 
4868 	if (!FS_CALL(vnode, read_attr_stat))
4869 		return EOPNOTSUPP;
4870 
4871 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4872 }
4873 
4874 
4875 static status_t
4876 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4877 {
4878 	struct vnode *vnode = descriptor->u.vnode;
4879 
4880 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4881 
4882 	if (!FS_CALL(vnode, write_attr_stat))
4883 		return EROFS;
4884 
4885 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4886 }
4887 
4888 
4889 static status_t
4890 attr_remove(int fd, const char *name, bool kernel)
4891 {
4892 	struct file_descriptor *descriptor;
4893 	struct vnode *vnode;
4894 	status_t status;
4895 
4896 	if (name == NULL || *name == '\0')
4897 		return B_BAD_VALUE;
4898 
4899 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4900 
4901 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4902 	if (descriptor == NULL)
4903 		return B_FILE_ERROR;
4904 
4905 	if (FS_CALL(vnode, remove_attr))
4906 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4907 	else
4908 		status = EROFS;
4909 
4910 	put_fd(descriptor);
4911 
4912 	return status;
4913 }
4914 
4915 
4916 static status_t
4917 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4918 {
4919 	struct file_descriptor *fromDescriptor, *toDescriptor;
4920 	struct vnode *fromVnode, *toVnode;
4921 	status_t status;
4922 
4923 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4924 		return B_BAD_VALUE;
4925 
4926 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4927 
4928 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4929 	if (fromDescriptor == NULL)
4930 		return B_FILE_ERROR;
4931 
4932 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4933 	if (toDescriptor == NULL) {
4934 		status = B_FILE_ERROR;
4935 		goto err;
4936 	}
4937 
4938 	// are the files on the same volume?
4939 	if (fromVnode->device != toVnode->device) {
4940 		status = B_CROSS_DEVICE_LINK;
4941 		goto err1;
4942 	}
4943 
4944 	if (FS_CALL(fromVnode, rename_attr))
4945 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4946 	else
4947 		status = EROFS;
4948 
4949 err1:
4950 	put_fd(toDescriptor);
4951 err:
4952 	put_fd(fromDescriptor);
4953 
4954 	return status;
4955 }
4956 
4957 
4958 static status_t
4959 index_dir_open(mount_id mountID, bool kernel)
4960 {
4961 	struct fs_mount *mount;
4962 	fs_cookie cookie;
4963 
4964 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4965 
4966 	status_t status = get_mount(mountID, &mount);
4967 	if (status < B_OK)
4968 		return status;
4969 
4970 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4971 		status = EOPNOTSUPP;
4972 		goto out;
4973 	}
4974 
4975 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4976 	if (status < B_OK)
4977 		goto out;
4978 
4979 	// get fd for the index directory
4980 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4981 	if (status >= 0)
4982 		goto out;
4983 
4984 	// something went wrong
4985 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4986 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4987 
4988 out:
4989 	put_mount(mount);
4990 	return status;
4991 }
4992 
4993 
4994 static status_t
4995 index_dir_close(struct file_descriptor *descriptor)
4996 {
4997 	struct fs_mount *mount = descriptor->u.mount;
4998 
4999 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
5000 
5001 	if (FS_MOUNT_CALL(mount, close_index_dir))
5002 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
5003 
5004 	return B_OK;
5005 }
5006 
5007 
5008 static void
5009 index_dir_free_fd(struct file_descriptor *descriptor)
5010 {
5011 	struct fs_mount *mount = descriptor->u.mount;
5012 
5013 	if (mount != NULL) {
5014 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
5015 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5016 		//put_vnode(vnode);
5017 	}
5018 }
5019 
5020 
5021 static status_t
5022 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5023 {
5024 	struct fs_mount *mount = descriptor->u.mount;
5025 
5026 	if (FS_MOUNT_CALL(mount, read_index_dir))
5027 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5028 
5029 	return EOPNOTSUPP;
5030 }
5031 
5032 
5033 static status_t
5034 index_dir_rewind(struct file_descriptor *descriptor)
5035 {
5036 	struct fs_mount *mount = descriptor->u.mount;
5037 
5038 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
5039 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
5040 
5041 	return EOPNOTSUPP;
5042 }
5043 
5044 
5045 static status_t
5046 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5047 {
5048 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5049 
5050 	struct fs_mount *mount;
5051 	status_t status = get_mount(mountID, &mount);
5052 	if (status < B_OK)
5053 		return status;
5054 
5055 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
5056 		status = EROFS;
5057 		goto out;
5058 	}
5059 
5060 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
5061 
5062 out:
5063 	put_mount(mount);
5064 	return status;
5065 }
5066 
5067 
5068 #if 0
5069 static status_t
5070 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5071 {
5072 	struct vnode *vnode = descriptor->u.vnode;
5073 
5074 	// ToDo: currently unused!
5075 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5076 	if (!FS_CALL(vnode, read_index_stat))
5077 		return EOPNOTSUPP;
5078 
5079 	return EOPNOTSUPP;
5080 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5081 }
5082 
5083 
5084 static void
5085 index_free_fd(struct file_descriptor *descriptor)
5086 {
5087 	struct vnode *vnode = descriptor->u.vnode;
5088 
5089 	if (vnode != NULL) {
5090 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5091 		put_vnode(vnode);
5092 	}
5093 }
5094 #endif
5095 
5096 
5097 static status_t
5098 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
5099 {
5100 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5101 
5102 	struct fs_mount *mount;
5103 	status_t status = get_mount(mountID, &mount);
5104 	if (status < B_OK)
5105 		return status;
5106 
5107 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5108 		status = EOPNOTSUPP;
5109 		goto out;
5110 	}
5111 
5112 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5113 
5114 out:
5115 	put_mount(mount);
5116 	return status;
5117 }
5118 
5119 
5120 static status_t
5121 index_remove(mount_id mountID, const char *name, bool kernel)
5122 {
5123 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5124 
5125 	struct fs_mount *mount;
5126 	status_t status = get_mount(mountID, &mount);
5127 	if (status < B_OK)
5128 		return status;
5129 
5130 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5131 		status = EROFS;
5132 		goto out;
5133 	}
5134 
5135 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5136 
5137 out:
5138 	put_mount(mount);
5139 	return status;
5140 }
5141 
5142 
5143 /**	ToDo: the query FS API is still the pretty much the same as in R5.
5144  *		It would be nice if the FS would find some more kernel support
5145  *		for them.
5146  *		For example, query parsing should be moved into the kernel.
5147  */
5148 
5149 static int
5150 query_open(dev_t device, const char *query, uint32 flags,
5151 	port_id port, int32 token, bool kernel)
5152 {
5153 	struct fs_mount *mount;
5154 	fs_cookie cookie;
5155 
5156 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5157 
5158 	status_t status = get_mount(device, &mount);
5159 	if (status < B_OK)
5160 		return status;
5161 
5162 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5163 		status = EOPNOTSUPP;
5164 		goto out;
5165 	}
5166 
5167 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5168 	if (status < B_OK)
5169 		goto out;
5170 
5171 	// get fd for the index directory
5172 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5173 	if (status >= 0)
5174 		goto out;
5175 
5176 	// something went wrong
5177 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5178 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5179 
5180 out:
5181 	put_mount(mount);
5182 	return status;
5183 }
5184 
5185 
5186 static status_t
5187 query_close(struct file_descriptor *descriptor)
5188 {
5189 	struct fs_mount *mount = descriptor->u.mount;
5190 
5191 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5192 
5193 	if (FS_MOUNT_CALL(mount, close_query))
5194 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5195 
5196 	return B_OK;
5197 }
5198 
5199 
5200 static void
5201 query_free_fd(struct file_descriptor *descriptor)
5202 {
5203 	struct fs_mount *mount = descriptor->u.mount;
5204 
5205 	if (mount != NULL) {
5206 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5207 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5208 		//put_vnode(vnode);
5209 	}
5210 }
5211 
5212 
5213 static status_t
5214 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5215 {
5216 	struct fs_mount *mount = descriptor->u.mount;
5217 
5218 	if (FS_MOUNT_CALL(mount, read_query))
5219 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5220 
5221 	return EOPNOTSUPP;
5222 }
5223 
5224 
5225 static status_t
5226 query_rewind(struct file_descriptor *descriptor)
5227 {
5228 	struct fs_mount *mount = descriptor->u.mount;
5229 
5230 	if (FS_MOUNT_CALL(mount, rewind_query))
5231 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5232 
5233 	return EOPNOTSUPP;
5234 }
5235 
5236 
5237 //	#pragma mark - General File System functions
5238 
5239 
5240 static dev_t
5241 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5242 	const char *args, bool kernel)
5243 {
5244 	struct fs_mount *mount;
5245 	status_t status = 0;
5246 
5247 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5248 
5249 	// The path is always safe, we just have to make sure that fsName is
5250 	// almost valid - we can't make any assumptions about args, though.
5251 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5252 	// We'll get it from the DDM later.
5253 	if (fsName == NULL) {
5254 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5255 			return B_BAD_VALUE;
5256 	} else if (fsName[0] == '\0')
5257 		return B_BAD_VALUE;
5258 
5259 	RecursiveLocker mountOpLocker(sMountOpLock);
5260 
5261 	// Helper to delete a newly created file device on failure.
5262 	// Not exactly beautiful, but helps to keep the code below cleaner.
5263 	struct FileDeviceDeleter {
5264 		FileDeviceDeleter() : id(-1) {}
5265 		~FileDeviceDeleter()
5266 		{
5267 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5268 		}
5269 
5270 		partition_id id;
5271 	} fileDeviceDeleter;
5272 
5273 	// If the file system is not a "virtual" one, the device argument should
5274 	// point to a real file/device (if given at all).
5275 	// get the partition
5276 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5277 	KPartition *partition = NULL;
5278 	KPath normalizedDevice;
5279 	bool newlyCreatedFileDevice = false;
5280 
5281 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5282 		// normalize the device path
5283 		status = normalizedDevice.SetTo(device, true);
5284 		if (status != B_OK)
5285 			return status;
5286 
5287 		// get a corresponding partition from the DDM
5288 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
5289 
5290 		if (!partition) {
5291 			// Partition not found: This either means, the user supplied
5292 			// an invalid path, or the path refers to an image file. We try
5293 			// to let the DDM create a file device for the path.
5294 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5295 				&newlyCreatedFileDevice, false);
5296 			if (deviceID >= 0) {
5297 				partition = ddm->RegisterPartition(deviceID, true);
5298 				if (newlyCreatedFileDevice)
5299 					fileDeviceDeleter.id = deviceID;
5300 			}
5301 		}
5302 
5303 		if (!partition) {
5304 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5305 				normalizedDevice.Path()));
5306 			return B_ENTRY_NOT_FOUND;
5307 		}
5308 
5309 		device = normalizedDevice.Path();
5310 			// correct path to file device
5311 	}
5312 	PartitionRegistrar partitionRegistrar(partition, true);
5313 
5314 	// Write lock the partition's device. For the time being, we keep the lock
5315 	// until we're done mounting -- not nice, but ensure, that no-one is
5316 	// interfering.
5317 	// TODO: Find a better solution.
5318 	KDiskDevice *diskDevice = NULL;
5319 	if (partition) {
5320 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5321 		if (!diskDevice) {
5322 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5323 			return B_ERROR;
5324 		}
5325 	}
5326 
5327 	DeviceWriteLocker writeLocker(diskDevice, true);
5328 		// this takes over the write lock acquired before
5329 
5330 	if (partition) {
5331 		// make sure, that the partition is not busy
5332 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5333 			TRACE(("fs_mount(): Partition is busy.\n"));
5334 			return B_BUSY;
5335 		}
5336 
5337 		// if no FS name had been supplied, we get it from the partition
5338 		if (!fsName) {
5339 			KDiskSystem *diskSystem = partition->DiskSystem();
5340 			if (!diskSystem) {
5341 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5342 					"recognize it.\n"));
5343 				return B_BAD_VALUE;
5344 			}
5345 
5346 			if (!diskSystem->IsFileSystem()) {
5347 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5348 					"partitioning system.\n"));
5349 				return B_BAD_VALUE;
5350 			}
5351 
5352 			// The disk system name will not change, and the KDiskSystem
5353 			// object will not go away while the disk device is locked (and
5354 			// the partition has a reference to it), so this is safe.
5355 			fsName = diskSystem->Name();
5356 		}
5357 	}
5358 
5359 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5360 	if (mount == NULL)
5361 		return B_NO_MEMORY;
5362 
5363 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5364 
5365 	mount->fs_name = get_file_system_name(fsName);
5366 	if (mount->fs_name == NULL) {
5367 		status = B_NO_MEMORY;
5368 		goto err1;
5369 	}
5370 
5371 	mount->device_name = strdup(device);
5372 		// "device" can be NULL
5373 
5374 	mount->fs = get_file_system(fsName);
5375 	if (mount->fs == NULL) {
5376 		status = ENODEV;
5377 		goto err3;
5378 	}
5379 
5380 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5381 	if (status < B_OK)
5382 		goto err4;
5383 
5384 	// initialize structure
5385 	mount->id = sNextMountID++;
5386 	mount->partition = NULL;
5387 	mount->root_vnode = NULL;
5388 	mount->covers_vnode = NULL;
5389 	mount->cookie = NULL;
5390 	mount->unmounting = false;
5391 	mount->owns_file_device = false;
5392 
5393 	// insert mount struct into list before we call FS's mount() function
5394 	// so that vnodes can be created for this mount
5395 	mutex_lock(&sMountMutex);
5396 	hash_insert(sMountsTable, mount);
5397 	mutex_unlock(&sMountMutex);
5398 
5399 	vnode_id rootID;
5400 
5401 	if (!sRoot) {
5402 		// we haven't mounted anything yet
5403 		if (strcmp(path, "/") != 0) {
5404 			status = B_ERROR;
5405 			goto err5;
5406 		}
5407 
5408 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5409 		if (status < 0) {
5410 			// ToDo: why should we hide the error code from the file system here?
5411 			//status = ERR_VFS_GENERAL;
5412 			goto err5;
5413 		}
5414 	} else {
5415 		struct vnode *coveredVnode;
5416 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5417 		if (status < B_OK)
5418 			goto err5;
5419 
5420 		// make sure covered_vnode is a DIR
5421 		struct stat coveredNodeStat;
5422 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5423 			coveredVnode->private_node, &coveredNodeStat);
5424 		if (status < B_OK)
5425 			goto err5;
5426 
5427 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5428 			status = B_NOT_A_DIRECTORY;
5429 			goto err5;
5430 		}
5431 
5432 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5433 			// this is already a mount point
5434 			status = B_BUSY;
5435 			goto err5;
5436 		}
5437 
5438 		mount->covers_vnode = coveredVnode;
5439 
5440 		// mount it
5441 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5442 		if (status < B_OK)
5443 			goto err6;
5444 	}
5445 
5446 	// the root node is supposed to be owned by the file system - it must
5447 	// exist at this point
5448 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5449 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5450 		panic("fs_mount: file system does not own its root node!\n");
5451 		status = B_ERROR;
5452 		goto err7;
5453 	}
5454 
5455 	// No race here, since fs_mount() is the only function changing
5456 	// covers_vnode (and holds sMountOpLock at that time).
5457 	mutex_lock(&sVnodeCoveredByMutex);
5458 	if (mount->covers_vnode)
5459 		mount->covers_vnode->covered_by = mount->root_vnode;
5460 	mutex_unlock(&sVnodeCoveredByMutex);
5461 
5462 	if (!sRoot)
5463 		sRoot = mount->root_vnode;
5464 
5465 	// supply the partition (if any) with the mount cookie and mark it mounted
5466 	if (partition) {
5467 		partition->SetMountCookie(mount->cookie);
5468 		partition->SetVolumeID(mount->id);
5469 
5470 		// keep a partition reference as long as the partition is mounted
5471 		partitionRegistrar.Detach();
5472 		mount->partition = partition;
5473 		mount->owns_file_device = newlyCreatedFileDevice;
5474 		fileDeviceDeleter.id = -1;
5475 	}
5476 
5477 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5478 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5479 
5480 	return mount->id;
5481 
5482 err7:
5483 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5484 err6:
5485 	if (mount->covers_vnode)
5486 		put_vnode(mount->covers_vnode);
5487 err5:
5488 	mutex_lock(&sMountMutex);
5489 	hash_remove(sMountsTable, mount);
5490 	mutex_unlock(&sMountMutex);
5491 
5492 	recursive_lock_destroy(&mount->rlock);
5493 err4:
5494 	put_file_system(mount->fs);
5495 	free(mount->device_name);
5496 err3:
5497 	free(mount->fs_name);
5498 err1:
5499 	free(mount);
5500 
5501 	return status;
5502 }
5503 
5504 
5505 static status_t
5506 fs_unmount(char *path, uint32 flags, bool kernel)
5507 {
5508 	struct fs_mount *mount;
5509 	struct vnode *vnode;
5510 	status_t err;
5511 
5512 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5513 
5514 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5515 	if (err < 0)
5516 		return B_ENTRY_NOT_FOUND;
5517 
5518 	RecursiveLocker mountOpLocker(sMountOpLock);
5519 
5520 	mount = find_mount(vnode->device);
5521 	if (!mount)
5522 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5523 
5524 	if (mount->root_vnode != vnode) {
5525 		// not mountpoint
5526 		put_vnode(vnode);
5527 		return B_BAD_VALUE;
5528 	}
5529 
5530 	// if the volume is associated with a partition, lock the device of the
5531 	// partition as long as we are unmounting
5532 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5533 	KPartition *partition = mount->partition;
5534 	KDiskDevice *diskDevice = NULL;
5535 	if (partition) {
5536 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5537 		if (!diskDevice) {
5538 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5539 			return B_ERROR;
5540 		}
5541 	}
5542 	DeviceWriteLocker writeLocker(diskDevice, true);
5543 
5544 	// make sure, that the partition is not busy
5545 	if (partition) {
5546 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5547 			TRACE(("fs_unmount(): Partition is busy.\n"));
5548 			return B_BUSY;
5549 		}
5550 	}
5551 
5552 	// grab the vnode master mutex to keep someone from creating
5553 	// a vnode while we're figuring out if we can continue
5554 	mutex_lock(&sVnodeMutex);
5555 
5556 	bool disconnectedDescriptors = false;
5557 
5558 	while (true) {
5559 		bool busy = false;
5560 
5561 		// cycle through the list of vnodes associated with this mount and
5562 		// make sure all of them are not busy or have refs on them
5563 		vnode = NULL;
5564 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5565 			// The root vnode ref_count needs to be 2 here: one for the file
5566 			// system, one from the path_to_vnode() call above
5567 			if (vnode->busy
5568 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
5569 					|| (vnode->ref_count != 2 && mount->root_vnode == vnode))) {
5570 				// there are still vnodes in use on this mount, so we cannot
5571 				// unmount yet
5572 				busy = true;
5573 				break;
5574 			}
5575 		}
5576 
5577 		if (!busy)
5578 			break;
5579 
5580 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5581 			mutex_unlock(&sVnodeMutex);
5582 			put_vnode(mount->root_vnode);
5583 
5584 			return B_BUSY;
5585 		}
5586 
5587 		if (disconnectedDescriptors) {
5588 			// wait a bit until the last access is finished, and then try again
5589 			mutex_unlock(&sVnodeMutex);
5590 			snooze(100000);
5591 			// TODO: if there is some kind of bug that prevents the ref counts
5592 			//	from getting back to zero, this will fall into an endless loop...
5593 			mutex_lock(&sVnodeMutex);
5594 			continue;
5595 		}
5596 
5597 		// the file system is still busy - but we're forced to unmount it,
5598 		// so let's disconnect all open file descriptors
5599 
5600 		mount->unmounting = true;
5601 			// prevent new vnodes from being created
5602 
5603 		mutex_unlock(&sVnodeMutex);
5604 
5605 		disconnect_mount_or_vnode_fds(mount, NULL);
5606 		disconnectedDescriptors = true;
5607 
5608 		mutex_lock(&sVnodeMutex);
5609 	}
5610 
5611 	// we can safely continue, mark all of the vnodes busy and this mount
5612 	// structure in unmounting state
5613 	mount->unmounting = true;
5614 
5615 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5616 		vnode->busy = true;
5617 
5618 		if (vnode->ref_count == 0) {
5619 			// this vnode has been unused before
5620 			list_remove_item(&sUnusedVnodeList, vnode);
5621 			sUnusedVnodes--;
5622 		}
5623 	}
5624 
5625 	// The ref_count of the root node is 2 at this point, see above why this is
5626 	mount->root_vnode->ref_count -= 2;
5627 
5628 	mutex_unlock(&sVnodeMutex);
5629 
5630 	mutex_lock(&sVnodeCoveredByMutex);
5631 	mount->covers_vnode->covered_by = NULL;
5632 	mutex_unlock(&sVnodeCoveredByMutex);
5633 	put_vnode(mount->covers_vnode);
5634 
5635 	// Free all vnodes associated with this mount.
5636 	// They will be removed from the mount list by free_vnode(), so
5637 	// we don't have to do this.
5638 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5639 		free_vnode(vnode, false);
5640 	}
5641 
5642 	// remove the mount structure from the hash table
5643 	mutex_lock(&sMountMutex);
5644 	hash_remove(sMountsTable, mount);
5645 	mutex_unlock(&sMountMutex);
5646 
5647 	mountOpLocker.Unlock();
5648 
5649 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5650 	notify_unmount(mount->id);
5651 
5652 	// release the file system
5653 	put_file_system(mount->fs);
5654 
5655 	// dereference the partition and mark it unmounted
5656 	if (partition) {
5657 		partition->SetVolumeID(-1);
5658 		partition->SetMountCookie(NULL);
5659 
5660 		if (mount->owns_file_device)
5661 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5662 		partition->Unregister();
5663 	}
5664 
5665 	free(mount->device_name);
5666 	free(mount->fs_name);
5667 	free(mount);
5668 
5669 	return B_OK;
5670 }
5671 
5672 
5673 static status_t
5674 fs_sync(dev_t device)
5675 {
5676 	struct fs_mount *mount;
5677 	status_t status = get_mount(device, &mount);
5678 	if (status < B_OK)
5679 		return status;
5680 
5681 	mutex_lock(&sMountMutex);
5682 
5683 	if (FS_MOUNT_CALL(mount, sync))
5684 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5685 
5686 	mutex_unlock(&sMountMutex);
5687 
5688 	struct vnode *previousVnode = NULL;
5689 	while (true) {
5690 		// synchronize access to vnode list
5691 		recursive_lock_lock(&mount->rlock);
5692 
5693 		struct vnode *vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
5694 			previousVnode);
5695 
5696 		vnode_id id = -1;
5697 		if (vnode != NULL)
5698 			id = vnode->id;
5699 
5700 		recursive_lock_unlock(&mount->rlock);
5701 
5702 		if (vnode == NULL)
5703 			break;
5704 
5705 		// acquire a reference to the vnode
5706 
5707 		if (get_vnode(mount->id, id, &vnode, true) == B_OK) {
5708 			if (previousVnode != NULL)
5709 				put_vnode(previousVnode);
5710 
5711 			if (FS_CALL(vnode, fsync) != NULL)
5712 				FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
5713 
5714 			// the next vnode might change until we lock the vnode list again,
5715 			// but this vnode won't go away since we keep a reference to it.
5716 			previousVnode = vnode;
5717 		} else {
5718 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n", mount->id, id);
5719 			break;
5720 		}
5721 	}
5722 
5723 	if (previousVnode != NULL)
5724 		put_vnode(previousVnode);
5725 
5726 	put_mount(mount);
5727 	return status;
5728 }
5729 
5730 
5731 static status_t
5732 fs_read_info(dev_t device, struct fs_info *info)
5733 {
5734 	struct fs_mount *mount;
5735 	status_t status = get_mount(device, &mount);
5736 	if (status < B_OK)
5737 		return status;
5738 
5739 	memset(info, 0, sizeof(struct fs_info));
5740 
5741 	if (FS_MOUNT_CALL(mount, read_fs_info))
5742 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5743 
5744 	// fill in info the file system doesn't (have to) know about
5745 	if (status == B_OK) {
5746 		info->dev = mount->id;
5747 		info->root = mount->root_vnode->id;
5748 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5749 		if (mount->device_name != NULL) {
5750 			strlcpy(info->device_name, mount->device_name,
5751 				sizeof(info->device_name));
5752 		}
5753 	}
5754 
5755 	// if the call is not supported by the file system, there are still
5756 	// the parts that we filled out ourselves
5757 
5758 	put_mount(mount);
5759 	return status;
5760 }
5761 
5762 
5763 static status_t
5764 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5765 {
5766 	struct fs_mount *mount;
5767 	status_t status = get_mount(device, &mount);
5768 	if (status < B_OK)
5769 		return status;
5770 
5771 	if (FS_MOUNT_CALL(mount, write_fs_info))
5772 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5773 	else
5774 		status = EROFS;
5775 
5776 	put_mount(mount);
5777 	return status;
5778 }
5779 
5780 
5781 static dev_t
5782 fs_next_device(int32 *_cookie)
5783 {
5784 	struct fs_mount *mount = NULL;
5785 	dev_t device = *_cookie;
5786 
5787 	mutex_lock(&sMountMutex);
5788 
5789 	// Since device IDs are assigned sequentially, this algorithm
5790 	// does work good enough. It makes sure that the device list
5791 	// returned is sorted, and that no device is skipped when an
5792 	// already visited device got unmounted.
5793 
5794 	while (device < sNextMountID) {
5795 		mount = find_mount(device++);
5796 		if (mount != NULL && mount->cookie != NULL)
5797 			break;
5798 	}
5799 
5800 	*_cookie = device;
5801 
5802 	if (mount != NULL)
5803 		device = mount->id;
5804 	else
5805 		device = B_BAD_VALUE;
5806 
5807 	mutex_unlock(&sMountMutex);
5808 
5809 	return device;
5810 }
5811 
5812 
5813 static status_t
5814 get_cwd(char *buffer, size_t size, bool kernel)
5815 {
5816 	// Get current working directory from io context
5817 	struct io_context *context = get_current_io_context(kernel);
5818 	status_t status;
5819 
5820 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5821 
5822 	mutex_lock(&context->io_mutex);
5823 
5824 	if (context->cwd)
5825 		status = dir_vnode_to_path(context->cwd, buffer, size);
5826 	else
5827 		status = B_ERROR;
5828 
5829 	mutex_unlock(&context->io_mutex);
5830 	return status;
5831 }
5832 
5833 
5834 static status_t
5835 set_cwd(int fd, char *path, bool kernel)
5836 {
5837 	struct io_context *context;
5838 	struct vnode *vnode = NULL;
5839 	struct vnode *oldDirectory;
5840 	struct stat stat;
5841 	status_t status;
5842 
5843 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5844 
5845 	// Get vnode for passed path, and bail if it failed
5846 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5847 	if (status < 0)
5848 		return status;
5849 
5850 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5851 	if (status < 0)
5852 		goto err;
5853 
5854 	if (!S_ISDIR(stat.st_mode)) {
5855 		// nope, can't cwd to here
5856 		status = B_NOT_A_DIRECTORY;
5857 		goto err;
5858 	}
5859 
5860 	// Get current io context and lock
5861 	context = get_current_io_context(kernel);
5862 	mutex_lock(&context->io_mutex);
5863 
5864 	// save the old current working directory first
5865 	oldDirectory = context->cwd;
5866 	context->cwd = vnode;
5867 
5868 	mutex_unlock(&context->io_mutex);
5869 
5870 	if (oldDirectory)
5871 		put_vnode(oldDirectory);
5872 
5873 	return B_NO_ERROR;
5874 
5875 err:
5876 	put_vnode(vnode);
5877 	return status;
5878 }
5879 
5880 
5881 //	#pragma mark - kernel mirrored syscalls
5882 
5883 
5884 dev_t
5885 _kern_mount(const char *path, const char *device, const char *fsName,
5886 	uint32 flags, const char *args, size_t argsLength)
5887 {
5888 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5889 	if (pathBuffer.InitCheck() != B_OK)
5890 		return B_NO_MEMORY;
5891 
5892 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5893 }
5894 
5895 
5896 status_t
5897 _kern_unmount(const char *path, uint32 flags)
5898 {
5899 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5900 	if (pathBuffer.InitCheck() != B_OK)
5901 		return B_NO_MEMORY;
5902 
5903 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5904 }
5905 
5906 
5907 status_t
5908 _kern_read_fs_info(dev_t device, struct fs_info *info)
5909 {
5910 	if (info == NULL)
5911 		return B_BAD_VALUE;
5912 
5913 	return fs_read_info(device, info);
5914 }
5915 
5916 
5917 status_t
5918 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5919 {
5920 	if (info == NULL)
5921 		return B_BAD_VALUE;
5922 
5923 	return fs_write_info(device, info, mask);
5924 }
5925 
5926 
5927 status_t
5928 _kern_sync(void)
5929 {
5930 	// Note: _kern_sync() is also called from _user_sync()
5931 	int32 cookie = 0;
5932 	dev_t device;
5933 	while ((device = next_dev(&cookie)) >= 0) {
5934 		status_t status = fs_sync(device);
5935 		if (status != B_OK && status != B_BAD_VALUE)
5936 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5937 	}
5938 
5939 	return B_OK;
5940 }
5941 
5942 
5943 dev_t
5944 _kern_next_device(int32 *_cookie)
5945 {
5946 	return fs_next_device(_cookie);
5947 }
5948 
5949 
5950 status_t
5951 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
5952 	size_t infoSize)
5953 {
5954 	if (infoSize != sizeof(fd_info))
5955 		return B_BAD_VALUE;
5956 
5957 	struct io_context *context = NULL;
5958 	sem_id contextMutex = -1;
5959 	struct team *team = NULL;
5960 
5961 	cpu_status state = disable_interrupts();
5962 	GRAB_TEAM_LOCK();
5963 
5964 	team = team_get_team_struct_locked(teamID);
5965 	if (team) {
5966 		context = (io_context *)team->io_context;
5967 		contextMutex = context->io_mutex.sem;
5968 	}
5969 
5970 	RELEASE_TEAM_LOCK();
5971 	restore_interrupts(state);
5972 
5973 	// we now have a context - since we couldn't lock it while having
5974 	// safe access to the team structure, we now need to lock the mutex
5975 	// manually
5976 
5977 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
5978 		// team doesn't exit or seems to be gone
5979 		return B_BAD_TEAM_ID;
5980 	}
5981 
5982 	// the team cannot be deleted completely while we're owning its
5983 	// io_context mutex, so we can safely play with it now
5984 
5985 	context->io_mutex.holder = thread_get_current_thread_id();
5986 
5987 	uint32 slot = *_cookie;
5988 
5989 	struct file_descriptor *descriptor;
5990 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
5991 		slot++;
5992 
5993 	if (slot >= context->table_size) {
5994 		mutex_unlock(&context->io_mutex);
5995 		return B_ENTRY_NOT_FOUND;
5996 	}
5997 
5998 	info->number = slot;
5999 	info->open_mode = descriptor->open_mode;
6000 
6001 	struct vnode *vnode = fd_vnode(descriptor);
6002 	if (vnode != NULL) {
6003 		info->device = vnode->device;
6004 		info->node = vnode->id;
6005 	} else if (descriptor->u.mount != NULL) {
6006 		info->device = descriptor->u.mount->id;
6007 		info->node = -1;
6008 	}
6009 
6010 	mutex_unlock(&context->io_mutex);
6011 
6012 	*_cookie = slot + 1;
6013 	return B_OK;
6014 }
6015 
6016 
6017 int
6018 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6019 {
6020 	if (openMode & O_CREAT)
6021 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6022 
6023 	return file_open_entry_ref(device, inode, name, openMode, true);
6024 }
6025 
6026 
6027 /**	\brief Opens a node specified by a FD + path pair.
6028  *
6029  *	At least one of \a fd and \a path must be specified.
6030  *	If only \a fd is given, the function opens the node identified by this
6031  *	FD. If only a path is given, this path is opened. If both are given and
6032  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6033  *	of the directory (!) identified by \a fd.
6034  *
6035  *	\param fd The FD. May be < 0.
6036  *	\param path The absolute or relative path. May be \c NULL.
6037  *	\param openMode The open mode.
6038  *	\return A FD referring to the newly opened node, or an error code,
6039  *			if an error occurs.
6040  */
6041 
6042 int
6043 _kern_open(int fd, const char *path, int openMode, int perms)
6044 {
6045 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6046 	if (pathBuffer.InitCheck() != B_OK)
6047 		return B_NO_MEMORY;
6048 
6049 	if (openMode & O_CREAT)
6050 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6051 
6052 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6053 }
6054 
6055 
6056 /**	\brief Opens a directory specified by entry_ref or node_ref.
6057  *
6058  *	The supplied name may be \c NULL, in which case directory identified
6059  *	by \a device and \a inode will be opened. Otherwise \a device and
6060  *	\a inode identify the parent directory of the directory to be opened
6061  *	and \a name its entry name.
6062  *
6063  *	\param device If \a name is specified the ID of the device the parent
6064  *		   directory of the directory to be opened resides on, otherwise
6065  *		   the device of the directory itself.
6066  *	\param inode If \a name is specified the node ID of the parent
6067  *		   directory of the directory to be opened, otherwise node ID of the
6068  *		   directory itself.
6069  *	\param name The entry name of the directory to be opened. If \c NULL,
6070  *		   the \a device + \a inode pair identify the node to be opened.
6071  *	\return The FD of the newly opened directory or an error code, if
6072  *			something went wrong.
6073  */
6074 
6075 int
6076 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6077 {
6078 	return dir_open_entry_ref(device, inode, name, true);
6079 }
6080 
6081 
6082 /**	\brief Opens a directory specified by a FD + path pair.
6083  *
6084  *	At least one of \a fd and \a path must be specified.
6085  *	If only \a fd is given, the function opens the directory identified by this
6086  *	FD. If only a path is given, this path is opened. If both are given and
6087  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6088  *	of the directory (!) identified by \a fd.
6089  *
6090  *	\param fd The FD. May be < 0.
6091  *	\param path The absolute or relative path. May be \c NULL.
6092  *	\return A FD referring to the newly opened directory, or an error code,
6093  *			if an error occurs.
6094  */
6095 
6096 int
6097 _kern_open_dir(int fd, const char *path)
6098 {
6099 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6100 	if (pathBuffer.InitCheck() != B_OK)
6101 		return B_NO_MEMORY;
6102 
6103 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6104 }
6105 
6106 
6107 status_t
6108 _kern_fcntl(int fd, int op, uint32 argument)
6109 {
6110 	return common_fcntl(fd, op, argument, true);
6111 }
6112 
6113 
6114 status_t
6115 _kern_fsync(int fd)
6116 {
6117 	return common_sync(fd, true);
6118 }
6119 
6120 
6121 status_t
6122 _kern_lock_node(int fd)
6123 {
6124 	return common_lock_node(fd, true);
6125 }
6126 
6127 
6128 status_t
6129 _kern_unlock_node(int fd)
6130 {
6131 	return common_unlock_node(fd, true);
6132 }
6133 
6134 
6135 status_t
6136 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6137 {
6138 	return dir_create_entry_ref(device, inode, name, perms, true);
6139 }
6140 
6141 
6142 /**	\brief Creates a directory specified by a FD + path pair.
6143  *
6144  *	\a path must always be specified (it contains the name of the new directory
6145  *	at least). If only a path is given, this path identifies the location at
6146  *	which the directory shall be created. If both \a fd and \a path are given and
6147  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6148  *	of the directory (!) identified by \a fd.
6149  *
6150  *	\param fd The FD. May be < 0.
6151  *	\param path The absolute or relative path. Must not be \c NULL.
6152  *	\param perms The access permissions the new directory shall have.
6153  *	\return \c B_OK, if the directory has been created successfully, another
6154  *			error code otherwise.
6155  */
6156 
6157 status_t
6158 _kern_create_dir(int fd, const char *path, int perms)
6159 {
6160 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6161 	if (pathBuffer.InitCheck() != B_OK)
6162 		return B_NO_MEMORY;
6163 
6164 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6165 }
6166 
6167 
6168 status_t
6169 _kern_remove_dir(int fd, const char *path)
6170 {
6171 	if (path) {
6172 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6173 		if (pathBuffer.InitCheck() != B_OK)
6174 			return B_NO_MEMORY;
6175 
6176 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6177 	}
6178 
6179 	return dir_remove(fd, NULL, true);
6180 }
6181 
6182 
6183 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
6184  *
6185  *	At least one of \a fd and \a path must be specified.
6186  *	If only \a fd is given, the function the symlink to be read is the node
6187  *	identified by this FD. If only a path is given, this path identifies the
6188  *	symlink to be read. If both are given and the path is absolute, \a fd is
6189  *	ignored; a relative path is reckoned off of the directory (!) identified
6190  *	by \a fd.
6191  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6192  *	will still be updated to reflect the required buffer size.
6193  *
6194  *	\param fd The FD. May be < 0.
6195  *	\param path The absolute or relative path. May be \c NULL.
6196  *	\param buffer The buffer into which the contents of the symlink shall be
6197  *		   written.
6198  *	\param _bufferSize A pointer to the size of the supplied buffer.
6199  *	\return The length of the link on success or an appropriate error code
6200  */
6201 
6202 status_t
6203 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6204 {
6205 	status_t status;
6206 
6207 	if (path) {
6208 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6209 		if (pathBuffer.InitCheck() != B_OK)
6210 			return B_NO_MEMORY;
6211 
6212 		return common_read_link(fd, pathBuffer.LockBuffer(),
6213 			buffer, _bufferSize, true);
6214 	}
6215 
6216 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6217 }
6218 
6219 
6220 /**	\brief Creates a symlink specified by a FD + path pair.
6221  *
6222  *	\a path must always be specified (it contains the name of the new symlink
6223  *	at least). If only a path is given, this path identifies the location at
6224  *	which the symlink shall be created. If both \a fd and \a path are given and
6225  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6226  *	of the directory (!) identified by \a fd.
6227  *
6228  *	\param fd The FD. May be < 0.
6229  *	\param toPath The absolute or relative path. Must not be \c NULL.
6230  *	\param mode The access permissions the new symlink shall have.
6231  *	\return \c B_OK, if the symlink has been created successfully, another
6232  *			error code otherwise.
6233  */
6234 
6235 status_t
6236 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6237 {
6238 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6239 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6240 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6241 		return B_NO_MEMORY;
6242 
6243 	char *toBuffer = toPathBuffer.LockBuffer();
6244 
6245 	status_t status = check_path(toBuffer);
6246 	if (status < B_OK)
6247 		return status;
6248 
6249 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6250 		toBuffer, mode, true);
6251 }
6252 
6253 
6254 status_t
6255 _kern_create_link(const char *path, const char *toPath)
6256 {
6257 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6258 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6259 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6260 		return B_NO_MEMORY;
6261 
6262 	return common_create_link(pathBuffer.LockBuffer(),
6263 		toPathBuffer.LockBuffer(), true);
6264 }
6265 
6266 
6267 /**	\brief Removes an entry specified by a FD + path pair from its directory.
6268  *
6269  *	\a path must always be specified (it contains at least the name of the entry
6270  *	to be deleted). If only a path is given, this path identifies the entry
6271  *	directly. If both \a fd and \a path are given and the path is absolute,
6272  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6273  *	identified by \a fd.
6274  *
6275  *	\param fd The FD. May be < 0.
6276  *	\param path The absolute or relative path. Must not be \c NULL.
6277  *	\return \c B_OK, if the entry has been removed successfully, another
6278  *			error code otherwise.
6279  */
6280 
6281 status_t
6282 _kern_unlink(int fd, const char *path)
6283 {
6284 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6285 	if (pathBuffer.InitCheck() != B_OK)
6286 		return B_NO_MEMORY;
6287 
6288 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6289 }
6290 
6291 
6292 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
6293  *		   by another FD + path pair.
6294  *
6295  *	\a oldPath and \a newPath must always be specified (they contain at least
6296  *	the name of the entry). If only a path is given, this path identifies the
6297  *	entry directly. If both a FD and a path are given and the path is absolute,
6298  *	the FD is ignored; a relative path is reckoned off of the directory (!)
6299  *	identified by the respective FD.
6300  *
6301  *	\param oldFD The FD of the old location. May be < 0.
6302  *	\param oldPath The absolute or relative path of the old location. Must not
6303  *		   be \c NULL.
6304  *	\param newFD The FD of the new location. May be < 0.
6305  *	\param newPath The absolute or relative path of the new location. Must not
6306  *		   be \c NULL.
6307  *	\return \c B_OK, if the entry has been moved successfully, another
6308  *			error code otherwise.
6309  */
6310 
6311 status_t
6312 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6313 {
6314 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6315 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6316 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6317 		return B_NO_MEMORY;
6318 
6319 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6320 		newFD, newPathBuffer.LockBuffer(), true);
6321 }
6322 
6323 
6324 status_t
6325 _kern_access(const char *path, int mode)
6326 {
6327 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6328 	if (pathBuffer.InitCheck() != B_OK)
6329 		return B_NO_MEMORY;
6330 
6331 	return common_access(pathBuffer.LockBuffer(), mode, true);
6332 }
6333 
6334 
6335 /**	\brief Reads stat data of an entity specified by a FD + path pair.
6336  *
6337  *	If only \a fd is given, the stat operation associated with the type
6338  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6339  *	given, this path identifies the entry for whose node to retrieve the
6340  *	stat data. If both \a fd and \a path are given and the path is absolute,
6341  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6342  *	identified by \a fd and specifies the entry whose stat data shall be
6343  *	retrieved.
6344  *
6345  *	\param fd The FD. May be < 0.
6346  *	\param path The absolute or relative path. Must not be \c NULL.
6347  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6348  *		   function shall not stick to symlinks, but traverse them.
6349  *	\param stat The buffer the stat data shall be written into.
6350  *	\param statSize The size of the supplied stat buffer.
6351  *	\return \c B_OK, if the the stat data have been read successfully, another
6352  *			error code otherwise.
6353  */
6354 
6355 status_t
6356 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6357 	struct stat *stat, size_t statSize)
6358 {
6359 	struct stat completeStat;
6360 	struct stat *originalStat = NULL;
6361 	status_t status;
6362 
6363 	if (statSize > sizeof(struct stat))
6364 		return B_BAD_VALUE;
6365 
6366 	// this supports different stat extensions
6367 	if (statSize < sizeof(struct stat)) {
6368 		originalStat = stat;
6369 		stat = &completeStat;
6370 	}
6371 
6372 	if (path) {
6373 		// path given: get the stat of the node referred to by (fd, path)
6374 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6375 		if (pathBuffer.InitCheck() != B_OK)
6376 			return B_NO_MEMORY;
6377 
6378 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6379 			traverseLeafLink, stat, true);
6380 	} else {
6381 		// no path given: get the FD and use the FD operation
6382 		struct file_descriptor *descriptor
6383 			= get_fd(get_current_io_context(true), fd);
6384 		if (descriptor == NULL)
6385 			return B_FILE_ERROR;
6386 
6387 		if (descriptor->ops->fd_read_stat)
6388 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6389 		else
6390 			status = EOPNOTSUPP;
6391 
6392 		put_fd(descriptor);
6393 	}
6394 
6395 	if (status == B_OK && originalStat != NULL)
6396 		memcpy(originalStat, stat, statSize);
6397 
6398 	return status;
6399 }
6400 
6401 
6402 /**	\brief Writes stat data of an entity specified by a FD + path pair.
6403  *
6404  *	If only \a fd is given, the stat operation associated with the type
6405  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6406  *	given, this path identifies the entry for whose node to write the
6407  *	stat data. If both \a fd and \a path are given and the path is absolute,
6408  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6409  *	identified by \a fd and specifies the entry whose stat data shall be
6410  *	written.
6411  *
6412  *	\param fd The FD. May be < 0.
6413  *	\param path The absolute or relative path. Must not be \c NULL.
6414  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6415  *		   function shall not stick to symlinks, but traverse them.
6416  *	\param stat The buffer containing the stat data to be written.
6417  *	\param statSize The size of the supplied stat buffer.
6418  *	\param statMask A mask specifying which parts of the stat data shall be
6419  *		   written.
6420  *	\return \c B_OK, if the the stat data have been written successfully,
6421  *			another error code otherwise.
6422  */
6423 
6424 status_t
6425 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6426 	const struct stat *stat, size_t statSize, int statMask)
6427 {
6428 	struct stat completeStat;
6429 
6430 	if (statSize > sizeof(struct stat))
6431 		return B_BAD_VALUE;
6432 
6433 	// this supports different stat extensions
6434 	if (statSize < sizeof(struct stat)) {
6435 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6436 		memcpy(&completeStat, stat, statSize);
6437 		stat = &completeStat;
6438 	}
6439 
6440 	status_t status;
6441 
6442 	if (path) {
6443 		// path given: write the stat of the node referred to by (fd, path)
6444 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6445 		if (pathBuffer.InitCheck() != B_OK)
6446 			return B_NO_MEMORY;
6447 
6448 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6449 			traverseLeafLink, stat, statMask, true);
6450 	} else {
6451 		// no path given: get the FD and use the FD operation
6452 		struct file_descriptor *descriptor
6453 			= get_fd(get_current_io_context(true), fd);
6454 		if (descriptor == NULL)
6455 			return B_FILE_ERROR;
6456 
6457 		if (descriptor->ops->fd_write_stat)
6458 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6459 		else
6460 			status = EOPNOTSUPP;
6461 
6462 		put_fd(descriptor);
6463 	}
6464 
6465 	return status;
6466 }
6467 
6468 
6469 int
6470 _kern_open_attr_dir(int fd, const char *path)
6471 {
6472 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6473 	if (pathBuffer.InitCheck() != B_OK)
6474 		return B_NO_MEMORY;
6475 
6476 	if (path != NULL)
6477 		pathBuffer.SetTo(path);
6478 
6479 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6480 }
6481 
6482 
6483 int
6484 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6485 {
6486 	return attr_create(fd, name, type, openMode, true);
6487 }
6488 
6489 
6490 int
6491 _kern_open_attr(int fd, const char *name, int openMode)
6492 {
6493 	return attr_open(fd, name, openMode, true);
6494 }
6495 
6496 
6497 status_t
6498 _kern_remove_attr(int fd, const char *name)
6499 {
6500 	return attr_remove(fd, name, true);
6501 }
6502 
6503 
6504 status_t
6505 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6506 {
6507 	return attr_rename(fromFile, fromName, toFile, toName, true);
6508 }
6509 
6510 
6511 int
6512 _kern_open_index_dir(dev_t device)
6513 {
6514 	return index_dir_open(device, true);
6515 }
6516 
6517 
6518 status_t
6519 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6520 {
6521 	return index_create(device, name, type, flags, true);
6522 }
6523 
6524 
6525 status_t
6526 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6527 {
6528 	return index_name_read_stat(device, name, stat, true);
6529 }
6530 
6531 
6532 status_t
6533 _kern_remove_index(dev_t device, const char *name)
6534 {
6535 	return index_remove(device, name, true);
6536 }
6537 
6538 
6539 status_t
6540 _kern_getcwd(char *buffer, size_t size)
6541 {
6542 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6543 
6544 	// Call vfs to get current working directory
6545 	return get_cwd(buffer, size, true);
6546 }
6547 
6548 
6549 status_t
6550 _kern_setcwd(int fd, const char *path)
6551 {
6552 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6553 	if (pathBuffer.InitCheck() != B_OK)
6554 		return B_NO_MEMORY;
6555 
6556 	if (path != NULL)
6557 		pathBuffer.SetTo(path);
6558 
6559 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6560 }
6561 
6562 
6563 //	#pragma mark - userland syscalls
6564 
6565 
6566 dev_t
6567 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6568 	uint32 flags, const char *userArgs, size_t argsLength)
6569 {
6570 	char fileSystem[B_OS_NAME_LENGTH];
6571 	KPath path, device;
6572 	char *args = NULL;
6573 	status_t status;
6574 
6575 	if (!IS_USER_ADDRESS(userPath)
6576 		|| !IS_USER_ADDRESS(userFileSystem)
6577 		|| !IS_USER_ADDRESS(userDevice))
6578 		return B_BAD_ADDRESS;
6579 
6580 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6581 		return B_NO_MEMORY;
6582 
6583 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6584 		return B_BAD_ADDRESS;
6585 
6586 	if (userFileSystem != NULL
6587 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6588 		return B_BAD_ADDRESS;
6589 
6590 	if (userDevice != NULL
6591 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6592 		return B_BAD_ADDRESS;
6593 
6594 	if (userArgs != NULL && argsLength > 0) {
6595 		// this is a safety restriction
6596 		if (argsLength >= 65536)
6597 			return B_NAME_TOO_LONG;
6598 
6599 		args = (char *)malloc(argsLength + 1);
6600 		if (args == NULL)
6601 			return B_NO_MEMORY;
6602 
6603 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
6604 			free(args);
6605 			return B_BAD_ADDRESS;
6606 		}
6607 	}
6608 	path.UnlockBuffer();
6609 	device.UnlockBuffer();
6610 
6611 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6612 		userFileSystem ? fileSystem : NULL, flags, args, false);
6613 
6614 	free(args);
6615 	return status;
6616 }
6617 
6618 
6619 status_t
6620 _user_unmount(const char *userPath, uint32 flags)
6621 {
6622 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6623 	if (pathBuffer.InitCheck() != B_OK)
6624 		return B_NO_MEMORY;
6625 
6626 	char *path = pathBuffer.LockBuffer();
6627 
6628 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6629 		return B_BAD_ADDRESS;
6630 
6631 	return fs_unmount(path, flags, false);
6632 }
6633 
6634 
6635 status_t
6636 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6637 {
6638 	struct fs_info info;
6639 	status_t status;
6640 
6641 	if (userInfo == NULL)
6642 		return B_BAD_VALUE;
6643 
6644 	if (!IS_USER_ADDRESS(userInfo))
6645 		return B_BAD_ADDRESS;
6646 
6647 	status = fs_read_info(device, &info);
6648 	if (status != B_OK)
6649 		return status;
6650 
6651 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6652 		return B_BAD_ADDRESS;
6653 
6654 	return B_OK;
6655 }
6656 
6657 
6658 status_t
6659 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6660 {
6661 	struct fs_info info;
6662 
6663 	if (userInfo == NULL)
6664 		return B_BAD_VALUE;
6665 
6666 	if (!IS_USER_ADDRESS(userInfo)
6667 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6668 		return B_BAD_ADDRESS;
6669 
6670 	return fs_write_info(device, &info, mask);
6671 }
6672 
6673 
6674 dev_t
6675 _user_next_device(int32 *_userCookie)
6676 {
6677 	int32 cookie;
6678 	dev_t device;
6679 
6680 	if (!IS_USER_ADDRESS(_userCookie)
6681 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6682 		return B_BAD_ADDRESS;
6683 
6684 	device = fs_next_device(&cookie);
6685 
6686 	if (device >= B_OK) {
6687 		// update user cookie
6688 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6689 			return B_BAD_ADDRESS;
6690 	}
6691 
6692 	return device;
6693 }
6694 
6695 
6696 status_t
6697 _user_sync(void)
6698 {
6699 	return _kern_sync();
6700 }
6701 
6702 
6703 status_t
6704 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
6705 	size_t infoSize)
6706 {
6707 	struct fd_info info;
6708 	uint32 cookie;
6709 
6710 	// only root can do this (or should root's group be enough?)
6711 	if (geteuid() != 0)
6712 		return B_NOT_ALLOWED;
6713 
6714 	if (infoSize != sizeof(fd_info))
6715 		return B_BAD_VALUE;
6716 
6717 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
6718 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
6719 		return B_BAD_ADDRESS;
6720 
6721 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
6722 	if (status < B_OK)
6723 		return status;
6724 
6725 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
6726 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
6727 		return B_BAD_ADDRESS;
6728 
6729 	return status;
6730 }
6731 
6732 
6733 status_t
6734 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6735 	char *userPath, size_t pathLength)
6736 {
6737 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6738 	if (pathBuffer.InitCheck() != B_OK)
6739 		return B_NO_MEMORY;
6740 
6741 	struct vnode *vnode;
6742 	status_t status;
6743 
6744 	if (!IS_USER_ADDRESS(userPath))
6745 		return B_BAD_ADDRESS;
6746 
6747 	// copy the leaf name onto the stack
6748 	char stackLeaf[B_FILE_NAME_LENGTH];
6749 	if (leaf) {
6750 		if (!IS_USER_ADDRESS(leaf))
6751 			return B_BAD_ADDRESS;
6752 
6753 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6754 		if (len < 0)
6755 			return len;
6756 		if (len >= B_FILE_NAME_LENGTH)
6757 			return B_NAME_TOO_LONG;
6758 		leaf = stackLeaf;
6759 
6760 		// filter invalid leaf names
6761 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6762 			return B_BAD_VALUE;
6763 	}
6764 
6765 	// get the vnode matching the dir's node_ref
6766 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6767 		// special cases "." and "..": we can directly get the vnode of the
6768 		// referenced directory
6769 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6770 		leaf = NULL;
6771 	} else
6772 		status = get_vnode(device, inode, &vnode, false);
6773 	if (status < B_OK)
6774 		return status;
6775 
6776 	char *path = pathBuffer.LockBuffer();
6777 
6778 	// get the directory path
6779 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
6780 	put_vnode(vnode);
6781 		// we don't need the vnode anymore
6782 	if (status < B_OK)
6783 		return status;
6784 
6785 	// append the leaf name
6786 	if (leaf) {
6787 		// insert a directory separator if this is not the file system root
6788 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
6789 				>= pathBuffer.BufferSize())
6790 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
6791 			return B_NAME_TOO_LONG;
6792 		}
6793 	}
6794 
6795 	int len = user_strlcpy(userPath, path, pathLength);
6796 	if (len < 0)
6797 		return len;
6798 	if (len >= (int)pathLength)
6799 		return B_BUFFER_OVERFLOW;
6800 
6801 	return B_OK;
6802 }
6803 
6804 
6805 int
6806 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6807 	int openMode, int perms)
6808 {
6809 	char name[B_FILE_NAME_LENGTH];
6810 
6811 	if (userName == NULL || device < 0 || inode < 0)
6812 		return B_BAD_VALUE;
6813 	if (!IS_USER_ADDRESS(userName)
6814 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6815 		return B_BAD_ADDRESS;
6816 
6817 	if (openMode & O_CREAT)
6818 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6819 
6820 	return file_open_entry_ref(device, inode, name, openMode, false);
6821 }
6822 
6823 
6824 int
6825 _user_open(int fd, const char *userPath, int openMode, int perms)
6826 {
6827 	KPath path(B_PATH_NAME_LENGTH + 1);
6828 	if (path.InitCheck() != B_OK)
6829 		return B_NO_MEMORY;
6830 
6831 	char *buffer = path.LockBuffer();
6832 
6833 	if (!IS_USER_ADDRESS(userPath)
6834 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6835 		return B_BAD_ADDRESS;
6836 
6837 	if (openMode & O_CREAT)
6838 		return file_create(fd, buffer, openMode, perms, false);
6839 
6840 	return file_open(fd, buffer, openMode, false);
6841 }
6842 
6843 
6844 int
6845 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
6846 {
6847 	if (userName != NULL) {
6848 		char name[B_FILE_NAME_LENGTH];
6849 
6850 		if (!IS_USER_ADDRESS(userName)
6851 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6852 			return B_BAD_ADDRESS;
6853 
6854 		return dir_open_entry_ref(device, inode, name, false);
6855 	}
6856 	return dir_open_entry_ref(device, inode, NULL, false);
6857 }
6858 
6859 
6860 int
6861 _user_open_dir(int fd, const char *userPath)
6862 {
6863 	KPath path(B_PATH_NAME_LENGTH + 1);
6864 	if (path.InitCheck() != B_OK)
6865 		return B_NO_MEMORY;
6866 
6867 	char *buffer = path.LockBuffer();
6868 
6869 	if (!IS_USER_ADDRESS(userPath)
6870 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6871 		return B_BAD_ADDRESS;
6872 
6873 	return dir_open(fd, buffer, false);
6874 }
6875 
6876 
6877 /**	\brief Opens a directory's parent directory and returns the entry name
6878  *		   of the former.
6879  *
6880  *	Aside from that is returns the directory's entry name, this method is
6881  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6882  *	equivalent, if \a userName is \c NULL.
6883  *
6884  *	If a name buffer is supplied and the name does not fit the buffer, the
6885  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6886  *
6887  *	\param fd A FD referring to a directory.
6888  *	\param userName Buffer the directory's entry name shall be written into.
6889  *		   May be \c NULL.
6890  *	\param nameLength Size of the name buffer.
6891  *	\return The file descriptor of the opened parent directory, if everything
6892  *			went fine, an error code otherwise.
6893  */
6894 
6895 int
6896 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6897 {
6898 	bool kernel = false;
6899 
6900 	if (userName && !IS_USER_ADDRESS(userName))
6901 		return B_BAD_ADDRESS;
6902 
6903 	// open the parent dir
6904 	int parentFD = dir_open(fd, "..", kernel);
6905 	if (parentFD < 0)
6906 		return parentFD;
6907 	FDCloser fdCloser(parentFD, kernel);
6908 
6909 	if (userName) {
6910 		// get the vnodes
6911 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6912 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6913 		VNodePutter parentVNodePutter(parentVNode);
6914 		VNodePutter dirVNodePutter(dirVNode);
6915 		if (!parentVNode || !dirVNode)
6916 			return B_FILE_ERROR;
6917 
6918 		// get the vnode name
6919 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
6920 		struct dirent *buffer = (struct dirent*)_buffer;
6921 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
6922 			sizeof(_buffer));
6923 		if (status != B_OK)
6924 			return status;
6925 
6926 		// copy the name to the userland buffer
6927 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
6928 		if (len < 0)
6929 			return len;
6930 		if (len >= (int)nameLength)
6931 			return B_BUFFER_OVERFLOW;
6932 	}
6933 
6934 	return fdCloser.Detach();
6935 }
6936 
6937 
6938 status_t
6939 _user_fcntl(int fd, int op, uint32 argument)
6940 {
6941 	return common_fcntl(fd, op, argument, false);
6942 }
6943 
6944 
6945 status_t
6946 _user_fsync(int fd)
6947 {
6948 	return common_sync(fd, false);
6949 }
6950 
6951 
6952 status_t
6953 _user_lock_node(int fd)
6954 {
6955 	return common_lock_node(fd, false);
6956 }
6957 
6958 
6959 status_t
6960 _user_unlock_node(int fd)
6961 {
6962 	return common_unlock_node(fd, false);
6963 }
6964 
6965 
6966 status_t
6967 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6968 {
6969 	char name[B_FILE_NAME_LENGTH];
6970 	status_t status;
6971 
6972 	if (!IS_USER_ADDRESS(userName))
6973 		return B_BAD_ADDRESS;
6974 
6975 	status = user_strlcpy(name, userName, sizeof(name));
6976 	if (status < 0)
6977 		return status;
6978 
6979 	return dir_create_entry_ref(device, inode, name, perms, false);
6980 }
6981 
6982 
6983 status_t
6984 _user_create_dir(int fd, const char *userPath, int perms)
6985 {
6986 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6987 	if (pathBuffer.InitCheck() != B_OK)
6988 		return B_NO_MEMORY;
6989 
6990 	char *path = pathBuffer.LockBuffer();
6991 
6992 	if (!IS_USER_ADDRESS(userPath)
6993 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6994 		return B_BAD_ADDRESS;
6995 
6996 	return dir_create(fd, path, perms, false);
6997 }
6998 
6999 
7000 status_t
7001 _user_remove_dir(int fd, const char *userPath)
7002 {
7003 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7004 	if (pathBuffer.InitCheck() != B_OK)
7005 		return B_NO_MEMORY;
7006 
7007 	char *path = pathBuffer.LockBuffer();
7008 
7009 	if (userPath != NULL) {
7010 		if (!IS_USER_ADDRESS(userPath)
7011 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7012 			return B_BAD_ADDRESS;
7013 	}
7014 
7015 	return dir_remove(fd, userPath ? path : NULL, false);
7016 }
7017 
7018 
7019 status_t
7020 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
7021 {
7022 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
7023 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
7024 		return B_NO_MEMORY;
7025 
7026 	size_t bufferSize;
7027 
7028 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
7029 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
7030 		return B_BAD_ADDRESS;
7031 
7032 	char *path = pathBuffer.LockBuffer();
7033 	char *buffer = linkBuffer.LockBuffer();
7034 
7035 	if (userPath) {
7036 		if (!IS_USER_ADDRESS(userPath)
7037 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7038 			return B_BAD_ADDRESS;
7039 
7040 		if (bufferSize > B_PATH_NAME_LENGTH)
7041 			bufferSize = B_PATH_NAME_LENGTH;
7042 	}
7043 
7044 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
7045 		&bufferSize, false);
7046 
7047 	// we also update the bufferSize in case of errors
7048 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
7049 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
7050 		return B_BAD_ADDRESS;
7051 
7052 	if (status < B_OK)
7053 		return status;
7054 
7055 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
7056 		return B_BAD_ADDRESS;
7057 
7058 	return B_OK;
7059 }
7060 
7061 
7062 status_t
7063 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7064 	int mode)
7065 {
7066 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7067 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7068 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7069 		return B_NO_MEMORY;
7070 
7071 	char *path = pathBuffer.LockBuffer();
7072 	char *toPath = toPathBuffer.LockBuffer();
7073 
7074 	if (!IS_USER_ADDRESS(userPath)
7075 		|| !IS_USER_ADDRESS(userToPath)
7076 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7077 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7078 		return B_BAD_ADDRESS;
7079 
7080 	status_t status = check_path(toPath);
7081 	if (status < B_OK)
7082 		return status;
7083 
7084 	return common_create_symlink(fd, path, toPath, mode, false);
7085 }
7086 
7087 
7088 status_t
7089 _user_create_link(const char *userPath, const char *userToPath)
7090 {
7091 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7092 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7093 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7094 		return B_NO_MEMORY;
7095 
7096 	char *path = pathBuffer.LockBuffer();
7097 	char *toPath = toPathBuffer.LockBuffer();
7098 
7099 	if (!IS_USER_ADDRESS(userPath)
7100 		|| !IS_USER_ADDRESS(userToPath)
7101 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7102 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7103 		return B_BAD_ADDRESS;
7104 
7105 	status_t status = check_path(toPath);
7106 	if (status < B_OK)
7107 		return status;
7108 
7109 	return common_create_link(path, toPath, false);
7110 }
7111 
7112 
7113 status_t
7114 _user_unlink(int fd, const char *userPath)
7115 {
7116 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7117 	if (pathBuffer.InitCheck() != B_OK)
7118 		return B_NO_MEMORY;
7119 
7120 	char *path = pathBuffer.LockBuffer();
7121 
7122 	if (!IS_USER_ADDRESS(userPath)
7123 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7124 		return B_BAD_ADDRESS;
7125 
7126 	return common_unlink(fd, path, false);
7127 }
7128 
7129 
7130 status_t
7131 _user_rename(int oldFD, const char *userOldPath, int newFD,
7132 	const char *userNewPath)
7133 {
7134 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7135 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7136 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7137 		return B_NO_MEMORY;
7138 
7139 	char *oldPath = oldPathBuffer.LockBuffer();
7140 	char *newPath = newPathBuffer.LockBuffer();
7141 
7142 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7143 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7144 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7145 		return B_BAD_ADDRESS;
7146 
7147 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7148 }
7149 
7150 
7151 status_t
7152 _user_access(const char *userPath, int mode)
7153 {
7154 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7155 	if (pathBuffer.InitCheck() != B_OK)
7156 		return B_NO_MEMORY;
7157 
7158 	char *path = pathBuffer.LockBuffer();
7159 
7160 	if (!IS_USER_ADDRESS(userPath)
7161 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7162 		return B_BAD_ADDRESS;
7163 
7164 	return common_access(path, mode, false);
7165 }
7166 
7167 
7168 status_t
7169 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7170 	struct stat *userStat, size_t statSize)
7171 {
7172 	struct stat stat;
7173 	status_t status;
7174 
7175 	if (statSize > sizeof(struct stat))
7176 		return B_BAD_VALUE;
7177 
7178 	if (!IS_USER_ADDRESS(userStat))
7179 		return B_BAD_ADDRESS;
7180 
7181 	if (userPath) {
7182 		// path given: get the stat of the node referred to by (fd, path)
7183 		if (!IS_USER_ADDRESS(userPath))
7184 			return B_BAD_ADDRESS;
7185 
7186 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7187 		if (pathBuffer.InitCheck() != B_OK)
7188 			return B_NO_MEMORY;
7189 
7190 		char *path = pathBuffer.LockBuffer();
7191 
7192 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7193 		if (length < B_OK)
7194 			return length;
7195 		if (length >= B_PATH_NAME_LENGTH)
7196 			return B_NAME_TOO_LONG;
7197 
7198 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7199 	} else {
7200 		// no path given: get the FD and use the FD operation
7201 		struct file_descriptor *descriptor
7202 			= get_fd(get_current_io_context(false), fd);
7203 		if (descriptor == NULL)
7204 			return B_FILE_ERROR;
7205 
7206 		if (descriptor->ops->fd_read_stat)
7207 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7208 		else
7209 			status = EOPNOTSUPP;
7210 
7211 		put_fd(descriptor);
7212 	}
7213 
7214 	if (status < B_OK)
7215 		return status;
7216 
7217 	return user_memcpy(userStat, &stat, statSize);
7218 }
7219 
7220 
7221 status_t
7222 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7223 	const struct stat *userStat, size_t statSize, int statMask)
7224 {
7225 	if (statSize > sizeof(struct stat))
7226 		return B_BAD_VALUE;
7227 
7228 	struct stat stat;
7229 
7230 	if (!IS_USER_ADDRESS(userStat)
7231 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7232 		return B_BAD_ADDRESS;
7233 
7234 	// clear additional stat fields
7235 	if (statSize < sizeof(struct stat))
7236 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7237 
7238 	status_t status;
7239 
7240 	if (userPath) {
7241 		// path given: write the stat of the node referred to by (fd, path)
7242 		if (!IS_USER_ADDRESS(userPath))
7243 			return B_BAD_ADDRESS;
7244 
7245 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7246 		if (pathBuffer.InitCheck() != B_OK)
7247 			return B_NO_MEMORY;
7248 
7249 		char *path = pathBuffer.LockBuffer();
7250 
7251 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7252 		if (length < B_OK)
7253 			return length;
7254 		if (length >= B_PATH_NAME_LENGTH)
7255 			return B_NAME_TOO_LONG;
7256 
7257 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7258 			statMask, false);
7259 	} else {
7260 		// no path given: get the FD and use the FD operation
7261 		struct file_descriptor *descriptor
7262 			= get_fd(get_current_io_context(false), fd);
7263 		if (descriptor == NULL)
7264 			return B_FILE_ERROR;
7265 
7266 		if (descriptor->ops->fd_write_stat)
7267 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7268 		else
7269 			status = EOPNOTSUPP;
7270 
7271 		put_fd(descriptor);
7272 	}
7273 
7274 	return status;
7275 }
7276 
7277 
7278 int
7279 _user_open_attr_dir(int fd, const char *userPath)
7280 {
7281 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7282 	if (pathBuffer.InitCheck() != B_OK)
7283 		return B_NO_MEMORY;
7284 
7285 	char *path = pathBuffer.LockBuffer();
7286 
7287 	if (userPath != NULL) {
7288 		if (!IS_USER_ADDRESS(userPath)
7289 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7290 			return B_BAD_ADDRESS;
7291 	}
7292 
7293 	return attr_dir_open(fd, userPath ? path : NULL, false);
7294 }
7295 
7296 
7297 int
7298 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7299 {
7300 	char name[B_FILE_NAME_LENGTH];
7301 
7302 	if (!IS_USER_ADDRESS(userName)
7303 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7304 		return B_BAD_ADDRESS;
7305 
7306 	return attr_create(fd, name, type, openMode, false);
7307 }
7308 
7309 
7310 int
7311 _user_open_attr(int fd, const char *userName, int openMode)
7312 {
7313 	char name[B_FILE_NAME_LENGTH];
7314 
7315 	if (!IS_USER_ADDRESS(userName)
7316 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7317 		return B_BAD_ADDRESS;
7318 
7319 	return attr_open(fd, name, openMode, false);
7320 }
7321 
7322 
7323 status_t
7324 _user_remove_attr(int fd, const char *userName)
7325 {
7326 	char name[B_FILE_NAME_LENGTH];
7327 
7328 	if (!IS_USER_ADDRESS(userName)
7329 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7330 		return B_BAD_ADDRESS;
7331 
7332 	return attr_remove(fd, name, false);
7333 }
7334 
7335 
7336 status_t
7337 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7338 {
7339 	if (!IS_USER_ADDRESS(userFromName)
7340 		|| !IS_USER_ADDRESS(userToName))
7341 		return B_BAD_ADDRESS;
7342 
7343 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7344 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7345 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7346 		return B_NO_MEMORY;
7347 
7348 	char *fromName = fromNameBuffer.LockBuffer();
7349 	char *toName = toNameBuffer.LockBuffer();
7350 
7351 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7352 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7353 		return B_BAD_ADDRESS;
7354 
7355 	return attr_rename(fromFile, fromName, toFile, toName, false);
7356 }
7357 
7358 
7359 int
7360 _user_open_index_dir(dev_t device)
7361 {
7362 	return index_dir_open(device, false);
7363 }
7364 
7365 
7366 status_t
7367 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7368 {
7369 	char name[B_FILE_NAME_LENGTH];
7370 
7371 	if (!IS_USER_ADDRESS(userName)
7372 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7373 		return B_BAD_ADDRESS;
7374 
7375 	return index_create(device, name, type, flags, false);
7376 }
7377 
7378 
7379 status_t
7380 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7381 {
7382 	char name[B_FILE_NAME_LENGTH];
7383 	struct stat stat;
7384 	status_t status;
7385 
7386 	if (!IS_USER_ADDRESS(userName)
7387 		|| !IS_USER_ADDRESS(userStat)
7388 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7389 		return B_BAD_ADDRESS;
7390 
7391 	status = index_name_read_stat(device, name, &stat, false);
7392 	if (status == B_OK) {
7393 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7394 			return B_BAD_ADDRESS;
7395 	}
7396 
7397 	return status;
7398 }
7399 
7400 
7401 status_t
7402 _user_remove_index(dev_t device, const char *userName)
7403 {
7404 	char name[B_FILE_NAME_LENGTH];
7405 
7406 	if (!IS_USER_ADDRESS(userName)
7407 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7408 		return B_BAD_ADDRESS;
7409 
7410 	return index_remove(device, name, false);
7411 }
7412 
7413 
7414 status_t
7415 _user_getcwd(char *userBuffer, size_t size)
7416 {
7417 	if (!IS_USER_ADDRESS(userBuffer))
7418 		return B_BAD_ADDRESS;
7419 
7420 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7421 	if (pathBuffer.InitCheck() != B_OK)
7422 		return B_NO_MEMORY;
7423 
7424 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7425 
7426 	if (size > B_PATH_NAME_LENGTH)
7427 		size = B_PATH_NAME_LENGTH;
7428 
7429 	char *path = pathBuffer.LockBuffer();
7430 
7431 	status_t status = get_cwd(path, size, false);
7432 	if (status < B_OK)
7433 		return status;
7434 
7435 	// Copy back the result
7436 	if (user_strlcpy(userBuffer, path, size) < B_OK)
7437 		return B_BAD_ADDRESS;
7438 
7439 	return status;
7440 }
7441 
7442 
7443 status_t
7444 _user_setcwd(int fd, const char *userPath)
7445 {
7446 	TRACE(("user_setcwd: path = %p\n", userPath));
7447 
7448 	KPath pathBuffer(B_PATH_NAME_LENGTH);
7449 	if (pathBuffer.InitCheck() != B_OK)
7450 		return B_NO_MEMORY;
7451 
7452 	char *path = pathBuffer.LockBuffer();
7453 
7454 	if (userPath != NULL) {
7455 		if (!IS_USER_ADDRESS(userPath)
7456 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7457 			return B_BAD_ADDRESS;
7458 	}
7459 
7460 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7461 }
7462 
7463 
7464 int
7465 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7466 	uint32 flags, port_id port, int32 token)
7467 {
7468 	char *query;
7469 
7470 	if (device < 0 || userQuery == NULL || queryLength == 0)
7471 		return B_BAD_VALUE;
7472 
7473 	// this is a safety restriction
7474 	if (queryLength >= 65536)
7475 		return B_NAME_TOO_LONG;
7476 
7477 	query = (char *)malloc(queryLength + 1);
7478 	if (query == NULL)
7479 		return B_NO_MEMORY;
7480 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7481 		free(query);
7482 		return B_BAD_ADDRESS;
7483 	}
7484 
7485 	int fd = query_open(device, query, flags, port, token, false);
7486 
7487 	free(query);
7488 	return fd;
7489 }
7490