xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 3e216965baa8d58a67bf7372e2bfa13d999f5a9d)
1 /*
2  * Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <block_cache.h>
18 #include <fd.h>
19 #include <file_cache.h>
20 #include <khash.h>
21 #include <KPath.h>
22 #include <lock.h>
23 #include <syscalls.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <vm_low_memory.h>
28 
29 #include <boot/kernel_args.h>
30 #include <disk_device_manager/KDiskDevice.h>
31 #include <disk_device_manager/KDiskDeviceManager.h>
32 #include <disk_device_manager/KDiskDeviceUtils.h>
33 #include <disk_device_manager/KDiskSystem.h>
34 #include <fs/node_monitor.h>
35 #include <util/kernel_cpp.h>
36 
37 #include <string.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/resource.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <stddef.h>
46 
47 //#define TRACE_VFS
48 #ifdef TRACE_VFS
49 #	define TRACE(x) dprintf x
50 #	define FUNCTION(x) dprintf x
51 #else
52 #	define TRACE(x) ;
53 #	define FUNCTION(x) ;
54 #endif
55 
56 #define ADD_DEBUGGER_COMMANDS
57 
58 const static uint32 kMaxUnusedVnodes = 8192;
59 	// This is the maximum number of unused vnodes that the system
60 	// will keep around (weak limit, if there is enough memory left,
61 	// they won't get flushed even when hitting that limit).
62 	// It may be chosen with respect to the available memory or enhanced
63 	// by some timestamp/frequency heurism.
64 
65 struct vnode {
66 	struct vnode	*next;
67 	vm_cache_ref	*cache;
68 	mount_id		device;
69 	list_link		mount_link;
70 	list_link		unused_link;
71 	vnode_id		id;
72 	fs_vnode		private_node;
73 	struct fs_mount	*mount;
74 	struct vnode	*covered_by;
75 	int32			ref_count;
76 	uint8			remove : 1;
77 	uint8			busy : 1;
78 	uint8			unpublished : 1;
79 	struct advisory_locking	*advisory_locking;
80 	struct file_descriptor *mandatory_locked_by;
81 };
82 
83 struct vnode_hash_key {
84 	mount_id	device;
85 	vnode_id	vnode;
86 };
87 
88 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
89 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
90 
91 struct fs_mount {
92 	struct fs_mount	*next;
93 	file_system_module_info *fs;
94 	mount_id		id;
95 	void			*cookie;
96 	char			*device_name;
97 	char			*fs_name;
98 	recursive_lock	rlock;	// guards the vnodes list
99 	struct vnode	*root_vnode;
100 	struct vnode	*covers_vnode;
101 	KPartition		*partition;
102 	struct list		vnodes;
103 	bool			unmounting;
104 	bool			owns_file_device;
105 };
106 
107 struct advisory_locking {
108 	sem_id			lock;
109 	sem_id			wait_sem;
110 	struct list		locks;
111 };
112 
113 struct advisory_lock {
114 	list_link		link;
115 	team_id			team;
116 	off_t			offset;
117 	off_t			length;
118 	bool			shared;
119 };
120 
121 static mutex sFileSystemsMutex;
122 
123 /**	\brief Guards sMountsTable.
124  *
125  *	The holder is allowed to read/write access the sMountsTable.
126  *	Manipulation of the fs_mount structures themselves
127  *	(and their destruction) requires different locks though.
128  */
129 static mutex sMountMutex;
130 
131 /**	\brief Guards mount/unmount operations.
132  *
133  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
134  *	That is locking the lock ensures that no FS is mounted/unmounted. In
135  *	particular this means that
136  *	- sMountsTable will not be modified,
137  *	- the fields immutable after initialization of the fs_mount structures in
138  *	  sMountsTable will not be modified,
139  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified,
140  *
141  *	The thread trying to lock the lock must not hold sVnodeMutex or
142  *	sMountMutex.
143  */
144 static recursive_lock sMountOpLock;
145 
146 /**	\brief Guards sVnodeTable.
147  *
148  *	The holder is allowed to read/write access sVnodeTable and to
149  *	to any unbusy vnode in that table, save
150  *	to the immutable fields (device, id, private_node, mount) to which
151  *	only read-only access is allowed, and to the field covered_by, which is
152  *	guarded by sMountOpLock.
153  *
154  *	The thread trying to lock the mutex must not hold sMountMutex.
155  *	You must not have this mutex held when calling create_sem(), as this
156  *	might call vfs_free_unused_vnodes().
157  */
158 static mutex sVnodeMutex;
159 
160 #define VNODE_HASH_TABLE_SIZE 1024
161 static hash_table *sVnodeTable;
162 static list sUnusedVnodeList;
163 static uint32 sUnusedVnodes = 0;
164 static struct vnode *sRoot;
165 
166 #define MOUNTS_HASH_TABLE_SIZE 16
167 static hash_table *sMountsTable;
168 static mount_id sNextMountID = 1;
169 
170 mode_t __gUmask = 022;
171 
172 /* function declarations */
173 
174 // file descriptor operation prototypes
175 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
176 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
177 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
178 static void file_free_fd(struct file_descriptor *);
179 static status_t file_close(struct file_descriptor *);
180 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
181 	struct select_sync *sync);
182 static status_t file_deselect(struct file_descriptor *, uint8 event,
183 	struct select_sync *sync);
184 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
185 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
186 static status_t dir_rewind(struct file_descriptor *);
187 static void dir_free_fd(struct file_descriptor *);
188 static status_t dir_close(struct file_descriptor *);
189 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
190 static status_t attr_dir_rewind(struct file_descriptor *);
191 static void attr_dir_free_fd(struct file_descriptor *);
192 static status_t attr_dir_close(struct file_descriptor *);
193 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
194 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
195 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
196 static void attr_free_fd(struct file_descriptor *);
197 static status_t attr_close(struct file_descriptor *);
198 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
199 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
200 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
201 static status_t index_dir_rewind(struct file_descriptor *);
202 static void index_dir_free_fd(struct file_descriptor *);
203 static status_t index_dir_close(struct file_descriptor *);
204 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
205 static status_t query_rewind(struct file_descriptor *);
206 static void query_free_fd(struct file_descriptor *);
207 static status_t query_close(struct file_descriptor *);
208 
209 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
210 static status_t common_read_stat(struct file_descriptor *, struct stat *);
211 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
212 
213 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
214 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
215 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
216 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
217 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
218 static void inc_vnode_ref_count(struct vnode *vnode);
219 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
220 static inline void put_vnode(struct vnode *vnode);
221 
222 static struct fd_ops sFileOps = {
223 	file_read,
224 	file_write,
225 	file_seek,
226 	common_ioctl,
227 	file_select,
228 	file_deselect,
229 	NULL,		// read_dir()
230 	NULL,		// rewind_dir()
231 	common_read_stat,
232 	common_write_stat,
233 	file_close,
234 	file_free_fd
235 };
236 
237 static struct fd_ops sDirectoryOps = {
238 	NULL,		// read()
239 	NULL,		// write()
240 	NULL,		// seek()
241 	common_ioctl,
242 	NULL,		// select()
243 	NULL,		// deselect()
244 	dir_read,
245 	dir_rewind,
246 	common_read_stat,
247 	common_write_stat,
248 	dir_close,
249 	dir_free_fd
250 };
251 
252 static struct fd_ops sAttributeDirectoryOps = {
253 	NULL,		// read()
254 	NULL,		// write()
255 	NULL,		// seek()
256 	common_ioctl,
257 	NULL,		// select()
258 	NULL,		// deselect()
259 	attr_dir_read,
260 	attr_dir_rewind,
261 	common_read_stat,
262 	common_write_stat,
263 	attr_dir_close,
264 	attr_dir_free_fd
265 };
266 
267 static struct fd_ops sAttributeOps = {
268 	attr_read,
269 	attr_write,
270 	attr_seek,
271 	common_ioctl,
272 	NULL,		// select()
273 	NULL,		// deselect()
274 	NULL,		// read_dir()
275 	NULL,		// rewind_dir()
276 	attr_read_stat,
277 	attr_write_stat,
278 	attr_close,
279 	attr_free_fd
280 };
281 
282 static struct fd_ops sIndexDirectoryOps = {
283 	NULL,		// read()
284 	NULL,		// write()
285 	NULL,		// seek()
286 	NULL,		// ioctl()
287 	NULL,		// select()
288 	NULL,		// deselect()
289 	index_dir_read,
290 	index_dir_rewind,
291 	NULL,		// read_stat()
292 	NULL,		// write_stat()
293 	index_dir_close,
294 	index_dir_free_fd
295 };
296 
297 #if 0
298 static struct fd_ops sIndexOps = {
299 	NULL,		// read()
300 	NULL,		// write()
301 	NULL,		// seek()
302 	NULL,		// ioctl()
303 	NULL,		// select()
304 	NULL,		// deselect()
305 	NULL,		// dir_read()
306 	NULL,		// dir_rewind()
307 	index_read_stat,	// read_stat()
308 	NULL,		// write_stat()
309 	NULL,		// dir_close()
310 	NULL		// free_fd()
311 };
312 #endif
313 
314 static struct fd_ops sQueryOps = {
315 	NULL,		// read()
316 	NULL,		// write()
317 	NULL,		// seek()
318 	NULL,		// ioctl()
319 	NULL,		// select()
320 	NULL,		// deselect()
321 	query_read,
322 	query_rewind,
323 	NULL,		// read_stat()
324 	NULL,		// write_stat()
325 	query_close,
326 	query_free_fd
327 };
328 
329 
330 // VNodePutter
331 class VNodePutter {
332 public:
333 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
334 
335 	~VNodePutter()
336 	{
337 		Put();
338 	}
339 
340 	void SetTo(struct vnode *vnode)
341 	{
342 		Put();
343 		fVNode = vnode;
344 	}
345 
346 	void Put()
347 	{
348 		if (fVNode) {
349 			put_vnode(fVNode);
350 			fVNode = NULL;
351 		}
352 	}
353 
354 	struct vnode *Detach()
355 	{
356 		struct vnode *vnode = fVNode;
357 		fVNode = NULL;
358 		return vnode;
359 	}
360 
361 private:
362 	struct vnode *fVNode;
363 };
364 
365 
366 class FDCloser {
367 public:
368 	FDCloser() : fFD(-1), fKernel(true) {}
369 
370 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
371 
372 	~FDCloser()
373 	{
374 		Close();
375 	}
376 
377 	void SetTo(int fd, bool kernel)
378 	{
379 		Close();
380 		fFD = fd;
381 		fKernel = kernel;
382 	}
383 
384 	void Close()
385 	{
386 		if (fFD >= 0) {
387 			if (fKernel)
388 				_kern_close(fFD);
389 			else
390 				_user_close(fFD);
391 			fFD = -1;
392 		}
393 	}
394 
395 	int Detach()
396 	{
397 		int fd = fFD;
398 		fFD = -1;
399 		return fd;
400 	}
401 
402 private:
403 	int		fFD;
404 	bool	fKernel;
405 };
406 
407 
408 static int
409 mount_compare(void *_m, const void *_key)
410 {
411 	struct fs_mount *mount = (fs_mount *)_m;
412 	const mount_id *id = (mount_id *)_key;
413 
414 	if (mount->id == *id)
415 		return 0;
416 
417 	return -1;
418 }
419 
420 
421 static uint32
422 mount_hash(void *_m, const void *_key, uint32 range)
423 {
424 	struct fs_mount *mount = (fs_mount *)_m;
425 	const mount_id *id = (mount_id *)_key;
426 
427 	if (mount)
428 		return mount->id % range;
429 
430 	return (uint32)*id % range;
431 }
432 
433 
434 /** Finds the mounted device (the fs_mount structure) with the given ID.
435  *	Note, you must hold the gMountMutex lock when you call this function.
436  */
437 
438 static struct fs_mount *
439 find_mount(mount_id id)
440 {
441 	ASSERT_LOCKED_MUTEX(&sMountMutex);
442 
443 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
444 }
445 
446 
447 static status_t
448 get_mount(mount_id id, struct fs_mount **_mount)
449 {
450 	struct fs_mount *mount;
451 	status_t status;
452 
453 	mutex_lock(&sMountMutex);
454 
455 	mount = find_mount(id);
456 	if (mount) {
457 		// ToDo: the volume is locked (against removal) by locking
458 		//	its root node - investigate if that's a good idea
459 		if (mount->root_vnode)
460 			inc_vnode_ref_count(mount->root_vnode);
461 		else {
462 			// might have been called during a mount operation in which
463 			// case the root node may still be NULL
464 			mount = NULL;
465 		}
466 	} else
467 		status = B_BAD_VALUE;
468 
469 	mutex_unlock(&sMountMutex);
470 
471 	if (mount == NULL)
472 		return B_BUSY;
473 
474 	*_mount = mount;
475 	return B_OK;
476 }
477 
478 
479 static void
480 put_mount(struct fs_mount *mount)
481 {
482 	if (mount)
483 		put_vnode(mount->root_vnode);
484 }
485 
486 
487 static status_t
488 put_file_system(file_system_module_info *fs)
489 {
490 	return put_module(fs->info.name);
491 }
492 
493 
494 /**	Tries to open the specified file system module.
495  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
496  *	Returns a pointer to file system module interface, or NULL if it
497  *	could not open the module.
498  */
499 
500 static file_system_module_info *
501 get_file_system(const char *fsName)
502 {
503 	char name[B_FILE_NAME_LENGTH];
504 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
505 		// construct module name if we didn't get one
506 		// (we currently support only one API)
507 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
508 		fsName = NULL;
509 	}
510 
511 	file_system_module_info *info;
512 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
513 		return NULL;
514 
515 	return info;
516 }
517 
518 
519 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
520  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
521  *	The name is allocated for you, and you have to free() it when you're
522  *	done with it.
523  *	Returns NULL if the required memory is no available.
524  */
525 
526 static char *
527 get_file_system_name(const char *fsName)
528 {
529 	const size_t length = strlen("file_systems/");
530 
531 	if (strncmp(fsName, "file_systems/", length)) {
532 		// the name already seems to be the module's file name
533 		return strdup(fsName);
534 	}
535 
536 	fsName += length;
537 	const char *end = strchr(fsName, '/');
538 	if (end == NULL) {
539 		// this doesn't seem to be a valid name, but well...
540 		return strdup(fsName);
541 	}
542 
543 	// cut off the trailing /v1
544 
545 	char *name = (char *)malloc(end + 1 - fsName);
546 	if (name == NULL)
547 		return NULL;
548 
549 	strlcpy(name, fsName, end + 1 - fsName);
550 	return name;
551 }
552 
553 
554 static int
555 vnode_compare(void *_vnode, const void *_key)
556 {
557 	struct vnode *vnode = (struct vnode *)_vnode;
558 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
559 
560 	if (vnode->device == key->device && vnode->id == key->vnode)
561 		return 0;
562 
563 	return -1;
564 }
565 
566 
567 static uint32
568 vnode_hash(void *_vnode, const void *_key, uint32 range)
569 {
570 	struct vnode *vnode = (struct vnode *)_vnode;
571 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
572 
573 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
574 
575 	if (vnode != NULL)
576 		return VHASH(vnode->device, vnode->id) % range;
577 
578 	return VHASH(key->device, key->vnode) % range;
579 
580 #undef VHASH
581 }
582 
583 
584 static void
585 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
586 {
587 	recursive_lock_lock(&mount->rlock);
588 
589 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
590 
591 	recursive_lock_unlock(&mount->rlock);
592 }
593 
594 
595 static void
596 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
597 {
598 	recursive_lock_lock(&mount->rlock);
599 
600 	list_remove_link(&vnode->mount_link);
601 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
602 
603 	recursive_lock_unlock(&mount->rlock);
604 }
605 
606 
607 static status_t
608 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
609 {
610 	FUNCTION(("create_new_vnode()\n"));
611 
612 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
613 	if (vnode == NULL)
614 		return B_NO_MEMORY;
615 
616 	// initialize basic values
617 	memset(vnode, 0, sizeof(struct vnode));
618 	vnode->device = mountID;
619 	vnode->id = vnodeID;
620 
621 	// add the vnode to the mount structure
622 	mutex_lock(&sMountMutex);
623 	vnode->mount = find_mount(mountID);
624 	if (!vnode->mount || vnode->mount->unmounting) {
625 		mutex_unlock(&sMountMutex);
626 		free(vnode);
627 		return B_ENTRY_NOT_FOUND;
628 	}
629 
630 	hash_insert(sVnodeTable, vnode);
631 	add_vnode_to_mount_list(vnode, vnode->mount);
632 
633 	mutex_unlock(&sMountMutex);
634 
635 	vnode->ref_count = 1;
636 	*_vnode = vnode;
637 
638 	return B_OK;
639 }
640 
641 
642 /**	Frees the vnode and all resources it has acquired, and removes
643  *	it from the vnode hash as well as from its mount structure.
644  *	Will also make sure that any cache modifications are written back.
645  */
646 
647 static void
648 free_vnode(struct vnode *vnode, bool reenter)
649 {
650 	ASSERT(vnode->ref_count == 0 && vnode->busy);
651 
652 	// write back any changes in this vnode's cache -- but only
653 	// if the vnode won't be deleted, in which case the changes
654 	// will be discarded
655 
656 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
657 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
658 
659 	if (!vnode->unpublished) {
660 		if (vnode->remove)
661 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
662 		else
663 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
664 	}
665 
666 	// The file system has removed the resources of the vnode now, so we can
667 	// make it available again (and remove the busy vnode from the hash)
668 	mutex_lock(&sVnodeMutex);
669 	hash_remove(sVnodeTable, vnode);
670 	mutex_unlock(&sVnodeMutex);
671 
672 	// if we have a vm_cache attached, remove it
673 	if (vnode->cache)
674 		vm_cache_release_ref(vnode->cache);
675 
676 	vnode->cache = NULL;
677 
678 	remove_vnode_from_mount_list(vnode, vnode->mount);
679 
680 	free(vnode);
681 }
682 
683 
684 /**	\brief Decrements the reference counter of the given vnode and deletes it,
685  *	if the counter dropped to 0.
686  *
687  *	The caller must, of course, own a reference to the vnode to call this
688  *	function.
689  *	The caller must not hold the sVnodeMutex or the sMountMutex.
690  *
691  *	\param vnode the vnode.
692  *	\param reenter \c true, if this function is called (indirectly) from within
693  *		   a file system.
694  *	\return \c B_OK, if everything went fine, an error code otherwise.
695  */
696 
697 static status_t
698 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
699 {
700 	mutex_lock(&sVnodeMutex);
701 
702 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
703 
704 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
705 
706 	if (oldRefCount == 1) {
707 		if (vnode->busy)
708 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
709 
710 		bool freeNode = false;
711 
712 		// Just insert the vnode into an unused list if we don't need
713 		// to delete it
714 		if (vnode->remove) {
715 			vnode->busy = true;
716 			freeNode = true;
717 		} else {
718 			list_add_item(&sUnusedVnodeList, vnode);
719 			if (++sUnusedVnodes > kMaxUnusedVnodes
720 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
721 				// there are too many unused vnodes so we free the oldest one
722 				// ToDo: evaluate this mechanism
723 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
724 				vnode->busy = true;
725 				freeNode = true;
726 				sUnusedVnodes--;
727 			}
728 		}
729 
730 		mutex_unlock(&sVnodeMutex);
731 
732 		if (freeNode)
733 			free_vnode(vnode, reenter);
734 	} else
735 		mutex_unlock(&sVnodeMutex);
736 
737 	return B_OK;
738 }
739 
740 
741 /**	\brief Increments the reference counter of the given vnode.
742  *
743  *	The caller must either already have a reference to the vnode or hold
744  *	the sVnodeMutex.
745  *
746  *	\param vnode the vnode.
747  */
748 
749 static void
750 inc_vnode_ref_count(struct vnode *vnode)
751 {
752 	atomic_add(&vnode->ref_count, 1);
753 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
754 }
755 
756 
757 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
758  *
759  *	The caller must hold the sVnodeMutex.
760  *
761  *	\param mountID the mount ID.
762  *	\param vnodeID the node ID.
763  *
764  *	\return The vnode structure, if it was found in the hash table, \c NULL
765  *			otherwise.
766  */
767 
768 static struct vnode *
769 lookup_vnode(mount_id mountID, vnode_id vnodeID)
770 {
771 	struct vnode_hash_key key;
772 
773 	key.device = mountID;
774 	key.vnode = vnodeID;
775 
776 	return (vnode *)hash_lookup(sVnodeTable, &key);
777 }
778 
779 
780 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
781  *
782  *	If the node is not yet in memory, it will be loaded.
783  *
784  *	The caller must not hold the sVnodeMutex or the sMountMutex.
785  *
786  *	\param mountID the mount ID.
787  *	\param vnodeID the node ID.
788  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
789  *		   retrieved vnode structure shall be written.
790  *	\param reenter \c true, if this function is called (indirectly) from within
791  *		   a file system.
792  *	\return \c B_OK, if everything when fine, an error code otherwise.
793  */
794 
795 static status_t
796 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
797 {
798 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
799 
800 	mutex_lock(&sVnodeMutex);
801 
802 	int32 tries = 300;
803 		// try for 3 secs
804 restart:
805 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
806 	if (vnode && vnode->busy) {
807 		mutex_unlock(&sVnodeMutex);
808 		if (--tries < 0) {
809 			// vnode doesn't seem to become unbusy
810 			panic("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
811 			return B_BUSY;
812 		}
813 		snooze(10000); // 10 ms
814 		mutex_lock(&sVnodeMutex);
815 		goto restart;
816 	}
817 
818 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
819 
820 	status_t status;
821 
822 	if (vnode) {
823 		if (vnode->ref_count == 0) {
824 			// this vnode has been unused before
825 			list_remove_item(&sUnusedVnodeList, vnode);
826 			sUnusedVnodes--;
827 		}
828 		inc_vnode_ref_count(vnode);
829 	} else {
830 		// we need to create a new vnode and read it in
831 		status = create_new_vnode(&vnode, mountID, vnodeID);
832 		if (status < B_OK)
833 			goto err;
834 
835 		vnode->busy = true;
836 		mutex_unlock(&sVnodeMutex);
837 
838 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
839 		if (status == B_OK && vnode->private_node == NULL)
840 			status = B_BAD_VALUE;
841 
842 		mutex_lock(&sVnodeMutex);
843 
844 		if (status < B_OK)
845 			goto err1;
846 
847 		vnode->busy = false;
848 	}
849 
850 	mutex_unlock(&sVnodeMutex);
851 
852 	TRACE(("get_vnode: returning %p\n", vnode));
853 
854 	*_vnode = vnode;
855 	return B_OK;
856 
857 err1:
858 	hash_remove(sVnodeTable, vnode);
859 	remove_vnode_from_mount_list(vnode, vnode->mount);
860 err:
861 	mutex_unlock(&sVnodeMutex);
862 	if (vnode)
863 		free(vnode);
864 
865 	return status;
866 }
867 
868 
869 /**	\brief Decrements the reference counter of the given vnode and deletes it,
870  *	if the counter dropped to 0.
871  *
872  *	The caller must, of course, own a reference to the vnode to call this
873  *	function.
874  *	The caller must not hold the sVnodeMutex or the sMountMutex.
875  *
876  *	\param vnode the vnode.
877  */
878 
879 static inline void
880 put_vnode(struct vnode *vnode)
881 {
882 	dec_vnode_ref_count(vnode, false);
883 }
884 
885 
886 static void
887 vnode_low_memory_handler(void */*data*/, int32 level)
888 {
889 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
890 
891 	int32 count = 1;
892 	switch (level) {
893 		case B_NO_LOW_MEMORY:
894 			return;
895 		case B_LOW_MEMORY_NOTE:
896 			count = sUnusedVnodes / 100;
897 			break;
898 		case B_LOW_MEMORY_WARNING:
899 			count = sUnusedVnodes / 10;
900 			break;
901 		case B_LOW_MEMORY_CRITICAL:
902 			count = sUnusedVnodes;
903 			break;
904 	}
905 
906 	for (int32 i = 0; i < count; i++) {
907 		mutex_lock(&sVnodeMutex);
908 
909 		struct vnode *vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
910 		if (vnode == NULL) {
911 			mutex_unlock(&sVnodeMutex);
912 			break;
913 		}
914 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
915 
916 		vnode->busy = true;
917 		sUnusedVnodes--;
918 
919 		mutex_unlock(&sVnodeMutex);
920 
921 		free_vnode(vnode, false);
922 	}
923 }
924 
925 
926 static inline void
927 put_advisory_locking(struct advisory_locking *locking)
928 {
929 	release_sem(locking->lock);
930 }
931 
932 
933 /**	Returns the advisory_locking object of the \a vnode in case it
934  *	has one, and locks it.
935  *	You have to call put_advisory_locking() when you're done with
936  *	it.
937  *	Note, you must not have the vnode mutex locked when calling
938  *	this function.
939  */
940 
941 static struct advisory_locking *
942 get_advisory_locking(struct vnode *vnode)
943 {
944 	mutex_lock(&sVnodeMutex);
945 
946 	struct advisory_locking *locking = vnode->advisory_locking;
947 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
948 
949 	mutex_unlock(&sVnodeMutex);
950 
951 	if (lock >= B_OK)
952 		lock = acquire_sem(lock);
953 	if (lock < B_OK) {
954 		// This means the locking has been deleted in the mean time
955 		// or had never existed in the first place - otherwise, we
956 		// would get the lock at some point.
957 		return NULL;
958 	}
959 
960 	return locking;
961 }
962 
963 
964 /**	Creates a locked advisory_locking object, and attaches it to the
965  *	given \a vnode.
966  *	Returns B_OK in case of success - also if the vnode got such an
967  *	object from someone else in the mean time, you'll still get this
968  *	one locked then.
969  */
970 
971 static status_t
972 create_advisory_locking(struct vnode *vnode)
973 {
974 	if (vnode == NULL)
975 		return B_FILE_ERROR;
976 
977 	struct advisory_locking *locking = (struct advisory_locking *)malloc(
978 		sizeof(struct advisory_locking));
979 	if (locking == NULL)
980 		return B_NO_MEMORY;
981 
982 	status_t status;
983 
984 	locking->wait_sem = create_sem(0, "advisory lock");
985 	if (locking->wait_sem < B_OK) {
986 		status = locking->wait_sem;
987 		goto err1;
988 	}
989 
990 	locking->lock = create_sem(0, "advisory locking");
991 	if (locking->lock < B_OK) {
992 		status = locking->lock;
993 		goto err2;
994 	}
995 
996 	list_init(&locking->locks);
997 
998 	// We need to set the locking structure atomically - someone
999 	// else might set one at the same time
1000 	do {
1001 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking, (addr_t)locking,
1002 				NULL) == NULL)
1003 			return B_OK;
1004 	} while (get_advisory_locking(vnode) == NULL);
1005 
1006 	status = B_OK;
1007 		// we delete the one we've just created, but nevertheless, the vnode
1008 		// does have a locking structure now
1009 
1010 	delete_sem(locking->lock);
1011 err2:
1012 	delete_sem(locking->wait_sem);
1013 err1:
1014 	free(locking);
1015 	return status;
1016 }
1017 
1018 
1019 /**	Retrieves the first lock that has been set by the current team.
1020  */
1021 
1022 static status_t
1023 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1024 {
1025 	struct advisory_locking *locking = get_advisory_locking(vnode);
1026 	if (locking == NULL)
1027 		return B_BAD_VALUE;
1028 
1029 	// TODO: this should probably get the flock by its file descriptor!
1030 	team_id team = team_get_current_team_id();
1031 	status_t status = B_BAD_VALUE;
1032 
1033 	struct advisory_lock *lock = NULL;
1034 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1035 		if (lock->team == team) {
1036 			flock->l_start = lock->offset;
1037 			flock->l_len = lock->length;
1038 			status = B_OK;
1039 			break;
1040 		}
1041 	}
1042 
1043 	put_advisory_locking(locking);
1044 	return status;
1045 }
1046 
1047 
1048 /**	Removes the specified lock, or all locks of the calling team
1049  *	if \a flock is NULL.
1050  */
1051 
1052 static status_t
1053 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1054 {
1055 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1056 
1057 	struct advisory_locking *locking = get_advisory_locking(vnode);
1058 	if (locking == NULL)
1059 		return flock != NULL ? B_BAD_VALUE : B_OK;
1060 
1061 	team_id team = team_get_current_team_id();
1062 
1063 	// find matching lock entry
1064 
1065 	status_t status = B_BAD_VALUE;
1066 	struct advisory_lock *lock = NULL;
1067 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1068 		if (lock->team == team && (flock == NULL || (flock != NULL
1069 			&& lock->offset == flock->l_start
1070 			&& lock->length == flock->l_len))) {
1071 			// we found our lock, free it
1072 			list_remove_item(&locking->locks, lock);
1073 			free(lock);
1074 			status = B_OK;
1075 			break;
1076 		}
1077 	}
1078 
1079 	bool removeLocking = list_is_empty(&locking->locks);
1080 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1081 
1082 	put_advisory_locking(locking);
1083 
1084 	if (status < B_OK)
1085 		return status;
1086 
1087 	if (removeLocking) {
1088 		// we can remove the whole advisory locking structure; it's no longer used
1089 		locking = get_advisory_locking(vnode);
1090 		if (locking != NULL) {
1091 			// the locking could have been changed in the mean time
1092 			if (list_is_empty(&locking->locks)) {
1093 				vnode->advisory_locking = NULL;
1094 
1095 				// we've detached the locking from the vnode, so we can safely delete it
1096 				delete_sem(locking->lock);
1097 				delete_sem(locking->wait_sem);
1098 				free(locking);
1099 			} else {
1100 				// the locking is in use again
1101 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1102 			}
1103 		}
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 static status_t
1111 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1112 {
1113 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1114 		vnode, flock, wait ? "yes" : "no"));
1115 
1116 	bool shared = flock->l_type == F_RDLCK;
1117 	status_t status = B_OK;
1118 
1119 restart:
1120 	// if this vnode has an advisory_locking structure attached,
1121 	// lock that one and search for any colliding file lock
1122 	struct advisory_locking *locking = get_advisory_locking(vnode);
1123 	sem_id waitForLock = -1;
1124 
1125 	if (locking != NULL) {
1126 		// test for collisions
1127 		struct advisory_lock *lock = NULL;
1128 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1129 			if (lock->offset <= flock->l_start + flock->l_len
1130 				&& lock->offset + lock->length > flock->l_start) {
1131 				// locks do overlap
1132 				if (!shared || !lock->shared) {
1133 					// we need to wait
1134 					waitForLock = locking->wait_sem;
1135 					break;
1136 				}
1137 			}
1138 		}
1139 
1140 		if (waitForLock < B_OK || !wait)
1141 			put_advisory_locking(locking);
1142 	}
1143 
1144 	// wait for the lock if we have to, or else return immediately
1145 
1146 	if (waitForLock >= B_OK) {
1147 		if (!wait)
1148 			status = B_PERMISSION_DENIED;
1149 		else {
1150 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1151 			if (status == B_OK) {
1152 				// see if we're still colliding
1153 				goto restart;
1154 			}
1155 		}
1156 	}
1157 
1158 	if (status < B_OK)
1159 		return status;
1160 
1161 	// install new lock
1162 
1163 	locking = get_advisory_locking(vnode);
1164 	if (locking == NULL) {
1165 		// we need to create a new locking object
1166 		status = create_advisory_locking(vnode);
1167 		if (status < B_OK)
1168 			return status;
1169 
1170 		locking = vnode->advisory_locking;
1171 			// we own the locking object, so it can't go away
1172 	}
1173 
1174 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1175 	if (lock == NULL) {
1176 		if (waitForLock >= B_OK)
1177 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1178 		release_sem(locking->lock);
1179 		return B_NO_MEMORY;
1180 	}
1181 
1182 	lock->team = team_get_current_team_id();
1183 	// values must already be normalized when getting here
1184 	lock->offset = flock->l_start;
1185 	lock->length = flock->l_len;
1186 	lock->shared = shared;
1187 
1188 	list_add_item(&locking->locks, lock);
1189 	put_advisory_locking(locking);
1190 
1191 	return status;
1192 }
1193 
1194 
1195 static status_t
1196 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1197 {
1198 	switch (flock->l_whence) {
1199 		case SEEK_SET:
1200 			break;
1201 		case SEEK_CUR:
1202 			flock->l_start += descriptor->pos;
1203 			break;
1204 		case SEEK_END:
1205 		{
1206 			struct vnode *vnode = descriptor->u.vnode;
1207 			struct stat stat;
1208 			status_t status;
1209 
1210 			if (FS_CALL(vnode, read_stat) == NULL)
1211 				return EOPNOTSUPP;
1212 
1213 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1214 			if (status < B_OK)
1215 				return status;
1216 
1217 			flock->l_start += stat.st_size;
1218 			break;
1219 		}
1220 		default:
1221 			return B_BAD_VALUE;
1222 	}
1223 
1224 	if (flock->l_start < 0)
1225 		flock->l_start = 0;
1226 	if (flock->l_len == 0)
1227 		flock->l_len = OFF_MAX;
1228 
1229 	// don't let the offset and length overflow
1230 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1231 		flock->l_len = OFF_MAX - flock->l_start;
1232 
1233 	if (flock->l_len < 0) {
1234 		// a negative length reverses the region
1235 		flock->l_start += flock->l_len;
1236 		flock->l_len = -flock->l_len;
1237 	}
1238 
1239 	return B_OK;
1240 }
1241 
1242 
1243 /**	Disconnects all file descriptors that are associated with the
1244  *	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1245  *	\a mount object.
1246  *
1247  *	Note, after you've called this function, there might still be ongoing
1248  *	accesses - they won't be interrupted if they already happened before.
1249  *	However, any subsequent access will fail.
1250  *
1251  *	This is not a cheap function and should be used with care and rarely.
1252  *	TODO: there is currently no means to stop a blocking read/write!
1253  */
1254 
1255 void
1256 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1257 	struct vnode *vnodeToDisconnect)
1258 {
1259 	// iterate over all teams and peek into their file descriptors
1260 	int32 nextTeamID = 0;
1261 
1262 	while (true) {
1263 		struct io_context *context = NULL;
1264 		sem_id contextMutex = -1;
1265 		struct team *team = NULL;
1266 		team_id lastTeamID;
1267 
1268 		cpu_status state = disable_interrupts();
1269 		GRAB_TEAM_LOCK();
1270 
1271 		lastTeamID = peek_next_thread_id();
1272 		if (nextTeamID < lastTeamID) {
1273 			// get next valid team
1274 			while (nextTeamID < lastTeamID
1275 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1276 				nextTeamID++;
1277 			}
1278 
1279 			if (team) {
1280 				context = (io_context *)team->io_context;
1281 				contextMutex = context->io_mutex.sem;
1282 				nextTeamID++;
1283 			}
1284 		}
1285 
1286 		RELEASE_TEAM_LOCK();
1287 		restore_interrupts(state);
1288 
1289 		if (context == NULL)
1290 			break;
1291 
1292 		// we now have a context - since we couldn't lock it while having
1293 		// safe access to the team structure, we now need to lock the mutex
1294 		// manually
1295 
1296 		if (acquire_sem(contextMutex) != B_OK) {
1297 			// team seems to be gone, go over to the next team
1298 			continue;
1299 		}
1300 
1301 		// the team cannot be deleted completely while we're owning its
1302 		// io_context mutex, so we can safely play with it now
1303 
1304 		context->io_mutex.holder = thread_get_current_thread_id();
1305 
1306 		if (context->cwd != NULL && context->cwd->mount == mount) {
1307 			put_vnode(context->cwd);
1308 
1309 			if (context->cwd == mount->root_vnode) {
1310 				// redirect the current working directory to the covered vnode
1311 				context->cwd = mount->covers_vnode;
1312 				inc_vnode_ref_count(context->cwd);
1313 			} else
1314 				context->cwd = NULL;
1315 		}
1316 
1317 		for (uint32 i = 0; i < context->table_size; i++) {
1318 			if (struct file_descriptor *descriptor = context->fds[i]) {
1319 				inc_fd_ref_count(descriptor);
1320 
1321 				// if this descriptor points at this mount, we
1322 				// need to disconnect it to be able to unmount
1323 				struct vnode *vnode = fd_vnode(descriptor);
1324 				if (vnodeToDisconnect != NULL) {
1325 					if (vnode == vnodeToDisconnect)
1326 						disconnect_fd(descriptor);
1327 				} else if (vnode != NULL && vnode->mount == mount
1328 					|| vnode == NULL && descriptor->u.mount == mount)
1329 					disconnect_fd(descriptor);
1330 
1331 				put_fd(descriptor);
1332 			}
1333 		}
1334 
1335 		mutex_unlock(&context->io_mutex);
1336 	}
1337 }
1338 
1339 
1340 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1341  *		   by.
1342  *
1343  *	Given an arbitrary vnode, the function checks, whether the node is covered
1344  *	by the root of a volume. If it is the function obtains a reference to the
1345  *	volume root node and returns it.
1346  *
1347  *	\param vnode The vnode in question.
1348  *	\return The volume root vnode the vnode cover is covered by, if it is
1349  *			indeed a mount point, or \c NULL otherwise.
1350  */
1351 
1352 static struct vnode *
1353 resolve_mount_point_to_volume_root(struct vnode *vnode)
1354 {
1355 	if (!vnode)
1356 		return NULL;
1357 
1358 	struct vnode *volumeRoot = NULL;
1359 
1360 	recursive_lock_lock(&sMountOpLock);
1361 	if (vnode->covered_by) {
1362 		volumeRoot = vnode->covered_by;
1363 		inc_vnode_ref_count(volumeRoot);
1364 	}
1365 	recursive_lock_unlock(&sMountOpLock);
1366 
1367 	return volumeRoot;
1368 }
1369 
1370 
1371 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1372  *		   by.
1373  *
1374  *	Given an arbitrary vnode (identified by mount and node ID), the function
1375  *	checks, whether the node is covered by the root of a volume. If it is the
1376  *	function returns the mount and node ID of the volume root node. Otherwise
1377  *	it simply returns the supplied mount and node ID.
1378  *
1379  *	In case of error (e.g. the supplied node could not be found) the variables
1380  *	for storing the resolved mount and node ID remain untouched and an error
1381  *	code is returned.
1382  *
1383  *	\param mountID The mount ID of the vnode in question.
1384  *	\param nodeID The node ID of the vnode in question.
1385  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1386  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1387  *	\return
1388  *	- \c B_OK, if everything went fine,
1389  *	- another error code, if something went wrong.
1390  */
1391 
1392 status_t
1393 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1394 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1395 {
1396 	// get the node
1397 	struct vnode *node;
1398 	status_t error = get_vnode(mountID, nodeID, &node, false);
1399 	if (error != B_OK)
1400 		return error;
1401 
1402 	// resolve the node
1403 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1404 	if (resolvedNode) {
1405 		put_vnode(node);
1406 		node = resolvedNode;
1407 	}
1408 
1409 	// set the return values
1410 	*resolvedMountID = node->device;
1411 	*resolvedNodeID = node->id;
1412 
1413 	put_vnode(node);
1414 
1415 	return B_OK;
1416 }
1417 
1418 
1419 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1420  *
1421  *	Given an arbitrary vnode, the function checks, whether the node is the
1422  *	root of a volume. If it is (and if it is not "/"), the function obtains
1423  *	a reference to the underlying mount point node and returns it.
1424  *
1425  *	\param vnode The vnode in question.
1426  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1427  *			root and not "/", or \c NULL otherwise.
1428  */
1429 
1430 static struct vnode *
1431 resolve_volume_root_to_mount_point(struct vnode *vnode)
1432 {
1433 	if (!vnode)
1434 		return NULL;
1435 
1436 	struct vnode *mountPoint = NULL;
1437 
1438 	recursive_lock_lock(&sMountOpLock);
1439 	struct fs_mount *mount = vnode->mount;
1440 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1441 		mountPoint = mount->covers_vnode;
1442 		inc_vnode_ref_count(mountPoint);
1443 	}
1444 	recursive_lock_unlock(&sMountOpLock);
1445 
1446 	return mountPoint;
1447 }
1448 
1449 
1450 /**	\brief Gets the directory path and leaf name for a given path.
1451  *
1452  *	The supplied \a path is transformed to refer to the directory part of
1453  *	the entry identified by the original path, and into the buffer \a filename
1454  *	the leaf name of the original entry is written.
1455  *	Neither the returned path nor the leaf name can be expected to be
1456  *	canonical.
1457  *
1458  *	\param path The path to be analyzed. Must be able to store at least one
1459  *		   additional character.
1460  *	\param filename The buffer into which the leaf name will be written.
1461  *		   Must be of size B_FILE_NAME_LENGTH at least.
1462  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1463  *		   name is longer than \c B_FILE_NAME_LENGTH.
1464  */
1465 
1466 static status_t
1467 get_dir_path_and_leaf(char *path, char *filename)
1468 {
1469 	char *p = strrchr(path, '/');
1470 		// '/' are not allowed in file names!
1471 
1472 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1473 
1474 	if (!p) {
1475 		// this path is single segment with no '/' in it
1476 		// ex. "foo"
1477 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1478 			return B_NAME_TOO_LONG;
1479 		strcpy(path, ".");
1480 	} else {
1481 		p++;
1482 		if (*p == '\0') {
1483 			// special case: the path ends in '/'
1484 			strcpy(filename, ".");
1485 		} else {
1486 			// normal leaf: replace the leaf portion of the path with a '.'
1487 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1488 				>= B_FILE_NAME_LENGTH) {
1489 				return B_NAME_TOO_LONG;
1490 			}
1491 		}
1492 		p[0] = '.';
1493 		p[1] = '\0';
1494 	}
1495 	return B_OK;
1496 }
1497 
1498 
1499 static status_t
1500 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1501 {
1502 	char clonedName[B_FILE_NAME_LENGTH + 1];
1503 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1504 		return B_NAME_TOO_LONG;
1505 
1506 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1507 	struct vnode *directory;
1508 
1509 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1510 	if (status < 0)
1511 		return status;
1512 
1513 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1514 }
1515 
1516 
1517 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1518  *	\a path must not be NULL.
1519  *	If it returns successfully, \a path contains the name of the last path
1520  *	component.
1521  *	Note, this reduces the ref_count of the starting \a vnode, no matter if
1522  *	it is successful or not!
1523  */
1524 
1525 static status_t
1526 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1527 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1528 {
1529 	status_t status = 0;
1530 	vnode_id lastParentID = vnode->id;
1531 	int type = 0;
1532 
1533 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1534 
1535 	if (path == NULL) {
1536 		put_vnode(vnode);
1537 		return B_BAD_VALUE;
1538 	}
1539 
1540 	while (true) {
1541 		struct vnode *nextVnode;
1542 		vnode_id vnodeID;
1543 		char *nextPath;
1544 
1545 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1546 
1547 		// done?
1548 		if (path[0] == '\0')
1549 			break;
1550 
1551 		// walk to find the next path component ("path" will point to a single
1552 		// path component), and filter out multiple slashes
1553 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1554 
1555 		if (*nextPath == '/') {
1556 			*nextPath = '\0';
1557 			do
1558 				nextPath++;
1559 			while (*nextPath == '/');
1560 		}
1561 
1562 		// See if the '..' is at the root of a mount and move to the covered
1563 		// vnode so we pass the '..' path to the underlying filesystem
1564 		if (!strcmp("..", path)
1565 			&& vnode->mount->root_vnode == vnode
1566 			&& vnode->mount->covers_vnode) {
1567 			nextVnode = vnode->mount->covers_vnode;
1568 			inc_vnode_ref_count(nextVnode);
1569 			put_vnode(vnode);
1570 			vnode = nextVnode;
1571 		}
1572 
1573 		// Check if we have the right to search the current directory vnode.
1574 		// If a file system doesn't have the access() function, we assume that
1575 		// searching a directory is always allowed
1576 		if (FS_CALL(vnode, access))
1577 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1578 
1579 		// Tell the filesystem to get the vnode of this path component (if we got the
1580 		// permission from the call above)
1581 		if (status >= B_OK)
1582 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1583 
1584 		if (status < B_OK) {
1585 			put_vnode(vnode);
1586 			return status;
1587 		}
1588 
1589 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1590 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1591 		// ref count incremented at this point
1592 		mutex_lock(&sVnodeMutex);
1593 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1594 		mutex_unlock(&sVnodeMutex);
1595 
1596 		if (!nextVnode) {
1597 			// pretty screwed up here - the file system found the vnode, but the hash
1598 			// lookup failed, so our internal structures are messed up
1599 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1600 				vnode->device, vnodeID);
1601 			put_vnode(vnode);
1602 			return B_ENTRY_NOT_FOUND;
1603 		}
1604 
1605 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1606 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1607 			size_t bufferSize;
1608 			char *buffer;
1609 
1610 			TRACE(("traverse link\n"));
1611 
1612 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1613 			if (count + 1 > B_MAX_SYMLINKS) {
1614 				status = B_LINK_LIMIT;
1615 				goto resolve_link_error;
1616 			}
1617 
1618 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1619 			if (buffer == NULL) {
1620 				status = B_NO_MEMORY;
1621 				goto resolve_link_error;
1622 			}
1623 
1624 			status = FS_CALL(nextVnode, read_link)(nextVnode->mount->cookie,
1625 				nextVnode->private_node, buffer, &bufferSize);
1626 			if (status < B_OK) {
1627 				free(buffer);
1628 
1629 		resolve_link_error:
1630 				put_vnode(vnode);
1631 				put_vnode(nextVnode);
1632 
1633 				return status;
1634 			}
1635 			put_vnode(nextVnode);
1636 
1637 			// Check if we start from the root directory or the current
1638 			// directory ("vnode" still points to that one).
1639 			// Cut off all leading slashes if it's the root directory
1640 			path = buffer;
1641 			if (path[0] == '/') {
1642 				// we don't need the old directory anymore
1643 				put_vnode(vnode);
1644 
1645 				while (*++path == '/')
1646 					;
1647 				vnode = sRoot;
1648 				inc_vnode_ref_count(vnode);
1649 			}
1650 			inc_vnode_ref_count(vnode);
1651 				// balance the next recursion - we will decrement the ref_count
1652 				// of the vnode, no matter if we succeeded or not
1653 
1654 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1655 				&nextVnode, &lastParentID, _type);
1656 
1657 			free(buffer);
1658 
1659 			if (status < B_OK) {
1660 				put_vnode(vnode);
1661 				return status;
1662 			}
1663 		} else
1664 			lastParentID = vnode->id;
1665 
1666 		// decrease the ref count on the old dir we just looked up into
1667 		put_vnode(vnode);
1668 
1669 		path = nextPath;
1670 		vnode = nextVnode;
1671 
1672 		// see if we hit a mount point
1673 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1674 		if (mountPoint) {
1675 			put_vnode(vnode);
1676 			vnode = mountPoint;
1677 		}
1678 	}
1679 
1680 	*_vnode = vnode;
1681 	if (_type)
1682 		*_type = type;
1683 	if (_parentID)
1684 		*_parentID = lastParentID;
1685 
1686 	return B_OK;
1687 }
1688 
1689 
1690 static status_t
1691 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1692 	vnode_id *_parentID, bool kernel)
1693 {
1694 	struct vnode *start = NULL;
1695 
1696 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1697 
1698 	if (!path)
1699 		return B_BAD_VALUE;
1700 
1701 	// figure out if we need to start at root or at cwd
1702 	if (*path == '/') {
1703 		if (sRoot == NULL) {
1704 			// we're a bit early, aren't we?
1705 			return B_ERROR;
1706 		}
1707 
1708 		while (*++path == '/')
1709 			;
1710 		start = sRoot;
1711 		inc_vnode_ref_count(start);
1712 	} else {
1713 		struct io_context *context = get_current_io_context(kernel);
1714 
1715 		mutex_lock(&context->io_mutex);
1716 		start = context->cwd;
1717 		if (start != NULL)
1718 			inc_vnode_ref_count(start);
1719 		mutex_unlock(&context->io_mutex);
1720 
1721 		if (start == NULL)
1722 			return B_ERROR;
1723 	}
1724 
1725 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1726 }
1727 
1728 
1729 /** Returns the vnode in the next to last segment of the path, and returns
1730  *	the last portion in filename.
1731  *	The path buffer must be able to store at least one additional character.
1732  */
1733 
1734 static status_t
1735 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1736 {
1737 	status_t status = get_dir_path_and_leaf(path, filename);
1738 	if (status != B_OK)
1739 		return status;
1740 
1741 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1742 }
1743 
1744 
1745 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1746  *		   to by a FD + path pair.
1747  *
1748  *	\a path must be given in either case. \a fd might be omitted, in which
1749  *	case \a path is either an absolute path or one relative to the current
1750  *	directory. If both a supplied and \a path is relative it is reckoned off
1751  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1752  *	ignored.
1753  *
1754  *	The caller has the responsibility to call put_vnode() on the returned
1755  *	directory vnode.
1756  *
1757  *	\param fd The FD. May be < 0.
1758  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1759  *	       is modified by this function. It must have at least room for a
1760  *	       string one character longer than the path it contains.
1761  *	\param _vnode A pointer to a variable the directory vnode shall be written
1762  *		   into.
1763  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1764  *		   the leaf name of the specified entry will be written.
1765  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1766  *		   invoked from userland.
1767  *	\return \c B_OK, if everything went fine, another error code otherwise.
1768  */
1769 
1770 static status_t
1771 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1772 	char *filename, bool kernel)
1773 {
1774 	if (!path)
1775 		return B_BAD_VALUE;
1776 	if (fd < 0)
1777 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1778 
1779 	status_t status = get_dir_path_and_leaf(path, filename);
1780 	if (status != B_OK)
1781 		return status;
1782 
1783 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1784 }
1785 
1786 
1787 static status_t
1788 get_vnode_name(struct vnode *vnode, struct vnode *parent,
1789 	char *name, size_t nameSize)
1790 {
1791 	VNodePutter vnodePutter;
1792 
1793 	// See if vnode is the root of a mount and move to the covered
1794 	// vnode so we get the underlying file system
1795 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1796 		vnode = vnode->mount->covers_vnode;
1797 		inc_vnode_ref_count(vnode);
1798 		vnodePutter.SetTo(vnode);
1799 	}
1800 
1801 	if (FS_CALL(vnode, get_vnode_name)) {
1802 		// The FS supports getting the name of a vnode.
1803 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1804 			vnode->private_node, name, nameSize);
1805 	}
1806 
1807 	// The FS doesn't support getting the name of a vnode. So we search the
1808 	// parent directory for the vnode, if the caller let us.
1809 
1810 	if (parent == NULL)
1811 		return EOPNOTSUPP;
1812 
1813 	fs_cookie cookie;
1814 
1815 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1816 		parent->private_node, &cookie);
1817 	if (status >= B_OK) {
1818 		char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1819 		struct dirent *dirent = (struct dirent *)buffer;
1820 		while (true) {
1821 			uint32 num = 1;
1822 			status = dir_read(parent, cookie, dirent, sizeof(buffer), &num);
1823 			if (status < B_OK)
1824 				break;
1825 
1826 			if (vnode->id == dirent->d_ino) {
1827 				// found correct entry!
1828 				if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1829 					status = B_BUFFER_OVERFLOW;
1830 				break;
1831 			}
1832 		}
1833 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1834 	}
1835 	return status;
1836 }
1837 
1838 
1839 /**	Gets the full path to a given directory vnode.
1840  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1841  *	file system doesn't support this call, it will fall back to iterating
1842  *	through the parent directory to get the name of the child.
1843  *
1844  *	To protect against circular loops, it supports a maximum tree depth
1845  *	of 256 levels.
1846  *
1847  *	Note that the path may not be correct the time this function returns!
1848  *	It doesn't use any locking to prevent returning the correct path, as
1849  *	paths aren't safe anyway: the path to a file can change at any time.
1850  *
1851  *	It might be a good idea, though, to check if the returned path exists
1852  *	in the calling function (it's not done here because of efficiency)
1853  */
1854 
1855 static status_t
1856 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1857 {
1858 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1859 
1860 	if (vnode == NULL || buffer == NULL)
1861 		return B_BAD_VALUE;
1862 
1863 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1864 	KPath pathBuffer;
1865 	if (pathBuffer.InitCheck() != B_OK)
1866 		return B_NO_MEMORY;
1867 
1868 	char *path = pathBuffer.LockBuffer();
1869 	int32 insert = pathBuffer.BufferSize();
1870 	int32 maxLevel = 256;
1871 	int32 length;
1872 	status_t status;
1873 
1874 	// we don't use get_vnode() here because this call is more
1875 	// efficient and does all we need from get_vnode()
1876 	inc_vnode_ref_count(vnode);
1877 
1878 	// resolve a volume root to its mount point
1879 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1880 	if (mountPoint) {
1881 		put_vnode(vnode);
1882 		vnode = mountPoint;
1883 	}
1884 
1885 	path[--insert] = '\0';
1886 
1887 	while (true) {
1888 		// the name buffer is also used for fs_read_dir()
1889 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1890 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1891 		struct vnode *parentVnode;
1892 		vnode_id parentID, id;
1893 		int type;
1894 
1895 		// lookup the parent vnode
1896 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
1897 			&parentID, &type);
1898 		if (status < B_OK)
1899 			goto out;
1900 
1901 		mutex_lock(&sVnodeMutex);
1902 		parentVnode = lookup_vnode(vnode->device, parentID);
1903 		mutex_unlock(&sVnodeMutex);
1904 
1905 		if (parentVnode == NULL) {
1906 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1907 				vnode->device, parentID);
1908 			status = B_ENTRY_NOT_FOUND;
1909 			goto out;
1910 		}
1911 
1912 		// resolve a volume root to its mount point
1913 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1914 		if (mountPoint) {
1915 			put_vnode(parentVnode);
1916 			parentVnode = mountPoint;
1917 			parentID = parentVnode->id;
1918 		}
1919 
1920 		bool hitRoot = (parentVnode == vnode);
1921 
1922 		// Does the file system support getting the name of a vnode?
1923 		// If so, get it here...
1924 		if (status == B_OK && FS_CALL(vnode, get_vnode_name)) {
1925 			status = FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie, vnode->private_node,
1926 				name, B_FILE_NAME_LENGTH);
1927 		}
1928 
1929 		// ... if not, find it out later (by iterating through
1930 		// the parent directory, searching for the id)
1931 		id = vnode->id;
1932 
1933 		// release the current vnode, we only need its parent from now on
1934 		put_vnode(vnode);
1935 		vnode = parentVnode;
1936 
1937 		if (status < B_OK)
1938 			goto out;
1939 
1940 		// ToDo: add an explicit check for loops in about 10 levels to do
1941 		// real loop detection
1942 
1943 		// don't go deeper as 'maxLevel' to prevent circular loops
1944 		if (maxLevel-- < 0) {
1945 			status = ELOOP;
1946 			goto out;
1947 		}
1948 
1949 		if (hitRoot) {
1950 			// we have reached "/", which means we have constructed the full
1951 			// path
1952 			break;
1953 		}
1954 
1955 		if (!FS_CALL(vnode, get_vnode_name)) {
1956 			// If we haven't got the vnode's name yet, we have to search for it
1957 			// in the parent directory now
1958 			fs_cookie cookie;
1959 
1960 			status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node,
1961 				&cookie);
1962 			if (status >= B_OK) {
1963 				struct dirent *dirent = (struct dirent *)nameBuffer;
1964 				while (true) {
1965 					uint32 num = 1;
1966 					status = dir_read(vnode, cookie, dirent, sizeof(nameBuffer),
1967 						&num);
1968 
1969 					if (status < B_OK)
1970 						break;
1971 
1972 					if (id == dirent->d_ino)
1973 						// found correct entry!
1974 						break;
1975 				}
1976 				FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1977 			}
1978 
1979 			if (status < B_OK)
1980 				goto out;
1981 		}
1982 
1983 		// add the name infront of the current path
1984 		name[B_FILE_NAME_LENGTH - 1] = '\0';
1985 		length = strlen(name);
1986 		insert -= length;
1987 		if (insert <= 0) {
1988 			status = ENOBUFS;
1989 			goto out;
1990 		}
1991 		memcpy(path + insert, name, length);
1992 		path[--insert] = '/';
1993 	}
1994 
1995 	// the root dir will result in an empty path: fix it
1996 	if (path[insert] == '\0')
1997 		path[--insert] = '/';
1998 
1999 	TRACE(("  path is: %s\n", path + insert));
2000 
2001 	// copy the path to the output buffer
2002 	length = pathBuffer.BufferSize() - insert;
2003 	if (length <= (int)bufferSize)
2004 		memcpy(buffer, path + insert, length);
2005 	else
2006 		status = ENOBUFS;
2007 
2008 out:
2009 	put_vnode(vnode);
2010 	return status;
2011 }
2012 
2013 
2014 /**	Checks the length of every path component, and adds a '.'
2015  *	if the path ends in a slash.
2016  *	The given path buffer must be able to store at least one
2017  *	additional character.
2018  */
2019 
2020 static status_t
2021 check_path(char *to)
2022 {
2023 	int32 length = 0;
2024 
2025 	// check length of every path component
2026 
2027 	while (*to) {
2028 		char *begin;
2029 		if (*to == '/')
2030 			to++, length++;
2031 
2032 		begin = to;
2033 		while (*to != '/' && *to)
2034 			to++, length++;
2035 
2036 		if (to - begin > B_FILE_NAME_LENGTH)
2037 			return B_NAME_TOO_LONG;
2038 	}
2039 
2040 	if (length == 0)
2041 		return B_ENTRY_NOT_FOUND;
2042 
2043 	// complete path if there is a slash at the end
2044 
2045 	if (*(to - 1) == '/') {
2046 		if (length > B_PATH_NAME_LENGTH - 2)
2047 			return B_NAME_TOO_LONG;
2048 
2049 		to[0] = '.';
2050 		to[1] = '\0';
2051 	}
2052 
2053 	return B_OK;
2054 }
2055 
2056 
2057 static struct file_descriptor *
2058 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2059 {
2060 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2061 	if (descriptor == NULL)
2062 		return NULL;
2063 
2064 	if (fd_vnode(descriptor) == NULL) {
2065 		put_fd(descriptor);
2066 		return NULL;
2067 	}
2068 
2069 	// ToDo: when we can close a file descriptor at any point, investigate
2070 	//	if this is still valid to do (accessing the vnode without ref_count
2071 	//	or locking)
2072 	*_vnode = descriptor->u.vnode;
2073 	return descriptor;
2074 }
2075 
2076 
2077 static struct vnode *
2078 get_vnode_from_fd(int fd, bool kernel)
2079 {
2080 	struct file_descriptor *descriptor;
2081 	struct vnode *vnode;
2082 
2083 	descriptor = get_fd(get_current_io_context(kernel), fd);
2084 	if (descriptor == NULL)
2085 		return NULL;
2086 
2087 	vnode = fd_vnode(descriptor);
2088 	if (vnode != NULL)
2089 		inc_vnode_ref_count(vnode);
2090 
2091 	put_fd(descriptor);
2092 	return vnode;
2093 }
2094 
2095 
2096 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2097  *	only the path will be considered. In this case, the \a path must not be
2098  *	NULL.
2099  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2100  *	and should be NULL for files.
2101  */
2102 
2103 static status_t
2104 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2105 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
2106 {
2107 	if (fd < 0 && !path)
2108 		return B_BAD_VALUE;
2109 
2110 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2111 		// no FD or absolute path
2112 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2113 	}
2114 
2115 	// FD only, or FD + relative path
2116 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2117 	if (!vnode)
2118 		return B_FILE_ERROR;
2119 
2120 	if (path != NULL) {
2121 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2122 			_vnode, _parentID, NULL);
2123 	}
2124 
2125 	// there is no relative path to take into account
2126 
2127 	*_vnode = vnode;
2128 	if (_parentID)
2129 		*_parentID = -1;
2130 
2131 	return B_OK;
2132 }
2133 
2134 
2135 static int
2136 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2137 	fs_cookie cookie, int openMode, bool kernel)
2138 {
2139 	struct file_descriptor *descriptor;
2140 	int fd;
2141 
2142 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2143 	if (vnode && vnode->mandatory_locked_by != NULL)
2144 		return B_BUSY;
2145 
2146 	descriptor = alloc_fd();
2147 	if (!descriptor)
2148 		return B_NO_MEMORY;
2149 
2150 	if (vnode)
2151 		descriptor->u.vnode = vnode;
2152 	else
2153 		descriptor->u.mount = mount;
2154 	descriptor->cookie = cookie;
2155 
2156 	switch (type) {
2157 		// vnode types
2158 		case FDTYPE_FILE:
2159 			descriptor->ops = &sFileOps;
2160 			break;
2161 		case FDTYPE_DIR:
2162 			descriptor->ops = &sDirectoryOps;
2163 			break;
2164 		case FDTYPE_ATTR:
2165 			descriptor->ops = &sAttributeOps;
2166 			break;
2167 		case FDTYPE_ATTR_DIR:
2168 			descriptor->ops = &sAttributeDirectoryOps;
2169 			break;
2170 
2171 		// mount types
2172 		case FDTYPE_INDEX_DIR:
2173 			descriptor->ops = &sIndexDirectoryOps;
2174 			break;
2175 		case FDTYPE_QUERY:
2176 			descriptor->ops = &sQueryOps;
2177 			break;
2178 
2179 		default:
2180 			panic("get_new_fd() called with unknown type %d\n", type);
2181 			break;
2182 	}
2183 	descriptor->type = type;
2184 	descriptor->open_mode = openMode;
2185 
2186 	fd = new_fd(get_current_io_context(kernel), descriptor);
2187 	if (fd < 0) {
2188 		free(descriptor);
2189 		return B_NO_MORE_FDS;
2190 	}
2191 
2192 	return fd;
2193 }
2194 
2195 #ifdef ADD_DEBUGGER_COMMANDS
2196 
2197 
2198 static void
2199 _dump_advisory_locking(advisory_locking *locking)
2200 {
2201 	if (locking == NULL)
2202 		return;
2203 
2204 	kprintf("   lock:        %ld", locking->lock);
2205 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2206 
2207 	struct advisory_lock *lock = NULL;
2208 	int32 index = 0;
2209 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2210 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2211 		kprintf("        offset: %Ld\n", lock->offset);
2212 		kprintf("        length: %Ld\n", lock->length);
2213 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2214 	}
2215 }
2216 
2217 
2218 static void
2219 _dump_mount(struct fs_mount *mount)
2220 {
2221 	kprintf("MOUNT: %p\n", mount);
2222 	kprintf(" id:            %ld\n", mount->id);
2223 	kprintf(" device_name:   %s\n", mount->device_name);
2224 	kprintf(" fs_name:       %s\n", mount->fs_name);
2225 	kprintf(" cookie:        %p\n", mount->cookie);
2226 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2227 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2228 	kprintf(" partition:     %p\n", mount->partition);
2229 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2230 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2231 		mount->owns_file_device ? " owns_file_device" : "");
2232 }
2233 
2234 
2235 static void
2236 _dump_vnode(struct vnode *vnode)
2237 {
2238 	kprintf("VNODE: %p\n", vnode);
2239 	kprintf(" device:        %ld\n", vnode->device);
2240 	kprintf(" id:            %Ld\n", vnode->id);
2241 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2242 	kprintf(" private_node:  %p\n", vnode->private_node);
2243 	kprintf(" mount:         %p\n", vnode->mount);
2244 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2245 	kprintf(" cache_ref:     %p\n", vnode->cache);
2246 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2247 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2248 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2249 
2250 	_dump_advisory_locking(vnode->advisory_locking);
2251 }
2252 
2253 
2254 static int
2255 dump_mount(int argc, char **argv)
2256 {
2257 	if (argc != 2) {
2258 		kprintf("usage: mount [id/address]\n");
2259 		return 0;
2260 	}
2261 
2262 	struct fs_mount *mount = NULL;
2263 
2264 	// if the argument looks like a hex number, treat it as such
2265 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2266 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2267 		if (IS_USER_ADDRESS(mount)) {
2268 			kprintf("invalid fs_mount address\n");
2269 			return 0;
2270 		}
2271 	} else {
2272 		mount_id id = atoll(argv[1]);
2273 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2274 		if (mount == NULL) {
2275 			kprintf("fs_mount not found\n");
2276 			return 0;
2277 		}
2278 	}
2279 
2280 	_dump_mount(mount);
2281 	return 0;
2282 }
2283 
2284 
2285 static int
2286 dump_mounts(int argc, char **argv)
2287 {
2288 	struct hash_iterator iterator;
2289 	struct fs_mount *mount;
2290 
2291 	kprintf("address     id root       covers     fs_name\n");
2292 
2293 	hash_open(sMountsTable, &iterator);
2294 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2295 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2296 			mount->covers_vnode, mount->fs_name);
2297 	}
2298 
2299 	hash_close(sMountsTable, &iterator, false);
2300 	return 0;
2301 }
2302 
2303 
2304 static int
2305 dump_vnode(int argc, char **argv)
2306 {
2307 	if (argc < 2) {
2308 		kprintf("usage: vnode [id/device id/address]\n");
2309 		return 0;
2310 	}
2311 
2312 	struct vnode *vnode = NULL;
2313 
2314 	// if the argument looks like a hex number, treat it as such
2315 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2316 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2317 		if (IS_USER_ADDRESS(vnode)) {
2318 			kprintf("invalid vnode address\n");
2319 			return 0;
2320 		}
2321 		_dump_vnode(vnode);
2322 		return 0;
2323 	}
2324 
2325 	struct hash_iterator iterator;
2326 	mount_id device = -1;
2327 	vnode_id id;
2328 	if (argc > 2) {
2329 		device = atoi(argv[1]);
2330 		id = atoll(argv[2]);
2331 	} else
2332 		id = atoll(argv[1]);
2333 
2334 	hash_open(sVnodeTable, &iterator);
2335 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2336 		if (vnode->id != id || device != -1 && vnode->device != device)
2337 			continue;
2338 
2339 		_dump_vnode(vnode);
2340 	}
2341 
2342 	hash_close(sVnodeTable, &iterator, false);
2343 	return 0;
2344 }
2345 
2346 
2347 static int
2348 dump_vnodes(int argc, char **argv)
2349 {
2350 	// restrict dumped nodes to a certain device if requested
2351 	mount_id device = -1;
2352 	if (argc > 1)
2353 		device = atoi(argv[1]);
2354 
2355 	struct hash_iterator iterator;
2356 	struct vnode *vnode;
2357 
2358 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2359 
2360 	hash_open(sVnodeTable, &iterator);
2361 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2362 		if (device != -1 && vnode->device != device)
2363 			continue;
2364 
2365 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2366 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2367 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2368 			vnode->unpublished ? "u" : "-");
2369 	}
2370 
2371 	hash_close(sVnodeTable, &iterator, false);
2372 	return 0;
2373 }
2374 
2375 
2376 static int
2377 dump_vnode_caches(int argc, char **argv)
2378 {
2379 	struct hash_iterator iterator;
2380 	struct vnode *vnode;
2381 
2382 	// restrict dumped nodes to a certain device if requested
2383 	mount_id device = -1;
2384 	if (argc > 1)
2385 		device = atoi(argv[1]);
2386 
2387 	kprintf("address    dev     inode cache          size   pages\n");
2388 
2389 	hash_open(sVnodeTable, &iterator);
2390 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2391 		if (vnode->cache == NULL)
2392 			continue;
2393 		if (device != -1 && vnode->device != device)
2394 			continue;
2395 
2396 		// count pages in cache
2397 		size_t numPages = 0;
2398 		for (struct vm_page *page = vnode->cache->cache->page_list;
2399 				page != NULL; page = page->cache_next) {
2400 			numPages++;
2401 		}
2402 
2403 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2404 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2405 	}
2406 
2407 	hash_close(sVnodeTable, &iterator, false);
2408 	return 0;
2409 }
2410 
2411 
2412 int
2413 dump_io_context(int argc, char **argv)
2414 {
2415 	if (argc > 2) {
2416 		kprintf("usage: io_context [team id/address]\n");
2417 		return 0;
2418 	}
2419 
2420 	struct io_context *context = NULL;
2421 
2422 	if (argc > 1) {
2423 		uint32 num = strtoul(argv[1], NULL, 0);
2424 		if (IS_KERNEL_ADDRESS(num))
2425 			context = (struct io_context *)num;
2426 		else {
2427 			struct team *team = team_get_team_struct_locked(num);
2428 			if (team == NULL) {
2429 				kprintf("could not find team with ID %ld\n", num);
2430 				return 0;
2431 			}
2432 			context = (struct io_context *)team->io_context;
2433 		}
2434 	} else
2435 		context = get_current_io_context(true);
2436 
2437 	kprintf("I/O CONTEXT: %p\n", context);
2438 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2439 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2440 	kprintf(" max fds:\t%lu\n", context->table_size);
2441 
2442 	if (context->num_used_fds)
2443 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2444 
2445 	for (uint32 i = 0; i < context->table_size; i++) {
2446 		struct file_descriptor *fd = context->fds[i];
2447 		if (fd == NULL)
2448 			continue;
2449 
2450 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2451 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2452 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2453 			fd->u.vnode);
2454 	}
2455 
2456 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2457 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2458 
2459 	return 0;
2460 }
2461 
2462 
2463 int
2464 dump_vnode_usage(int argc, char **argv)
2465 {
2466 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes, kMaxUnusedVnodes);
2467 
2468 	struct hash_iterator iterator;
2469 	hash_open(sVnodeTable, &iterator);
2470 
2471 	uint32 count = 0;
2472 	struct vnode *vnode;
2473 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2474 		count++;
2475 	}
2476 
2477 	hash_close(sVnodeTable, &iterator, false);
2478 
2479 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2480 	return 0;
2481 }
2482 
2483 #endif	// ADD_DEBUGGER_COMMANDS
2484 
2485 
2486 //	#pragma mark - public VFS API
2487 
2488 
2489 extern "C" status_t
2490 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2491 {
2492 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2493 		mountID, vnodeID, privateNode));
2494 
2495 	if (privateNode == NULL)
2496 		return B_BAD_VALUE;
2497 
2498 	mutex_lock(&sVnodeMutex);
2499 
2500 	// file system integrity check:
2501 	// test if the vnode already exists and bail out if this is the case!
2502 
2503 	// ToDo: the R5 implementation obviously checks for a different cookie
2504 	//	and doesn't panic if they are equal
2505 
2506 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2507 	if (vnode != NULL)
2508 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2509 
2510 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2511 	if (status == B_OK) {
2512 		vnode->private_node = privateNode;
2513 		vnode->busy = true;
2514 		vnode->unpublished = true;
2515 	}
2516 
2517 	TRACE(("returns: %s\n", strerror(status)));
2518 
2519 	mutex_unlock(&sVnodeMutex);
2520 	return status;
2521 }
2522 
2523 
2524 extern "C" status_t
2525 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2526 {
2527 	FUNCTION(("publish_vnode()\n"));
2528 
2529 	mutex_lock(&sVnodeMutex);
2530 
2531 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2532 	status_t status = B_OK;
2533 
2534 	if (vnode != NULL && vnode->busy && vnode->unpublished
2535 		&& vnode->private_node == privateNode) {
2536 		vnode->busy = false;
2537 		vnode->unpublished = false;
2538 	} else if (vnode == NULL && privateNode != NULL) {
2539 		status = create_new_vnode(&vnode, mountID, vnodeID);
2540 		if (status == B_OK)
2541 			vnode->private_node = privateNode;
2542 	} else
2543 		status = B_BAD_VALUE;
2544 
2545 	TRACE(("returns: %s\n", strerror(status)));
2546 
2547 	mutex_unlock(&sVnodeMutex);
2548 	return status;
2549 }
2550 
2551 
2552 extern "C" status_t
2553 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2554 {
2555 	struct vnode *vnode;
2556 
2557 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2558 	if (status < B_OK)
2559 		return status;
2560 
2561 	*_fsNode = vnode->private_node;
2562 	return B_OK;
2563 }
2564 
2565 
2566 extern "C" status_t
2567 put_vnode(mount_id mountID, vnode_id vnodeID)
2568 {
2569 	struct vnode *vnode;
2570 
2571 	mutex_lock(&sVnodeMutex);
2572 	vnode = lookup_vnode(mountID, vnodeID);
2573 	mutex_unlock(&sVnodeMutex);
2574 
2575 	if (vnode)
2576 		dec_vnode_ref_count(vnode, true);
2577 
2578 	return B_OK;
2579 }
2580 
2581 
2582 extern "C" status_t
2583 remove_vnode(mount_id mountID, vnode_id vnodeID)
2584 {
2585 	struct vnode *vnode;
2586 	bool remove = false;
2587 
2588 	mutex_lock(&sVnodeMutex);
2589 
2590 	vnode = lookup_vnode(mountID, vnodeID);
2591 	if (vnode != NULL) {
2592 		if (vnode->covered_by != NULL) {
2593 			// this vnode is in use
2594 			mutex_unlock(&sVnodeMutex);
2595 			return B_BUSY;
2596 		}
2597 
2598 		vnode->remove = true;
2599 		if (vnode->unpublished) {
2600 			// prepare the vnode for deletion
2601 			vnode->busy = true;
2602 			remove = true;
2603 		}
2604 	}
2605 
2606 	mutex_unlock(&sVnodeMutex);
2607 
2608 	if (remove) {
2609 		// if the vnode hasn't been published yet, we delete it here
2610 		atomic_add(&vnode->ref_count, -1);
2611 		free_vnode(vnode, true);
2612 	}
2613 
2614 	return B_OK;
2615 }
2616 
2617 
2618 extern "C" status_t
2619 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2620 {
2621 	struct vnode *vnode;
2622 
2623 	mutex_lock(&sVnodeMutex);
2624 
2625 	vnode = lookup_vnode(mountID, vnodeID);
2626 	if (vnode)
2627 		vnode->remove = false;
2628 
2629 	mutex_unlock(&sVnodeMutex);
2630 	return B_OK;
2631 }
2632 
2633 
2634 //	#pragma mark - private VFS API
2635 //	Functions the VFS exports for other parts of the kernel
2636 
2637 
2638 /** Acquires another reference to the vnode that has to be released
2639  *	by calling vfs_put_vnode().
2640  */
2641 
2642 void
2643 vfs_acquire_vnode(void *_vnode)
2644 {
2645 	inc_vnode_ref_count((struct vnode *)_vnode);
2646 }
2647 
2648 
2649 /** This is currently called from file_cache_create() only.
2650  *	It's probably a temporary solution as long as devfs requires that
2651  *	fs_read_pages()/fs_write_pages() are called with the standard
2652  *	open cookie and not with a device cookie.
2653  *	If that's done differently, remove this call; it has no other
2654  *	purpose.
2655  */
2656 
2657 extern "C" status_t
2658 vfs_get_cookie_from_fd(int fd, void **_cookie)
2659 {
2660 	struct file_descriptor *descriptor;
2661 
2662 	descriptor = get_fd(get_current_io_context(true), fd);
2663 	if (descriptor == NULL)
2664 		return B_FILE_ERROR;
2665 
2666 	*_cookie = descriptor->cookie;
2667 	return B_OK;
2668 }
2669 
2670 
2671 extern "C" int
2672 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2673 {
2674 	*vnode = get_vnode_from_fd(fd, kernel);
2675 
2676 	if (*vnode == NULL)
2677 		return B_FILE_ERROR;
2678 
2679 	return B_NO_ERROR;
2680 }
2681 
2682 
2683 extern "C" status_t
2684 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2685 {
2686 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2687 
2688 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2689 	if (pathBuffer.InitCheck() != B_OK)
2690 		return B_NO_MEMORY;
2691 
2692 	char *buffer = pathBuffer.LockBuffer();
2693 	strlcpy(buffer, path, pathBuffer.BufferSize());
2694 
2695 	struct vnode *vnode;
2696 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2697 	if (status < B_OK)
2698 		return status;
2699 
2700 	*_vnode = vnode;
2701 	return B_OK;
2702 }
2703 
2704 
2705 extern "C" status_t
2706 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2707 {
2708 	struct vnode *vnode;
2709 
2710 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2711 	if (status < B_OK)
2712 		return status;
2713 
2714 	*_vnode = vnode;
2715 	return B_OK;
2716 }
2717 
2718 
2719 extern "C" status_t
2720 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2721 	const char *name, void **_vnode)
2722 {
2723 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2724 }
2725 
2726 
2727 extern "C" void
2728 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2729 {
2730 	struct vnode *vnode = (struct vnode *)_vnode;
2731 
2732 	*_mountID = vnode->device;
2733 	*_vnodeID = vnode->id;
2734 }
2735 
2736 
2737 /**	Looks up a vnode with the given mount and vnode ID.
2738  *	Must only be used with "in-use" vnodes as it doesn't grab a reference
2739  *	to the node.
2740  *	It's currently only be used by file_cache_create().
2741  */
2742 
2743 extern "C" status_t
2744 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2745 {
2746 	mutex_lock(&sVnodeMutex);
2747 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2748 	mutex_unlock(&sVnodeMutex);
2749 
2750 	if (vnode == NULL)
2751 		return B_ERROR;
2752 
2753 	*_vnode = vnode;
2754 	return B_OK;
2755 }
2756 
2757 
2758 extern "C" status_t
2759 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2760 {
2761 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
2762 		mountID, path, kernel));
2763 
2764 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2765 	if (pathBuffer.InitCheck() != B_OK)
2766 		return B_NO_MEMORY;
2767 
2768 	fs_mount *mount;
2769 	status_t status = get_mount(mountID, &mount);
2770 	if (status < B_OK)
2771 		return status;
2772 
2773 	char *buffer = pathBuffer.LockBuffer();
2774 	strlcpy(buffer, path, pathBuffer.BufferSize());
2775 
2776 	struct vnode *vnode = mount->root_vnode;
2777 
2778 	if (buffer[0] == '/')
2779 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
2780 	else {
2781 		inc_vnode_ref_count(vnode);
2782 			// vnode_path_to_vnode() releases a reference to the starting vnode
2783 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
2784 	}
2785 
2786 	put_mount(mount);
2787 
2788 	if (status < B_OK)
2789 		return status;
2790 
2791 	if (vnode->device != mountID) {
2792 		// wrong mount ID - must not gain access on foreign file system nodes
2793 		put_vnode(vnode);
2794 		return B_BAD_VALUE;
2795 	}
2796 
2797 	*_node = vnode->private_node;
2798 	return B_OK;
2799 }
2800 
2801 
2802 /**	Finds the full path to the file that contains the module \a moduleName,
2803  *	puts it into \a pathBuffer, and returns B_OK for success.
2804  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2805  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2806  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2807  *	functions returns unsuccessfully.
2808  */
2809 
2810 status_t
2811 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2812 	size_t bufferSize)
2813 {
2814 	struct vnode *dir, *file;
2815 	status_t status;
2816 	size_t length;
2817 	char *path;
2818 
2819 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2820 		return B_BUFFER_OVERFLOW;
2821 
2822 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2823 	if (status < B_OK)
2824 		return status;
2825 
2826 	// the path buffer had been clobbered by the above call
2827 	length = strlcpy(pathBuffer, basePath, bufferSize);
2828 	if (pathBuffer[length - 1] != '/')
2829 		pathBuffer[length++] = '/';
2830 
2831 	path = pathBuffer + length;
2832 	bufferSize -= length;
2833 
2834 	while (moduleName) {
2835 		int type;
2836 
2837 		char *nextPath = strchr(moduleName, '/');
2838 		if (nextPath == NULL)
2839 			length = strlen(moduleName);
2840 		else {
2841 			length = nextPath - moduleName;
2842 			nextPath++;
2843 		}
2844 
2845 		if (length + 1 >= bufferSize) {
2846 			status = B_BUFFER_OVERFLOW;
2847 			goto err;
2848 		}
2849 
2850 		memcpy(path, moduleName, length);
2851 		path[length] = '\0';
2852 		moduleName = nextPath;
2853 
2854 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2855 		if (status < B_OK) {
2856 			// vnode_path_to_vnode() has already released the reference to dir
2857 			return status;
2858 		}
2859 
2860 		if (S_ISDIR(type)) {
2861 			// goto the next directory
2862 			path[length] = '/';
2863 			path[length + 1] = '\0';
2864 			path += length + 1;
2865 			bufferSize -= length + 1;
2866 
2867 			dir = file;
2868 		} else if (S_ISREG(type)) {
2869 			// it's a file so it should be what we've searched for
2870 			put_vnode(file);
2871 
2872 			return B_OK;
2873 		} else {
2874 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
2875 			status = B_ERROR;
2876 			dir = file;
2877 			goto err;
2878 		}
2879 	}
2880 
2881 	// if we got here, the moduleName just pointed to a directory, not to
2882 	// a real module - what should we do in this case?
2883 	status = B_ENTRY_NOT_FOUND;
2884 
2885 err:
2886 	put_vnode(dir);
2887 	return status;
2888 }
2889 
2890 
2891 /**	\brief Normalizes a given path.
2892  *
2893  *	The path must refer to an existing or non-existing entry in an existing
2894  *	directory, that is chopping off the leaf component the remaining path must
2895  *	refer to an existing directory.
2896  *
2897  *	The returned will be canonical in that it will be absolute, will not
2898  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2899  *	and none of the directory components will by symbolic links.
2900  *
2901  *	Any two paths referring to the same entry, will result in the same
2902  *	normalized path (well, that is pretty much the definition of `normalized',
2903  *	isn't it :-).
2904  *
2905  *	\param path The path to be normalized.
2906  *	\param buffer The buffer into which the normalized path will be written.
2907  *	\param bufferSize The size of \a buffer.
2908  *	\param kernel \c true, if the IO context of the kernel shall be used,
2909  *		   otherwise that of the team this thread belongs to. Only relevant,
2910  *		   if the path is relative (to get the CWD).
2911  *	\return \c B_OK if everything went fine, another error code otherwise.
2912  */
2913 
2914 status_t
2915 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2916 	bool kernel)
2917 {
2918 	if (!path || !buffer || bufferSize < 1)
2919 		return B_BAD_VALUE;
2920 
2921 	TRACE(("vfs_normalize_path(`%s')\n", path));
2922 
2923 	// copy the supplied path to the stack, so it can be modified
2924 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
2925 	if (mutablePathBuffer.InitCheck() != B_OK)
2926 		return B_NO_MEMORY;
2927 
2928 	char *mutablePath = mutablePathBuffer.LockBuffer();
2929 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2930 		return B_NAME_TOO_LONG;
2931 
2932 	// get the dir vnode and the leaf name
2933 	struct vnode *dirNode;
2934 	char leaf[B_FILE_NAME_LENGTH];
2935 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2936 	if (error != B_OK) {
2937 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2938 		return error;
2939 	}
2940 
2941 	// if the leaf is "." or "..", we directly get the correct directory
2942 	// vnode and ignore the leaf later
2943 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2944 	if (isDir)
2945 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2946 	if (error != B_OK) {
2947 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
2948 			strerror(error)));
2949 		return error;
2950 	}
2951 
2952 	// get the directory path
2953 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2954 	put_vnode(dirNode);
2955 	if (error < B_OK) {
2956 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2957 		return error;
2958 	}
2959 
2960 	// append the leaf name
2961 	if (!isDir) {
2962 		// insert a directory separator only if this is not the file system root
2963 		if ((strcmp(buffer, "/") != 0
2964 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
2965 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
2966 			return B_NAME_TOO_LONG;
2967 		}
2968 	}
2969 
2970 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
2971 	return B_OK;
2972 }
2973 
2974 
2975 extern "C" void
2976 vfs_put_vnode(void *_vnode)
2977 {
2978 	put_vnode((struct vnode *)_vnode);
2979 }
2980 
2981 
2982 extern "C" status_t
2983 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
2984 {
2985 	// Get current working directory from io context
2986 	struct io_context *context = get_current_io_context(false);
2987 	status_t status = B_OK;
2988 
2989 	mutex_lock(&context->io_mutex);
2990 
2991 	if (context->cwd != NULL) {
2992 		*_mountID = context->cwd->device;
2993 		*_vnodeID = context->cwd->id;
2994 	} else
2995 		status = B_ERROR;
2996 
2997 	mutex_unlock(&context->io_mutex);
2998 	return status;
2999 }
3000 
3001 
3002 extern "C" status_t
3003 vfs_disconnect_vnode(mount_id mountID, vnode_id vnodeID)
3004 {
3005 	struct vnode *vnode;
3006 
3007 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
3008 	if (status < B_OK)
3009 		return status;
3010 
3011 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3012 	return B_OK;
3013 }
3014 
3015 
3016 extern "C" void
3017 vfs_free_unused_vnodes(int32 level)
3018 {
3019 	vnode_low_memory_handler(NULL, level);
3020 }
3021 
3022 
3023 extern "C" bool
3024 vfs_can_page(void *_vnode, void *cookie)
3025 {
3026 	struct vnode *vnode = (struct vnode *)_vnode;
3027 
3028 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3029 
3030 	if (FS_CALL(vnode, can_page))
3031 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
3032 
3033 	return false;
3034 }
3035 
3036 
3037 extern "C" status_t
3038 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3039 	size_t *_numBytes, bool fsReenter)
3040 {
3041 	struct vnode *vnode = (struct vnode *)_vnode;
3042 
3043 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3044 
3045 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3046 		cookie, pos, vecs, count, _numBytes, fsReenter);
3047 }
3048 
3049 
3050 extern "C" status_t
3051 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3052 	size_t *_numBytes, bool fsReenter)
3053 {
3054 	struct vnode *vnode = (struct vnode *)_vnode;
3055 
3056 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3057 
3058 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3059 		cookie, pos, vecs, count, _numBytes, fsReenter);
3060 }
3061 
3062 
3063 /** Gets the vnode's vm_cache object. If it didn't have one, it will be
3064  *	created if \a allocate is \c true.
3065  *	In case it's successful, it will also grab a reference to the cache
3066  *	it returns (and therefore, one from the \a vnode in question as well).
3067  */
3068 
3069 extern "C" status_t
3070 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
3071 {
3072 	struct vnode *vnode = (struct vnode *)_vnode;
3073 
3074 	if (vnode->cache != NULL) {
3075 		vm_cache_acquire_ref(vnode->cache);
3076 		*_cache = vnode->cache;
3077 		return B_OK;
3078 	}
3079 
3080 	mutex_lock(&sVnodeMutex);
3081 
3082 	status_t status = B_OK;
3083 
3084 	// The cache could have been created in the meantime
3085 	if (vnode->cache == NULL) {
3086 		if (allocate) {
3087 			// TODO: actually the vnode need to be busy already here, or
3088 			//	else this won't work...
3089 			bool wasBusy = vnode->busy;
3090 			vnode->busy = true;
3091 			mutex_unlock(&sVnodeMutex);
3092 
3093 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3094 
3095 			mutex_lock(&sVnodeMutex);
3096 			vnode->busy = wasBusy;
3097 		} else
3098 			status = B_BAD_VALUE;
3099 	} else
3100 		vm_cache_acquire_ref(vnode->cache);
3101 
3102 	if (status == B_OK)
3103 		*_cache = vnode->cache;
3104 
3105 	mutex_unlock(&sVnodeMutex);
3106 	return status;
3107 }
3108 
3109 
3110 status_t
3111 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
3112 {
3113 	struct vnode *vnode = (struct vnode *)_vnode;
3114 
3115 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3116 
3117 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
3118 }
3119 
3120 
3121 status_t
3122 vfs_stat_vnode(void *_vnode, struct stat *stat)
3123 {
3124 	struct vnode *vnode = (struct vnode *)_vnode;
3125 
3126 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3127 		vnode->private_node, stat);
3128 
3129 	// fill in the st_dev and st_ino fields
3130 	if (status == B_OK) {
3131 		stat->st_dev = vnode->device;
3132 		stat->st_ino = vnode->id;
3133 	}
3134 
3135 	return status;
3136 }
3137 
3138 
3139 status_t
3140 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
3141 {
3142 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
3143 }
3144 
3145 
3146 /**	If the given descriptor locked its vnode, that lock will be released.
3147  */
3148 
3149 void
3150 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3151 {
3152 	struct vnode *vnode = fd_vnode(descriptor);
3153 
3154 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3155 		vnode->mandatory_locked_by = NULL;
3156 }
3157 
3158 
3159 /**	Closes all file descriptors of the specified I/O context that
3160  *	don't have the O_CLOEXEC flag set.
3161  */
3162 
3163 void
3164 vfs_exec_io_context(void *_context)
3165 {
3166 	struct io_context *context = (struct io_context *)_context;
3167 	uint32 i;
3168 
3169 	for (i = 0; i < context->table_size; i++) {
3170 		mutex_lock(&context->io_mutex);
3171 
3172 		struct file_descriptor *descriptor = context->fds[i];
3173 		bool remove = false;
3174 
3175 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3176 			context->fds[i] = NULL;
3177 			context->num_used_fds--;
3178 
3179 			remove = true;
3180 		}
3181 
3182 		mutex_unlock(&context->io_mutex);
3183 
3184 		if (remove) {
3185 			close_fd(descriptor);
3186 			put_fd(descriptor);
3187 		}
3188 	}
3189 }
3190 
3191 
3192 /** Sets up a new io_control structure, and inherits the properties
3193  *	of the parent io_control if it is given.
3194  */
3195 
3196 void *
3197 vfs_new_io_context(void *_parentContext)
3198 {
3199 	size_t tableSize;
3200 	struct io_context *context;
3201 	struct io_context *parentContext;
3202 
3203 	context = (io_context *)malloc(sizeof(struct io_context));
3204 	if (context == NULL)
3205 		return NULL;
3206 
3207 	memset(context, 0, sizeof(struct io_context));
3208 
3209 	parentContext = (struct io_context *)_parentContext;
3210 	if (parentContext)
3211 		tableSize = parentContext->table_size;
3212 	else
3213 		tableSize = DEFAULT_FD_TABLE_SIZE;
3214 
3215 	// allocate space for FDs and their close-on-exec flag
3216 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
3217 		+ tableSize / 8);
3218 	if (context->fds == NULL) {
3219 		free(context);
3220 		return NULL;
3221 	}
3222 
3223 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
3224 		+ tableSize / 8);
3225 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
3226 
3227 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3228 		free(context->fds);
3229 		free(context);
3230 		return NULL;
3231 	}
3232 
3233 	// Copy all parent files which don't have the O_CLOEXEC flag set
3234 
3235 	if (parentContext) {
3236 		size_t i;
3237 
3238 		mutex_lock(&parentContext->io_mutex);
3239 
3240 		context->cwd = parentContext->cwd;
3241 		if (context->cwd)
3242 			inc_vnode_ref_count(context->cwd);
3243 
3244 		for (i = 0; i < tableSize; i++) {
3245 			struct file_descriptor *descriptor = parentContext->fds[i];
3246 
3247 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
3248 				context->fds[i] = descriptor;
3249 				context->num_used_fds++;
3250 				atomic_add(&descriptor->ref_count, 1);
3251 				atomic_add(&descriptor->open_count, 1);
3252 			}
3253 		}
3254 
3255 		mutex_unlock(&parentContext->io_mutex);
3256 	} else {
3257 		context->cwd = sRoot;
3258 
3259 		if (context->cwd)
3260 			inc_vnode_ref_count(context->cwd);
3261 	}
3262 
3263 	context->table_size = tableSize;
3264 
3265 	list_init(&context->node_monitors);
3266 	context->max_monitors = MAX_NODE_MONITORS;
3267 
3268 	return context;
3269 }
3270 
3271 
3272 status_t
3273 vfs_free_io_context(void *_ioContext)
3274 {
3275 	struct io_context *context = (struct io_context *)_ioContext;
3276 	uint32 i;
3277 
3278 	if (context->cwd)
3279 		dec_vnode_ref_count(context->cwd, false);
3280 
3281 	mutex_lock(&context->io_mutex);
3282 
3283 	for (i = 0; i < context->table_size; i++) {
3284 		if (struct file_descriptor *descriptor = context->fds[i]) {
3285 			close_fd(descriptor);
3286 			put_fd(descriptor);
3287 		}
3288 	}
3289 
3290 	mutex_destroy(&context->io_mutex);
3291 
3292 	remove_node_monitors(context);
3293 	free(context->fds);
3294 	free(context);
3295 
3296 	return B_OK;
3297 }
3298 
3299 
3300 static status_t
3301 vfs_resize_fd_table(struct io_context *context, const int newSize)
3302 {
3303 	void *fds;
3304 	int	status = B_OK;
3305 
3306 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3307 		return EINVAL;
3308 
3309 	mutex_lock(&context->io_mutex);
3310 
3311 	if ((size_t)newSize < context->table_size) {
3312 		// shrink the fd table
3313 		int i;
3314 
3315 		// Make sure none of the fds being dropped are in use
3316 		for(i = context->table_size; i-- > newSize;) {
3317 			if (context->fds[i]) {
3318 				status = EBUSY;
3319 				goto out;
3320 			}
3321 		}
3322 
3323 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3324 		if (fds == NULL) {
3325 			status = ENOMEM;
3326 			goto out;
3327 		}
3328 
3329 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
3330 	} else {
3331 		// enlarge the fd table
3332 
3333 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3334 		if (fds == NULL) {
3335 			status = ENOMEM;
3336 			goto out;
3337 		}
3338 
3339 		// copy the fd array, and zero the additional slots
3340 		memcpy(fds, context->fds, sizeof(void *) * context->table_size);
3341 		memset((char *)fds + (sizeof(void *) * context->table_size), 0,
3342 			sizeof(void *) * (newSize - context->table_size));
3343 	}
3344 
3345 	free(context->fds);
3346 	context->fds = (file_descriptor **)fds;
3347 	context->table_size = newSize;
3348 
3349 out:
3350 	mutex_unlock(&context->io_mutex);
3351 	return status;
3352 }
3353 
3354 
3355 int
3356 vfs_getrlimit(int resource, struct rlimit * rlp)
3357 {
3358 	if (!rlp)
3359 		return -1;
3360 
3361 	switch (resource) {
3362 		case RLIMIT_NOFILE:
3363 		{
3364 			struct io_context *ioctx = get_current_io_context(false);
3365 
3366 			mutex_lock(&ioctx->io_mutex);
3367 
3368 			rlp->rlim_cur = ioctx->table_size;
3369 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3370 
3371 			mutex_unlock(&ioctx->io_mutex);
3372 
3373 			return 0;
3374 		}
3375 
3376 		default:
3377 			return -1;
3378 	}
3379 }
3380 
3381 
3382 int
3383 vfs_setrlimit(int resource, const struct rlimit * rlp)
3384 {
3385 	if (!rlp)
3386 		return -1;
3387 
3388 	switch (resource) {
3389 		case RLIMIT_NOFILE:
3390 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3391 
3392 		default:
3393 			return -1;
3394 	}
3395 }
3396 
3397 
3398 status_t
3399 vfs_init(kernel_args *args)
3400 {
3401 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3402 		&vnode_compare, &vnode_hash);
3403 	if (sVnodeTable == NULL)
3404 		panic("vfs_init: error creating vnode hash table\n");
3405 
3406 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3407 
3408 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3409 		&mount_compare, &mount_hash);
3410 	if (sMountsTable == NULL)
3411 		panic("vfs_init: error creating mounts hash table\n");
3412 
3413 	node_monitor_init();
3414 
3415 	sRoot = NULL;
3416 
3417 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3418 		panic("vfs_init: error allocating file systems lock\n");
3419 
3420 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3421 		panic("vfs_init: error allocating mount op lock\n");
3422 
3423 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3424 		panic("vfs_init: error allocating mount lock\n");
3425 
3426 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3427 		panic("vfs_init: error allocating vnode lock\n");
3428 
3429 	if (block_cache_init() != B_OK)
3430 		return B_ERROR;
3431 
3432 #ifdef ADD_DEBUGGER_COMMANDS
3433 	// add some debugger commands
3434 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3435 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3436 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3437 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3438 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3439 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3440 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3441 #endif
3442 
3443 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3444 
3445 	return file_cache_init();
3446 }
3447 
3448 
3449 //	#pragma mark -
3450 //	The filetype-dependent implementations (fd_ops + open/create/rename/remove, ...)
3451 
3452 
3453 /** Calls fs_open() on the given vnode and returns a new
3454  *	file descriptor for it
3455  */
3456 
3457 static int
3458 create_vnode(struct vnode *directory, const char *name, int openMode, int perms, bool kernel)
3459 {
3460 	struct vnode *vnode;
3461 	fs_cookie cookie;
3462 	vnode_id newID;
3463 	int status;
3464 
3465 	if (FS_CALL(directory, create) == NULL)
3466 		return EROFS;
3467 
3468 	status = FS_CALL(directory, create)(directory->mount->cookie, directory->private_node, name, openMode, perms, &cookie, &newID);
3469 	if (status < B_OK)
3470 		return status;
3471 
3472 	mutex_lock(&sVnodeMutex);
3473 	vnode = lookup_vnode(directory->device, newID);
3474 	mutex_unlock(&sVnodeMutex);
3475 
3476 	if (vnode == NULL) {
3477 		dprintf("vfs: fs_create() returned success but there is no vnode!");
3478 		return EINVAL;
3479 	}
3480 
3481 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3482 		return status;
3483 
3484 	// something went wrong, clean up
3485 
3486 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3487 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3488 	put_vnode(vnode);
3489 
3490 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3491 
3492 	return status;
3493 }
3494 
3495 
3496 /** Calls fs_open() on the given vnode and returns a new
3497  *	file descriptor for it
3498  */
3499 
3500 static int
3501 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3502 {
3503 	fs_cookie cookie;
3504 	int status;
3505 
3506 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3507 	if (status < 0)
3508 		return status;
3509 
3510 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3511 	if (status < 0) {
3512 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3513 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3514 	}
3515 	return status;
3516 }
3517 
3518 
3519 /** Calls fs open_dir() on the given vnode and returns a new
3520  *	file descriptor for it
3521  */
3522 
3523 static int
3524 open_dir_vnode(struct vnode *vnode, bool kernel)
3525 {
3526 	fs_cookie cookie;
3527 	int status;
3528 
3529 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3530 	if (status < B_OK)
3531 		return status;
3532 
3533 	// file is opened, create a fd
3534 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3535 	if (status >= 0)
3536 		return status;
3537 
3538 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3539 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3540 
3541 	return status;
3542 }
3543 
3544 
3545 /** Calls fs open_attr_dir() on the given vnode and returns a new
3546  *	file descriptor for it.
3547  *	Used by attr_dir_open(), and attr_dir_open_fd().
3548  */
3549 
3550 static int
3551 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3552 {
3553 	fs_cookie cookie;
3554 	int status;
3555 
3556 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3557 		return EOPNOTSUPP;
3558 
3559 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3560 	if (status < 0)
3561 		return status;
3562 
3563 	// file is opened, create a fd
3564 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3565 	if (status >= 0)
3566 		return status;
3567 
3568 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3569 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3570 
3571 	return status;
3572 }
3573 
3574 
3575 static int
3576 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3577 {
3578 	struct vnode *directory;
3579 	int status;
3580 
3581 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3582 
3583 	// get directory to put the new file in
3584 	status = get_vnode(mountID, directoryID, &directory, false);
3585 	if (status < B_OK)
3586 		return status;
3587 
3588 	status = create_vnode(directory, name, openMode, perms, kernel);
3589 	put_vnode(directory);
3590 
3591 	return status;
3592 }
3593 
3594 
3595 static int
3596 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3597 {
3598 	char name[B_FILE_NAME_LENGTH];
3599 	struct vnode *directory;
3600 	int status;
3601 
3602 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3603 
3604 	// get directory to put the new file in
3605 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3606 	if (status < 0)
3607 		return status;
3608 
3609 	status = create_vnode(directory, name, openMode, perms, kernel);
3610 
3611 	put_vnode(directory);
3612 	return status;
3613 }
3614 
3615 
3616 static int
3617 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3618 {
3619 	struct vnode *vnode;
3620 	int status;
3621 
3622 	if (name == NULL || *name == '\0')
3623 		return B_BAD_VALUE;
3624 
3625 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3626 		mountID, directoryID, name, openMode));
3627 
3628 	// get the vnode matching the entry_ref
3629 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3630 	if (status < B_OK)
3631 		return status;
3632 
3633 	status = open_vnode(vnode, openMode, kernel);
3634 	if (status < B_OK)
3635 		put_vnode(vnode);
3636 
3637 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3638 	return status;
3639 }
3640 
3641 
3642 static int
3643 file_open(int fd, char *path, int openMode, bool kernel)
3644 {
3645 	int status = B_OK;
3646 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3647 
3648 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3649 		fd, path, openMode, kernel));
3650 
3651 	// get the vnode matching the vnode + path combination
3652 	struct vnode *vnode = NULL;
3653 	vnode_id parentID;
3654 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3655 	if (status != B_OK)
3656 		return status;
3657 
3658 	// open the vnode
3659 	status = open_vnode(vnode, openMode, kernel);
3660 	// put only on error -- otherwise our reference was transferred to the FD
3661 	if (status < B_OK)
3662 		put_vnode(vnode);
3663 
3664 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3665 		vnode->device, parentID, vnode->id, NULL);
3666 
3667 	return status;
3668 }
3669 
3670 
3671 static status_t
3672 file_close(struct file_descriptor *descriptor)
3673 {
3674 	struct vnode *vnode = descriptor->u.vnode;
3675 	status_t status = B_OK;
3676 
3677 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3678 
3679 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3680 	if (FS_CALL(vnode, close))
3681 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3682 
3683 	if (status == B_OK) {
3684 		// remove all outstanding locks for this team
3685 		release_advisory_lock(vnode, NULL);
3686 	}
3687 	return status;
3688 }
3689 
3690 
3691 static void
3692 file_free_fd(struct file_descriptor *descriptor)
3693 {
3694 	struct vnode *vnode = descriptor->u.vnode;
3695 
3696 	if (vnode != NULL) {
3697 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3698 		put_vnode(vnode);
3699 	}
3700 }
3701 
3702 
3703 static status_t
3704 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3705 {
3706 	struct vnode *vnode = descriptor->u.vnode;
3707 
3708 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3709 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3710 }
3711 
3712 
3713 static status_t
3714 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3715 {
3716 	struct vnode *vnode = descriptor->u.vnode;
3717 
3718 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3719 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3720 }
3721 
3722 
3723 static off_t
3724 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3725 {
3726 	off_t offset;
3727 
3728 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3729 	// ToDo: seek should fail for pipes and FIFOs...
3730 
3731 	switch (seekType) {
3732 		case SEEK_SET:
3733 			offset = 0;
3734 			break;
3735 		case SEEK_CUR:
3736 			offset = descriptor->pos;
3737 			break;
3738 		case SEEK_END:
3739 		{
3740 			struct vnode *vnode = descriptor->u.vnode;
3741 			struct stat stat;
3742 			status_t status;
3743 
3744 			if (FS_CALL(vnode, read_stat) == NULL)
3745 				return EOPNOTSUPP;
3746 
3747 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3748 			if (status < B_OK)
3749 				return status;
3750 
3751 			offset = stat.st_size;
3752 			break;
3753 		}
3754 		default:
3755 			return B_BAD_VALUE;
3756 	}
3757 
3758 	// assumes off_t is 64 bits wide
3759 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3760 		return EOVERFLOW;
3761 
3762 	pos += offset;
3763 	if (pos < 0)
3764 		return B_BAD_VALUE;
3765 
3766 	return descriptor->pos = pos;
3767 }
3768 
3769 
3770 static status_t
3771 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3772 	struct select_sync *sync)
3773 {
3774 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3775 
3776 	struct vnode *vnode = descriptor->u.vnode;
3777 
3778 	// If the FS has no select() hook, notify select() now.
3779 	if (FS_CALL(vnode, select) == NULL)
3780 		return notify_select_event((selectsync*)sync, ref, event);
3781 
3782 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3783 		descriptor->cookie, event, ref, (selectsync*)sync);
3784 }
3785 
3786 
3787 static status_t
3788 file_deselect(struct file_descriptor *descriptor, uint8 event,
3789 	struct select_sync *sync)
3790 {
3791 	struct vnode *vnode = descriptor->u.vnode;
3792 
3793 	if (FS_CALL(vnode, deselect) == NULL)
3794 		return B_OK;
3795 
3796 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3797 		descriptor->cookie, event, (selectsync*)sync);
3798 }
3799 
3800 
3801 static status_t
3802 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3803 {
3804 	struct vnode *vnode;
3805 	vnode_id newID;
3806 	status_t status;
3807 
3808 	if (name == NULL || *name == '\0')
3809 		return B_BAD_VALUE;
3810 
3811 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3812 
3813 	status = get_vnode(mountID, parentID, &vnode, kernel);
3814 	if (status < B_OK)
3815 		return status;
3816 
3817 	if (FS_CALL(vnode, create_dir))
3818 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3819 	else
3820 		status = EROFS;
3821 
3822 	put_vnode(vnode);
3823 	return status;
3824 }
3825 
3826 
3827 static status_t
3828 dir_create(int fd, char *path, int perms, bool kernel)
3829 {
3830 	char filename[B_FILE_NAME_LENGTH];
3831 	struct vnode *vnode;
3832 	vnode_id newID;
3833 	status_t status;
3834 
3835 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3836 
3837 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3838 	if (status < 0)
3839 		return status;
3840 
3841 	if (FS_CALL(vnode, create_dir))
3842 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3843 	else
3844 		status = EROFS;
3845 
3846 	put_vnode(vnode);
3847 	return status;
3848 }
3849 
3850 
3851 static int
3852 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3853 {
3854 	struct vnode *vnode;
3855 	int status;
3856 
3857 	FUNCTION(("dir_open_entry_ref()\n"));
3858 
3859 	if (name && *name == '\0')
3860 		return B_BAD_VALUE;
3861 
3862 	// get the vnode matching the entry_ref/node_ref
3863 	if (name)
3864 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3865 	else
3866 		status = get_vnode(mountID, parentID, &vnode, false);
3867 	if (status < B_OK)
3868 		return status;
3869 
3870 	status = open_dir_vnode(vnode, kernel);
3871 	if (status < B_OK)
3872 		put_vnode(vnode);
3873 
3874 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3875 	return status;
3876 }
3877 
3878 
3879 static int
3880 dir_open(int fd, char *path, bool kernel)
3881 {
3882 	int status = B_OK;
3883 
3884 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3885 
3886 	// get the vnode matching the vnode + path combination
3887 	struct vnode *vnode = NULL;
3888 	vnode_id parentID;
3889 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3890 	if (status != B_OK)
3891 		return status;
3892 
3893 	// open the dir
3894 	status = open_dir_vnode(vnode, kernel);
3895 	if (status < B_OK)
3896 		put_vnode(vnode);
3897 
3898 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
3899 	return status;
3900 }
3901 
3902 
3903 static status_t
3904 dir_close(struct file_descriptor *descriptor)
3905 {
3906 	struct vnode *vnode = descriptor->u.vnode;
3907 
3908 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
3909 
3910 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
3911 	if (FS_CALL(vnode, close_dir))
3912 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3913 
3914 	return B_OK;
3915 }
3916 
3917 
3918 static void
3919 dir_free_fd(struct file_descriptor *descriptor)
3920 {
3921 	struct vnode *vnode = descriptor->u.vnode;
3922 
3923 	if (vnode != NULL) {
3924 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3925 		put_vnode(vnode);
3926 	}
3927 }
3928 
3929 
3930 static status_t
3931 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3932 {
3933 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
3934 }
3935 
3936 
3937 static void
3938 fix_dirent(struct vnode *parent, struct dirent *entry)
3939 {
3940 	// set d_pdev and d_pino
3941 	entry->d_pdev = parent->device;
3942 	entry->d_pino = parent->id;
3943 
3944 	// If this is the ".." entry and the directory is the root of a FS,
3945 	// we need to replace d_dev and d_ino with the actual values.
3946 	if (strcmp(entry->d_name, "..") == 0
3947 		&& parent->mount->root_vnode == parent
3948 		&& parent->mount->covers_vnode) {
3949 		inc_vnode_ref_count(parent);
3950 			// vnode_path_to_vnode() puts the node
3951 
3952 		struct vnode *vnode;
3953 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
3954 			NULL, NULL);
3955 
3956 		if (status == B_OK) {
3957 			entry->d_dev = vnode->device;
3958 			entry->d_ino = vnode->id;
3959 		}
3960 	} else {
3961 		// resolve mount points
3962 		struct vnode *vnode = NULL;
3963 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
3964 		if (status != B_OK)
3965 			return;
3966 
3967 		recursive_lock_lock(&sMountOpLock);
3968 		if (vnode->covered_by) {
3969 			entry->d_dev = vnode->covered_by->device;
3970 			entry->d_ino = vnode->covered_by->id;
3971 		}
3972 		recursive_lock_unlock(&sMountOpLock);
3973 
3974 		put_vnode(vnode);
3975 	}
3976 }
3977 
3978 
3979 static status_t
3980 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3981 {
3982 	if (!FS_CALL(vnode, read_dir))
3983 		return EOPNOTSUPP;
3984 
3985 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
3986 	if (error != B_OK)
3987 		return error;
3988 
3989 	// we need to adjust the read dirents
3990 	if (*_count > 0) {
3991 		// XXX: Currently reading only one dirent is supported. Make this a loop!
3992 		fix_dirent(vnode, buffer);
3993 	}
3994 
3995 	return error;
3996 }
3997 
3998 
3999 static status_t
4000 dir_rewind(struct file_descriptor *descriptor)
4001 {
4002 	struct vnode *vnode = descriptor->u.vnode;
4003 
4004 	if (FS_CALL(vnode, rewind_dir))
4005 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4006 
4007 	return EOPNOTSUPP;
4008 }
4009 
4010 
4011 static status_t
4012 dir_remove(int fd, char *path, bool kernel)
4013 {
4014 	char name[B_FILE_NAME_LENGTH];
4015 	struct vnode *directory;
4016 	status_t status;
4017 
4018 	if (path != NULL) {
4019 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4020 		char *lastSlash = strrchr(path, '/');
4021 		if (lastSlash != NULL) {
4022 			char *leaf = lastSlash + 1;
4023 			if (!strcmp(leaf, ".."))
4024 				return B_NOT_ALLOWED;
4025 
4026 			// omit multiple slashes
4027 			while (lastSlash > path && lastSlash[-1] == '/') {
4028 				lastSlash--;
4029 			}
4030 
4031 			if (!leaf[0]
4032 				|| !strcmp(leaf, ".")) {
4033 				// "name/" -> "name", or "name/." -> "name"
4034 				lastSlash[0] = '\0';
4035 			}
4036 		} else if (!strcmp(path, ".."))
4037 			return B_NOT_ALLOWED;
4038 	}
4039 
4040 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4041 	if (status < B_OK)
4042 		return status;
4043 
4044 	if (FS_CALL(directory, remove_dir)) {
4045 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4046 			directory->private_node, name);
4047 	} else
4048 		status = EROFS;
4049 
4050 	put_vnode(directory);
4051 	return status;
4052 }
4053 
4054 
4055 static status_t
4056 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
4057 {
4058 	struct vnode *vnode = descriptor->u.vnode;
4059 
4060 	if (FS_CALL(vnode, ioctl)) {
4061 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4062 			descriptor->cookie, op, buffer, length);
4063 	}
4064 
4065 	return EOPNOTSUPP;
4066 }
4067 
4068 
4069 static status_t
4070 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4071 {
4072 	struct file_descriptor *descriptor;
4073 	struct vnode *vnode;
4074 	struct flock flock;
4075 	status_t status;
4076 
4077 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4078 		fd, op, argument, kernel ? "kernel" : "user"));
4079 
4080 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4081 	if (descriptor == NULL)
4082 		return B_FILE_ERROR;
4083 
4084 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4085 		if (descriptor->type != FDTYPE_FILE)
4086 			return B_BAD_VALUE;
4087 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
4088 			return B_BAD_ADDRESS;
4089 	}
4090 
4091 	switch (op) {
4092 		case F_SETFD:
4093 		{
4094 			struct io_context *context = get_current_io_context(kernel);
4095 			// Set file descriptor flags
4096 
4097 			// O_CLOEXEC is the only flag available at this time
4098 			mutex_lock(&context->io_mutex);
4099 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
4100 			mutex_unlock(&context->io_mutex);
4101 
4102 			status = B_OK;
4103 			break;
4104 		}
4105 
4106 		case F_GETFD:
4107 		{
4108 			struct io_context *context = get_current_io_context(kernel);
4109 
4110 			// Get file descriptor flags
4111 			mutex_lock(&context->io_mutex);
4112 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4113 			mutex_unlock(&context->io_mutex);
4114 			break;
4115 		}
4116 
4117 		case F_SETFL:
4118 			// Set file descriptor open mode
4119 			if (FS_CALL(vnode, set_flags)) {
4120 				// we only accept changes to O_APPEND and O_NONBLOCK
4121 				argument &= O_APPEND | O_NONBLOCK;
4122 
4123 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4124 					vnode->private_node, descriptor->cookie, (int)argument);
4125 				if (status == B_OK) {
4126 					// update this descriptor's open_mode field
4127 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
4128 						| argument;
4129 				}
4130 			} else
4131 				status = EOPNOTSUPP;
4132 			break;
4133 
4134 		case F_GETFL:
4135 			// Get file descriptor open mode
4136 			status = descriptor->open_mode;
4137 			break;
4138 
4139 		case F_DUPFD:
4140 		{
4141 			struct io_context *context = get_current_io_context(kernel);
4142 
4143 			status = new_fd_etc(context, descriptor, (int)argument);
4144 			if (status >= 0) {
4145 				mutex_lock(&context->io_mutex);
4146 				fd_set_close_on_exec(context, fd, false);
4147 				mutex_unlock(&context->io_mutex);
4148 
4149 				atomic_add(&descriptor->ref_count, 1);
4150 			}
4151 			break;
4152 		}
4153 
4154 		case F_GETLK:
4155 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4156 			if (status == B_OK) {
4157 				// copy back flock structure
4158 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
4159 			}
4160 			break;
4161 
4162 		case F_SETLK:
4163 		case F_SETLKW:
4164 			status = normalize_flock(descriptor, &flock);
4165 			if (status < B_OK)
4166 				break;
4167 
4168 			if (flock.l_type == F_UNLCK)
4169 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4170 			else {
4171 				// the open mode must match the lock type
4172 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
4173 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
4174 					status = B_FILE_ERROR;
4175 				else
4176 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
4177 			}
4178 			break;
4179 
4180 		// ToDo: add support for more ops?
4181 
4182 		default:
4183 			status = B_BAD_VALUE;
4184 	}
4185 
4186 	put_fd(descriptor);
4187 	return status;
4188 }
4189 
4190 
4191 static status_t
4192 common_sync(int fd, bool kernel)
4193 {
4194 	struct file_descriptor *descriptor;
4195 	struct vnode *vnode;
4196 	status_t status;
4197 
4198 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4199 
4200 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4201 	if (descriptor == NULL)
4202 		return B_FILE_ERROR;
4203 
4204 	if (FS_CALL(vnode, fsync) != NULL)
4205 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4206 	else
4207 		status = EOPNOTSUPP;
4208 
4209 	put_fd(descriptor);
4210 	return status;
4211 }
4212 
4213 
4214 static status_t
4215 common_lock_node(int fd, bool kernel)
4216 {
4217 	struct file_descriptor *descriptor;
4218 	struct vnode *vnode;
4219 
4220 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4221 	if (descriptor == NULL)
4222 		return B_FILE_ERROR;
4223 
4224 	status_t status = B_OK;
4225 
4226 	// We need to set the locking atomically - someone
4227 	// else might set one at the same time
4228 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4229 			(addr_t)descriptor, NULL) != NULL)
4230 		status = B_BUSY;
4231 
4232 	put_fd(descriptor);
4233 	return status;
4234 }
4235 
4236 
4237 static status_t
4238 common_unlock_node(int fd, bool kernel)
4239 {
4240 	struct file_descriptor *descriptor;
4241 	struct vnode *vnode;
4242 
4243 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4244 	if (descriptor == NULL)
4245 		return B_FILE_ERROR;
4246 
4247 	status_t status = B_OK;
4248 
4249 	// We need to set the locking atomically - someone
4250 	// else might set one at the same time
4251 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4252 			NULL, (addr_t)descriptor) != (int32)descriptor)
4253 		status = B_BAD_VALUE;
4254 
4255 	put_fd(descriptor);
4256 	return status;
4257 }
4258 
4259 
4260 static status_t
4261 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4262 	bool kernel)
4263 {
4264 	struct vnode *vnode;
4265 	status_t status;
4266 
4267 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4268 	if (status < B_OK)
4269 		return status;
4270 
4271 	if (FS_CALL(vnode, read_link) != NULL) {
4272 		status = FS_CALL(vnode, read_link)(vnode->mount->cookie,
4273 			vnode->private_node, buffer, _bufferSize);
4274 	} else
4275 		status = B_BAD_VALUE;
4276 
4277 	put_vnode(vnode);
4278 	return status;
4279 }
4280 
4281 
4282 static status_t
4283 common_write_link(char *path, char *toPath, bool kernel)
4284 {
4285 	struct vnode *vnode;
4286 	status_t status;
4287 
4288 	status = path_to_vnode(path, false, &vnode, NULL, kernel);
4289 	if (status < B_OK)
4290 		return status;
4291 
4292 	if (FS_CALL(vnode, write_link) != NULL)
4293 		status = FS_CALL(vnode, write_link)(vnode->mount->cookie, vnode->private_node, toPath);
4294 	else
4295 		status = EOPNOTSUPP;
4296 
4297 	put_vnode(vnode);
4298 
4299 	return status;
4300 }
4301 
4302 
4303 static status_t
4304 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4305 	bool kernel)
4306 {
4307 	// path validity checks have to be in the calling function!
4308 	char name[B_FILE_NAME_LENGTH];
4309 	struct vnode *vnode;
4310 	status_t status;
4311 
4312 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4313 
4314 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4315 	if (status < B_OK)
4316 		return status;
4317 
4318 	if (FS_CALL(vnode, create_symlink) != NULL)
4319 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4320 	else
4321 		status = EROFS;
4322 
4323 	put_vnode(vnode);
4324 
4325 	return status;
4326 }
4327 
4328 
4329 static status_t
4330 common_create_link(char *path, char *toPath, bool kernel)
4331 {
4332 	// path validity checks have to be in the calling function!
4333 	char name[B_FILE_NAME_LENGTH];
4334 	struct vnode *directory, *vnode;
4335 	status_t status;
4336 
4337 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4338 
4339 	status = path_to_dir_vnode(path, &directory, name, kernel);
4340 	if (status < B_OK)
4341 		return status;
4342 
4343 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4344 	if (status < B_OK)
4345 		goto err;
4346 
4347 	if (directory->mount != vnode->mount) {
4348 		status = B_CROSS_DEVICE_LINK;
4349 		goto err1;
4350 	}
4351 
4352 	if (FS_CALL(vnode, link) != NULL)
4353 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4354 	else
4355 		status = EROFS;
4356 
4357 err1:
4358 	put_vnode(vnode);
4359 err:
4360 	put_vnode(directory);
4361 
4362 	return status;
4363 }
4364 
4365 
4366 static status_t
4367 common_unlink(int fd, char *path, bool kernel)
4368 {
4369 	char filename[B_FILE_NAME_LENGTH];
4370 	struct vnode *vnode;
4371 	status_t status;
4372 
4373 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4374 
4375 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4376 	if (status < 0)
4377 		return status;
4378 
4379 	if (FS_CALL(vnode, unlink) != NULL)
4380 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4381 	else
4382 		status = EROFS;
4383 
4384 	put_vnode(vnode);
4385 
4386 	return status;
4387 }
4388 
4389 
4390 static status_t
4391 common_access(char *path, int mode, bool kernel)
4392 {
4393 	struct vnode *vnode;
4394 	status_t status;
4395 
4396 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4397 	if (status < B_OK)
4398 		return status;
4399 
4400 	if (FS_CALL(vnode, access) != NULL)
4401 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4402 	else
4403 		status = B_OK;
4404 
4405 	put_vnode(vnode);
4406 
4407 	return status;
4408 }
4409 
4410 
4411 static status_t
4412 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4413 {
4414 	struct vnode *fromVnode, *toVnode;
4415 	char fromName[B_FILE_NAME_LENGTH];
4416 	char toName[B_FILE_NAME_LENGTH];
4417 	status_t status;
4418 
4419 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4420 
4421 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4422 	if (status < 0)
4423 		return status;
4424 
4425 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4426 	if (status < 0)
4427 		goto err;
4428 
4429 	if (fromVnode->device != toVnode->device) {
4430 		status = B_CROSS_DEVICE_LINK;
4431 		goto err1;
4432 	}
4433 
4434 	if (FS_CALL(fromVnode, rename) != NULL)
4435 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4436 	else
4437 		status = EROFS;
4438 
4439 err1:
4440 	put_vnode(toVnode);
4441 err:
4442 	put_vnode(fromVnode);
4443 
4444 	return status;
4445 }
4446 
4447 
4448 static status_t
4449 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4450 {
4451 	struct vnode *vnode = descriptor->u.vnode;
4452 
4453 	FUNCTION(("common_read_stat: stat %p\n", stat));
4454 
4455 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4456 		vnode->private_node, stat);
4457 
4458 	// fill in the st_dev and st_ino fields
4459 	if (status == B_OK) {
4460 		stat->st_dev = vnode->device;
4461 		stat->st_ino = vnode->id;
4462 	}
4463 
4464 	return status;
4465 }
4466 
4467 
4468 static status_t
4469 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4470 {
4471 	struct vnode *vnode = descriptor->u.vnode;
4472 
4473 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4474 	if (!FS_CALL(vnode, write_stat))
4475 		return EROFS;
4476 
4477 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4478 }
4479 
4480 
4481 static status_t
4482 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4483 	struct stat *stat, bool kernel)
4484 {
4485 	struct vnode *vnode;
4486 	status_t status;
4487 
4488 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4489 
4490 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4491 	if (status < 0)
4492 		return status;
4493 
4494 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4495 
4496 	// fill in the st_dev and st_ino fields
4497 	if (status == B_OK) {
4498 		stat->st_dev = vnode->device;
4499 		stat->st_ino = vnode->id;
4500 	}
4501 
4502 	put_vnode(vnode);
4503 	return status;
4504 }
4505 
4506 
4507 static status_t
4508 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4509 	const struct stat *stat, int statMask, bool kernel)
4510 {
4511 	struct vnode *vnode;
4512 	status_t status;
4513 
4514 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4515 
4516 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4517 	if (status < 0)
4518 		return status;
4519 
4520 	if (FS_CALL(vnode, write_stat))
4521 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4522 	else
4523 		status = EROFS;
4524 
4525 	put_vnode(vnode);
4526 
4527 	return status;
4528 }
4529 
4530 
4531 static int
4532 attr_dir_open(int fd, char *path, bool kernel)
4533 {
4534 	struct vnode *vnode;
4535 	int status;
4536 
4537 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4538 
4539 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4540 	if (status < B_OK)
4541 		return status;
4542 
4543 	status = open_attr_dir_vnode(vnode, kernel);
4544 	if (status < 0)
4545 		put_vnode(vnode);
4546 
4547 	return status;
4548 }
4549 
4550 
4551 static status_t
4552 attr_dir_close(struct file_descriptor *descriptor)
4553 {
4554 	struct vnode *vnode = descriptor->u.vnode;
4555 
4556 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4557 
4558 	if (FS_CALL(vnode, close_attr_dir))
4559 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4560 
4561 	return B_OK;
4562 }
4563 
4564 
4565 static void
4566 attr_dir_free_fd(struct file_descriptor *descriptor)
4567 {
4568 	struct vnode *vnode = descriptor->u.vnode;
4569 
4570 	if (vnode != NULL) {
4571 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4572 		put_vnode(vnode);
4573 	}
4574 }
4575 
4576 
4577 static status_t
4578 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4579 {
4580 	struct vnode *vnode = descriptor->u.vnode;
4581 
4582 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4583 
4584 	if (FS_CALL(vnode, read_attr_dir))
4585 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4586 
4587 	return EOPNOTSUPP;
4588 }
4589 
4590 
4591 static status_t
4592 attr_dir_rewind(struct file_descriptor *descriptor)
4593 {
4594 	struct vnode *vnode = descriptor->u.vnode;
4595 
4596 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4597 
4598 	if (FS_CALL(vnode, rewind_attr_dir))
4599 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4600 
4601 	return EOPNOTSUPP;
4602 }
4603 
4604 
4605 static int
4606 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4607 {
4608 	struct vnode *vnode;
4609 	fs_cookie cookie;
4610 	int status;
4611 
4612 	if (name == NULL || *name == '\0')
4613 		return B_BAD_VALUE;
4614 
4615 	vnode = get_vnode_from_fd(fd, kernel);
4616 	if (vnode == NULL)
4617 		return B_FILE_ERROR;
4618 
4619 	if (FS_CALL(vnode, create_attr) == NULL) {
4620 		status = EROFS;
4621 		goto err;
4622 	}
4623 
4624 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4625 	if (status < B_OK)
4626 		goto err;
4627 
4628 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4629 		return status;
4630 
4631 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4632 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4633 
4634 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4635 
4636 err:
4637 	put_vnode(vnode);
4638 
4639 	return status;
4640 }
4641 
4642 
4643 static int
4644 attr_open(int fd, const char *name, int openMode, bool kernel)
4645 {
4646 	struct vnode *vnode;
4647 	fs_cookie cookie;
4648 	int status;
4649 
4650 	if (name == NULL || *name == '\0')
4651 		return B_BAD_VALUE;
4652 
4653 	vnode = get_vnode_from_fd(fd, kernel);
4654 	if (vnode == NULL)
4655 		return B_FILE_ERROR;
4656 
4657 	if (FS_CALL(vnode, open_attr) == NULL) {
4658 		status = EOPNOTSUPP;
4659 		goto err;
4660 	}
4661 
4662 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4663 	if (status < B_OK)
4664 		goto err;
4665 
4666 	// now we only need a file descriptor for this attribute and we're done
4667 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4668 		return status;
4669 
4670 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4671 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4672 
4673 err:
4674 	put_vnode(vnode);
4675 
4676 	return status;
4677 }
4678 
4679 
4680 static status_t
4681 attr_close(struct file_descriptor *descriptor)
4682 {
4683 	struct vnode *vnode = descriptor->u.vnode;
4684 
4685 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4686 
4687 	if (FS_CALL(vnode, close_attr))
4688 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4689 
4690 	return B_OK;
4691 }
4692 
4693 
4694 static void
4695 attr_free_fd(struct file_descriptor *descriptor)
4696 {
4697 	struct vnode *vnode = descriptor->u.vnode;
4698 
4699 	if (vnode != NULL) {
4700 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4701 		put_vnode(vnode);
4702 	}
4703 }
4704 
4705 
4706 static status_t
4707 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4708 {
4709 	struct vnode *vnode = descriptor->u.vnode;
4710 
4711 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4712 	if (!FS_CALL(vnode, read_attr))
4713 		return EOPNOTSUPP;
4714 
4715 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4716 }
4717 
4718 
4719 static status_t
4720 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4721 {
4722 	struct vnode *vnode = descriptor->u.vnode;
4723 
4724 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4725 	if (!FS_CALL(vnode, write_attr))
4726 		return EOPNOTSUPP;
4727 
4728 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4729 }
4730 
4731 
4732 static off_t
4733 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4734 {
4735 	off_t offset;
4736 
4737 	switch (seekType) {
4738 		case SEEK_SET:
4739 			offset = 0;
4740 			break;
4741 		case SEEK_CUR:
4742 			offset = descriptor->pos;
4743 			break;
4744 		case SEEK_END:
4745 		{
4746 			struct vnode *vnode = descriptor->u.vnode;
4747 			struct stat stat;
4748 			status_t status;
4749 
4750 			if (FS_CALL(vnode, read_stat) == NULL)
4751 				return EOPNOTSUPP;
4752 
4753 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4754 			if (status < B_OK)
4755 				return status;
4756 
4757 			offset = stat.st_size;
4758 			break;
4759 		}
4760 		default:
4761 			return B_BAD_VALUE;
4762 	}
4763 
4764 	// assumes off_t is 64 bits wide
4765 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4766 		return EOVERFLOW;
4767 
4768 	pos += offset;
4769 	if (pos < 0)
4770 		return B_BAD_VALUE;
4771 
4772 	return descriptor->pos = pos;
4773 }
4774 
4775 
4776 static status_t
4777 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4778 {
4779 	struct vnode *vnode = descriptor->u.vnode;
4780 
4781 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4782 
4783 	if (!FS_CALL(vnode, read_attr_stat))
4784 		return EOPNOTSUPP;
4785 
4786 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4787 }
4788 
4789 
4790 static status_t
4791 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4792 {
4793 	struct vnode *vnode = descriptor->u.vnode;
4794 
4795 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4796 
4797 	if (!FS_CALL(vnode, write_attr_stat))
4798 		return EROFS;
4799 
4800 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4801 }
4802 
4803 
4804 static status_t
4805 attr_remove(int fd, const char *name, bool kernel)
4806 {
4807 	struct file_descriptor *descriptor;
4808 	struct vnode *vnode;
4809 	status_t status;
4810 
4811 	if (name == NULL || *name == '\0')
4812 		return B_BAD_VALUE;
4813 
4814 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4815 
4816 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4817 	if (descriptor == NULL)
4818 		return B_FILE_ERROR;
4819 
4820 	if (FS_CALL(vnode, remove_attr))
4821 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4822 	else
4823 		status = EROFS;
4824 
4825 	put_fd(descriptor);
4826 
4827 	return status;
4828 }
4829 
4830 
4831 static status_t
4832 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4833 {
4834 	struct file_descriptor *fromDescriptor, *toDescriptor;
4835 	struct vnode *fromVnode, *toVnode;
4836 	status_t status;
4837 
4838 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4839 		return B_BAD_VALUE;
4840 
4841 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4842 
4843 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4844 	if (fromDescriptor == NULL)
4845 		return B_FILE_ERROR;
4846 
4847 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4848 	if (toDescriptor == NULL) {
4849 		status = B_FILE_ERROR;
4850 		goto err;
4851 	}
4852 
4853 	// are the files on the same volume?
4854 	if (fromVnode->device != toVnode->device) {
4855 		status = B_CROSS_DEVICE_LINK;
4856 		goto err1;
4857 	}
4858 
4859 	if (FS_CALL(fromVnode, rename_attr))
4860 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4861 	else
4862 		status = EROFS;
4863 
4864 err1:
4865 	put_fd(toDescriptor);
4866 err:
4867 	put_fd(fromDescriptor);
4868 
4869 	return status;
4870 }
4871 
4872 
4873 static status_t
4874 index_dir_open(mount_id mountID, bool kernel)
4875 {
4876 	struct fs_mount *mount;
4877 	fs_cookie cookie;
4878 
4879 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4880 
4881 	status_t status = get_mount(mountID, &mount);
4882 	if (status < B_OK)
4883 		return status;
4884 
4885 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4886 		status = EOPNOTSUPP;
4887 		goto out;
4888 	}
4889 
4890 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4891 	if (status < B_OK)
4892 		goto out;
4893 
4894 	// get fd for the index directory
4895 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4896 	if (status >= 0)
4897 		goto out;
4898 
4899 	// something went wrong
4900 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4901 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4902 
4903 out:
4904 	put_mount(mount);
4905 	return status;
4906 }
4907 
4908 
4909 static status_t
4910 index_dir_close(struct file_descriptor *descriptor)
4911 {
4912 	struct fs_mount *mount = descriptor->u.mount;
4913 
4914 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
4915 
4916 	if (FS_MOUNT_CALL(mount, close_index_dir))
4917 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
4918 
4919 	return B_OK;
4920 }
4921 
4922 
4923 static void
4924 index_dir_free_fd(struct file_descriptor *descriptor)
4925 {
4926 	struct fs_mount *mount = descriptor->u.mount;
4927 
4928 	if (mount != NULL) {
4929 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
4930 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4931 		//put_vnode(vnode);
4932 	}
4933 }
4934 
4935 
4936 static status_t
4937 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4938 {
4939 	struct fs_mount *mount = descriptor->u.mount;
4940 
4941 	if (FS_MOUNT_CALL(mount, read_index_dir))
4942 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4943 
4944 	return EOPNOTSUPP;
4945 }
4946 
4947 
4948 static status_t
4949 index_dir_rewind(struct file_descriptor *descriptor)
4950 {
4951 	struct fs_mount *mount = descriptor->u.mount;
4952 
4953 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
4954 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
4955 
4956 	return EOPNOTSUPP;
4957 }
4958 
4959 
4960 static status_t
4961 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
4962 {
4963 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4964 
4965 	struct fs_mount *mount;
4966 	status_t status = get_mount(mountID, &mount);
4967 	if (status < B_OK)
4968 		return status;
4969 
4970 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
4971 		status = EROFS;
4972 		goto out;
4973 	}
4974 
4975 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
4976 
4977 out:
4978 	put_mount(mount);
4979 	return status;
4980 }
4981 
4982 
4983 #if 0
4984 static status_t
4985 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4986 {
4987 	struct vnode *vnode = descriptor->u.vnode;
4988 
4989 	// ToDo: currently unused!
4990 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
4991 	if (!FS_CALL(vnode, read_index_stat))
4992 		return EOPNOTSUPP;
4993 
4994 	return EOPNOTSUPP;
4995 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4996 }
4997 
4998 
4999 static void
5000 index_free_fd(struct file_descriptor *descriptor)
5001 {
5002 	struct vnode *vnode = descriptor->u.vnode;
5003 
5004 	if (vnode != NULL) {
5005 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5006 		put_vnode(vnode);
5007 	}
5008 }
5009 #endif
5010 
5011 
5012 static status_t
5013 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
5014 {
5015 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5016 
5017 	struct fs_mount *mount;
5018 	status_t status = get_mount(mountID, &mount);
5019 	if (status < B_OK)
5020 		return status;
5021 
5022 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5023 		status = EOPNOTSUPP;
5024 		goto out;
5025 	}
5026 
5027 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5028 
5029 out:
5030 	put_mount(mount);
5031 	return status;
5032 }
5033 
5034 
5035 static status_t
5036 index_remove(mount_id mountID, const char *name, bool kernel)
5037 {
5038 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5039 
5040 	struct fs_mount *mount;
5041 	status_t status = get_mount(mountID, &mount);
5042 	if (status < B_OK)
5043 		return status;
5044 
5045 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5046 		status = EROFS;
5047 		goto out;
5048 	}
5049 
5050 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5051 
5052 out:
5053 	put_mount(mount);
5054 	return status;
5055 }
5056 
5057 
5058 /**	ToDo: the query FS API is still the pretty much the same as in R5.
5059  *		It would be nice if the FS would find some more kernel support
5060  *		for them.
5061  *		For example, query parsing should be moved into the kernel.
5062  */
5063 
5064 static int
5065 query_open(dev_t device, const char *query, uint32 flags,
5066 	port_id port, int32 token, bool kernel)
5067 {
5068 	struct fs_mount *mount;
5069 	fs_cookie cookie;
5070 
5071 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5072 
5073 	status_t status = get_mount(device, &mount);
5074 	if (status < B_OK)
5075 		return status;
5076 
5077 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5078 		status = EOPNOTSUPP;
5079 		goto out;
5080 	}
5081 
5082 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5083 	if (status < B_OK)
5084 		goto out;
5085 
5086 	// get fd for the index directory
5087 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5088 	if (status >= 0)
5089 		goto out;
5090 
5091 	// something went wrong
5092 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5093 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5094 
5095 out:
5096 	put_mount(mount);
5097 	return status;
5098 }
5099 
5100 
5101 static status_t
5102 query_close(struct file_descriptor *descriptor)
5103 {
5104 	struct fs_mount *mount = descriptor->u.mount;
5105 
5106 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5107 
5108 	if (FS_MOUNT_CALL(mount, close_query))
5109 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5110 
5111 	return B_OK;
5112 }
5113 
5114 
5115 static void
5116 query_free_fd(struct file_descriptor *descriptor)
5117 {
5118 	struct fs_mount *mount = descriptor->u.mount;
5119 
5120 	if (mount != NULL) {
5121 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5122 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5123 		//put_vnode(vnode);
5124 	}
5125 }
5126 
5127 
5128 static status_t
5129 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5130 {
5131 	struct fs_mount *mount = descriptor->u.mount;
5132 
5133 	if (FS_MOUNT_CALL(mount, read_query))
5134 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5135 
5136 	return EOPNOTSUPP;
5137 }
5138 
5139 
5140 static status_t
5141 query_rewind(struct file_descriptor *descriptor)
5142 {
5143 	struct fs_mount *mount = descriptor->u.mount;
5144 
5145 	if (FS_MOUNT_CALL(mount, rewind_query))
5146 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5147 
5148 	return EOPNOTSUPP;
5149 }
5150 
5151 
5152 //	#pragma mark -
5153 //	General File System functions
5154 
5155 
5156 static dev_t
5157 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5158 	const char *args, bool kernel)
5159 {
5160 	struct fs_mount *mount;
5161 	status_t status = 0;
5162 
5163 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5164 
5165 	// The path is always safe, we just have to make sure that fsName is
5166 	// almost valid - we can't make any assumptions about args, though.
5167 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5168 	// We'll get it from the DDM later.
5169 	if (fsName == NULL) {
5170 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5171 			return B_BAD_VALUE;
5172 	} else if (fsName[0] == '\0')
5173 		return B_BAD_VALUE;
5174 
5175 	RecursiveLocker mountOpLocker(sMountOpLock);
5176 
5177 	// Helper to delete a newly created file device on failure.
5178 	// Not exactly beautiful, but helps to keep the code below cleaner.
5179 	struct FileDeviceDeleter {
5180 		FileDeviceDeleter() : id(-1) {}
5181 		~FileDeviceDeleter()
5182 		{
5183 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5184 		}
5185 
5186 		partition_id id;
5187 	} fileDeviceDeleter;
5188 
5189 	// If the file system is not a "virtual" one, the device argument should
5190 	// point to a real file/device (if given at all).
5191 	// get the partition
5192 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5193 	KPartition *partition = NULL;
5194 	KPath normalizedDevice;
5195 	bool newlyCreatedFileDevice = false;
5196 
5197 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5198 		// normalize the device path
5199 		status = normalizedDevice.SetTo(device, true);
5200 		if (status != B_OK)
5201 			return status;
5202 
5203 		// get a corresponding partition from the DDM
5204 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
5205 
5206 		if (!partition) {
5207 			// Partition not found: This either means, the user supplied
5208 			// an invalid path, or the path refers to an image file. We try
5209 			// to let the DDM create a file device for the path.
5210 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5211 				&newlyCreatedFileDevice, false);
5212 			if (deviceID >= 0) {
5213 				partition = ddm->RegisterPartition(deviceID, true);
5214 				if (newlyCreatedFileDevice)
5215 					fileDeviceDeleter.id = deviceID;
5216 			}
5217 		}
5218 
5219 		if (!partition) {
5220 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5221 				normalizedDevice.Path()));
5222 			return B_ENTRY_NOT_FOUND;
5223 		}
5224 
5225 		device = normalizedDevice.Path();
5226 			// correct path to file device
5227 	}
5228 	PartitionRegistrar partitionRegistrar(partition, true);
5229 
5230 	// Write lock the partition's device. For the time being, we keep the lock
5231 	// until we're done mounting -- not nice, but ensure, that no-one is
5232 	// interfering.
5233 	// TODO: Find a better solution.
5234 	KDiskDevice *diskDevice = NULL;
5235 	if (partition) {
5236 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5237 		if (!diskDevice) {
5238 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5239 			return B_ERROR;
5240 		}
5241 	}
5242 
5243 	DeviceWriteLocker writeLocker(diskDevice, true);
5244 		// this takes over the write lock acquired before
5245 
5246 	if (partition) {
5247 		// make sure, that the partition is not busy
5248 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5249 			TRACE(("fs_mount(): Partition is busy.\n"));
5250 			return B_BUSY;
5251 		}
5252 
5253 		// if no FS name had been supplied, we get it from the partition
5254 		if (!fsName) {
5255 			KDiskSystem *diskSystem = partition->DiskSystem();
5256 			if (!diskSystem) {
5257 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5258 					"recognize it.\n"));
5259 				return B_BAD_VALUE;
5260 			}
5261 
5262 			if (!diskSystem->IsFileSystem()) {
5263 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5264 					"partitioning system.\n"));
5265 				return B_BAD_VALUE;
5266 			}
5267 
5268 			// The disk system name will not change, and the KDiskSystem
5269 			// object will not go away while the disk device is locked (and
5270 			// the partition has a reference to it), so this is safe.
5271 			fsName = diskSystem->Name();
5272 		}
5273 	}
5274 
5275 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5276 	if (mount == NULL)
5277 		return B_NO_MEMORY;
5278 
5279 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5280 
5281 	mount->fs_name = get_file_system_name(fsName);
5282 	if (mount->fs_name == NULL) {
5283 		status = B_NO_MEMORY;
5284 		goto err1;
5285 	}
5286 
5287 	mount->device_name = strdup(device);
5288 		// "device" can be NULL
5289 
5290 	mount->fs = get_file_system(fsName);
5291 	if (mount->fs == NULL) {
5292 		status = ENODEV;
5293 		goto err3;
5294 	}
5295 
5296 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5297 	if (status < B_OK)
5298 		goto err4;
5299 
5300 	// initialize structure
5301 	mount->id = sNextMountID++;
5302 	mount->partition = NULL;
5303 	mount->root_vnode = NULL;
5304 	mount->covers_vnode = NULL;
5305 	mount->cookie = NULL;
5306 	mount->unmounting = false;
5307 	mount->owns_file_device = false;
5308 
5309 	// insert mount struct into list before we call FS's mount() function
5310 	// so that vnodes can be created for this mount
5311 	mutex_lock(&sMountMutex);
5312 	hash_insert(sMountsTable, mount);
5313 	mutex_unlock(&sMountMutex);
5314 
5315 	vnode_id rootID;
5316 
5317 	if (!sRoot) {
5318 		// we haven't mounted anything yet
5319 		if (strcmp(path, "/") != 0) {
5320 			status = B_ERROR;
5321 			goto err5;
5322 		}
5323 
5324 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5325 		if (status < 0) {
5326 			// ToDo: why should we hide the error code from the file system here?
5327 			//status = ERR_VFS_GENERAL;
5328 			goto err5;
5329 		}
5330 	} else {
5331 		struct vnode *coveredVnode;
5332 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5333 		if (status < B_OK)
5334 			goto err5;
5335 
5336 		// make sure covered_vnode is a DIR
5337 		struct stat coveredNodeStat;
5338 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5339 			coveredVnode->private_node, &coveredNodeStat);
5340 		if (status < B_OK)
5341 			goto err5;
5342 
5343 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5344 			status = B_NOT_A_DIRECTORY;
5345 			goto err5;
5346 		}
5347 
5348 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5349 			// this is already a mount point
5350 			status = B_BUSY;
5351 			goto err5;
5352 		}
5353 
5354 		mount->covers_vnode = coveredVnode;
5355 
5356 		// mount it
5357 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5358 		if (status < B_OK)
5359 			goto err6;
5360 	}
5361 
5362 	// the root node is supposed to be owned by the file system - it must
5363 	// exist at this point
5364 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5365 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5366 		panic("fs_mount: file system does not own its root node!\n");
5367 		status = B_ERROR;
5368 		goto err7;
5369 	}
5370 
5371 	// No race here, since fs_mount() is the only function changing
5372 	// covers_vnode (and holds sMountOpLock at that time).
5373 	if (mount->covers_vnode)
5374 		mount->covers_vnode->covered_by = mount->root_vnode;
5375 
5376 	if (!sRoot)
5377 		sRoot = mount->root_vnode;
5378 
5379 	// supply the partition (if any) with the mount cookie and mark it mounted
5380 	if (partition) {
5381 		partition->SetMountCookie(mount->cookie);
5382 		partition->SetVolumeID(mount->id);
5383 
5384 		// keep a partition reference as long as the partition is mounted
5385 		partitionRegistrar.Detach();
5386 		mount->partition = partition;
5387 		mount->owns_file_device = newlyCreatedFileDevice;
5388 		fileDeviceDeleter.id = -1;
5389 	}
5390 
5391 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5392 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5393 
5394 	return mount->id;
5395 
5396 err7:
5397 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5398 err6:
5399 	if (mount->covers_vnode)
5400 		put_vnode(mount->covers_vnode);
5401 err5:
5402 	mutex_lock(&sMountMutex);
5403 	hash_remove(sMountsTable, mount);
5404 	mutex_unlock(&sMountMutex);
5405 
5406 	recursive_lock_destroy(&mount->rlock);
5407 err4:
5408 	put_file_system(mount->fs);
5409 	free(mount->device_name);
5410 err3:
5411 	free(mount->fs_name);
5412 err1:
5413 	free(mount);
5414 
5415 	return status;
5416 }
5417 
5418 
5419 static status_t
5420 fs_unmount(char *path, uint32 flags, bool kernel)
5421 {
5422 	struct fs_mount *mount;
5423 	struct vnode *vnode;
5424 	status_t err;
5425 
5426 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5427 
5428 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5429 	if (err < 0)
5430 		return B_ENTRY_NOT_FOUND;
5431 
5432 	RecursiveLocker mountOpLocker(sMountOpLock);
5433 
5434 	mount = find_mount(vnode->device);
5435 	if (!mount)
5436 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5437 
5438 	if (mount->root_vnode != vnode) {
5439 		// not mountpoint
5440 		put_vnode(vnode);
5441 		return B_BAD_VALUE;
5442 	}
5443 
5444 	// if the volume is associated with a partition, lock the device of the
5445 	// partition as long as we are unmounting
5446 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5447 	KPartition *partition = mount->partition;
5448 	KDiskDevice *diskDevice = NULL;
5449 	if (partition) {
5450 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5451 		if (!diskDevice) {
5452 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5453 			return B_ERROR;
5454 		}
5455 	}
5456 	DeviceWriteLocker writeLocker(diskDevice, true);
5457 
5458 	// make sure, that the partition is not busy
5459 	if (partition) {
5460 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5461 			TRACE(("fs_unmount(): Partition is busy.\n"));
5462 			return B_BUSY;
5463 		}
5464 	}
5465 
5466 	// grab the vnode master mutex to keep someone from creating
5467 	// a vnode while we're figuring out if we can continue
5468 	mutex_lock(&sVnodeMutex);
5469 
5470 	bool disconnectedDescriptors = false;
5471 
5472 	while (true) {
5473 		bool busy = false;
5474 
5475 		// cycle through the list of vnodes associated with this mount and
5476 		// make sure all of them are not busy or have refs on them
5477 		vnode = NULL;
5478 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5479 			// The root vnode ref_count needs to be 2 here: one for the file
5480 			// system, one from the path_to_vnode() call above
5481 			if (vnode->busy
5482 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
5483 					|| (vnode->ref_count != 2 && mount->root_vnode == vnode))) {
5484 				// there are still vnodes in use on this mount, so we cannot
5485 				// unmount yet
5486 				busy = true;
5487 				break;
5488 			}
5489 		}
5490 
5491 		if (!busy)
5492 			break;
5493 
5494 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5495 			mutex_unlock(&sVnodeMutex);
5496 			put_vnode(mount->root_vnode);
5497 
5498 			return B_BUSY;
5499 		}
5500 
5501 		if (disconnectedDescriptors) {
5502 			// wait a bit until the last access is finished, and then try again
5503 			mutex_unlock(&sVnodeMutex);
5504 			snooze(100000);
5505 			// TODO: if there is some kind of bug that prevents the ref counts
5506 			//	from getting back to zero, this will fall into an endless loop...
5507 			mutex_lock(&sVnodeMutex);
5508 			continue;
5509 		}
5510 
5511 		// the file system is still busy - but we're forced to unmount it,
5512 		// so let's disconnect all open file descriptors
5513 
5514 		mount->unmounting = true;
5515 			// prevent new vnodes from being created
5516 
5517 		mutex_unlock(&sVnodeMutex);
5518 
5519 		disconnect_mount_or_vnode_fds(mount, NULL);
5520 		disconnectedDescriptors = true;
5521 
5522 		mutex_lock(&sVnodeMutex);
5523 	}
5524 
5525 	// we can safely continue, mark all of the vnodes busy and this mount
5526 	// structure in unmounting state
5527 	mount->unmounting = true;
5528 
5529 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5530 		vnode->busy = true;
5531 
5532 		if (vnode->ref_count == 0) {
5533 			// this vnode has been unused before
5534 			list_remove_item(&sUnusedVnodeList, vnode);
5535 			sUnusedVnodes--;
5536 		}
5537 	}
5538 
5539 	// The ref_count of the root node is 2 at this point, see above why this is
5540 	mount->root_vnode->ref_count -= 2;
5541 
5542 	mutex_unlock(&sVnodeMutex);
5543 
5544 	mount->covers_vnode->covered_by = NULL;
5545 	put_vnode(mount->covers_vnode);
5546 
5547 	// Free all vnodes associated with this mount.
5548 	// They will be removed from the mount list by free_vnode(), so
5549 	// we don't have to do this.
5550 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5551 		free_vnode(vnode, false);
5552 	}
5553 
5554 	// remove the mount structure from the hash table
5555 	mutex_lock(&sMountMutex);
5556 	hash_remove(sMountsTable, mount);
5557 	mutex_unlock(&sMountMutex);
5558 
5559 	mountOpLocker.Unlock();
5560 
5561 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5562 	notify_unmount(mount->id);
5563 
5564 	// release the file system
5565 	put_file_system(mount->fs);
5566 
5567 	// dereference the partition and mark it unmounted
5568 	if (partition) {
5569 		partition->SetVolumeID(-1);
5570 		partition->SetMountCookie(NULL);
5571 
5572 		if (mount->owns_file_device)
5573 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5574 		partition->Unregister();
5575 	}
5576 
5577 	free(mount->device_name);
5578 	free(mount->fs_name);
5579 	free(mount);
5580 
5581 	return B_OK;
5582 }
5583 
5584 
5585 static status_t
5586 fs_sync(dev_t device)
5587 {
5588 	struct fs_mount *mount;
5589 	status_t status = get_mount(device, &mount);
5590 	if (status < B_OK)
5591 		return status;
5592 
5593 	mutex_lock(&sMountMutex);
5594 
5595 	if (FS_MOUNT_CALL(mount, sync))
5596 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5597 
5598 	mutex_unlock(&sMountMutex);
5599 
5600 	struct vnode *previousVnode = NULL;
5601 	while (true) {
5602 		// synchronize access to vnode list
5603 		recursive_lock_lock(&mount->rlock);
5604 
5605 		struct vnode *vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
5606 			previousVnode);
5607 
5608 		vnode_id id = -1;
5609 		if (vnode != NULL)
5610 			id = vnode->id;
5611 
5612 		recursive_lock_unlock(&mount->rlock);
5613 
5614 		if (vnode == NULL)
5615 			break;
5616 
5617 		// acquire a reference to the vnode
5618 
5619 		if (get_vnode(mount->id, id, &vnode, true) == B_OK) {
5620 			if (previousVnode != NULL)
5621 				put_vnode(previousVnode);
5622 
5623 			if (FS_CALL(vnode, fsync) != NULL)
5624 				FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
5625 
5626 			// the next vnode might change until we lock the vnode list again,
5627 			// but this vnode won't go away since we keep a reference to it.
5628 			previousVnode = vnode;
5629 		} else {
5630 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n", mount->id, id);
5631 			break;
5632 		}
5633 	}
5634 
5635 	if (previousVnode != NULL)
5636 		put_vnode(previousVnode);
5637 
5638 	put_mount(mount);
5639 	return status;
5640 }
5641 
5642 
5643 static status_t
5644 fs_read_info(dev_t device, struct fs_info *info)
5645 {
5646 	struct fs_mount *mount;
5647 	status_t status = get_mount(device, &mount);
5648 	if (status < B_OK)
5649 		return status;
5650 
5651 	// fill in info the file system doesn't (have to) know about
5652 	memset(info, 0, sizeof(struct fs_info));
5653 	info->dev = mount->id;
5654 	info->root = mount->root_vnode->id;
5655 	strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5656 	if (mount->device_name != NULL)
5657 		strlcpy(info->device_name, mount->device_name, sizeof(info->device_name));
5658 
5659 	if (FS_MOUNT_CALL(mount, read_fs_info))
5660 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5661 
5662 	// if the call is not supported by the file system, there are still
5663 	// the parts that we filled out ourselves
5664 
5665 	put_mount(mount);
5666 	return status;
5667 }
5668 
5669 
5670 static status_t
5671 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5672 {
5673 	struct fs_mount *mount;
5674 	status_t status = get_mount(device, &mount);
5675 	if (status < B_OK)
5676 		return status;
5677 
5678 	if (FS_MOUNT_CALL(mount, write_fs_info))
5679 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5680 	else
5681 		status = EROFS;
5682 
5683 	put_mount(mount);
5684 	return status;
5685 }
5686 
5687 
5688 static dev_t
5689 fs_next_device(int32 *_cookie)
5690 {
5691 	struct fs_mount *mount = NULL;
5692 	dev_t device = *_cookie;
5693 
5694 	mutex_lock(&sMountMutex);
5695 
5696 	// Since device IDs are assigned sequentially, this algorithm
5697 	// does work good enough. It makes sure that the device list
5698 	// returned is sorted, and that no device is skipped when an
5699 	// already visited device got unmounted.
5700 
5701 	while (device < sNextMountID) {
5702 		mount = find_mount(device++);
5703 		if (mount != NULL && mount->cookie != NULL)
5704 			break;
5705 	}
5706 
5707 	*_cookie = device;
5708 
5709 	if (mount != NULL)
5710 		device = mount->id;
5711 	else
5712 		device = B_BAD_VALUE;
5713 
5714 	mutex_unlock(&sMountMutex);
5715 
5716 	return device;
5717 }
5718 
5719 
5720 static status_t
5721 get_cwd(char *buffer, size_t size, bool kernel)
5722 {
5723 	// Get current working directory from io context
5724 	struct io_context *context = get_current_io_context(kernel);
5725 	status_t status;
5726 
5727 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5728 
5729 	mutex_lock(&context->io_mutex);
5730 
5731 	if (context->cwd)
5732 		status = dir_vnode_to_path(context->cwd, buffer, size);
5733 	else
5734 		status = B_ERROR;
5735 
5736 	mutex_unlock(&context->io_mutex);
5737 	return status;
5738 }
5739 
5740 
5741 static status_t
5742 set_cwd(int fd, char *path, bool kernel)
5743 {
5744 	struct io_context *context;
5745 	struct vnode *vnode = NULL;
5746 	struct vnode *oldDirectory;
5747 	struct stat stat;
5748 	status_t status;
5749 
5750 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5751 
5752 	// Get vnode for passed path, and bail if it failed
5753 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5754 	if (status < 0)
5755 		return status;
5756 
5757 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5758 	if (status < 0)
5759 		goto err;
5760 
5761 	if (!S_ISDIR(stat.st_mode)) {
5762 		// nope, can't cwd to here
5763 		status = B_NOT_A_DIRECTORY;
5764 		goto err;
5765 	}
5766 
5767 	// Get current io context and lock
5768 	context = get_current_io_context(kernel);
5769 	mutex_lock(&context->io_mutex);
5770 
5771 	// save the old current working directory first
5772 	oldDirectory = context->cwd;
5773 	context->cwd = vnode;
5774 
5775 	mutex_unlock(&context->io_mutex);
5776 
5777 	if (oldDirectory)
5778 		put_vnode(oldDirectory);
5779 
5780 	return B_NO_ERROR;
5781 
5782 err:
5783 	put_vnode(vnode);
5784 	return status;
5785 }
5786 
5787 
5788 //	#pragma mark -
5789 //	Calls from within the kernel
5790 
5791 
5792 dev_t
5793 _kern_mount(const char *path, const char *device, const char *fsName,
5794 	uint32 flags, const char *args, size_t argsLength)
5795 {
5796 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5797 	if (pathBuffer.InitCheck() != B_OK)
5798 		return B_NO_MEMORY;
5799 
5800 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5801 }
5802 
5803 
5804 status_t
5805 _kern_unmount(const char *path, uint32 flags)
5806 {
5807 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5808 	if (pathBuffer.InitCheck() != B_OK)
5809 		return B_NO_MEMORY;
5810 
5811 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5812 }
5813 
5814 
5815 status_t
5816 _kern_read_fs_info(dev_t device, struct fs_info *info)
5817 {
5818 	if (info == NULL)
5819 		return B_BAD_VALUE;
5820 
5821 	return fs_read_info(device, info);
5822 }
5823 
5824 
5825 status_t
5826 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5827 {
5828 	if (info == NULL)
5829 		return B_BAD_VALUE;
5830 
5831 	return fs_write_info(device, info, mask);
5832 }
5833 
5834 
5835 status_t
5836 _kern_sync(void)
5837 {
5838 	// Note: _kern_sync() is also called from _user_sync()
5839 	int32 cookie = 0;
5840 	dev_t device;
5841 	while ((device = next_dev(&cookie)) >= 0) {
5842 		status_t status = fs_sync(device);
5843 		if (status != B_OK && status != B_BAD_VALUE)
5844 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5845 	}
5846 
5847 	return B_OK;
5848 }
5849 
5850 
5851 dev_t
5852 _kern_next_device(int32 *_cookie)
5853 {
5854 	return fs_next_device(_cookie);
5855 }
5856 
5857 
5858 status_t
5859 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
5860 	size_t infoSize)
5861 {
5862 	if (infoSize != sizeof(fd_info))
5863 		return B_BAD_VALUE;
5864 
5865 	struct io_context *context = NULL;
5866 	sem_id contextMutex = -1;
5867 	struct team *team = NULL;
5868 
5869 	cpu_status state = disable_interrupts();
5870 	GRAB_TEAM_LOCK();
5871 
5872 	team = team_get_team_struct_locked(teamID);
5873 	if (team) {
5874 		context = (io_context *)team->io_context;
5875 		contextMutex = context->io_mutex.sem;
5876 	}
5877 
5878 	RELEASE_TEAM_LOCK();
5879 	restore_interrupts(state);
5880 
5881 	// we now have a context - since we couldn't lock it while having
5882 	// safe access to the team structure, we now need to lock the mutex
5883 	// manually
5884 
5885 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
5886 		// team doesn't exit or seems to be gone
5887 		return B_BAD_TEAM_ID;
5888 	}
5889 
5890 	// the team cannot be deleted completely while we're owning its
5891 	// io_context mutex, so we can safely play with it now
5892 
5893 	context->io_mutex.holder = thread_get_current_thread_id();
5894 
5895 	uint32 slot = *_cookie;
5896 
5897 	struct file_descriptor *descriptor;
5898 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
5899 		slot++;
5900 
5901 	if (slot >= context->table_size) {
5902 		mutex_unlock(&context->io_mutex);
5903 		return B_ENTRY_NOT_FOUND;
5904 	}
5905 
5906 	info->number = slot;
5907 	info->open_mode = descriptor->open_mode;
5908 
5909 	struct vnode *vnode = fd_vnode(descriptor);
5910 	if (vnode != NULL) {
5911 		info->device = vnode->device;
5912 		info->node = vnode->id;
5913 	} else if (descriptor->u.mount != NULL) {
5914 		info->device = descriptor->u.mount->id;
5915 		info->node = -1;
5916 	}
5917 
5918 	mutex_unlock(&context->io_mutex);
5919 
5920 	*_cookie = slot + 1;
5921 	return B_OK;
5922 }
5923 
5924 
5925 int
5926 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
5927 {
5928 	if (openMode & O_CREAT)
5929 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
5930 
5931 	return file_open_entry_ref(device, inode, name, openMode, true);
5932 }
5933 
5934 
5935 /**	\brief Opens a node specified by a FD + path pair.
5936  *
5937  *	At least one of \a fd and \a path must be specified.
5938  *	If only \a fd is given, the function opens the node identified by this
5939  *	FD. If only a path is given, this path is opened. If both are given and
5940  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5941  *	of the directory (!) identified by \a fd.
5942  *
5943  *	\param fd The FD. May be < 0.
5944  *	\param path The absolute or relative path. May be \c NULL.
5945  *	\param openMode The open mode.
5946  *	\return A FD referring to the newly opened node, or an error code,
5947  *			if an error occurs.
5948  */
5949 
5950 int
5951 _kern_open(int fd, const char *path, int openMode, int perms)
5952 {
5953 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5954 	if (pathBuffer.InitCheck() != B_OK)
5955 		return B_NO_MEMORY;
5956 
5957 	if (openMode & O_CREAT)
5958 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
5959 
5960 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
5961 }
5962 
5963 
5964 /**	\brief Opens a directory specified by entry_ref or node_ref.
5965  *
5966  *	The supplied name may be \c NULL, in which case directory identified
5967  *	by \a device and \a inode will be opened. Otherwise \a device and
5968  *	\a inode identify the parent directory of the directory to be opened
5969  *	and \a name its entry name.
5970  *
5971  *	\param device If \a name is specified the ID of the device the parent
5972  *		   directory of the directory to be opened resides on, otherwise
5973  *		   the device of the directory itself.
5974  *	\param inode If \a name is specified the node ID of the parent
5975  *		   directory of the directory to be opened, otherwise node ID of the
5976  *		   directory itself.
5977  *	\param name The entry name of the directory to be opened. If \c NULL,
5978  *		   the \a device + \a inode pair identify the node to be opened.
5979  *	\return The FD of the newly opened directory or an error code, if
5980  *			something went wrong.
5981  */
5982 
5983 int
5984 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
5985 {
5986 	return dir_open_entry_ref(device, inode, name, true);
5987 }
5988 
5989 
5990 /**	\brief Opens a directory specified by a FD + path pair.
5991  *
5992  *	At least one of \a fd and \a path must be specified.
5993  *	If only \a fd is given, the function opens the directory identified by this
5994  *	FD. If only a path is given, this path is opened. If both are given and
5995  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5996  *	of the directory (!) identified by \a fd.
5997  *
5998  *	\param fd The FD. May be < 0.
5999  *	\param path The absolute or relative path. May be \c NULL.
6000  *	\return A FD referring to the newly opened directory, or an error code,
6001  *			if an error occurs.
6002  */
6003 
6004 int
6005 _kern_open_dir(int fd, const char *path)
6006 {
6007 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6008 	if (pathBuffer.InitCheck() != B_OK)
6009 		return B_NO_MEMORY;
6010 
6011 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6012 }
6013 
6014 
6015 status_t
6016 _kern_fcntl(int fd, int op, uint32 argument)
6017 {
6018 	return common_fcntl(fd, op, argument, true);
6019 }
6020 
6021 
6022 status_t
6023 _kern_fsync(int fd)
6024 {
6025 	return common_sync(fd, true);
6026 }
6027 
6028 
6029 status_t
6030 _kern_lock_node(int fd)
6031 {
6032 	return common_lock_node(fd, true);
6033 }
6034 
6035 
6036 status_t
6037 _kern_unlock_node(int fd)
6038 {
6039 	return common_unlock_node(fd, true);
6040 }
6041 
6042 
6043 status_t
6044 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6045 {
6046 	return dir_create_entry_ref(device, inode, name, perms, true);
6047 }
6048 
6049 
6050 /**	\brief Creates a directory specified by a FD + path pair.
6051  *
6052  *	\a path must always be specified (it contains the name of the new directory
6053  *	at least). If only a path is given, this path identifies the location at
6054  *	which the directory shall be created. If both \a fd and \a path are given and
6055  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6056  *	of the directory (!) identified by \a fd.
6057  *
6058  *	\param fd The FD. May be < 0.
6059  *	\param path The absolute or relative path. Must not be \c NULL.
6060  *	\param perms The access permissions the new directory shall have.
6061  *	\return \c B_OK, if the directory has been created successfully, another
6062  *			error code otherwise.
6063  */
6064 
6065 status_t
6066 _kern_create_dir(int fd, const char *path, int perms)
6067 {
6068 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6069 	if (pathBuffer.InitCheck() != B_OK)
6070 		return B_NO_MEMORY;
6071 
6072 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6073 }
6074 
6075 
6076 status_t
6077 _kern_remove_dir(int fd, const char *path)
6078 {
6079 	if (path) {
6080 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6081 		if (pathBuffer.InitCheck() != B_OK)
6082 			return B_NO_MEMORY;
6083 
6084 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6085 	}
6086 
6087 	return dir_remove(fd, NULL, true);
6088 }
6089 
6090 
6091 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
6092  *
6093  *	At least one of \a fd and \a path must be specified.
6094  *	If only \a fd is given, the function the symlink to be read is the node
6095  *	identified by this FD. If only a path is given, this path identifies the
6096  *	symlink to be read. If both are given and the path is absolute, \a fd is
6097  *	ignored; a relative path is reckoned off of the directory (!) identified
6098  *	by \a fd.
6099  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6100  *	will still be updated to reflect the required buffer size.
6101  *
6102  *	\param fd The FD. May be < 0.
6103  *	\param path The absolute or relative path. May be \c NULL.
6104  *	\param buffer The buffer into which the contents of the symlink shall be
6105  *		   written.
6106  *	\param _bufferSize A pointer to the size of the supplied buffer.
6107  *	\return The length of the link on success or an appropriate error code
6108  */
6109 
6110 status_t
6111 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6112 {
6113 	status_t status;
6114 
6115 	if (path) {
6116 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6117 		if (pathBuffer.InitCheck() != B_OK)
6118 			return B_NO_MEMORY;
6119 
6120 		return common_read_link(fd, pathBuffer.LockBuffer(),
6121 			buffer, _bufferSize, true);
6122 	}
6123 
6124 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6125 }
6126 
6127 
6128 status_t
6129 _kern_write_link(const char *path, const char *toPath)
6130 {
6131 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6132 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6133 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6134 		return B_NO_MEMORY;
6135 
6136 	char *toBuffer = toPathBuffer.LockBuffer();
6137 
6138 	status_t status = check_path(toBuffer);
6139 	if (status < B_OK)
6140 		return status;
6141 
6142 	return common_write_link(pathBuffer.LockBuffer(), toBuffer, true);
6143 }
6144 
6145 
6146 /**	\brief Creates a symlink specified by a FD + path pair.
6147  *
6148  *	\a path must always be specified (it contains the name of the new symlink
6149  *	at least). If only a path is given, this path identifies the location at
6150  *	which the symlink shall be created. If both \a fd and \a path are given and
6151  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6152  *	of the directory (!) identified by \a fd.
6153  *
6154  *	\param fd The FD. May be < 0.
6155  *	\param toPath The absolute or relative path. Must not be \c NULL.
6156  *	\param mode The access permissions the new symlink shall have.
6157  *	\return \c B_OK, if the symlink has been created successfully, another
6158  *			error code otherwise.
6159  */
6160 
6161 status_t
6162 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6163 {
6164 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6165 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6166 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6167 		return B_NO_MEMORY;
6168 
6169 	char *toBuffer = toPathBuffer.LockBuffer();
6170 
6171 	status_t status = check_path(toBuffer);
6172 	if (status < B_OK)
6173 		return status;
6174 
6175 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6176 		toBuffer, mode, true);
6177 }
6178 
6179 
6180 status_t
6181 _kern_create_link(const char *path, const char *toPath)
6182 {
6183 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6184 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6185 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6186 		return B_NO_MEMORY;
6187 
6188 	return common_create_link(pathBuffer.LockBuffer(),
6189 		toPathBuffer.LockBuffer(), true);
6190 }
6191 
6192 
6193 /**	\brief Removes an entry specified by a FD + path pair from its directory.
6194  *
6195  *	\a path must always be specified (it contains at least the name of the entry
6196  *	to be deleted). If only a path is given, this path identifies the entry
6197  *	directly. If both \a fd and \a path are given and the path is absolute,
6198  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6199  *	identified by \a fd.
6200  *
6201  *	\param fd The FD. May be < 0.
6202  *	\param path The absolute or relative path. Must not be \c NULL.
6203  *	\return \c B_OK, if the entry has been removed successfully, another
6204  *			error code otherwise.
6205  */
6206 
6207 status_t
6208 _kern_unlink(int fd, const char *path)
6209 {
6210 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6211 	if (pathBuffer.InitCheck() != B_OK)
6212 		return B_NO_MEMORY;
6213 
6214 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6215 }
6216 
6217 
6218 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
6219  *		   by another FD + path pair.
6220  *
6221  *	\a oldPath and \a newPath must always be specified (they contain at least
6222  *	the name of the entry). If only a path is given, this path identifies the
6223  *	entry directly. If both a FD and a path are given and the path is absolute,
6224  *	the FD is ignored; a relative path is reckoned off of the directory (!)
6225  *	identified by the respective FD.
6226  *
6227  *	\param oldFD The FD of the old location. May be < 0.
6228  *	\param oldPath The absolute or relative path of the old location. Must not
6229  *		   be \c NULL.
6230  *	\param newFD The FD of the new location. May be < 0.
6231  *	\param newPath The absolute or relative path of the new location. Must not
6232  *		   be \c NULL.
6233  *	\return \c B_OK, if the entry has been moved successfully, another
6234  *			error code otherwise.
6235  */
6236 
6237 status_t
6238 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6239 {
6240 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6241 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6242 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6243 		return B_NO_MEMORY;
6244 
6245 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6246 		newFD, newPathBuffer.LockBuffer(), true);
6247 }
6248 
6249 
6250 status_t
6251 _kern_access(const char *path, int mode)
6252 {
6253 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6254 	if (pathBuffer.InitCheck() != B_OK)
6255 		return B_NO_MEMORY;
6256 
6257 	return common_access(pathBuffer.LockBuffer(), mode, true);
6258 }
6259 
6260 
6261 /**	\brief Reads stat data of an entity specified by a FD + path pair.
6262  *
6263  *	If only \a fd is given, the stat operation associated with the type
6264  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6265  *	given, this path identifies the entry for whose node to retrieve the
6266  *	stat data. If both \a fd and \a path are given and the path is absolute,
6267  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6268  *	identified by \a fd and specifies the entry whose stat data shall be
6269  *	retrieved.
6270  *
6271  *	\param fd The FD. May be < 0.
6272  *	\param path The absolute or relative path. Must not be \c NULL.
6273  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6274  *		   function shall not stick to symlinks, but traverse them.
6275  *	\param stat The buffer the stat data shall be written into.
6276  *	\param statSize The size of the supplied stat buffer.
6277  *	\return \c B_OK, if the the stat data have been read successfully, another
6278  *			error code otherwise.
6279  */
6280 
6281 status_t
6282 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6283 	struct stat *stat, size_t statSize)
6284 {
6285 	struct stat completeStat;
6286 	struct stat *originalStat = NULL;
6287 	status_t status;
6288 
6289 	if (statSize > sizeof(struct stat))
6290 		return B_BAD_VALUE;
6291 
6292 	// this supports different stat extensions
6293 	if (statSize < sizeof(struct stat)) {
6294 		originalStat = stat;
6295 		stat = &completeStat;
6296 	}
6297 
6298 	if (path) {
6299 		// path given: get the stat of the node referred to by (fd, path)
6300 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6301 		if (pathBuffer.InitCheck() != B_OK)
6302 			return B_NO_MEMORY;
6303 
6304 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6305 			traverseLeafLink, stat, true);
6306 	} else {
6307 		// no path given: get the FD and use the FD operation
6308 		struct file_descriptor *descriptor
6309 			= get_fd(get_current_io_context(true), fd);
6310 		if (descriptor == NULL)
6311 			return B_FILE_ERROR;
6312 
6313 		if (descriptor->ops->fd_read_stat)
6314 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6315 		else
6316 			status = EOPNOTSUPP;
6317 
6318 		put_fd(descriptor);
6319 	}
6320 
6321 	if (status == B_OK && originalStat != NULL)
6322 		memcpy(originalStat, stat, statSize);
6323 
6324 	return status;
6325 }
6326 
6327 
6328 /**	\brief Writes stat data of an entity specified by a FD + path pair.
6329  *
6330  *	If only \a fd is given, the stat operation associated with the type
6331  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6332  *	given, this path identifies the entry for whose node to write the
6333  *	stat data. If both \a fd and \a path are given and the path is absolute,
6334  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6335  *	identified by \a fd and specifies the entry whose stat data shall be
6336  *	written.
6337  *
6338  *	\param fd The FD. May be < 0.
6339  *	\param path The absolute or relative path. Must not be \c NULL.
6340  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6341  *		   function shall not stick to symlinks, but traverse them.
6342  *	\param stat The buffer containing the stat data to be written.
6343  *	\param statSize The size of the supplied stat buffer.
6344  *	\param statMask A mask specifying which parts of the stat data shall be
6345  *		   written.
6346  *	\return \c B_OK, if the the stat data have been written successfully,
6347  *			another error code otherwise.
6348  */
6349 
6350 status_t
6351 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6352 	const struct stat *stat, size_t statSize, int statMask)
6353 {
6354 	struct stat completeStat;
6355 
6356 	if (statSize > sizeof(struct stat))
6357 		return B_BAD_VALUE;
6358 
6359 	// this supports different stat extensions
6360 	if (statSize < sizeof(struct stat)) {
6361 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6362 		memcpy(&completeStat, stat, statSize);
6363 		stat = &completeStat;
6364 	}
6365 
6366 	status_t status;
6367 
6368 	if (path) {
6369 		// path given: write the stat of the node referred to by (fd, path)
6370 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6371 		if (pathBuffer.InitCheck() != B_OK)
6372 			return B_NO_MEMORY;
6373 
6374 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6375 			traverseLeafLink, stat, statMask, true);
6376 	} else {
6377 		// no path given: get the FD and use the FD operation
6378 		struct file_descriptor *descriptor
6379 			= get_fd(get_current_io_context(true), fd);
6380 		if (descriptor == NULL)
6381 			return B_FILE_ERROR;
6382 
6383 		if (descriptor->ops->fd_write_stat)
6384 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6385 		else
6386 			status = EOPNOTSUPP;
6387 
6388 		put_fd(descriptor);
6389 	}
6390 
6391 	return status;
6392 }
6393 
6394 
6395 int
6396 _kern_open_attr_dir(int fd, const char *path)
6397 {
6398 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6399 	if (pathBuffer.InitCheck() != B_OK)
6400 		return B_NO_MEMORY;
6401 
6402 	if (path != NULL)
6403 		pathBuffer.SetTo(path);
6404 
6405 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6406 }
6407 
6408 
6409 int
6410 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6411 {
6412 	return attr_create(fd, name, type, openMode, true);
6413 }
6414 
6415 
6416 int
6417 _kern_open_attr(int fd, const char *name, int openMode)
6418 {
6419 	return attr_open(fd, name, openMode, true);
6420 }
6421 
6422 
6423 status_t
6424 _kern_remove_attr(int fd, const char *name)
6425 {
6426 	return attr_remove(fd, name, true);
6427 }
6428 
6429 
6430 status_t
6431 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6432 {
6433 	return attr_rename(fromFile, fromName, toFile, toName, true);
6434 }
6435 
6436 
6437 int
6438 _kern_open_index_dir(dev_t device)
6439 {
6440 	return index_dir_open(device, true);
6441 }
6442 
6443 
6444 status_t
6445 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6446 {
6447 	return index_create(device, name, type, flags, true);
6448 }
6449 
6450 
6451 status_t
6452 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6453 {
6454 	return index_name_read_stat(device, name, stat, true);
6455 }
6456 
6457 
6458 status_t
6459 _kern_remove_index(dev_t device, const char *name)
6460 {
6461 	return index_remove(device, name, true);
6462 }
6463 
6464 
6465 status_t
6466 _kern_getcwd(char *buffer, size_t size)
6467 {
6468 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6469 
6470 	// Call vfs to get current working directory
6471 	return get_cwd(buffer, size, true);
6472 }
6473 
6474 
6475 status_t
6476 _kern_setcwd(int fd, const char *path)
6477 {
6478 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6479 	if (pathBuffer.InitCheck() != B_OK)
6480 		return B_NO_MEMORY;
6481 
6482 	if (path != NULL)
6483 		pathBuffer.SetTo(path);
6484 
6485 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6486 }
6487 
6488 
6489 //	#pragma mark -
6490 //	Calls from userland (with extra address checks)
6491 
6492 
6493 dev_t
6494 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6495 	uint32 flags, const char *userArgs, size_t argsLength)
6496 {
6497 	char fileSystem[B_OS_NAME_LENGTH];
6498 	KPath path, device;
6499 	char *args = NULL;
6500 	status_t status;
6501 
6502 	if (!IS_USER_ADDRESS(userPath)
6503 		|| !IS_USER_ADDRESS(userFileSystem)
6504 		|| !IS_USER_ADDRESS(userDevice))
6505 		return B_BAD_ADDRESS;
6506 
6507 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6508 		return B_NO_MEMORY;
6509 
6510 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6511 		return B_BAD_ADDRESS;
6512 
6513 	if (userFileSystem != NULL
6514 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6515 		return B_BAD_ADDRESS;
6516 
6517 	if (userDevice != NULL
6518 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6519 		return B_BAD_ADDRESS;
6520 
6521 	if (userArgs != NULL && argsLength > 0) {
6522 		// this is a safety restriction
6523 		if (argsLength >= 65536)
6524 			return B_NAME_TOO_LONG;
6525 
6526 		args = (char *)malloc(argsLength + 1);
6527 		if (args == NULL)
6528 			return B_NO_MEMORY;
6529 
6530 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
6531 			free(args);
6532 			return B_BAD_ADDRESS;
6533 		}
6534 	}
6535 	path.UnlockBuffer();
6536 	device.UnlockBuffer();
6537 
6538 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6539 		userFileSystem ? fileSystem : NULL, flags, args, false);
6540 
6541 	free(args);
6542 	return status;
6543 }
6544 
6545 
6546 status_t
6547 _user_unmount(const char *userPath, uint32 flags)
6548 {
6549 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6550 	if (pathBuffer.InitCheck() != B_OK)
6551 		return B_NO_MEMORY;
6552 
6553 	char *path = pathBuffer.LockBuffer();
6554 
6555 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6556 		return B_BAD_ADDRESS;
6557 
6558 	return fs_unmount(path, flags, false);
6559 }
6560 
6561 
6562 status_t
6563 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6564 {
6565 	struct fs_info info;
6566 	status_t status;
6567 
6568 	if (userInfo == NULL)
6569 		return B_BAD_VALUE;
6570 
6571 	if (!IS_USER_ADDRESS(userInfo))
6572 		return B_BAD_ADDRESS;
6573 
6574 	status = fs_read_info(device, &info);
6575 	if (status != B_OK)
6576 		return status;
6577 
6578 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6579 		return B_BAD_ADDRESS;
6580 
6581 	return B_OK;
6582 }
6583 
6584 
6585 status_t
6586 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6587 {
6588 	struct fs_info info;
6589 
6590 	if (userInfo == NULL)
6591 		return B_BAD_VALUE;
6592 
6593 	if (!IS_USER_ADDRESS(userInfo)
6594 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6595 		return B_BAD_ADDRESS;
6596 
6597 	return fs_write_info(device, &info, mask);
6598 }
6599 
6600 
6601 dev_t
6602 _user_next_device(int32 *_userCookie)
6603 {
6604 	int32 cookie;
6605 	dev_t device;
6606 
6607 	if (!IS_USER_ADDRESS(_userCookie)
6608 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6609 		return B_BAD_ADDRESS;
6610 
6611 	device = fs_next_device(&cookie);
6612 
6613 	if (device >= B_OK) {
6614 		// update user cookie
6615 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6616 			return B_BAD_ADDRESS;
6617 	}
6618 
6619 	return device;
6620 }
6621 
6622 
6623 status_t
6624 _user_sync(void)
6625 {
6626 	return _kern_sync();
6627 }
6628 
6629 
6630 status_t
6631 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
6632 	size_t infoSize)
6633 {
6634 	struct fd_info info;
6635 	uint32 cookie;
6636 
6637 	// only root can do this (or should root's group be enough?)
6638 	if (geteuid() != 0)
6639 		return B_NOT_ALLOWED;
6640 
6641 	if (infoSize != sizeof(fd_info))
6642 		return B_BAD_VALUE;
6643 
6644 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
6645 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
6646 		return B_BAD_ADDRESS;
6647 
6648 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
6649 	if (status < B_OK)
6650 		return status;
6651 
6652 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
6653 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
6654 		return B_BAD_ADDRESS;
6655 
6656 	return status;
6657 }
6658 
6659 
6660 status_t
6661 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6662 	char *userPath, size_t pathLength)
6663 {
6664 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6665 	if (pathBuffer.InitCheck() != B_OK)
6666 		return B_NO_MEMORY;
6667 
6668 	struct vnode *vnode;
6669 	status_t status;
6670 
6671 	if (!IS_USER_ADDRESS(userPath))
6672 		return B_BAD_ADDRESS;
6673 
6674 	// copy the leaf name onto the stack
6675 	char stackLeaf[B_FILE_NAME_LENGTH];
6676 	if (leaf) {
6677 		if (!IS_USER_ADDRESS(leaf))
6678 			return B_BAD_ADDRESS;
6679 
6680 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6681 		if (len < 0)
6682 			return len;
6683 		if (len >= B_FILE_NAME_LENGTH)
6684 			return B_NAME_TOO_LONG;
6685 		leaf = stackLeaf;
6686 
6687 		// filter invalid leaf names
6688 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6689 			return B_BAD_VALUE;
6690 	}
6691 
6692 	// get the vnode matching the dir's node_ref
6693 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6694 		// special cases "." and "..": we can directly get the vnode of the
6695 		// referenced directory
6696 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6697 		leaf = NULL;
6698 	} else
6699 		status = get_vnode(device, inode, &vnode, false);
6700 	if (status < B_OK)
6701 		return status;
6702 
6703 	char *path = pathBuffer.LockBuffer();
6704 
6705 	// get the directory path
6706 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
6707 	put_vnode(vnode);
6708 		// we don't need the vnode anymore
6709 	if (status < B_OK)
6710 		return status;
6711 
6712 	// append the leaf name
6713 	if (leaf) {
6714 		// insert a directory separator if this is not the file system root
6715 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
6716 				>= pathBuffer.BufferSize())
6717 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
6718 			return B_NAME_TOO_LONG;
6719 		}
6720 	}
6721 
6722 	int len = user_strlcpy(userPath, path, pathLength);
6723 	if (len < 0)
6724 		return len;
6725 	if (len >= (int)pathLength)
6726 		return B_BUFFER_OVERFLOW;
6727 
6728 	return B_OK;
6729 }
6730 
6731 
6732 int
6733 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6734 	int openMode, int perms)
6735 {
6736 	char name[B_FILE_NAME_LENGTH];
6737 
6738 	if (userName == NULL || device < 0 || inode < 0)
6739 		return B_BAD_VALUE;
6740 	if (!IS_USER_ADDRESS(userName)
6741 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6742 		return B_BAD_ADDRESS;
6743 
6744 	if (openMode & O_CREAT)
6745 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6746 
6747 	return file_open_entry_ref(device, inode, name, openMode, false);
6748 }
6749 
6750 
6751 int
6752 _user_open(int fd, const char *userPath, int openMode, int perms)
6753 {
6754 	KPath path(B_PATH_NAME_LENGTH + 1);
6755 	if (path.InitCheck() != B_OK)
6756 		return B_NO_MEMORY;
6757 
6758 	char *buffer = path.LockBuffer();
6759 
6760 	if (!IS_USER_ADDRESS(userPath)
6761 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6762 		return B_BAD_ADDRESS;
6763 
6764 	if (openMode & O_CREAT)
6765 		return file_create(fd, buffer, openMode, perms, false);
6766 
6767 	return file_open(fd, buffer, openMode, false);
6768 }
6769 
6770 
6771 int
6772 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
6773 {
6774 	if (userName != NULL) {
6775 		char name[B_FILE_NAME_LENGTH];
6776 
6777 		if (!IS_USER_ADDRESS(userName)
6778 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6779 			return B_BAD_ADDRESS;
6780 
6781 		return dir_open_entry_ref(device, inode, name, false);
6782 	}
6783 	return dir_open_entry_ref(device, inode, NULL, false);
6784 }
6785 
6786 
6787 int
6788 _user_open_dir(int fd, const char *userPath)
6789 {
6790 	KPath path(B_PATH_NAME_LENGTH + 1);
6791 	if (path.InitCheck() != B_OK)
6792 		return B_NO_MEMORY;
6793 
6794 	char *buffer = path.LockBuffer();
6795 
6796 	if (!IS_USER_ADDRESS(userPath)
6797 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6798 		return B_BAD_ADDRESS;
6799 
6800 	return dir_open(fd, buffer, false);
6801 }
6802 
6803 
6804 /**	\brief Opens a directory's parent directory and returns the entry name
6805  *		   of the former.
6806  *
6807  *	Aside from that is returns the directory's entry name, this method is
6808  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6809  *	equivalent, if \a userName is \c NULL.
6810  *
6811  *	If a name buffer is supplied and the name does not fit the buffer, the
6812  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6813  *
6814  *	\param fd A FD referring to a directory.
6815  *	\param userName Buffer the directory's entry name shall be written into.
6816  *		   May be \c NULL.
6817  *	\param nameLength Size of the name buffer.
6818  *	\return The file descriptor of the opened parent directory, if everything
6819  *			went fine, an error code otherwise.
6820  */
6821 
6822 int
6823 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6824 {
6825 	bool kernel = false;
6826 
6827 	if (userName && !IS_USER_ADDRESS(userName))
6828 		return B_BAD_ADDRESS;
6829 
6830 	// open the parent dir
6831 	int parentFD = dir_open(fd, "..", kernel);
6832 	if (parentFD < 0)
6833 		return parentFD;
6834 	FDCloser fdCloser(parentFD, kernel);
6835 
6836 	if (userName) {
6837 		// get the vnodes
6838 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6839 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6840 		VNodePutter parentVNodePutter(parentVNode);
6841 		VNodePutter dirVNodePutter(dirVNode);
6842 		if (!parentVNode || !dirVNode)
6843 			return B_FILE_ERROR;
6844 
6845 		// get the vnode name
6846 		char name[B_FILE_NAME_LENGTH];
6847 		status_t status = get_vnode_name(dirVNode, parentVNode,
6848 			name, sizeof(name));
6849 		if (status != B_OK)
6850 			return status;
6851 
6852 		// copy the name to the userland buffer
6853 		int len = user_strlcpy(userName, name, nameLength);
6854 		if (len < 0)
6855 			return len;
6856 		if (len >= (int)nameLength)
6857 			return B_BUFFER_OVERFLOW;
6858 	}
6859 
6860 	return fdCloser.Detach();
6861 }
6862 
6863 
6864 status_t
6865 _user_fcntl(int fd, int op, uint32 argument)
6866 {
6867 	return common_fcntl(fd, op, argument, false);
6868 }
6869 
6870 
6871 status_t
6872 _user_fsync(int fd)
6873 {
6874 	return common_sync(fd, false);
6875 }
6876 
6877 
6878 status_t
6879 _user_lock_node(int fd)
6880 {
6881 	return common_lock_node(fd, false);
6882 }
6883 
6884 
6885 status_t
6886 _user_unlock_node(int fd)
6887 {
6888 	return common_unlock_node(fd, false);
6889 }
6890 
6891 
6892 status_t
6893 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6894 {
6895 	char name[B_FILE_NAME_LENGTH];
6896 	status_t status;
6897 
6898 	if (!IS_USER_ADDRESS(userName))
6899 		return B_BAD_ADDRESS;
6900 
6901 	status = user_strlcpy(name, userName, sizeof(name));
6902 	if (status < 0)
6903 		return status;
6904 
6905 	return dir_create_entry_ref(device, inode, name, perms, false);
6906 }
6907 
6908 
6909 status_t
6910 _user_create_dir(int fd, const char *userPath, int perms)
6911 {
6912 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6913 	if (pathBuffer.InitCheck() != B_OK)
6914 		return B_NO_MEMORY;
6915 
6916 	char *path = pathBuffer.LockBuffer();
6917 
6918 	if (!IS_USER_ADDRESS(userPath)
6919 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6920 		return B_BAD_ADDRESS;
6921 
6922 	return dir_create(fd, path, perms, false);
6923 }
6924 
6925 
6926 status_t
6927 _user_remove_dir(int fd, const char *userPath)
6928 {
6929 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6930 	if (pathBuffer.InitCheck() != B_OK)
6931 		return B_NO_MEMORY;
6932 
6933 	char *path = pathBuffer.LockBuffer();
6934 
6935 	if (userPath != NULL) {
6936 		if (!IS_USER_ADDRESS(userPath)
6937 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6938 			return B_BAD_ADDRESS;
6939 	}
6940 
6941 	return dir_remove(fd, userPath ? path : NULL, false);
6942 }
6943 
6944 
6945 status_t
6946 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
6947 {
6948 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
6949 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
6950 		return B_NO_MEMORY;
6951 
6952 	size_t bufferSize;
6953 
6954 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
6955 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
6956 		return B_BAD_ADDRESS;
6957 
6958 	char *path = pathBuffer.LockBuffer();
6959 	char *buffer = linkBuffer.LockBuffer();
6960 
6961 	if (userPath) {
6962 		if (!IS_USER_ADDRESS(userPath)
6963 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6964 			return B_BAD_ADDRESS;
6965 
6966 		if (bufferSize > B_PATH_NAME_LENGTH)
6967 			bufferSize = B_PATH_NAME_LENGTH;
6968 	}
6969 
6970 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
6971 		&bufferSize, false);
6972 
6973 	// we also update the bufferSize in case of errors
6974 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
6975 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
6976 		return B_BAD_ADDRESS;
6977 
6978 	if (status < B_OK)
6979 		return status;
6980 
6981 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
6982 		return B_BAD_ADDRESS;
6983 
6984 	return B_OK;
6985 }
6986 
6987 
6988 status_t
6989 _user_write_link(const char *userPath, const char *userToPath)
6990 {
6991 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6992 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
6993 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6994 		return B_NO_MEMORY;
6995 
6996 	char *path = pathBuffer.LockBuffer();
6997 	char *toPath = toPathBuffer.LockBuffer();
6998 
6999 	if (!IS_USER_ADDRESS(userPath)
7000 		|| !IS_USER_ADDRESS(userToPath)
7001 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7002 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7003 		return B_BAD_ADDRESS;
7004 
7005 	status_t status = check_path(toPath);
7006 	if (status < B_OK)
7007 		return status;
7008 
7009 	return common_write_link(path, toPath, false);
7010 }
7011 
7012 
7013 status_t
7014 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7015 	int mode)
7016 {
7017 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7018 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7019 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7020 		return B_NO_MEMORY;
7021 
7022 	char *path = pathBuffer.LockBuffer();
7023 	char *toPath = toPathBuffer.LockBuffer();
7024 
7025 	if (!IS_USER_ADDRESS(userPath)
7026 		|| !IS_USER_ADDRESS(userToPath)
7027 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7028 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7029 		return B_BAD_ADDRESS;
7030 
7031 	status_t status = check_path(toPath);
7032 	if (status < B_OK)
7033 		return status;
7034 
7035 	return common_create_symlink(fd, path, toPath, mode, false);
7036 }
7037 
7038 
7039 status_t
7040 _user_create_link(const char *userPath, const char *userToPath)
7041 {
7042 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7043 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7044 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7045 		return B_NO_MEMORY;
7046 
7047 	char *path = pathBuffer.LockBuffer();
7048 	char *toPath = toPathBuffer.LockBuffer();
7049 
7050 	if (!IS_USER_ADDRESS(userPath)
7051 		|| !IS_USER_ADDRESS(userToPath)
7052 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7053 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7054 		return B_BAD_ADDRESS;
7055 
7056 	status_t status = check_path(toPath);
7057 	if (status < B_OK)
7058 		return status;
7059 
7060 	return common_create_link(path, toPath, false);
7061 }
7062 
7063 
7064 status_t
7065 _user_unlink(int fd, const char *userPath)
7066 {
7067 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7068 	if (pathBuffer.InitCheck() != B_OK)
7069 		return B_NO_MEMORY;
7070 
7071 	char *path = pathBuffer.LockBuffer();
7072 
7073 	if (!IS_USER_ADDRESS(userPath)
7074 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7075 		return B_BAD_ADDRESS;
7076 
7077 	return common_unlink(fd, path, false);
7078 }
7079 
7080 
7081 status_t
7082 _user_rename(int oldFD, const char *userOldPath, int newFD,
7083 	const char *userNewPath)
7084 {
7085 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7086 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7087 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7088 		return B_NO_MEMORY;
7089 
7090 	char *oldPath = oldPathBuffer.LockBuffer();
7091 	char *newPath = newPathBuffer.LockBuffer();
7092 
7093 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7094 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7095 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7096 		return B_BAD_ADDRESS;
7097 
7098 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7099 }
7100 
7101 
7102 status_t
7103 _user_access(const char *userPath, int mode)
7104 {
7105 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7106 	if (pathBuffer.InitCheck() != B_OK)
7107 		return B_NO_MEMORY;
7108 
7109 	char *path = pathBuffer.LockBuffer();
7110 
7111 	if (!IS_USER_ADDRESS(userPath)
7112 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7113 		return B_BAD_ADDRESS;
7114 
7115 	return common_access(path, mode, false);
7116 }
7117 
7118 
7119 status_t
7120 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7121 	struct stat *userStat, size_t statSize)
7122 {
7123 	struct stat stat;
7124 	status_t status;
7125 
7126 	if (statSize > sizeof(struct stat))
7127 		return B_BAD_VALUE;
7128 
7129 	if (!IS_USER_ADDRESS(userStat))
7130 		return B_BAD_ADDRESS;
7131 
7132 	if (userPath) {
7133 		// path given: get the stat of the node referred to by (fd, path)
7134 		if (!IS_USER_ADDRESS(userPath))
7135 			return B_BAD_ADDRESS;
7136 
7137 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7138 		if (pathBuffer.InitCheck() != B_OK)
7139 			return B_NO_MEMORY;
7140 
7141 		char *path = pathBuffer.LockBuffer();
7142 
7143 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7144 		if (length < B_OK)
7145 			return length;
7146 		if (length >= B_PATH_NAME_LENGTH)
7147 			return B_NAME_TOO_LONG;
7148 
7149 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7150 	} else {
7151 		// no path given: get the FD and use the FD operation
7152 		struct file_descriptor *descriptor
7153 			= get_fd(get_current_io_context(false), fd);
7154 		if (descriptor == NULL)
7155 			return B_FILE_ERROR;
7156 
7157 		if (descriptor->ops->fd_read_stat)
7158 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7159 		else
7160 			status = EOPNOTSUPP;
7161 
7162 		put_fd(descriptor);
7163 	}
7164 
7165 	if (status < B_OK)
7166 		return status;
7167 
7168 	return user_memcpy(userStat, &stat, statSize);
7169 }
7170 
7171 
7172 status_t
7173 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7174 	const struct stat *userStat, size_t statSize, int statMask)
7175 {
7176 	if (statSize > sizeof(struct stat))
7177 		return B_BAD_VALUE;
7178 
7179 	struct stat stat;
7180 
7181 	if (!IS_USER_ADDRESS(userStat)
7182 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7183 		return B_BAD_ADDRESS;
7184 
7185 	// clear additional stat fields
7186 	if (statSize < sizeof(struct stat))
7187 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7188 
7189 	status_t status;
7190 
7191 	if (userPath) {
7192 		// path given: write the stat of the node referred to by (fd, path)
7193 		if (!IS_USER_ADDRESS(userPath))
7194 			return B_BAD_ADDRESS;
7195 
7196 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7197 		if (pathBuffer.InitCheck() != B_OK)
7198 			return B_NO_MEMORY;
7199 
7200 		char *path = pathBuffer.LockBuffer();
7201 
7202 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7203 		if (length < B_OK)
7204 			return length;
7205 		if (length >= B_PATH_NAME_LENGTH)
7206 			return B_NAME_TOO_LONG;
7207 
7208 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7209 			statMask, false);
7210 	} else {
7211 		// no path given: get the FD and use the FD operation
7212 		struct file_descriptor *descriptor
7213 			= get_fd(get_current_io_context(false), fd);
7214 		if (descriptor == NULL)
7215 			return B_FILE_ERROR;
7216 
7217 		if (descriptor->ops->fd_write_stat)
7218 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7219 		else
7220 			status = EOPNOTSUPP;
7221 
7222 		put_fd(descriptor);
7223 	}
7224 
7225 	return status;
7226 }
7227 
7228 
7229 int
7230 _user_open_attr_dir(int fd, const char *userPath)
7231 {
7232 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7233 	if (pathBuffer.InitCheck() != B_OK)
7234 		return B_NO_MEMORY;
7235 
7236 	char *path = pathBuffer.LockBuffer();
7237 
7238 	if (userPath != NULL) {
7239 		if (!IS_USER_ADDRESS(userPath)
7240 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7241 			return B_BAD_ADDRESS;
7242 	}
7243 
7244 	return attr_dir_open(fd, userPath ? path : NULL, false);
7245 }
7246 
7247 
7248 int
7249 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7250 {
7251 	char name[B_FILE_NAME_LENGTH];
7252 
7253 	if (!IS_USER_ADDRESS(userName)
7254 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7255 		return B_BAD_ADDRESS;
7256 
7257 	return attr_create(fd, name, type, openMode, false);
7258 }
7259 
7260 
7261 int
7262 _user_open_attr(int fd, const char *userName, int openMode)
7263 {
7264 	char name[B_FILE_NAME_LENGTH];
7265 
7266 	if (!IS_USER_ADDRESS(userName)
7267 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7268 		return B_BAD_ADDRESS;
7269 
7270 	return attr_open(fd, name, openMode, false);
7271 }
7272 
7273 
7274 status_t
7275 _user_remove_attr(int fd, const char *userName)
7276 {
7277 	char name[B_FILE_NAME_LENGTH];
7278 
7279 	if (!IS_USER_ADDRESS(userName)
7280 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7281 		return B_BAD_ADDRESS;
7282 
7283 	return attr_remove(fd, name, false);
7284 }
7285 
7286 
7287 status_t
7288 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7289 {
7290 	if (!IS_USER_ADDRESS(userFromName)
7291 		|| !IS_USER_ADDRESS(userToName))
7292 		return B_BAD_ADDRESS;
7293 
7294 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7295 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7296 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7297 		return B_NO_MEMORY;
7298 
7299 	char *fromName = fromNameBuffer.LockBuffer();
7300 	char *toName = toNameBuffer.LockBuffer();
7301 
7302 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7303 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7304 		return B_BAD_ADDRESS;
7305 
7306 	return attr_rename(fromFile, fromName, toFile, toName, false);
7307 }
7308 
7309 
7310 int
7311 _user_open_index_dir(dev_t device)
7312 {
7313 	return index_dir_open(device, false);
7314 }
7315 
7316 
7317 status_t
7318 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7319 {
7320 	char name[B_FILE_NAME_LENGTH];
7321 
7322 	if (!IS_USER_ADDRESS(userName)
7323 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7324 		return B_BAD_ADDRESS;
7325 
7326 	return index_create(device, name, type, flags, false);
7327 }
7328 
7329 
7330 status_t
7331 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7332 {
7333 	char name[B_FILE_NAME_LENGTH];
7334 	struct stat stat;
7335 	status_t status;
7336 
7337 	if (!IS_USER_ADDRESS(userName)
7338 		|| !IS_USER_ADDRESS(userStat)
7339 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7340 		return B_BAD_ADDRESS;
7341 
7342 	status = index_name_read_stat(device, name, &stat, false);
7343 	if (status == B_OK) {
7344 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7345 			return B_BAD_ADDRESS;
7346 	}
7347 
7348 	return status;
7349 }
7350 
7351 
7352 status_t
7353 _user_remove_index(dev_t device, const char *userName)
7354 {
7355 	char name[B_FILE_NAME_LENGTH];
7356 
7357 	if (!IS_USER_ADDRESS(userName)
7358 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7359 		return B_BAD_ADDRESS;
7360 
7361 	return index_remove(device, name, false);
7362 }
7363 
7364 
7365 status_t
7366 _user_getcwd(char *userBuffer, size_t size)
7367 {
7368 	if (!IS_USER_ADDRESS(userBuffer))
7369 		return B_BAD_ADDRESS;
7370 
7371 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7372 	if (pathBuffer.InitCheck() != B_OK)
7373 		return B_NO_MEMORY;
7374 
7375 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7376 
7377 	if (size > B_PATH_NAME_LENGTH)
7378 		size = B_PATH_NAME_LENGTH;
7379 
7380 	char *path = pathBuffer.LockBuffer();
7381 
7382 	status_t status = get_cwd(path, size, false);
7383 	if (status < B_OK)
7384 		return status;
7385 
7386 	// Copy back the result
7387 	if (user_strlcpy(userBuffer, path, size) < B_OK)
7388 		return B_BAD_ADDRESS;
7389 
7390 	return status;
7391 }
7392 
7393 
7394 status_t
7395 _user_setcwd(int fd, const char *userPath)
7396 {
7397 	TRACE(("user_setcwd: path = %p\n", userPath));
7398 
7399 	KPath pathBuffer(B_PATH_NAME_LENGTH);
7400 	if (pathBuffer.InitCheck() != B_OK)
7401 		return B_NO_MEMORY;
7402 
7403 	char *path = pathBuffer.LockBuffer();
7404 
7405 	if (userPath != NULL) {
7406 		if (!IS_USER_ADDRESS(userPath)
7407 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7408 			return B_BAD_ADDRESS;
7409 	}
7410 
7411 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7412 }
7413 
7414 
7415 int
7416 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7417 	uint32 flags, port_id port, int32 token)
7418 {
7419 	char *query;
7420 
7421 	if (device < 0 || userQuery == NULL || queryLength == 0)
7422 		return B_BAD_VALUE;
7423 
7424 	// this is a safety restriction
7425 	if (queryLength >= 65536)
7426 		return B_NAME_TOO_LONG;
7427 
7428 	query = (char *)malloc(queryLength + 1);
7429 	if (query == NULL)
7430 		return B_NO_MEMORY;
7431 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7432 		free(query);
7433 		return B_BAD_ADDRESS;
7434 	}
7435 
7436 	int fd = query_open(device, query, flags, port, token, false);
7437 
7438 	free(query);
7439 	return fd;
7440 }
7441