xref: /haiku/src/system/kernel/fs/vfs.cpp (revision cd552c7a15cc10c36dae8d7439ba1d6c0bb168c5)
1 /*
2  * Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <block_cache.h>
18 #include <fd.h>
19 #include <file_cache.h>
20 #include <khash.h>
21 #include <KPath.h>
22 #include <lock.h>
23 #include <syscalls.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <vm_low_memory.h>
28 
29 #include <boot/kernel_args.h>
30 #include <disk_device_manager/KDiskDevice.h>
31 #include <disk_device_manager/KDiskDeviceManager.h>
32 #include <disk_device_manager/KDiskDeviceUtils.h>
33 #include <disk_device_manager/KDiskSystem.h>
34 #include <fs/node_monitor.h>
35 #include <util/kernel_cpp.h>
36 
37 #include <string.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/resource.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <stddef.h>
46 
47 //#define TRACE_VFS
48 #ifdef TRACE_VFS
49 #	define TRACE(x) dprintf x
50 #	define FUNCTION(x) dprintf x
51 #else
52 #	define TRACE(x) ;
53 #	define FUNCTION(x) ;
54 #endif
55 
56 #define ADD_DEBUGGER_COMMANDS
57 
58 #define MAX_SYM_LINKS SYMLINKS_MAX
59 
60 const static uint32 kMaxUnusedVnodes = 8192;
61 	// This is the maximum number of unused vnodes that the system
62 	// will keep around.
63 	// It may be chosen with respect to the available memory or enhanced
64 	// by some timestamp/frequency heurism.
65 
66 struct vnode {
67 	struct vnode	*next;
68 	vm_cache_ref	*cache;
69 	mount_id		device;
70 	list_link		mount_link;
71 	list_link		unused_link;
72 	vnode_id		id;
73 	fs_vnode		private_node;
74 	struct fs_mount	*mount;
75 	struct vnode	*covered_by;
76 	int32			ref_count;
77 	uint8			remove : 1;
78 	uint8			busy : 1;
79 	uint8			unpublished : 1;
80 	struct advisory_locking	*advisory_locking;
81 };
82 
83 struct vnode_hash_key {
84 	mount_id	device;
85 	vnode_id	vnode;
86 };
87 
88 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
89 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
90 
91 struct fs_mount {
92 	struct fs_mount	*next;
93 	file_system_module_info *fs;
94 	mount_id		id;
95 	void			*cookie;
96 	char			*device_name;
97 	char			*fs_name;
98 	recursive_lock	rlock;	// guards the vnodes list
99 	struct vnode	*root_vnode;
100 	struct vnode	*covers_vnode;
101 	KPartition		*partition;
102 	struct list		vnodes;
103 	bool			unmounting;
104 	bool			owns_file_device;
105 };
106 
107 struct advisory_locking {
108 	sem_id			lock;
109 	sem_id			wait_sem;
110 	struct list		locks;
111 };
112 
113 struct advisory_lock {
114 	list_link		link;
115 	team_id			team;
116 	off_t			offset;
117 	off_t			length;
118 	bool			shared;
119 };
120 
121 static mutex sFileSystemsMutex;
122 
123 /**	\brief Guards sMountsTable.
124  *
125  *	The holder is allowed to read/write access the sMountsTable.
126  *	Manipulation of the fs_mount structures themselves
127  *	(and their destruction) requires different locks though.
128  */
129 static mutex sMountMutex;
130 
131 /**	\brief Guards mount/unmount operations.
132  *
133  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
134  *	That is locking the lock ensures that no FS is mounted/unmounted. In
135  *	particular this means that
136  *	- sMountsTable will not be modified,
137  *	- the fields immutable after initialization of the fs_mount structures in
138  *	  sMountsTable will not be modified,
139  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified,
140  *
141  *	The thread trying to lock the lock must not hold sVnodeMutex or
142  *	sMountMutex.
143  */
144 static recursive_lock sMountOpLock;
145 
146 /**	\brief Guards sVnodeTable.
147  *
148  *	The holder is allowed to read/write access sVnodeTable and to
149  *	to any unbusy vnode in that table, save
150  *	to the immutable fields (device, id, private_node, mount) to which
151  *	only read-only access is allowed, and to the field covered_by, which is
152  *	guarded by sMountOpLock.
153  *
154  *	The thread trying to lock the mutex must not hold sMountMutex.
155  *	You must not have this mutex held when calling create_sem(), as this
156  *	might call vfs_free_unused_vnodes().
157  */
158 static mutex sVnodeMutex;
159 
160 #define VNODE_HASH_TABLE_SIZE 1024
161 static hash_table *sVnodeTable;
162 static list sUnusedVnodeList;
163 static uint32 sUnusedVnodes = 0;
164 static struct vnode *sRoot;
165 
166 #define MOUNTS_HASH_TABLE_SIZE 16
167 static hash_table *sMountsTable;
168 static mount_id sNextMountID = 1;
169 
170 mode_t __gUmask = 022;
171 
172 /* function declarations */
173 
174 // file descriptor operation prototypes
175 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
176 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
177 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
178 static void file_free_fd(struct file_descriptor *);
179 static status_t file_close(struct file_descriptor *);
180 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
181 	struct select_sync *sync);
182 static status_t file_deselect(struct file_descriptor *, uint8 event,
183 	struct select_sync *sync);
184 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
185 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
186 static status_t dir_rewind(struct file_descriptor *);
187 static void dir_free_fd(struct file_descriptor *);
188 static status_t dir_close(struct file_descriptor *);
189 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
190 static status_t attr_dir_rewind(struct file_descriptor *);
191 static void attr_dir_free_fd(struct file_descriptor *);
192 static status_t attr_dir_close(struct file_descriptor *);
193 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
194 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
195 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
196 static void attr_free_fd(struct file_descriptor *);
197 static status_t attr_close(struct file_descriptor *);
198 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
199 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
200 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
201 static status_t index_dir_rewind(struct file_descriptor *);
202 static void index_dir_free_fd(struct file_descriptor *);
203 static status_t index_dir_close(struct file_descriptor *);
204 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
205 static status_t query_rewind(struct file_descriptor *);
206 static void query_free_fd(struct file_descriptor *);
207 static status_t query_close(struct file_descriptor *);
208 
209 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
210 static status_t common_read_stat(struct file_descriptor *, struct stat *);
211 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
212 
213 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
214 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
215 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
216 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
217 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
218 static void inc_vnode_ref_count(struct vnode *vnode);
219 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
220 static inline void put_vnode(struct vnode *vnode);
221 
222 static struct fd_ops sFileOps = {
223 	file_read,
224 	file_write,
225 	file_seek,
226 	common_ioctl,
227 	file_select,
228 	file_deselect,
229 	NULL,		// read_dir()
230 	NULL,		// rewind_dir()
231 	common_read_stat,
232 	common_write_stat,
233 	file_close,
234 	file_free_fd
235 };
236 
237 static struct fd_ops sDirectoryOps = {
238 	NULL,		// read()
239 	NULL,		// write()
240 	NULL,		// seek()
241 	common_ioctl,
242 	NULL,		// select()
243 	NULL,		// deselect()
244 	dir_read,
245 	dir_rewind,
246 	common_read_stat,
247 	common_write_stat,
248 	dir_close,
249 	dir_free_fd
250 };
251 
252 static struct fd_ops sAttributeDirectoryOps = {
253 	NULL,		// read()
254 	NULL,		// write()
255 	NULL,		// seek()
256 	common_ioctl,
257 	NULL,		// select()
258 	NULL,		// deselect()
259 	attr_dir_read,
260 	attr_dir_rewind,
261 	common_read_stat,
262 	common_write_stat,
263 	attr_dir_close,
264 	attr_dir_free_fd
265 };
266 
267 static struct fd_ops sAttributeOps = {
268 	attr_read,
269 	attr_write,
270 	attr_seek,
271 	common_ioctl,
272 	NULL,		// select()
273 	NULL,		// deselect()
274 	NULL,		// read_dir()
275 	NULL,		// rewind_dir()
276 	attr_read_stat,
277 	attr_write_stat,
278 	attr_close,
279 	attr_free_fd
280 };
281 
282 static struct fd_ops sIndexDirectoryOps = {
283 	NULL,		// read()
284 	NULL,		// write()
285 	NULL,		// seek()
286 	NULL,		// ioctl()
287 	NULL,		// select()
288 	NULL,		// deselect()
289 	index_dir_read,
290 	index_dir_rewind,
291 	NULL,		// read_stat()
292 	NULL,		// write_stat()
293 	index_dir_close,
294 	index_dir_free_fd
295 };
296 
297 #if 0
298 static struct fd_ops sIndexOps = {
299 	NULL,		// read()
300 	NULL,		// write()
301 	NULL,		// seek()
302 	NULL,		// ioctl()
303 	NULL,		// select()
304 	NULL,		// deselect()
305 	NULL,		// dir_read()
306 	NULL,		// dir_rewind()
307 	index_read_stat,	// read_stat()
308 	NULL,		// write_stat()
309 	NULL,		// dir_close()
310 	NULL		// free_fd()
311 };
312 #endif
313 
314 static struct fd_ops sQueryOps = {
315 	NULL,		// read()
316 	NULL,		// write()
317 	NULL,		// seek()
318 	NULL,		// ioctl()
319 	NULL,		// select()
320 	NULL,		// deselect()
321 	query_read,
322 	query_rewind,
323 	NULL,		// read_stat()
324 	NULL,		// write_stat()
325 	query_close,
326 	query_free_fd
327 };
328 
329 
330 // VNodePutter
331 class VNodePutter {
332 public:
333 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
334 
335 	~VNodePutter()
336 	{
337 		Put();
338 	}
339 
340 	void SetTo(struct vnode *vnode)
341 	{
342 		Put();
343 		fVNode = vnode;
344 	}
345 
346 	void Put()
347 	{
348 		if (fVNode) {
349 			put_vnode(fVNode);
350 			fVNode = NULL;
351 		}
352 	}
353 
354 	struct vnode *Detach()
355 	{
356 		struct vnode *vnode = fVNode;
357 		fVNode = NULL;
358 		return vnode;
359 	}
360 
361 private:
362 	struct vnode *fVNode;
363 };
364 
365 
366 class FDCloser {
367 public:
368 	FDCloser() : fFD(-1), fKernel(true) {}
369 
370 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
371 
372 	~FDCloser()
373 	{
374 		Close();
375 	}
376 
377 	void SetTo(int fd, bool kernel)
378 	{
379 		Close();
380 		fFD = fd;
381 		fKernel = kernel;
382 	}
383 
384 	void Close()
385 	{
386 		if (fFD >= 0) {
387 			if (fKernel)
388 				_kern_close(fFD);
389 			else
390 				_user_close(fFD);
391 			fFD = -1;
392 		}
393 	}
394 
395 	int Detach()
396 	{
397 		int fd = fFD;
398 		fFD = -1;
399 		return fd;
400 	}
401 
402 private:
403 	int		fFD;
404 	bool	fKernel;
405 };
406 
407 
408 static int
409 mount_compare(void *_m, const void *_key)
410 {
411 	struct fs_mount *mount = (fs_mount *)_m;
412 	const mount_id *id = (mount_id *)_key;
413 
414 	if (mount->id == *id)
415 		return 0;
416 
417 	return -1;
418 }
419 
420 
421 static uint32
422 mount_hash(void *_m, const void *_key, uint32 range)
423 {
424 	struct fs_mount *mount = (fs_mount *)_m;
425 	const mount_id *id = (mount_id *)_key;
426 
427 	if (mount)
428 		return mount->id % range;
429 
430 	return (uint32)*id % range;
431 }
432 
433 
434 /** Finds the mounted device (the fs_mount structure) with the given ID.
435  *	Note, you must hold the gMountMutex lock when you call this function.
436  */
437 
438 static struct fs_mount *
439 find_mount(mount_id id)
440 {
441 	ASSERT_LOCKED_MUTEX(&sMountMutex);
442 
443 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
444 }
445 
446 
447 static status_t
448 get_mount(mount_id id, struct fs_mount **_mount)
449 {
450 	struct fs_mount *mount;
451 	status_t status;
452 
453 	mutex_lock(&sMountMutex);
454 
455 	mount = find_mount(id);
456 	if (mount) {
457 		// ToDo: the volume is locked (against removal) by locking
458 		//	its root node - investigate if that's a good idea
459 		if (mount->root_vnode)
460 			inc_vnode_ref_count(mount->root_vnode);
461 		else {
462 			// might have been called during a mount operation in which
463 			// case the root node may still be NULL
464 			mount = NULL;
465 		}
466 	} else
467 		status = B_BAD_VALUE;
468 
469 	mutex_unlock(&sMountMutex);
470 
471 	if (mount == NULL)
472 		return B_BUSY;
473 
474 	*_mount = mount;
475 	return B_OK;
476 }
477 
478 
479 static void
480 put_mount(struct fs_mount *mount)
481 {
482 	if (mount)
483 		put_vnode(mount->root_vnode);
484 }
485 
486 
487 static status_t
488 put_file_system(file_system_module_info *fs)
489 {
490 	return put_module(fs->info.name);
491 }
492 
493 
494 /**	Tries to open the specified file system module.
495  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
496  *	Returns a pointer to file system module interface, or NULL if it
497  *	could not open the module.
498  */
499 
500 static file_system_module_info *
501 get_file_system(const char *fsName)
502 {
503 	char name[B_FILE_NAME_LENGTH];
504 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
505 		// construct module name if we didn't get one
506 		// (we currently support only one API)
507 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
508 		fsName = NULL;
509 	}
510 
511 	file_system_module_info *info;
512 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
513 		return NULL;
514 
515 	return info;
516 }
517 
518 
519 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
520  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
521  *	The name is allocated for you, and you have to free() it when you're
522  *	done with it.
523  *	Returns NULL if the required memory is no available.
524  */
525 
526 static char *
527 get_file_system_name(const char *fsName)
528 {
529 	const size_t length = strlen("file_systems/");
530 
531 	if (strncmp(fsName, "file_systems/", length)) {
532 		// the name already seems to be the module's file name
533 		return strdup(fsName);
534 	}
535 
536 	fsName += length;
537 	const char *end = strchr(fsName, '/');
538 	if (end == NULL) {
539 		// this doesn't seem to be a valid name, but well...
540 		return strdup(fsName);
541 	}
542 
543 	// cut off the trailing /v1
544 
545 	char *name = (char *)malloc(end + 1 - fsName);
546 	if (name == NULL)
547 		return NULL;
548 
549 	strlcpy(name, fsName, end + 1 - fsName);
550 	return name;
551 }
552 
553 
554 static int
555 vnode_compare(void *_vnode, const void *_key)
556 {
557 	struct vnode *vnode = (struct vnode *)_vnode;
558 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
559 
560 	if (vnode->device == key->device && vnode->id == key->vnode)
561 		return 0;
562 
563 	return -1;
564 }
565 
566 
567 static uint32
568 vnode_hash(void *_vnode, const void *_key, uint32 range)
569 {
570 	struct vnode *vnode = (struct vnode *)_vnode;
571 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
572 
573 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
574 
575 	if (vnode != NULL)
576 		return VHASH(vnode->device, vnode->id) % range;
577 
578 	return VHASH(key->device, key->vnode) % range;
579 
580 #undef VHASH
581 }
582 
583 
584 static void
585 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
586 {
587 	recursive_lock_lock(&mount->rlock);
588 
589 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
590 
591 	recursive_lock_unlock(&mount->rlock);
592 }
593 
594 
595 static void
596 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
597 {
598 	recursive_lock_lock(&mount->rlock);
599 
600 	list_remove_link(&vnode->mount_link);
601 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
602 
603 	recursive_lock_unlock(&mount->rlock);
604 }
605 
606 
607 static status_t
608 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
609 {
610 	FUNCTION(("create_new_vnode()\n"));
611 
612 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
613 	if (vnode == NULL)
614 		return B_NO_MEMORY;
615 
616 	// initialize basic values
617 	memset(vnode, 0, sizeof(struct vnode));
618 	vnode->device = mountID;
619 	vnode->id = vnodeID;
620 
621 	// add the vnode to the mount structure
622 	mutex_lock(&sMountMutex);
623 	vnode->mount = find_mount(mountID);
624 	if (!vnode->mount || vnode->mount->unmounting) {
625 		mutex_unlock(&sMountMutex);
626 		free(vnode);
627 		return B_ENTRY_NOT_FOUND;
628 	}
629 
630 	hash_insert(sVnodeTable, vnode);
631 	add_vnode_to_mount_list(vnode, vnode->mount);
632 
633 	mutex_unlock(&sMountMutex);
634 
635 	vnode->ref_count = 1;
636 	*_vnode = vnode;
637 
638 	return B_OK;
639 }
640 
641 
642 /**	Frees the vnode and all resources it has acquired, and removes
643  *	it from the vnode hash as well as from its mount structure.
644  *	Will also make sure that any cache modifications are written back.
645  */
646 
647 static void
648 free_vnode(struct vnode *vnode, bool reenter)
649 {
650 	ASSERT(vnode->ref_count == 0 && vnode->busy);
651 
652 	// write back any changes in this vnode's cache -- but only
653 	// if the vnode won't be deleted, in which case the changes
654 	// will be discarded
655 
656 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
657 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
658 
659 	if (!vnode->unpublished) {
660 		if (vnode->remove)
661 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
662 		else
663 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
664 	}
665 
666 	// The file system has removed the resources of the vnode now, so we can
667 	// make it available again (and remove the busy vnode from the hash)
668 	mutex_lock(&sVnodeMutex);
669 	hash_remove(sVnodeTable, vnode);
670 	mutex_unlock(&sVnodeMutex);
671 
672 	// if we have a vm_cache attached, remove it
673 	if (vnode->cache)
674 		vm_cache_release_ref(vnode->cache);
675 
676 	vnode->cache = NULL;
677 
678 	remove_vnode_from_mount_list(vnode, vnode->mount);
679 
680 	free(vnode);
681 }
682 
683 
684 /**	\brief Decrements the reference counter of the given vnode and deletes it,
685  *	if the counter dropped to 0.
686  *
687  *	The caller must, of course, own a reference to the vnode to call this
688  *	function.
689  *	The caller must not hold the sVnodeMutex or the sMountMutex.
690  *
691  *	\param vnode the vnode.
692  *	\param reenter \c true, if this function is called (indirectly) from within
693  *		   a file system.
694  *	\return \c B_OK, if everything went fine, an error code otherwise.
695  */
696 
697 static status_t
698 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
699 {
700 	mutex_lock(&sVnodeMutex);
701 
702 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
703 
704 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
705 
706 	if (oldRefCount == 1) {
707 		if (vnode->busy)
708 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
709 
710 		bool freeNode = false;
711 
712 		// Just insert the vnode into an unused list if we don't need
713 		// to delete it
714 		if (vnode->remove) {
715 			vnode->busy = true;
716 			freeNode = true;
717 		} else {
718 			list_add_item(&sUnusedVnodeList, vnode);
719 			if (++sUnusedVnodes > kMaxUnusedVnodes
720 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
721 				// there are too many unused vnodes so we free the oldest one
722 				// ToDo: evaluate this mechanism
723 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
724 				vnode->busy = true;
725 				freeNode = true;
726 				sUnusedVnodes--;
727 			}
728 		}
729 
730 		mutex_unlock(&sVnodeMutex);
731 
732 		if (freeNode)
733 			free_vnode(vnode, reenter);
734 	} else
735 		mutex_unlock(&sVnodeMutex);
736 
737 	return B_OK;
738 }
739 
740 
741 /**	\brief Increments the reference counter of the given vnode.
742  *
743  *	The caller must either already have a reference to the vnode or hold
744  *	the sVnodeMutex.
745  *
746  *	\param vnode the vnode.
747  */
748 
749 static void
750 inc_vnode_ref_count(struct vnode *vnode)
751 {
752 	atomic_add(&vnode->ref_count, 1);
753 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
754 }
755 
756 
757 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
758  *
759  *	The caller must hold the sVnodeMutex.
760  *
761  *	\param mountID the mount ID.
762  *	\param vnodeID the node ID.
763  *
764  *	\return The vnode structure, if it was found in the hash table, \c NULL
765  *			otherwise.
766  */
767 
768 static struct vnode *
769 lookup_vnode(mount_id mountID, vnode_id vnodeID)
770 {
771 	struct vnode_hash_key key;
772 
773 	key.device = mountID;
774 	key.vnode = vnodeID;
775 
776 	return (vnode *)hash_lookup(sVnodeTable, &key);
777 }
778 
779 
780 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
781  *
782  *	If the node is not yet in memory, it will be loaded.
783  *
784  *	The caller must not hold the sVnodeMutex or the sMountMutex.
785  *
786  *	\param mountID the mount ID.
787  *	\param vnodeID the node ID.
788  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
789  *		   retrieved vnode structure shall be written.
790  *	\param reenter \c true, if this function is called (indirectly) from within
791  *		   a file system.
792  *	\return \c B_OK, if everything when fine, an error code otherwise.
793  */
794 
795 static status_t
796 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
797 {
798 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
799 
800 	mutex_lock(&sVnodeMutex);
801 
802 	int32 tries = 300;
803 		// try for 3 secs
804 restart:
805 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
806 	if (vnode && vnode->busy) {
807 		mutex_unlock(&sVnodeMutex);
808 		if (--tries < 0) {
809 			// vnode doesn't seem to become unbusy
810 			panic("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
811 			return B_BUSY;
812 		}
813 		snooze(10000); // 10 ms
814 		mutex_lock(&sVnodeMutex);
815 		goto restart;
816 	}
817 
818 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
819 
820 	status_t status;
821 
822 	if (vnode) {
823 		if (vnode->ref_count == 0) {
824 			// this vnode has been unused before
825 			list_remove_item(&sUnusedVnodeList, vnode);
826 			sUnusedVnodes--;
827 		}
828 		inc_vnode_ref_count(vnode);
829 	} else {
830 		// we need to create a new vnode and read it in
831 		status = create_new_vnode(&vnode, mountID, vnodeID);
832 		if (status < B_OK)
833 			goto err;
834 
835 		vnode->busy = true;
836 		mutex_unlock(&sVnodeMutex);
837 
838 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
839 		if (status == B_OK && vnode->private_node == NULL)
840 			status = B_BAD_VALUE;
841 
842 		mutex_lock(&sVnodeMutex);
843 
844 		if (status < B_OK)
845 			goto err1;
846 
847 		vnode->busy = false;
848 	}
849 
850 	mutex_unlock(&sVnodeMutex);
851 
852 	TRACE(("get_vnode: returning %p\n", vnode));
853 
854 	*_vnode = vnode;
855 	return B_OK;
856 
857 err1:
858 	hash_remove(sVnodeTable, vnode);
859 	remove_vnode_from_mount_list(vnode, vnode->mount);
860 err:
861 	mutex_unlock(&sVnodeMutex);
862 	if (vnode)
863 		free(vnode);
864 
865 	return status;
866 }
867 
868 
869 /**	\brief Decrements the reference counter of the given vnode and deletes it,
870  *	if the counter dropped to 0.
871  *
872  *	The caller must, of course, own a reference to the vnode to call this
873  *	function.
874  *	The caller must not hold the sVnodeMutex or the sMountMutex.
875  *
876  *	\param vnode the vnode.
877  */
878 
879 static inline void
880 put_vnode(struct vnode *vnode)
881 {
882 	dec_vnode_ref_count(vnode, false);
883 }
884 
885 
886 static void
887 vnode_low_memory_handler(void */*data*/, int32 level)
888 {
889 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
890 
891 	int32 count = 1;
892 	switch (level) {
893 		case B_NO_LOW_MEMORY:
894 			return;
895 		case B_LOW_MEMORY_NOTE:
896 			count = sUnusedVnodes / 100;
897 			break;
898 		case B_LOW_MEMORY_WARNING:
899 			count = sUnusedVnodes / 10;
900 			break;
901 		case B_LOW_MEMORY_CRITICAL:
902 			count = sUnusedVnodes;
903 			break;
904 	}
905 
906 	for (int32 i = 0; i < count; i++) {
907 		mutex_lock(&sVnodeMutex);
908 
909 		struct vnode *vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
910 		if (vnode == NULL) {
911 			mutex_unlock(&sVnodeMutex);
912 			break;
913 		}
914 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
915 
916 		vnode->busy = true;
917 		sUnusedVnodes--;
918 
919 		mutex_unlock(&sVnodeMutex);
920 
921 		free_vnode(vnode, false);
922 	}
923 }
924 
925 
926 static inline void
927 put_advisory_locking(struct advisory_locking *locking)
928 {
929 	release_sem(locking->lock);
930 }
931 
932 
933 /**	Returns the advisory_locking object of the \a vnode in case it
934  *	has one, and locks it.
935  *	You have to call put_advisory_locking() when you're done with
936  *	it.
937  *	Note, you must not have the vnode mutex locked when calling
938  *	this function.
939  */
940 
941 static struct advisory_locking *
942 get_advisory_locking(struct vnode *vnode)
943 {
944 	mutex_lock(&sVnodeMutex);
945 
946 	struct advisory_locking *locking = vnode->advisory_locking;
947 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
948 
949 	mutex_unlock(&sVnodeMutex);
950 
951 	if (lock >= B_OK)
952 		lock = acquire_sem(lock);
953 	if (lock < B_OK) {
954 		// This means the locking has been deleted in the mean time
955 		// or had never existed in the first place - otherwise, we
956 		// would get the lock at some point.
957 		return NULL;
958 	}
959 
960 	return locking;
961 }
962 
963 
964 /**	Creates a locked advisory_locking object, and attaches it to the
965  *	given \a vnode.
966  *	Returns B_OK in case of success - also if the vnode got such an
967  *	object from someone else in the mean time, you'll still get this
968  *	one locked then.
969  */
970 
971 static status_t
972 create_advisory_locking(struct vnode *vnode)
973 {
974 	if (vnode == NULL)
975 		return B_FILE_ERROR;
976 
977 	struct advisory_locking *locking = (struct advisory_locking *)malloc(
978 		sizeof(struct advisory_locking));
979 	if (locking == NULL)
980 		return B_NO_MEMORY;
981 
982 	status_t status;
983 
984 	locking->wait_sem = create_sem(0, "advisory lock");
985 	if (locking->wait_sem < B_OK) {
986 		status = locking->wait_sem;
987 		goto err1;
988 	}
989 
990 	locking->lock = create_sem(0, "advisory locking");
991 	if (locking->lock < B_OK) {
992 		status = locking->lock;
993 		goto err2;
994 	}
995 
996 	list_init(&locking->locks);
997 
998 	// We need to set the locking structure atomically - someone
999 	// else might set one at the same time
1000 	do {
1001 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking, (addr_t)locking,
1002 				NULL) == NULL)
1003 			return B_OK;
1004 	} while (get_advisory_locking(vnode) == NULL);
1005 
1006 	status = B_OK;
1007 		// we delete the one we've just created, but nevertheless, the vnode
1008 		// does have a locking structure now
1009 
1010 	delete_sem(locking->lock);
1011 err2:
1012 	delete_sem(locking->wait_sem);
1013 err1:
1014 	free(locking);
1015 	return status;
1016 }
1017 
1018 
1019 /**	Retrieves the first lock that has been set by the current team.
1020  */
1021 
1022 static status_t
1023 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1024 {
1025 	struct advisory_locking *locking = get_advisory_locking(vnode);
1026 	if (locking == NULL)
1027 		return B_BAD_VALUE;
1028 
1029 	// TODO: this should probably get the flock by its file descriptor!
1030 	team_id team = team_get_current_team_id();
1031 	status_t status = B_BAD_VALUE;
1032 
1033 	struct advisory_lock *lock = NULL;
1034 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1035 		if (lock->team == team) {
1036 			flock->l_start = lock->offset;
1037 			flock->l_len = lock->length;
1038 			status = B_OK;
1039 			break;
1040 		}
1041 	}
1042 
1043 	put_advisory_locking(locking);
1044 	return status;
1045 }
1046 
1047 
1048 /**	Removes the specified lock, or all locks of the calling team
1049  *	if \a flock is NULL.
1050  */
1051 
1052 static status_t
1053 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1054 {
1055 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1056 
1057 	struct advisory_locking *locking = get_advisory_locking(vnode);
1058 	if (locking == NULL)
1059 		return flock != NULL ? B_BAD_VALUE : B_OK;
1060 
1061 	team_id team = team_get_current_team_id();
1062 
1063 	// find matching lock entry
1064 
1065 	status_t status = B_BAD_VALUE;
1066 	struct advisory_lock *lock = NULL;
1067 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1068 		if (lock->team == team && (flock == NULL || (flock != NULL
1069 			&& lock->offset == flock->l_start
1070 			&& lock->length == flock->l_len))) {
1071 			// we found our lock, free it
1072 			list_remove_item(&locking->locks, lock);
1073 			free(lock);
1074 			status = B_OK;
1075 			break;
1076 		}
1077 	}
1078 
1079 	bool removeLocking = list_is_empty(&locking->locks);
1080 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1081 
1082 	put_advisory_locking(locking);
1083 
1084 	if (status < B_OK)
1085 		return status;
1086 
1087 	if (removeLocking) {
1088 		// we can remove the whole advisory locking structure; it's no longer used
1089 		locking = get_advisory_locking(vnode);
1090 		if (locking != NULL) {
1091 			// the locking could have been changed in the mean time
1092 			if (list_is_empty(&locking->locks)) {
1093 				vnode->advisory_locking = NULL;
1094 
1095 				// we've detached the locking from the vnode, so we can safely delete it
1096 				delete_sem(locking->lock);
1097 				delete_sem(locking->wait_sem);
1098 				free(locking);
1099 			} else {
1100 				// the locking is in use again
1101 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1102 			}
1103 		}
1104 	}
1105 
1106 	return B_OK;
1107 }
1108 
1109 
1110 static status_t
1111 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1112 {
1113 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1114 		vnode, flock, wait ? "yes" : "no"));
1115 
1116 	bool shared = flock->l_type == F_RDLCK;
1117 	status_t status = B_OK;
1118 
1119 restart:
1120 	// if this vnode has an advisory_locking structure attached,
1121 	// lock that one and search for any colliding file lock
1122 	struct advisory_locking *locking = get_advisory_locking(vnode);
1123 	sem_id waitForLock = -1;
1124 
1125 	if (locking != NULL) {
1126 		// test for collisions
1127 		struct advisory_lock *lock = NULL;
1128 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1129 			if (lock->offset <= flock->l_start + flock->l_len
1130 				&& lock->offset + lock->length > flock->l_start) {
1131 				// locks do overlap
1132 				if (!shared || !lock->shared) {
1133 					// we need to wait
1134 					waitForLock = locking->wait_sem;
1135 					break;
1136 				}
1137 			}
1138 		}
1139 
1140 		if (waitForLock < B_OK || !wait)
1141 			put_advisory_locking(locking);
1142 	}
1143 
1144 	// wait for the lock if we have to, or else return immediately
1145 
1146 	if (waitForLock >= B_OK) {
1147 		if (!wait)
1148 			status = B_PERMISSION_DENIED;
1149 		else {
1150 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1151 			if (status == B_OK) {
1152 				// see if we're still colliding
1153 				goto restart;
1154 			}
1155 		}
1156 	}
1157 
1158 	if (status < B_OK)
1159 		return status;
1160 
1161 	// install new lock
1162 
1163 	locking = get_advisory_locking(vnode);
1164 	if (locking == NULL) {
1165 		// we need to create a new locking object
1166 		status = create_advisory_locking(vnode);
1167 		if (status < B_OK)
1168 			return status;
1169 
1170 		locking = vnode->advisory_locking;
1171 			// we own the locking object, so it can't go away
1172 	}
1173 
1174 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1175 	if (lock == NULL) {
1176 		if (waitForLock >= B_OK)
1177 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1178 		release_sem(locking->lock);
1179 		return B_NO_MEMORY;
1180 	}
1181 
1182 	lock->team = team_get_current_team_id();
1183 	// values must already be normalized when getting here
1184 	lock->offset = flock->l_start;
1185 	lock->length = flock->l_len;
1186 	lock->shared = shared;
1187 
1188 	list_add_item(&locking->locks, lock);
1189 	put_advisory_locking(locking);
1190 
1191 	return status;
1192 }
1193 
1194 
1195 static status_t
1196 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1197 {
1198 	switch (flock->l_whence) {
1199 		case SEEK_SET:
1200 			break;
1201 		case SEEK_CUR:
1202 			flock->l_start += descriptor->pos;
1203 			break;
1204 		case SEEK_END:
1205 		{
1206 			struct vnode *vnode = descriptor->u.vnode;
1207 			struct stat stat;
1208 			status_t status;
1209 
1210 			if (FS_CALL(vnode, read_stat) == NULL)
1211 				return EOPNOTSUPP;
1212 
1213 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1214 			if (status < B_OK)
1215 				return status;
1216 
1217 			flock->l_start += stat.st_size;
1218 			break;
1219 		}
1220 		default:
1221 			return B_BAD_VALUE;
1222 	}
1223 
1224 	if (flock->l_start < 0)
1225 		flock->l_start = 0;
1226 	if (flock->l_len == 0)
1227 		flock->l_len = OFF_MAX;
1228 
1229 	// don't let the offset and length overflow
1230 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1231 		flock->l_len = OFF_MAX - flock->l_start;
1232 
1233 	if (flock->l_len < 0) {
1234 		// a negative length reverses the region
1235 		flock->l_start += flock->l_len;
1236 		flock->l_len = -flock->l_len;
1237 	}
1238 
1239 	return B_OK;
1240 }
1241 
1242 
1243 /**	Disconnects all file descriptors that are associated with the
1244  *	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1245  *	\a mount object.
1246  *
1247  *	Note, after you've called this function, there might still be ongoing
1248  *	accesses - they won't be interrupted if they already happened before.
1249  *	However, any subsequent access will fail.
1250  *
1251  *	This is not a cheap function and should be used with care and rarely.
1252  *	TODO: there is currently no means to stop a blocking read/write!
1253  */
1254 
1255 void
1256 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1257 	struct vnode *vnodeToDisconnect)
1258 {
1259 	// iterate over all teams and peek into their file descriptors
1260 	int32 nextTeamID = 0;
1261 
1262 	while (true) {
1263 		struct io_context *context = NULL;
1264 		sem_id contextMutex = -1;
1265 		struct team *team = NULL;
1266 		team_id lastTeamID;
1267 
1268 		cpu_status state = disable_interrupts();
1269 		GRAB_TEAM_LOCK();
1270 
1271 		lastTeamID = peek_next_thread_id();
1272 		if (nextTeamID < lastTeamID) {
1273 			// get next valid team
1274 			while (nextTeamID < lastTeamID
1275 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1276 				nextTeamID++;
1277 			}
1278 
1279 			if (team) {
1280 				context = (io_context *)team->io_context;
1281 				contextMutex = context->io_mutex.sem;
1282 				nextTeamID++;
1283 			}
1284 		}
1285 
1286 		RELEASE_TEAM_LOCK();
1287 		restore_interrupts(state);
1288 
1289 		if (context == NULL)
1290 			break;
1291 
1292 		// we now have a context - since we couldn't lock it while having
1293 		// safe access to the team structure, we now need to lock the mutex
1294 		// manually
1295 
1296 		if (acquire_sem(contextMutex) != B_OK) {
1297 			// team seems to be gone, go over to the next team
1298 			continue;
1299 		}
1300 
1301 		// the team cannot be deleted completely while we're owning its
1302 		// io_context mutex, so we can safely play with it now
1303 
1304 		context->io_mutex.holder = thread_get_current_thread_id();
1305 
1306 		if (context->cwd != NULL && context->cwd->mount == mount) {
1307 			put_vnode(context->cwd);
1308 
1309 			if (context->cwd == mount->root_vnode) {
1310 				// redirect the current working directory to the covered vnode
1311 				context->cwd = mount->covers_vnode;
1312 				inc_vnode_ref_count(context->cwd);
1313 			} else
1314 				context->cwd = NULL;
1315 		}
1316 
1317 		for (uint32 i = 0; i < context->table_size; i++) {
1318 			if (struct file_descriptor *descriptor = context->fds[i]) {
1319 				inc_fd_ref_count(descriptor);
1320 
1321 				// if this descriptor points at this mount, we
1322 				// need to disconnect it to be able to unmount
1323 				struct vnode *vnode = fd_vnode(descriptor);
1324 				if (vnodeToDisconnect != NULL) {
1325 					if (vnode == vnodeToDisconnect)
1326 						disconnect_fd(descriptor);
1327 				} else if (vnode != NULL && vnode->mount == mount
1328 					|| vnode == NULL && descriptor->u.mount == mount)
1329 					disconnect_fd(descriptor);
1330 
1331 				put_fd(descriptor);
1332 			}
1333 		}
1334 
1335 		mutex_unlock(&context->io_mutex);
1336 	}
1337 }
1338 
1339 
1340 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1341  *		   by.
1342  *
1343  *	Given an arbitrary vnode, the function checks, whether the node is covered
1344  *	by the root of a volume. If it is the function obtains a reference to the
1345  *	volume root node and returns it.
1346  *
1347  *	\param vnode The vnode in question.
1348  *	\return The volume root vnode the vnode cover is covered by, if it is
1349  *			indeed a mount point, or \c NULL otherwise.
1350  */
1351 
1352 static struct vnode *
1353 resolve_mount_point_to_volume_root(struct vnode *vnode)
1354 {
1355 	if (!vnode)
1356 		return NULL;
1357 
1358 	struct vnode *volumeRoot = NULL;
1359 
1360 	recursive_lock_lock(&sMountOpLock);
1361 	if (vnode->covered_by) {
1362 		volumeRoot = vnode->covered_by;
1363 		inc_vnode_ref_count(volumeRoot);
1364 	}
1365 	recursive_lock_unlock(&sMountOpLock);
1366 
1367 	return volumeRoot;
1368 }
1369 
1370 
1371 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1372  *		   by.
1373  *
1374  *	Given an arbitrary vnode (identified by mount and node ID), the function
1375  *	checks, whether the node is covered by the root of a volume. If it is the
1376  *	function returns the mount and node ID of the volume root node. Otherwise
1377  *	it simply returns the supplied mount and node ID.
1378  *
1379  *	In case of error (e.g. the supplied node could not be found) the variables
1380  *	for storing the resolved mount and node ID remain untouched and an error
1381  *	code is returned.
1382  *
1383  *	\param mountID The mount ID of the vnode in question.
1384  *	\param nodeID The node ID of the vnode in question.
1385  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1386  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1387  *	\return
1388  *	- \c B_OK, if everything went fine,
1389  *	- another error code, if something went wrong.
1390  */
1391 
1392 status_t
1393 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1394 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1395 {
1396 	// get the node
1397 	struct vnode *node;
1398 	status_t error = get_vnode(mountID, nodeID, &node, false);
1399 	if (error != B_OK)
1400 		return error;
1401 
1402 	// resolve the node
1403 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1404 	if (resolvedNode) {
1405 		put_vnode(node);
1406 		node = resolvedNode;
1407 	}
1408 
1409 	// set the return values
1410 	*resolvedMountID = node->device;
1411 	*resolvedNodeID = node->id;
1412 
1413 	put_vnode(node);
1414 
1415 	return B_OK;
1416 }
1417 
1418 
1419 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1420  *
1421  *	Given an arbitrary vnode, the function checks, whether the node is the
1422  *	root of a volume. If it is (and if it is not "/"), the function obtains
1423  *	a reference to the underlying mount point node and returns it.
1424  *
1425  *	\param vnode The vnode in question.
1426  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1427  *			root and not "/", or \c NULL otherwise.
1428  */
1429 
1430 static struct vnode *
1431 resolve_volume_root_to_mount_point(struct vnode *vnode)
1432 {
1433 	if (!vnode)
1434 		return NULL;
1435 
1436 	struct vnode *mountPoint = NULL;
1437 
1438 	recursive_lock_lock(&sMountOpLock);
1439 	struct fs_mount *mount = vnode->mount;
1440 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1441 		mountPoint = mount->covers_vnode;
1442 		inc_vnode_ref_count(mountPoint);
1443 	}
1444 	recursive_lock_unlock(&sMountOpLock);
1445 
1446 	return mountPoint;
1447 }
1448 
1449 
1450 /**	\brief Gets the directory path and leaf name for a given path.
1451  *
1452  *	The supplied \a path is transformed to refer to the directory part of
1453  *	the entry identified by the original path, and into the buffer \a filename
1454  *	the leaf name of the original entry is written.
1455  *	Neither the returned path nor the leaf name can be expected to be
1456  *	canonical.
1457  *
1458  *	\param path The path to be analyzed. Must be able to store at least one
1459  *		   additional character.
1460  *	\param filename The buffer into which the leaf name will be written.
1461  *		   Must be of size B_FILE_NAME_LENGTH at least.
1462  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1463  *		   name is longer than \c B_FILE_NAME_LENGTH.
1464  */
1465 
1466 static status_t
1467 get_dir_path_and_leaf(char *path, char *filename)
1468 {
1469 	char *p = strrchr(path, '/');
1470 		// '/' are not allowed in file names!
1471 
1472 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1473 
1474 	if (!p) {
1475 		// this path is single segment with no '/' in it
1476 		// ex. "foo"
1477 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1478 			return B_NAME_TOO_LONG;
1479 		strcpy(path, ".");
1480 	} else {
1481 		p++;
1482 		if (*p == '\0') {
1483 			// special case: the path ends in '/'
1484 			strcpy(filename, ".");
1485 		} else {
1486 			// normal leaf: replace the leaf portion of the path with a '.'
1487 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1488 				>= B_FILE_NAME_LENGTH) {
1489 				return B_NAME_TOO_LONG;
1490 			}
1491 		}
1492 		p[0] = '.';
1493 		p[1] = '\0';
1494 	}
1495 	return B_OK;
1496 }
1497 
1498 
1499 static status_t
1500 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1501 {
1502 	char clonedName[B_FILE_NAME_LENGTH + 1];
1503 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1504 		return B_NAME_TOO_LONG;
1505 
1506 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1507 	struct vnode *directory;
1508 
1509 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1510 	if (status < 0)
1511 		return status;
1512 
1513 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1514 }
1515 
1516 
1517 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1518  *	\a path must not be NULL.
1519  *	If it returns successfully, \a path contains the name of the last path
1520  *	component.
1521  *	Note, this reduces the ref_count of the starting \a vnode, no matter if
1522  *	it is successful or not!
1523  */
1524 
1525 static status_t
1526 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1527 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1528 {
1529 	status_t status = 0;
1530 	vnode_id lastParentID = vnode->id;
1531 	int type = 0;
1532 
1533 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1534 
1535 	if (path == NULL) {
1536 		put_vnode(vnode);
1537 		return B_BAD_VALUE;
1538 	}
1539 
1540 	while (true) {
1541 		struct vnode *nextVnode;
1542 		vnode_id vnodeID;
1543 		char *nextPath;
1544 
1545 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1546 
1547 		// done?
1548 		if (path[0] == '\0')
1549 			break;
1550 
1551 		// walk to find the next path component ("path" will point to a single
1552 		// path component), and filter out multiple slashes
1553 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1554 
1555 		if (*nextPath == '/') {
1556 			*nextPath = '\0';
1557 			do
1558 				nextPath++;
1559 			while (*nextPath == '/');
1560 		}
1561 
1562 		// See if the '..' is at the root of a mount and move to the covered
1563 		// vnode so we pass the '..' path to the underlying filesystem
1564 		if (!strcmp("..", path)
1565 			&& vnode->mount->root_vnode == vnode
1566 			&& vnode->mount->covers_vnode) {
1567 			nextVnode = vnode->mount->covers_vnode;
1568 			inc_vnode_ref_count(nextVnode);
1569 			put_vnode(vnode);
1570 			vnode = nextVnode;
1571 		}
1572 
1573 		// Check if we have the right to search the current directory vnode.
1574 		// If a file system doesn't have the access() function, we assume that
1575 		// searching a directory is always allowed
1576 		if (FS_CALL(vnode, access))
1577 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1578 
1579 		// Tell the filesystem to get the vnode of this path component (if we got the
1580 		// permission from the call above)
1581 		if (status >= B_OK)
1582 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1583 
1584 		if (status < B_OK) {
1585 			put_vnode(vnode);
1586 			return status;
1587 		}
1588 
1589 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1590 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1591 		// ref count incremented at this point
1592 		mutex_lock(&sVnodeMutex);
1593 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1594 		mutex_unlock(&sVnodeMutex);
1595 
1596 		if (!nextVnode) {
1597 			// pretty screwed up here - the file system found the vnode, but the hash
1598 			// lookup failed, so our internal structures are messed up
1599 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1600 				vnode->device, vnodeID);
1601 			put_vnode(vnode);
1602 			return B_ENTRY_NOT_FOUND;
1603 		}
1604 
1605 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1606 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1607 			size_t bufferSize;
1608 			char *buffer;
1609 
1610 			TRACE(("traverse link\n"));
1611 
1612 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1613 			if (count + 1 > MAX_SYM_LINKS) {
1614 				status = B_LINK_LIMIT;
1615 				goto resolve_link_error;
1616 			}
1617 
1618 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1619 			if (buffer == NULL) {
1620 				status = B_NO_MEMORY;
1621 				goto resolve_link_error;
1622 			}
1623 
1624 			status = FS_CALL(nextVnode, read_link)(nextVnode->mount->cookie,
1625 				nextVnode->private_node, buffer, &bufferSize);
1626 			if (status < B_OK) {
1627 				free(buffer);
1628 
1629 		resolve_link_error:
1630 				put_vnode(vnode);
1631 				put_vnode(nextVnode);
1632 
1633 				return status;
1634 			}
1635 			put_vnode(nextVnode);
1636 
1637 			// Check if we start from the root directory or the current
1638 			// directory ("vnode" still points to that one).
1639 			// Cut off all leading slashes if it's the root directory
1640 			path = buffer;
1641 			if (path[0] == '/') {
1642 				// we don't need the old directory anymore
1643 				put_vnode(vnode);
1644 
1645 				while (*++path == '/')
1646 					;
1647 				vnode = sRoot;
1648 				inc_vnode_ref_count(vnode);
1649 			}
1650 			inc_vnode_ref_count(vnode);
1651 				// balance the next recursion - we will decrement the ref_count
1652 				// of the vnode, no matter if we succeeded or not
1653 
1654 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1655 				&nextVnode, &lastParentID, _type);
1656 
1657 			free(buffer);
1658 
1659 			if (status < B_OK) {
1660 				put_vnode(vnode);
1661 				return status;
1662 			}
1663 		} else
1664 			lastParentID = vnode->id;
1665 
1666 		// decrease the ref count on the old dir we just looked up into
1667 		put_vnode(vnode);
1668 
1669 		path = nextPath;
1670 		vnode = nextVnode;
1671 
1672 		// see if we hit a mount point
1673 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1674 		if (mountPoint) {
1675 			put_vnode(vnode);
1676 			vnode = mountPoint;
1677 		}
1678 	}
1679 
1680 	*_vnode = vnode;
1681 	if (_type)
1682 		*_type = type;
1683 	if (_parentID)
1684 		*_parentID = lastParentID;
1685 
1686 	return B_OK;
1687 }
1688 
1689 
1690 static status_t
1691 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1692 	vnode_id *_parentID, bool kernel)
1693 {
1694 	struct vnode *start = NULL;
1695 
1696 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1697 
1698 	if (!path)
1699 		return B_BAD_VALUE;
1700 
1701 	// figure out if we need to start at root or at cwd
1702 	if (*path == '/') {
1703 		if (sRoot == NULL) {
1704 			// we're a bit early, aren't we?
1705 			return B_ERROR;
1706 		}
1707 
1708 		while (*++path == '/')
1709 			;
1710 		start = sRoot;
1711 		inc_vnode_ref_count(start);
1712 	} else {
1713 		struct io_context *context = get_current_io_context(kernel);
1714 
1715 		mutex_lock(&context->io_mutex);
1716 		start = context->cwd;
1717 		if (start != NULL)
1718 			inc_vnode_ref_count(start);
1719 		mutex_unlock(&context->io_mutex);
1720 
1721 		if (start == NULL)
1722 			return B_ERROR;
1723 	}
1724 
1725 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1726 }
1727 
1728 
1729 /** Returns the vnode in the next to last segment of the path, and returns
1730  *	the last portion in filename.
1731  *	The path buffer must be able to store at least one additional character.
1732  */
1733 
1734 static status_t
1735 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1736 {
1737 	status_t status = get_dir_path_and_leaf(path, filename);
1738 	if (status != B_OK)
1739 		return status;
1740 
1741 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1742 }
1743 
1744 
1745 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1746  *		   to by a FD + path pair.
1747  *
1748  *	\a path must be given in either case. \a fd might be omitted, in which
1749  *	case \a path is either an absolute path or one relative to the current
1750  *	directory. If both a supplied and \a path is relative it is reckoned off
1751  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1752  *	ignored.
1753  *
1754  *	The caller has the responsibility to call put_vnode() on the returned
1755  *	directory vnode.
1756  *
1757  *	\param fd The FD. May be < 0.
1758  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1759  *	       is modified by this function. It must have at least room for a
1760  *	       string one character longer than the path it contains.
1761  *	\param _vnode A pointer to a variable the directory vnode shall be written
1762  *		   into.
1763  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1764  *		   the leaf name of the specified entry will be written.
1765  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1766  *		   invoked from userland.
1767  *	\return \c B_OK, if everything went fine, another error code otherwise.
1768  */
1769 
1770 static status_t
1771 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1772 	char *filename, bool kernel)
1773 {
1774 	if (!path)
1775 		return B_BAD_VALUE;
1776 	if (fd < 0)
1777 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1778 
1779 	status_t status = get_dir_path_and_leaf(path, filename);
1780 	if (status != B_OK)
1781 		return status;
1782 
1783 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1784 }
1785 
1786 
1787 static status_t
1788 get_vnode_name(struct vnode *vnode, struct vnode *parent,
1789 	char *name, size_t nameSize)
1790 {
1791 	VNodePutter vnodePutter;
1792 
1793 	// See if vnode is the root of a mount and move to the covered
1794 	// vnode so we get the underlying file system
1795 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1796 		vnode = vnode->mount->covers_vnode;
1797 		inc_vnode_ref_count(vnode);
1798 		vnodePutter.SetTo(vnode);
1799 	}
1800 
1801 	if (FS_CALL(vnode, get_vnode_name)) {
1802 		// The FS supports getting the name of a vnode.
1803 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1804 			vnode->private_node, name, nameSize);
1805 	}
1806 
1807 	// The FS doesn't support getting the name of a vnode. So we search the
1808 	// parent directory for the vnode, if the caller let us.
1809 
1810 	if (parent == NULL)
1811 		return EOPNOTSUPP;
1812 
1813 	fs_cookie cookie;
1814 
1815 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1816 		parent->private_node, &cookie);
1817 	if (status >= B_OK) {
1818 		char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1819 		struct dirent *dirent = (struct dirent *)buffer;
1820 		while (true) {
1821 			uint32 num = 1;
1822 			status = dir_read(parent, cookie, dirent, sizeof(buffer), &num);
1823 			if (status < B_OK)
1824 				break;
1825 
1826 			if (vnode->id == dirent->d_ino) {
1827 				// found correct entry!
1828 				if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1829 					status = B_BUFFER_OVERFLOW;
1830 				break;
1831 			}
1832 		}
1833 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1834 	}
1835 	return status;
1836 }
1837 
1838 
1839 /**	Gets the full path to a given directory vnode.
1840  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1841  *	file system doesn't support this call, it will fall back to iterating
1842  *	through the parent directory to get the name of the child.
1843  *
1844  *	To protect against circular loops, it supports a maximum tree depth
1845  *	of 256 levels.
1846  *
1847  *	Note that the path may not be correct the time this function returns!
1848  *	It doesn't use any locking to prevent returning the correct path, as
1849  *	paths aren't safe anyway: the path to a file can change at any time.
1850  *
1851  *	It might be a good idea, though, to check if the returned path exists
1852  *	in the calling function (it's not done here because of efficiency)
1853  */
1854 
1855 static status_t
1856 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1857 {
1858 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1859 
1860 	if (vnode == NULL || buffer == NULL)
1861 		return B_BAD_VALUE;
1862 
1863 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1864 	KPath pathBuffer;
1865 	if (pathBuffer.InitCheck() != B_OK)
1866 		return B_NO_MEMORY;
1867 
1868 	char *path = pathBuffer.LockBuffer();
1869 	int32 insert = pathBuffer.BufferSize();
1870 	int32 maxLevel = 256;
1871 	int32 length;
1872 	status_t status;
1873 
1874 	// we don't use get_vnode() here because this call is more
1875 	// efficient and does all we need from get_vnode()
1876 	inc_vnode_ref_count(vnode);
1877 
1878 	// resolve a volume root to its mount point
1879 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1880 	if (mountPoint) {
1881 		put_vnode(vnode);
1882 		vnode = mountPoint;
1883 	}
1884 
1885 	path[--insert] = '\0';
1886 
1887 	while (true) {
1888 		// the name buffer is also used for fs_read_dir()
1889 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1890 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1891 		struct vnode *parentVnode;
1892 		vnode_id parentID, id;
1893 		int type;
1894 
1895 		// lookup the parent vnode
1896 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
1897 			&parentID, &type);
1898 		if (status < B_OK)
1899 			goto out;
1900 
1901 		mutex_lock(&sVnodeMutex);
1902 		parentVnode = lookup_vnode(vnode->device, parentID);
1903 		mutex_unlock(&sVnodeMutex);
1904 
1905 		if (parentVnode == NULL) {
1906 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1907 				vnode->device, parentID);
1908 			status = B_ENTRY_NOT_FOUND;
1909 			goto out;
1910 		}
1911 
1912 		// resolve a volume root to its mount point
1913 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1914 		if (mountPoint) {
1915 			put_vnode(parentVnode);
1916 			parentVnode = mountPoint;
1917 			parentID = parentVnode->id;
1918 		}
1919 
1920 		bool hitRoot = (parentVnode == vnode);
1921 
1922 		// Does the file system support getting the name of a vnode?
1923 		// If so, get it here...
1924 		if (status == B_OK && FS_CALL(vnode, get_vnode_name)) {
1925 			status = FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie, vnode->private_node,
1926 				name, B_FILE_NAME_LENGTH);
1927 		}
1928 
1929 		// ... if not, find it out later (by iterating through
1930 		// the parent directory, searching for the id)
1931 		id = vnode->id;
1932 
1933 		// release the current vnode, we only need its parent from now on
1934 		put_vnode(vnode);
1935 		vnode = parentVnode;
1936 
1937 		if (status < B_OK)
1938 			goto out;
1939 
1940 		// ToDo: add an explicit check for loops in about 10 levels to do
1941 		// real loop detection
1942 
1943 		// don't go deeper as 'maxLevel' to prevent circular loops
1944 		if (maxLevel-- < 0) {
1945 			status = ELOOP;
1946 			goto out;
1947 		}
1948 
1949 		if (hitRoot) {
1950 			// we have reached "/", which means we have constructed the full
1951 			// path
1952 			break;
1953 		}
1954 
1955 		if (!FS_CALL(vnode, get_vnode_name)) {
1956 			// If we haven't got the vnode's name yet, we have to search for it
1957 			// in the parent directory now
1958 			fs_cookie cookie;
1959 
1960 			status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node,
1961 				&cookie);
1962 			if (status >= B_OK) {
1963 				struct dirent *dirent = (struct dirent *)nameBuffer;
1964 				while (true) {
1965 					uint32 num = 1;
1966 					status = dir_read(vnode, cookie, dirent, sizeof(nameBuffer),
1967 						&num);
1968 
1969 					if (status < B_OK)
1970 						break;
1971 
1972 					if (id == dirent->d_ino)
1973 						// found correct entry!
1974 						break;
1975 				}
1976 				FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1977 			}
1978 
1979 			if (status < B_OK)
1980 				goto out;
1981 		}
1982 
1983 		// add the name infront of the current path
1984 		name[B_FILE_NAME_LENGTH - 1] = '\0';
1985 		length = strlen(name);
1986 		insert -= length;
1987 		if (insert <= 0) {
1988 			status = ENOBUFS;
1989 			goto out;
1990 		}
1991 		memcpy(path + insert, name, length);
1992 		path[--insert] = '/';
1993 	}
1994 
1995 	// the root dir will result in an empty path: fix it
1996 	if (path[insert] == '\0')
1997 		path[--insert] = '/';
1998 
1999 	TRACE(("  path is: %s\n", path + insert));
2000 
2001 	// copy the path to the output buffer
2002 	length = pathBuffer.BufferSize() - insert;
2003 	if (length <= (int)bufferSize)
2004 		memcpy(buffer, path + insert, length);
2005 	else
2006 		status = ENOBUFS;
2007 
2008 out:
2009 	put_vnode(vnode);
2010 	return status;
2011 }
2012 
2013 
2014 /**	Checks the length of every path component, and adds a '.'
2015  *	if the path ends in a slash.
2016  *	The given path buffer must be able to store at least one
2017  *	additional character.
2018  */
2019 
2020 static status_t
2021 check_path(char *to)
2022 {
2023 	int32 length = 0;
2024 
2025 	// check length of every path component
2026 
2027 	while (*to) {
2028 		char *begin;
2029 		if (*to == '/')
2030 			to++, length++;
2031 
2032 		begin = to;
2033 		while (*to != '/' && *to)
2034 			to++, length++;
2035 
2036 		if (to - begin > B_FILE_NAME_LENGTH)
2037 			return B_NAME_TOO_LONG;
2038 	}
2039 
2040 	if (length == 0)
2041 		return B_ENTRY_NOT_FOUND;
2042 
2043 	// complete path if there is a slash at the end
2044 
2045 	if (*(to - 1) == '/') {
2046 		if (length > B_PATH_NAME_LENGTH - 2)
2047 			return B_NAME_TOO_LONG;
2048 
2049 		to[0] = '.';
2050 		to[1] = '\0';
2051 	}
2052 
2053 	return B_OK;
2054 }
2055 
2056 
2057 static struct file_descriptor *
2058 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2059 {
2060 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2061 	if (descriptor == NULL)
2062 		return NULL;
2063 
2064 	if (fd_vnode(descriptor) == NULL) {
2065 		put_fd(descriptor);
2066 		return NULL;
2067 	}
2068 
2069 	// ToDo: when we can close a file descriptor at any point, investigate
2070 	//	if this is still valid to do (accessing the vnode without ref_count
2071 	//	or locking)
2072 	*_vnode = descriptor->u.vnode;
2073 	return descriptor;
2074 }
2075 
2076 
2077 static struct vnode *
2078 get_vnode_from_fd(int fd, bool kernel)
2079 {
2080 	struct file_descriptor *descriptor;
2081 	struct vnode *vnode;
2082 
2083 	descriptor = get_fd(get_current_io_context(kernel), fd);
2084 	if (descriptor == NULL)
2085 		return NULL;
2086 
2087 	vnode = fd_vnode(descriptor);
2088 	if (vnode != NULL)
2089 		inc_vnode_ref_count(vnode);
2090 
2091 	put_fd(descriptor);
2092 	return vnode;
2093 }
2094 
2095 
2096 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2097  *	only the path will be considered. In this case, the \a path must not be
2098  *	NULL.
2099  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2100  *	and should be NULL for files.
2101  */
2102 
2103 static status_t
2104 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2105 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
2106 {
2107 	if (fd < 0 && !path)
2108 		return B_BAD_VALUE;
2109 
2110 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2111 		// no FD or absolute path
2112 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2113 	}
2114 
2115 	// FD only, or FD + relative path
2116 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2117 	if (!vnode)
2118 		return B_FILE_ERROR;
2119 
2120 	if (path != NULL) {
2121 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2122 			_vnode, _parentID, NULL);
2123 	}
2124 
2125 	// there is no relative path to take into account
2126 
2127 	*_vnode = vnode;
2128 	if (_parentID)
2129 		*_parentID = -1;
2130 
2131 	return B_OK;
2132 }
2133 
2134 
2135 static int
2136 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2137 	fs_cookie cookie, int openMode, bool kernel)
2138 {
2139 	struct file_descriptor *descriptor;
2140 	int fd;
2141 
2142 	descriptor = alloc_fd();
2143 	if (!descriptor)
2144 		return B_NO_MEMORY;
2145 
2146 	if (vnode)
2147 		descriptor->u.vnode = vnode;
2148 	else
2149 		descriptor->u.mount = mount;
2150 	descriptor->cookie = cookie;
2151 
2152 	switch (type) {
2153 		// vnode types
2154 		case FDTYPE_FILE:
2155 			descriptor->ops = &sFileOps;
2156 			break;
2157 		case FDTYPE_DIR:
2158 			descriptor->ops = &sDirectoryOps;
2159 			break;
2160 		case FDTYPE_ATTR:
2161 			descriptor->ops = &sAttributeOps;
2162 			break;
2163 		case FDTYPE_ATTR_DIR:
2164 			descriptor->ops = &sAttributeDirectoryOps;
2165 			break;
2166 
2167 		// mount types
2168 		case FDTYPE_INDEX_DIR:
2169 			descriptor->ops = &sIndexDirectoryOps;
2170 			break;
2171 		case FDTYPE_QUERY:
2172 			descriptor->ops = &sQueryOps;
2173 			break;
2174 
2175 		default:
2176 			panic("get_new_fd() called with unknown type %d\n", type);
2177 			break;
2178 	}
2179 	descriptor->type = type;
2180 	descriptor->open_mode = openMode;
2181 
2182 	fd = new_fd(get_current_io_context(kernel), descriptor);
2183 	if (fd < 0) {
2184 		free(descriptor);
2185 		return B_NO_MORE_FDS;
2186 	}
2187 
2188 	return fd;
2189 }
2190 
2191 #ifdef ADD_DEBUGGER_COMMANDS
2192 
2193 
2194 static void
2195 _dump_advisory_locking(advisory_locking *locking)
2196 {
2197 	if (locking == NULL)
2198 		return;
2199 
2200 	kprintf("   lock:        %ld", locking->lock);
2201 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2202 
2203 	struct advisory_lock *lock = NULL;
2204 	int32 index = 0;
2205 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2206 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2207 		kprintf("        offset: %Ld\n", lock->offset);
2208 		kprintf("        length: %Ld\n", lock->length);
2209 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2210 	}
2211 }
2212 
2213 
2214 static void
2215 _dump_mount(struct fs_mount *mount)
2216 {
2217 	kprintf("MOUNT: %p\n", mount);
2218 	kprintf(" id:            %ld\n", mount->id);
2219 	kprintf(" device_name:   %s\n", mount->device_name);
2220 	kprintf(" fs_name:       %s\n", mount->fs_name);
2221 	kprintf(" cookie:        %p\n", mount->cookie);
2222 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2223 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2224 	kprintf(" partition:     %p\n", mount->partition);
2225 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2226 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2227 		mount->owns_file_device ? " owns_file_device" : "");
2228 }
2229 
2230 
2231 static void
2232 _dump_vnode(struct vnode *vnode)
2233 {
2234 	kprintf("VNODE: %p\n", vnode);
2235 	kprintf(" device:        %ld\n", vnode->device);
2236 	kprintf(" id:            %Ld\n", vnode->id);
2237 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2238 	kprintf(" private_node:  %p\n", vnode->private_node);
2239 	kprintf(" mount:         %p\n", vnode->mount);
2240 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2241 	kprintf(" cache_ref:     %p\n", vnode->cache);
2242 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2243 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2244 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2245 
2246 	_dump_advisory_locking(vnode->advisory_locking);
2247 }
2248 
2249 
2250 static int
2251 dump_mount(int argc, char **argv)
2252 {
2253 	if (argc != 2) {
2254 		kprintf("usage: mount [id/address]\n");
2255 		return 0;
2256 	}
2257 
2258 	struct fs_mount *mount = NULL;
2259 
2260 	// if the argument looks like a hex number, treat it as such
2261 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2262 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2263 		if (IS_USER_ADDRESS(mount)) {
2264 			kprintf("invalid fs_mount address\n");
2265 			return 0;
2266 		}
2267 	} else {
2268 		mount_id id = atoll(argv[1]);
2269 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2270 		if (mount == NULL) {
2271 			kprintf("fs_mount not found\n");
2272 			return 0;
2273 		}
2274 	}
2275 
2276 	_dump_mount(mount);
2277 	return 0;
2278 }
2279 
2280 
2281 static int
2282 dump_mounts(int argc, char **argv)
2283 {
2284 	struct hash_iterator iterator;
2285 	struct fs_mount *mount;
2286 
2287 	kprintf("address     id root       covers     fs_name\n");
2288 
2289 	hash_open(sMountsTable, &iterator);
2290 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2291 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2292 			mount->covers_vnode, mount->fs_name);
2293 	}
2294 
2295 	hash_close(sMountsTable, &iterator, false);
2296 	return 0;
2297 }
2298 
2299 
2300 static int
2301 dump_vnode(int argc, char **argv)
2302 {
2303 	if (argc < 2) {
2304 		kprintf("usage: vnode [id/device id/address]\n");
2305 		return 0;
2306 	}
2307 
2308 	struct vnode *vnode = NULL;
2309 
2310 	// if the argument looks like a hex number, treat it as such
2311 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2312 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2313 		if (IS_USER_ADDRESS(vnode)) {
2314 			kprintf("invalid vnode address\n");
2315 			return 0;
2316 		}
2317 		_dump_vnode(vnode);
2318 		return 0;
2319 	}
2320 
2321 	struct hash_iterator iterator;
2322 	mount_id device = -1;
2323 	vnode_id id;
2324 	if (argc > 2) {
2325 		device = atoi(argv[1]);
2326 		id = atoll(argv[2]);
2327 	} else
2328 		id = atoll(argv[1]);
2329 
2330 	hash_open(sVnodeTable, &iterator);
2331 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2332 		if (vnode->id != id || device != -1 && vnode->device != device)
2333 			continue;
2334 
2335 		_dump_vnode(vnode);
2336 	}
2337 
2338 	hash_close(sVnodeTable, &iterator, false);
2339 	return 0;
2340 }
2341 
2342 
2343 static int
2344 dump_vnodes(int argc, char **argv)
2345 {
2346 	// restrict dumped nodes to a certain device if requested
2347 	mount_id device = -1;
2348 	if (argc > 1)
2349 		device = atoi(argv[1]);
2350 
2351 	struct hash_iterator iterator;
2352 	struct vnode *vnode;
2353 
2354 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2355 
2356 	hash_open(sVnodeTable, &iterator);
2357 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2358 		if (device != -1 && vnode->device != device)
2359 			continue;
2360 
2361 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2362 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2363 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2364 			vnode->unpublished ? "u" : "-");
2365 	}
2366 
2367 	hash_close(sVnodeTable, &iterator, false);
2368 	return 0;
2369 }
2370 
2371 
2372 static int
2373 dump_vnode_caches(int argc, char **argv)
2374 {
2375 	struct hash_iterator iterator;
2376 	struct vnode *vnode;
2377 
2378 	kprintf("address    dev     inode cache          size   pages\n");
2379 
2380 	hash_open(sVnodeTable, &iterator);
2381 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2382 		if (vnode->cache == NULL)
2383 			continue;
2384 
2385 		// count pages in cache
2386 		size_t numPages = 0;
2387 		for (struct vm_page *page = vnode->cache->cache->page_list;
2388 				page != NULL; page = page->cache_next) {
2389 			numPages++;
2390 		}
2391 
2392 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2393 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2394 	}
2395 
2396 	hash_close(sVnodeTable, &iterator, false);
2397 	return 0;
2398 }
2399 
2400 
2401 int
2402 dump_io_context(int argc, char **argv)
2403 {
2404 	if (argc > 2) {
2405 		kprintf("usage: io_context [team id/address]\n");
2406 		return 0;
2407 	}
2408 
2409 	struct io_context *context = NULL;
2410 
2411 	if (argc > 1) {
2412 		uint32 num = strtoul(argv[1], NULL, 0);
2413 		if (IS_KERNEL_ADDRESS(num))
2414 			context = (struct io_context *)num;
2415 		else {
2416 			struct team *team = team_get_team_struct_locked(num);
2417 			if (team == NULL) {
2418 				kprintf("could not find team with ID %ld\n", num);
2419 				return 0;
2420 			}
2421 			context = (struct io_context *)team->io_context;
2422 		}
2423 	} else
2424 		context = get_current_io_context(true);
2425 
2426 	kprintf("I/O CONTEXT: %p\n", context);
2427 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2428 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2429 	kprintf(" max fds:\t%lu\n", context->table_size);
2430 
2431 	if (context->num_used_fds)
2432 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2433 
2434 	for (uint32 i = 0; i < context->table_size; i++) {
2435 		struct file_descriptor *fd = context->fds[i];
2436 		if (fd == NULL)
2437 			continue;
2438 
2439 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2440 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2441 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2442 			fd->u.vnode);
2443 	}
2444 
2445 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2446 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2447 
2448 	return 0;
2449 }
2450 
2451 
2452 int
2453 dump_vnode_usage(int argc, char **argv)
2454 {
2455 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes, kMaxUnusedVnodes);
2456 
2457 	struct hash_iterator iterator;
2458 	hash_open(sVnodeTable, &iterator);
2459 
2460 	uint32 count = 0;
2461 	struct vnode *vnode;
2462 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2463 		count++;
2464 	}
2465 
2466 	hash_close(sVnodeTable, &iterator, false);
2467 
2468 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2469 	return 0;
2470 }
2471 
2472 #endif	// ADD_DEBUGGER_COMMANDS
2473 
2474 
2475 //	#pragma mark - public VFS API
2476 
2477 
2478 extern "C" status_t
2479 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2480 {
2481 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2482 		mountID, vnodeID, privateNode));
2483 
2484 	if (privateNode == NULL)
2485 		return B_BAD_VALUE;
2486 
2487 	mutex_lock(&sVnodeMutex);
2488 
2489 	// file system integrity check:
2490 	// test if the vnode already exists and bail out if this is the case!
2491 
2492 	// ToDo: the R5 implementation obviously checks for a different cookie
2493 	//	and doesn't panic if they are equal
2494 
2495 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2496 	if (vnode != NULL)
2497 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2498 
2499 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2500 	if (status == B_OK) {
2501 		vnode->private_node = privateNode;
2502 		vnode->busy = true;
2503 		vnode->unpublished = true;
2504 	}
2505 
2506 	TRACE(("returns: %s\n", strerror(status)));
2507 
2508 	mutex_unlock(&sVnodeMutex);
2509 	return status;
2510 }
2511 
2512 
2513 extern "C" status_t
2514 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2515 {
2516 	FUNCTION(("publish_vnode()\n"));
2517 
2518 	mutex_lock(&sVnodeMutex);
2519 
2520 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2521 	status_t status = B_OK;
2522 
2523 	if (vnode != NULL && vnode->busy && vnode->unpublished
2524 		&& vnode->private_node == privateNode) {
2525 		vnode->busy = false;
2526 		vnode->unpublished = false;
2527 	} else if (vnode == NULL && privateNode != NULL) {
2528 		status = create_new_vnode(&vnode, mountID, vnodeID);
2529 		if (status == B_OK)
2530 			vnode->private_node = privateNode;
2531 	} else
2532 		status = B_BAD_VALUE;
2533 
2534 	TRACE(("returns: %s\n", strerror(status)));
2535 
2536 	mutex_unlock(&sVnodeMutex);
2537 	return status;
2538 }
2539 
2540 
2541 extern "C" status_t
2542 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2543 {
2544 	struct vnode *vnode;
2545 
2546 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2547 	if (status < B_OK)
2548 		return status;
2549 
2550 	*_fsNode = vnode->private_node;
2551 	return B_OK;
2552 }
2553 
2554 
2555 extern "C" status_t
2556 put_vnode(mount_id mountID, vnode_id vnodeID)
2557 {
2558 	struct vnode *vnode;
2559 
2560 	mutex_lock(&sVnodeMutex);
2561 	vnode = lookup_vnode(mountID, vnodeID);
2562 	mutex_unlock(&sVnodeMutex);
2563 
2564 	if (vnode)
2565 		dec_vnode_ref_count(vnode, true);
2566 
2567 	return B_OK;
2568 }
2569 
2570 
2571 extern "C" status_t
2572 remove_vnode(mount_id mountID, vnode_id vnodeID)
2573 {
2574 	struct vnode *vnode;
2575 	bool remove = false;
2576 
2577 	mutex_lock(&sVnodeMutex);
2578 
2579 	vnode = lookup_vnode(mountID, vnodeID);
2580 	if (vnode != NULL) {
2581 		if (vnode->covered_by != NULL) {
2582 			// this vnode is in use
2583 			mutex_unlock(&sVnodeMutex);
2584 			return B_BUSY;
2585 		}
2586 
2587 		vnode->remove = true;
2588 		if (vnode->unpublished) {
2589 			// prepare the vnode for deletion
2590 			vnode->busy = true;
2591 			remove = true;
2592 		}
2593 	}
2594 
2595 	mutex_unlock(&sVnodeMutex);
2596 
2597 	if (remove) {
2598 		// if the vnode hasn't been published yet, we delete it here
2599 		atomic_add(&vnode->ref_count, -1);
2600 		free_vnode(vnode, true);
2601 	}
2602 
2603 	return B_OK;
2604 }
2605 
2606 
2607 extern "C" status_t
2608 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2609 {
2610 	struct vnode *vnode;
2611 
2612 	mutex_lock(&sVnodeMutex);
2613 
2614 	vnode = lookup_vnode(mountID, vnodeID);
2615 	if (vnode)
2616 		vnode->remove = false;
2617 
2618 	mutex_unlock(&sVnodeMutex);
2619 	return B_OK;
2620 }
2621 
2622 
2623 //	#pragma mark - private VFS API
2624 //	Functions the VFS exports for other parts of the kernel
2625 
2626 
2627 /** Acquires another reference to the vnode that has to be released
2628  *	by calling vfs_put_vnode().
2629  */
2630 
2631 void
2632 vfs_acquire_vnode(void *_vnode)
2633 {
2634 	inc_vnode_ref_count((struct vnode *)_vnode);
2635 }
2636 
2637 
2638 /** This is currently called from file_cache_create() only.
2639  *	It's probably a temporary solution as long as devfs requires that
2640  *	fs_read_pages()/fs_write_pages() are called with the standard
2641  *	open cookie and not with a device cookie.
2642  *	If that's done differently, remove this call; it has no other
2643  *	purpose.
2644  */
2645 
2646 extern "C" status_t
2647 vfs_get_cookie_from_fd(int fd, void **_cookie)
2648 {
2649 	struct file_descriptor *descriptor;
2650 
2651 	descriptor = get_fd(get_current_io_context(true), fd);
2652 	if (descriptor == NULL)
2653 		return B_FILE_ERROR;
2654 
2655 	*_cookie = descriptor->cookie;
2656 	return B_OK;
2657 }
2658 
2659 
2660 extern "C" int
2661 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2662 {
2663 	*vnode = get_vnode_from_fd(fd, kernel);
2664 
2665 	if (*vnode == NULL)
2666 		return B_FILE_ERROR;
2667 
2668 	return B_NO_ERROR;
2669 }
2670 
2671 
2672 extern "C" status_t
2673 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2674 {
2675 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2676 
2677 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2678 	if (pathBuffer.InitCheck() != B_OK)
2679 		return B_NO_MEMORY;
2680 
2681 	char *buffer = pathBuffer.LockBuffer();
2682 	strlcpy(buffer, path, pathBuffer.BufferSize());
2683 
2684 	struct vnode *vnode;
2685 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2686 	if (status < B_OK)
2687 		return status;
2688 
2689 	*_vnode = vnode;
2690 	return B_OK;
2691 }
2692 
2693 
2694 extern "C" status_t
2695 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2696 {
2697 	struct vnode *vnode;
2698 
2699 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2700 	if (status < B_OK)
2701 		return status;
2702 
2703 	*_vnode = vnode;
2704 	return B_OK;
2705 }
2706 
2707 
2708 extern "C" status_t
2709 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2710 	const char *name, void **_vnode)
2711 {
2712 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2713 }
2714 
2715 
2716 extern "C" void
2717 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2718 {
2719 	struct vnode *vnode = (struct vnode *)_vnode;
2720 
2721 	*_mountID = vnode->device;
2722 	*_vnodeID = vnode->id;
2723 }
2724 
2725 
2726 /**	Looks up a vnode with the given mount and vnode ID.
2727  *	Must only be used with "in-use" vnodes as it doesn't grab a reference
2728  *	to the node.
2729  *	It's currently only be used by file_cache_create().
2730  */
2731 
2732 extern "C" status_t
2733 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2734 {
2735 	mutex_lock(&sVnodeMutex);
2736 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2737 	mutex_unlock(&sVnodeMutex);
2738 
2739 	if (vnode == NULL)
2740 		return B_ERROR;
2741 
2742 	*_vnode = vnode;
2743 	return B_OK;
2744 }
2745 
2746 
2747 extern "C" status_t
2748 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2749 {
2750 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
2751 		mountID, path, kernel));
2752 
2753 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
2754 	if (pathBuffer.InitCheck() != B_OK)
2755 		return B_NO_MEMORY;
2756 
2757 	fs_mount *mount;
2758 	status_t status = get_mount(mountID, &mount);
2759 	if (status < B_OK)
2760 		return status;
2761 
2762 	char *buffer = pathBuffer.LockBuffer();
2763 	strlcpy(buffer, path, pathBuffer.BufferSize());
2764 
2765 	struct vnode *vnode = mount->root_vnode;
2766 
2767 	if (buffer[0] == '/')
2768 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
2769 	else {
2770 		inc_vnode_ref_count(vnode);
2771 			// vnode_path_to_vnode() releases a reference to the starting vnode
2772 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
2773 	}
2774 
2775 	put_mount(mount);
2776 
2777 	if (status < B_OK)
2778 		return status;
2779 
2780 	if (vnode->device != mountID) {
2781 		// wrong mount ID - must not gain access on foreign file system nodes
2782 		put_vnode(vnode);
2783 		return B_BAD_VALUE;
2784 	}
2785 
2786 	*_node = vnode->private_node;
2787 	return B_OK;
2788 }
2789 
2790 
2791 /**	Finds the full path to the file that contains the module \a moduleName,
2792  *	puts it into \a pathBuffer, and returns B_OK for success.
2793  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2794  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2795  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2796  *	functions returns unsuccessfully.
2797  */
2798 
2799 status_t
2800 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2801 	size_t bufferSize)
2802 {
2803 	struct vnode *dir, *file;
2804 	status_t status;
2805 	size_t length;
2806 	char *path;
2807 
2808 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2809 		return B_BUFFER_OVERFLOW;
2810 
2811 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2812 	if (status < B_OK)
2813 		return status;
2814 
2815 	// the path buffer had been clobbered by the above call
2816 	length = strlcpy(pathBuffer, basePath, bufferSize);
2817 	if (pathBuffer[length - 1] != '/')
2818 		pathBuffer[length++] = '/';
2819 
2820 	path = pathBuffer + length;
2821 	bufferSize -= length;
2822 
2823 	while (moduleName) {
2824 		int type;
2825 
2826 		char *nextPath = strchr(moduleName, '/');
2827 		if (nextPath == NULL)
2828 			length = strlen(moduleName);
2829 		else {
2830 			length = nextPath - moduleName;
2831 			nextPath++;
2832 		}
2833 
2834 		if (length + 1 >= bufferSize) {
2835 			status = B_BUFFER_OVERFLOW;
2836 			goto err;
2837 		}
2838 
2839 		memcpy(path, moduleName, length);
2840 		path[length] = '\0';
2841 		moduleName = nextPath;
2842 
2843 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2844 		if (status < B_OK) {
2845 			// vnode_path_to_vnode() has already released the reference to dir
2846 			return status;
2847 		}
2848 
2849 		if (S_ISDIR(type)) {
2850 			// goto the next directory
2851 			path[length] = '/';
2852 			path[length + 1] = '\0';
2853 			path += length + 1;
2854 			bufferSize -= length + 1;
2855 
2856 			dir = file;
2857 		} else if (S_ISREG(type)) {
2858 			// it's a file so it should be what we've searched for
2859 			put_vnode(file);
2860 
2861 			return B_OK;
2862 		} else {
2863 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
2864 			status = B_ERROR;
2865 			dir = file;
2866 			goto err;
2867 		}
2868 	}
2869 
2870 	// if we got here, the moduleName just pointed to a directory, not to
2871 	// a real module - what should we do in this case?
2872 	status = B_ENTRY_NOT_FOUND;
2873 
2874 err:
2875 	put_vnode(dir);
2876 	return status;
2877 }
2878 
2879 
2880 /**	\brief Normalizes a given path.
2881  *
2882  *	The path must refer to an existing or non-existing entry in an existing
2883  *	directory, that is chopping off the leaf component the remaining path must
2884  *	refer to an existing directory.
2885  *
2886  *	The returned will be canonical in that it will be absolute, will not
2887  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2888  *	and none of the directory components will by symbolic links.
2889  *
2890  *	Any two paths referring to the same entry, will result in the same
2891  *	normalized path (well, that is pretty much the definition of `normalized',
2892  *	isn't it :-).
2893  *
2894  *	\param path The path to be normalized.
2895  *	\param buffer The buffer into which the normalized path will be written.
2896  *	\param bufferSize The size of \a buffer.
2897  *	\param kernel \c true, if the IO context of the kernel shall be used,
2898  *		   otherwise that of the team this thread belongs to. Only relevant,
2899  *		   if the path is relative (to get the CWD).
2900  *	\return \c B_OK if everything went fine, another error code otherwise.
2901  */
2902 
2903 status_t
2904 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2905 	bool kernel)
2906 {
2907 	if (!path || !buffer || bufferSize < 1)
2908 		return B_BAD_VALUE;
2909 
2910 	TRACE(("vfs_normalize_path(`%s')\n", path));
2911 
2912 	// copy the supplied path to the stack, so it can be modified
2913 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
2914 	if (mutablePathBuffer.InitCheck() != B_OK)
2915 		return B_NO_MEMORY;
2916 
2917 	char *mutablePath = mutablePathBuffer.LockBuffer();
2918 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2919 		return B_NAME_TOO_LONG;
2920 
2921 	// get the dir vnode and the leaf name
2922 	struct vnode *dirNode;
2923 	char leaf[B_FILE_NAME_LENGTH];
2924 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2925 	if (error != B_OK) {
2926 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2927 		return error;
2928 	}
2929 
2930 	// if the leaf is "." or "..", we directly get the correct directory
2931 	// vnode and ignore the leaf later
2932 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2933 	if (isDir)
2934 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2935 	if (error != B_OK) {
2936 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
2937 			strerror(error)));
2938 		return error;
2939 	}
2940 
2941 	// get the directory path
2942 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2943 	put_vnode(dirNode);
2944 	if (error < B_OK) {
2945 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2946 		return error;
2947 	}
2948 
2949 	// append the leaf name
2950 	if (!isDir) {
2951 		// insert a directory separator only if this is not the file system root
2952 		if ((strcmp(buffer, "/") != 0
2953 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
2954 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
2955 			return B_NAME_TOO_LONG;
2956 		}
2957 	}
2958 
2959 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
2960 	return B_OK;
2961 }
2962 
2963 
2964 extern "C" void
2965 vfs_put_vnode(void *_vnode)
2966 {
2967 	put_vnode((struct vnode *)_vnode);
2968 }
2969 
2970 
2971 extern "C" status_t
2972 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
2973 {
2974 	// Get current working directory from io context
2975 	struct io_context *context = get_current_io_context(false);
2976 	status_t status = B_OK;
2977 
2978 	mutex_lock(&context->io_mutex);
2979 
2980 	if (context->cwd != NULL) {
2981 		*_mountID = context->cwd->device;
2982 		*_vnodeID = context->cwd->id;
2983 	} else
2984 		status = B_ERROR;
2985 
2986 	mutex_unlock(&context->io_mutex);
2987 	return status;
2988 }
2989 
2990 
2991 extern "C" status_t
2992 vfs_disconnect_vnode(mount_id mountID, vnode_id vnodeID)
2993 {
2994 	struct vnode *vnode;
2995 
2996 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2997 	if (status < B_OK)
2998 		return status;
2999 
3000 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3001 	return B_OK;
3002 }
3003 
3004 
3005 extern "C" void
3006 vfs_free_unused_vnodes(int32 level)
3007 {
3008 	vnode_low_memory_handler(NULL, level);
3009 }
3010 
3011 
3012 extern "C" bool
3013 vfs_can_page(void *_vnode, void *cookie)
3014 {
3015 	struct vnode *vnode = (struct vnode *)_vnode;
3016 
3017 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3018 
3019 	if (FS_CALL(vnode, can_page))
3020 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
3021 
3022 	return false;
3023 }
3024 
3025 
3026 extern "C" status_t
3027 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3028 	size_t *_numBytes, bool fsReenter)
3029 {
3030 	struct vnode *vnode = (struct vnode *)_vnode;
3031 
3032 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3033 
3034 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3035 		cookie, pos, vecs, count, _numBytes, fsReenter);
3036 }
3037 
3038 
3039 extern "C" status_t
3040 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count,
3041 	size_t *_numBytes, bool fsReenter)
3042 {
3043 	struct vnode *vnode = (struct vnode *)_vnode;
3044 
3045 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3046 
3047 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3048 		cookie, pos, vecs, count, _numBytes, fsReenter);
3049 }
3050 
3051 
3052 /** Gets the vnode's vm_cache object. If it didn't have one, it will be
3053  *	created if \a allocate is \c true.
3054  *	In case it's successful, it will also grab a reference to the cache
3055  *	it returns.
3056  */
3057 
3058 extern "C" status_t
3059 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
3060 {
3061 	struct vnode *vnode = (struct vnode *)_vnode;
3062 
3063 	if (vnode->cache != NULL) {
3064 		vm_cache_acquire_ref(vnode->cache);
3065 		*_cache = vnode->cache;
3066 		return B_OK;
3067 	}
3068 
3069 	mutex_lock(&sVnodeMutex);
3070 
3071 	status_t status = B_OK;
3072 
3073 	// The cache could have been created in the meantime
3074 	if (vnode->cache == NULL) {
3075 		if (allocate) {
3076 			// TODO: actually the vnode need to be busy already here, or
3077 			//	else this won't work...
3078 			bool wasBusy = vnode->busy;
3079 			vnode->busy = true;
3080 			mutex_unlock(&sVnodeMutex);
3081 
3082 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3083 
3084 			mutex_lock(&sVnodeMutex);
3085 			vnode->busy = wasBusy;
3086 		} else
3087 			status = B_BAD_VALUE;
3088 	} else
3089 		vm_cache_acquire_ref(vnode->cache);
3090 
3091 	if (status == B_OK)
3092 		*_cache = vnode->cache;
3093 
3094 	mutex_unlock(&sVnodeMutex);
3095 	return status;
3096 }
3097 
3098 
3099 status_t
3100 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
3101 {
3102 	struct vnode *vnode = (struct vnode *)_vnode;
3103 
3104 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3105 
3106 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
3107 }
3108 
3109 
3110 status_t
3111 vfs_stat_vnode(void *_vnode, struct stat *stat)
3112 {
3113 	struct vnode *vnode = (struct vnode *)_vnode;
3114 
3115 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3116 		vnode->private_node, stat);
3117 
3118 	// fill in the st_dev and st_ino fields
3119 	if (status == B_OK) {
3120 		stat->st_dev = vnode->device;
3121 		stat->st_ino = vnode->id;
3122 	}
3123 
3124 	return status;
3125 }
3126 
3127 
3128 status_t
3129 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
3130 {
3131 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
3132 }
3133 
3134 
3135 /**	Closes all file descriptors of the specified I/O context that
3136  *	don't have the O_CLOEXEC flag set.
3137  */
3138 
3139 void
3140 vfs_exec_io_context(void *_context)
3141 {
3142 	struct io_context *context = (struct io_context *)_context;
3143 	uint32 i;
3144 
3145 	for (i = 0; i < context->table_size; i++) {
3146 		mutex_lock(&context->io_mutex);
3147 
3148 		struct file_descriptor *descriptor = context->fds[i];
3149 		bool remove = false;
3150 
3151 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3152 			context->fds[i] = NULL;
3153 			context->num_used_fds--;
3154 
3155 			remove = true;
3156 		}
3157 
3158 		mutex_unlock(&context->io_mutex);
3159 
3160 		if (remove) {
3161 			close_fd(descriptor);
3162 			put_fd(descriptor);
3163 		}
3164 	}
3165 }
3166 
3167 
3168 /** Sets up a new io_control structure, and inherits the properties
3169  *	of the parent io_control if it is given.
3170  */
3171 
3172 void *
3173 vfs_new_io_context(void *_parentContext)
3174 {
3175 	size_t tableSize;
3176 	struct io_context *context;
3177 	struct io_context *parentContext;
3178 
3179 	context = (io_context *)malloc(sizeof(struct io_context));
3180 	if (context == NULL)
3181 		return NULL;
3182 
3183 	memset(context, 0, sizeof(struct io_context));
3184 
3185 	parentContext = (struct io_context *)_parentContext;
3186 	if (parentContext)
3187 		tableSize = parentContext->table_size;
3188 	else
3189 		tableSize = DEFAULT_FD_TABLE_SIZE;
3190 
3191 	// allocate space for FDs and their close-on-exec flag
3192 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
3193 		+ tableSize / 8);
3194 	if (context->fds == NULL) {
3195 		free(context);
3196 		return NULL;
3197 	}
3198 
3199 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
3200 		+ tableSize / 8);
3201 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
3202 
3203 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3204 		free(context->fds);
3205 		free(context);
3206 		return NULL;
3207 	}
3208 
3209 	// Copy all parent files which don't have the O_CLOEXEC flag set
3210 
3211 	if (parentContext) {
3212 		size_t i;
3213 
3214 		mutex_lock(&parentContext->io_mutex);
3215 
3216 		context->cwd = parentContext->cwd;
3217 		if (context->cwd)
3218 			inc_vnode_ref_count(context->cwd);
3219 
3220 		for (i = 0; i < tableSize; i++) {
3221 			struct file_descriptor *descriptor = parentContext->fds[i];
3222 
3223 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
3224 				context->fds[i] = descriptor;
3225 				context->num_used_fds++;
3226 				atomic_add(&descriptor->ref_count, 1);
3227 				atomic_add(&descriptor->open_count, 1);
3228 			}
3229 		}
3230 
3231 		mutex_unlock(&parentContext->io_mutex);
3232 	} else {
3233 		context->cwd = sRoot;
3234 
3235 		if (context->cwd)
3236 			inc_vnode_ref_count(context->cwd);
3237 	}
3238 
3239 	context->table_size = tableSize;
3240 
3241 	list_init(&context->node_monitors);
3242 	context->max_monitors = MAX_NODE_MONITORS;
3243 
3244 	return context;
3245 }
3246 
3247 
3248 status_t
3249 vfs_free_io_context(void *_ioContext)
3250 {
3251 	struct io_context *context = (struct io_context *)_ioContext;
3252 	uint32 i;
3253 
3254 	if (context->cwd)
3255 		dec_vnode_ref_count(context->cwd, false);
3256 
3257 	mutex_lock(&context->io_mutex);
3258 
3259 	for (i = 0; i < context->table_size; i++) {
3260 		if (struct file_descriptor *descriptor = context->fds[i]) {
3261 			close_fd(descriptor);
3262 			put_fd(descriptor);
3263 		}
3264 	}
3265 
3266 	mutex_destroy(&context->io_mutex);
3267 
3268 	remove_node_monitors(context);
3269 	free(context->fds);
3270 	free(context);
3271 
3272 	return B_OK;
3273 }
3274 
3275 
3276 static status_t
3277 vfs_resize_fd_table(struct io_context *context, const int newSize)
3278 {
3279 	void *fds;
3280 	int	status = B_OK;
3281 
3282 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3283 		return EINVAL;
3284 
3285 	mutex_lock(&context->io_mutex);
3286 
3287 	if ((size_t)newSize < context->table_size) {
3288 		// shrink the fd table
3289 		int i;
3290 
3291 		// Make sure none of the fds being dropped are in use
3292 		for(i = context->table_size; i-- > newSize;) {
3293 			if (context->fds[i]) {
3294 				status = EBUSY;
3295 				goto out;
3296 			}
3297 		}
3298 
3299 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3300 		if (fds == NULL) {
3301 			status = ENOMEM;
3302 			goto out;
3303 		}
3304 
3305 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
3306 	} else {
3307 		// enlarge the fd table
3308 
3309 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3310 		if (fds == NULL) {
3311 			status = ENOMEM;
3312 			goto out;
3313 		}
3314 
3315 		// copy the fd array, and zero the additional slots
3316 		memcpy(fds, context->fds, sizeof(void *) * context->table_size);
3317 		memset((char *)fds + (sizeof(void *) * context->table_size), 0,
3318 			sizeof(void *) * (newSize - context->table_size));
3319 	}
3320 
3321 	free(context->fds);
3322 	context->fds = (file_descriptor **)fds;
3323 	context->table_size = newSize;
3324 
3325 out:
3326 	mutex_unlock(&context->io_mutex);
3327 	return status;
3328 }
3329 
3330 
3331 int
3332 vfs_getrlimit(int resource, struct rlimit * rlp)
3333 {
3334 	if (!rlp)
3335 		return -1;
3336 
3337 	switch (resource) {
3338 		case RLIMIT_NOFILE:
3339 		{
3340 			struct io_context *ioctx = get_current_io_context(false);
3341 
3342 			mutex_lock(&ioctx->io_mutex);
3343 
3344 			rlp->rlim_cur = ioctx->table_size;
3345 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3346 
3347 			mutex_unlock(&ioctx->io_mutex);
3348 
3349 			return 0;
3350 		}
3351 
3352 		default:
3353 			return -1;
3354 	}
3355 }
3356 
3357 
3358 int
3359 vfs_setrlimit(int resource, const struct rlimit * rlp)
3360 {
3361 	if (!rlp)
3362 		return -1;
3363 
3364 	switch (resource) {
3365 		case RLIMIT_NOFILE:
3366 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3367 
3368 		default:
3369 			return -1;
3370 	}
3371 }
3372 
3373 
3374 status_t
3375 vfs_init(kernel_args *args)
3376 {
3377 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3378 		&vnode_compare, &vnode_hash);
3379 	if (sVnodeTable == NULL)
3380 		panic("vfs_init: error creating vnode hash table\n");
3381 
3382 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3383 
3384 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3385 		&mount_compare, &mount_hash);
3386 	if (sMountsTable == NULL)
3387 		panic("vfs_init: error creating mounts hash table\n");
3388 
3389 	node_monitor_init();
3390 
3391 	sRoot = NULL;
3392 
3393 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3394 		panic("vfs_init: error allocating file systems lock\n");
3395 
3396 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3397 		panic("vfs_init: error allocating mount op lock\n");
3398 
3399 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3400 		panic("vfs_init: error allocating mount lock\n");
3401 
3402 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3403 		panic("vfs_init: error allocating vnode lock\n");
3404 
3405 	if (block_cache_init() != B_OK)
3406 		return B_ERROR;
3407 
3408 #ifdef ADD_DEBUGGER_COMMANDS
3409 	// add some debugger commands
3410 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3411 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3412 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3413 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3414 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3415 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3416 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3417 #endif
3418 
3419 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3420 
3421 	return file_cache_init();
3422 }
3423 
3424 
3425 //	#pragma mark -
3426 //	The filetype-dependent implementations (fd_ops + open/create/rename/remove, ...)
3427 
3428 
3429 /** Calls fs_open() on the given vnode and returns a new
3430  *	file descriptor for it
3431  */
3432 
3433 static int
3434 create_vnode(struct vnode *directory, const char *name, int openMode, int perms, bool kernel)
3435 {
3436 	struct vnode *vnode;
3437 	fs_cookie cookie;
3438 	vnode_id newID;
3439 	int status;
3440 
3441 	if (FS_CALL(directory, create) == NULL)
3442 		return EROFS;
3443 
3444 	status = FS_CALL(directory, create)(directory->mount->cookie, directory->private_node, name, openMode, perms, &cookie, &newID);
3445 	if (status < B_OK)
3446 		return status;
3447 
3448 	mutex_lock(&sVnodeMutex);
3449 	vnode = lookup_vnode(directory->device, newID);
3450 	mutex_unlock(&sVnodeMutex);
3451 
3452 	if (vnode == NULL) {
3453 		dprintf("vfs: fs_create() returned success but there is no vnode!");
3454 		return EINVAL;
3455 	}
3456 
3457 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3458 		return status;
3459 
3460 	// something went wrong, clean up
3461 
3462 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3463 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3464 	put_vnode(vnode);
3465 
3466 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3467 
3468 	return status;
3469 }
3470 
3471 
3472 /** Calls fs_open() on the given vnode and returns a new
3473  *	file descriptor for it
3474  */
3475 
3476 static int
3477 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3478 {
3479 	fs_cookie cookie;
3480 	int status;
3481 
3482 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3483 	if (status < 0)
3484 		return status;
3485 
3486 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3487 	if (status < 0) {
3488 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3489 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3490 	}
3491 	return status;
3492 }
3493 
3494 
3495 /** Calls fs open_dir() on the given vnode and returns a new
3496  *	file descriptor for it
3497  */
3498 
3499 static int
3500 open_dir_vnode(struct vnode *vnode, bool kernel)
3501 {
3502 	fs_cookie cookie;
3503 	int status;
3504 
3505 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3506 	if (status < B_OK)
3507 		return status;
3508 
3509 	// file is opened, create a fd
3510 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3511 	if (status >= 0)
3512 		return status;
3513 
3514 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3515 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3516 
3517 	return status;
3518 }
3519 
3520 
3521 /** Calls fs open_attr_dir() on the given vnode and returns a new
3522  *	file descriptor for it.
3523  *	Used by attr_dir_open(), and attr_dir_open_fd().
3524  */
3525 
3526 static int
3527 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3528 {
3529 	fs_cookie cookie;
3530 	int status;
3531 
3532 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3533 		return EOPNOTSUPP;
3534 
3535 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3536 	if (status < 0)
3537 		return status;
3538 
3539 	// file is opened, create a fd
3540 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3541 	if (status >= 0)
3542 		return status;
3543 
3544 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3545 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3546 
3547 	return status;
3548 }
3549 
3550 
3551 static int
3552 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3553 {
3554 	struct vnode *directory;
3555 	int status;
3556 
3557 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3558 
3559 	// get directory to put the new file in
3560 	status = get_vnode(mountID, directoryID, &directory, false);
3561 	if (status < B_OK)
3562 		return status;
3563 
3564 	status = create_vnode(directory, name, openMode, perms, kernel);
3565 	put_vnode(directory);
3566 
3567 	return status;
3568 }
3569 
3570 
3571 static int
3572 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3573 {
3574 	char name[B_FILE_NAME_LENGTH];
3575 	struct vnode *directory;
3576 	int status;
3577 
3578 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3579 
3580 	// get directory to put the new file in
3581 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3582 	if (status < 0)
3583 		return status;
3584 
3585 	status = create_vnode(directory, name, openMode, perms, kernel);
3586 
3587 	put_vnode(directory);
3588 	return status;
3589 }
3590 
3591 
3592 static int
3593 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3594 {
3595 	struct vnode *vnode;
3596 	int status;
3597 
3598 	if (name == NULL || *name == '\0')
3599 		return B_BAD_VALUE;
3600 
3601 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3602 		mountID, directoryID, name, openMode));
3603 
3604 	// get the vnode matching the entry_ref
3605 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3606 	if (status < B_OK)
3607 		return status;
3608 
3609 	status = open_vnode(vnode, openMode, kernel);
3610 	if (status < B_OK)
3611 		put_vnode(vnode);
3612 
3613 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3614 	return status;
3615 }
3616 
3617 
3618 static int
3619 file_open(int fd, char *path, int openMode, bool kernel)
3620 {
3621 	int status = B_OK;
3622 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3623 
3624 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3625 		fd, path, openMode, kernel));
3626 
3627 	// get the vnode matching the vnode + path combination
3628 	struct vnode *vnode = NULL;
3629 	vnode_id parentID;
3630 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3631 	if (status != B_OK)
3632 		return status;
3633 
3634 	// open the vnode
3635 	status = open_vnode(vnode, openMode, kernel);
3636 	// put only on error -- otherwise our reference was transferred to the FD
3637 	if (status < B_OK)
3638 		put_vnode(vnode);
3639 
3640 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3641 		vnode->device, parentID, vnode->id, NULL);
3642 
3643 	return status;
3644 }
3645 
3646 
3647 static status_t
3648 file_close(struct file_descriptor *descriptor)
3649 {
3650 	struct vnode *vnode = descriptor->u.vnode;
3651 	status_t status = B_OK;
3652 
3653 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3654 
3655 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3656 	if (FS_CALL(vnode, close))
3657 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3658 
3659 	if (status == B_OK) {
3660 		// remove all outstanding locks for this team
3661 		release_advisory_lock(vnode, NULL);
3662 	}
3663 	return status;
3664 }
3665 
3666 
3667 static void
3668 file_free_fd(struct file_descriptor *descriptor)
3669 {
3670 	struct vnode *vnode = descriptor->u.vnode;
3671 
3672 	if (vnode != NULL) {
3673 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3674 		put_vnode(vnode);
3675 	}
3676 }
3677 
3678 
3679 static status_t
3680 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3681 {
3682 	struct vnode *vnode = descriptor->u.vnode;
3683 
3684 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3685 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3686 }
3687 
3688 
3689 static status_t
3690 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3691 {
3692 	struct vnode *vnode = descriptor->u.vnode;
3693 
3694 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3695 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3696 }
3697 
3698 
3699 static off_t
3700 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3701 {
3702 	off_t offset;
3703 
3704 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3705 	// ToDo: seek should fail for pipes and FIFOs...
3706 
3707 	switch (seekType) {
3708 		case SEEK_SET:
3709 			offset = 0;
3710 			break;
3711 		case SEEK_CUR:
3712 			offset = descriptor->pos;
3713 			break;
3714 		case SEEK_END:
3715 		{
3716 			struct vnode *vnode = descriptor->u.vnode;
3717 			struct stat stat;
3718 			status_t status;
3719 
3720 			if (FS_CALL(vnode, read_stat) == NULL)
3721 				return EOPNOTSUPP;
3722 
3723 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3724 			if (status < B_OK)
3725 				return status;
3726 
3727 			offset = stat.st_size;
3728 			break;
3729 		}
3730 		default:
3731 			return B_BAD_VALUE;
3732 	}
3733 
3734 	// assumes off_t is 64 bits wide
3735 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3736 		return EOVERFLOW;
3737 
3738 	pos += offset;
3739 	if (pos < 0)
3740 		return B_BAD_VALUE;
3741 
3742 	return descriptor->pos = pos;
3743 }
3744 
3745 
3746 static status_t
3747 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3748 	struct select_sync *sync)
3749 {
3750 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3751 
3752 	struct vnode *vnode = descriptor->u.vnode;
3753 
3754 	// If the FS has no select() hook, notify select() now.
3755 	if (FS_CALL(vnode, select) == NULL)
3756 		return notify_select_event((selectsync*)sync, ref, event);
3757 
3758 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3759 		descriptor->cookie, event, ref, (selectsync*)sync);
3760 }
3761 
3762 
3763 static status_t
3764 file_deselect(struct file_descriptor *descriptor, uint8 event,
3765 	struct select_sync *sync)
3766 {
3767 	struct vnode *vnode = descriptor->u.vnode;
3768 
3769 	if (FS_CALL(vnode, deselect) == NULL)
3770 		return B_OK;
3771 
3772 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3773 		descriptor->cookie, event, (selectsync*)sync);
3774 }
3775 
3776 
3777 static status_t
3778 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3779 {
3780 	struct vnode *vnode;
3781 	vnode_id newID;
3782 	status_t status;
3783 
3784 	if (name == NULL || *name == '\0')
3785 		return B_BAD_VALUE;
3786 
3787 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3788 
3789 	status = get_vnode(mountID, parentID, &vnode, kernel);
3790 	if (status < B_OK)
3791 		return status;
3792 
3793 	if (FS_CALL(vnode, create_dir))
3794 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3795 	else
3796 		status = EROFS;
3797 
3798 	put_vnode(vnode);
3799 	return status;
3800 }
3801 
3802 
3803 static status_t
3804 dir_create(int fd, char *path, int perms, bool kernel)
3805 {
3806 	char filename[B_FILE_NAME_LENGTH];
3807 	struct vnode *vnode;
3808 	vnode_id newID;
3809 	status_t status;
3810 
3811 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3812 
3813 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3814 	if (status < 0)
3815 		return status;
3816 
3817 	if (FS_CALL(vnode, create_dir))
3818 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3819 	else
3820 		status = EROFS;
3821 
3822 	put_vnode(vnode);
3823 	return status;
3824 }
3825 
3826 
3827 static int
3828 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3829 {
3830 	struct vnode *vnode;
3831 	int status;
3832 
3833 	FUNCTION(("dir_open_entry_ref()\n"));
3834 
3835 	if (name && *name == '\0')
3836 		return B_BAD_VALUE;
3837 
3838 	// get the vnode matching the entry_ref/node_ref
3839 	if (name)
3840 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3841 	else
3842 		status = get_vnode(mountID, parentID, &vnode, false);
3843 	if (status < B_OK)
3844 		return status;
3845 
3846 	status = open_dir_vnode(vnode, kernel);
3847 	if (status < B_OK)
3848 		put_vnode(vnode);
3849 
3850 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3851 	return status;
3852 }
3853 
3854 
3855 static int
3856 dir_open(int fd, char *path, bool kernel)
3857 {
3858 	int status = B_OK;
3859 
3860 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3861 
3862 	// get the vnode matching the vnode + path combination
3863 	struct vnode *vnode = NULL;
3864 	vnode_id parentID;
3865 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3866 	if (status != B_OK)
3867 		return status;
3868 
3869 	// open the dir
3870 	status = open_dir_vnode(vnode, kernel);
3871 	if (status < B_OK)
3872 		put_vnode(vnode);
3873 
3874 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
3875 	return status;
3876 }
3877 
3878 
3879 static status_t
3880 dir_close(struct file_descriptor *descriptor)
3881 {
3882 	struct vnode *vnode = descriptor->u.vnode;
3883 
3884 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
3885 
3886 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
3887 	if (FS_CALL(vnode, close_dir))
3888 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3889 
3890 	return B_OK;
3891 }
3892 
3893 
3894 static void
3895 dir_free_fd(struct file_descriptor *descriptor)
3896 {
3897 	struct vnode *vnode = descriptor->u.vnode;
3898 
3899 	if (vnode != NULL) {
3900 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3901 		put_vnode(vnode);
3902 	}
3903 }
3904 
3905 
3906 static status_t
3907 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3908 {
3909 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
3910 }
3911 
3912 
3913 static void
3914 fix_dirent(struct vnode *parent, struct dirent *entry)
3915 {
3916 	// set d_pdev and d_pino
3917 	entry->d_pdev = parent->device;
3918 	entry->d_pino = parent->id;
3919 
3920 	// If this is the ".." entry and the directory is the root of a FS,
3921 	// we need to replace d_dev and d_ino with the actual values.
3922 	if (strcmp(entry->d_name, "..") == 0
3923 		&& parent->mount->root_vnode == parent
3924 		&& parent->mount->covers_vnode) {
3925 		inc_vnode_ref_count(parent);
3926 			// vnode_path_to_vnode() puts the node
3927 
3928 		struct vnode *vnode;
3929 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
3930 			NULL, NULL);
3931 
3932 		if (status == B_OK) {
3933 			entry->d_dev = vnode->device;
3934 			entry->d_ino = vnode->id;
3935 		}
3936 	} else {
3937 		// resolve mount points
3938 		struct vnode *vnode = NULL;
3939 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
3940 		if (status != B_OK)
3941 			return;
3942 
3943 		recursive_lock_lock(&sMountOpLock);
3944 		if (vnode->covered_by) {
3945 			entry->d_dev = vnode->covered_by->device;
3946 			entry->d_ino = vnode->covered_by->id;
3947 		}
3948 		recursive_lock_unlock(&sMountOpLock);
3949 
3950 		put_vnode(vnode);
3951 	}
3952 }
3953 
3954 
3955 static status_t
3956 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3957 {
3958 	if (!FS_CALL(vnode, read_dir))
3959 		return EOPNOTSUPP;
3960 
3961 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
3962 	if (error != B_OK)
3963 		return error;
3964 
3965 	// we need to adjust the read dirents
3966 	if (*_count > 0) {
3967 		// XXX: Currently reading only one dirent is supported. Make this a loop!
3968 		fix_dirent(vnode, buffer);
3969 	}
3970 
3971 	return error;
3972 }
3973 
3974 
3975 static status_t
3976 dir_rewind(struct file_descriptor *descriptor)
3977 {
3978 	struct vnode *vnode = descriptor->u.vnode;
3979 
3980 	if (FS_CALL(vnode, rewind_dir))
3981 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
3982 
3983 	return EOPNOTSUPP;
3984 }
3985 
3986 
3987 static status_t
3988 dir_remove(int fd, char *path, bool kernel)
3989 {
3990 	char name[B_FILE_NAME_LENGTH];
3991 	struct vnode *directory;
3992 	status_t status;
3993 
3994 	if (path != NULL) {
3995 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
3996 		char *lastSlash = strrchr(path, '/');
3997 		if (lastSlash != NULL) {
3998 			char *leaf = lastSlash + 1;
3999 			if (!strcmp(leaf, ".."))
4000 				return B_NOT_ALLOWED;
4001 
4002 			// omit multiple slashes
4003 			while (lastSlash > path && lastSlash[-1] == '/') {
4004 				lastSlash--;
4005 			}
4006 
4007 			if (!leaf[0]
4008 				|| !strcmp(leaf, ".")) {
4009 				// "name/" -> "name", or "name/." -> "name"
4010 				lastSlash[0] = '\0';
4011 			}
4012 		} else if (!strcmp(path, ".."))
4013 			return B_NOT_ALLOWED;
4014 	}
4015 
4016 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4017 	if (status < B_OK)
4018 		return status;
4019 
4020 	if (FS_CALL(directory, remove_dir)) {
4021 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4022 			directory->private_node, name);
4023 	} else
4024 		status = EROFS;
4025 
4026 	put_vnode(directory);
4027 	return status;
4028 }
4029 
4030 
4031 static status_t
4032 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
4033 {
4034 	struct vnode *vnode = descriptor->u.vnode;
4035 
4036 	if (FS_CALL(vnode, ioctl)) {
4037 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4038 			descriptor->cookie, op, buffer, length);
4039 	}
4040 
4041 	return EOPNOTSUPP;
4042 }
4043 
4044 
4045 static status_t
4046 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4047 {
4048 	struct file_descriptor *descriptor;
4049 	struct vnode *vnode;
4050 	struct flock flock;
4051 	status_t status;
4052 
4053 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4054 		fd, op, argument, kernel ? "kernel" : "user"));
4055 
4056 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4057 	if (descriptor == NULL)
4058 		return B_FILE_ERROR;
4059 
4060 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4061 		if (descriptor->type != FDTYPE_FILE)
4062 			return B_BAD_VALUE;
4063 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
4064 			return B_BAD_ADDRESS;
4065 	}
4066 
4067 	switch (op) {
4068 		case F_SETFD:
4069 		{
4070 			struct io_context *context = get_current_io_context(kernel);
4071 			// Set file descriptor flags
4072 
4073 			// O_CLOEXEC is the only flag available at this time
4074 			mutex_lock(&context->io_mutex);
4075 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
4076 			mutex_unlock(&context->io_mutex);
4077 
4078 			status = B_OK;
4079 			break;
4080 		}
4081 
4082 		case F_GETFD:
4083 		{
4084 			struct io_context *context = get_current_io_context(kernel);
4085 
4086 			// Get file descriptor flags
4087 			mutex_lock(&context->io_mutex);
4088 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4089 			mutex_unlock(&context->io_mutex);
4090 			break;
4091 		}
4092 
4093 		case F_SETFL:
4094 			// Set file descriptor open mode
4095 			if (FS_CALL(vnode, set_flags)) {
4096 				// we only accept changes to O_APPEND and O_NONBLOCK
4097 				argument &= O_APPEND | O_NONBLOCK;
4098 
4099 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4100 					vnode->private_node, descriptor->cookie, (int)argument);
4101 				if (status == B_OK) {
4102 					// update this descriptor's open_mode field
4103 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
4104 						| argument;
4105 				}
4106 			} else
4107 				status = EOPNOTSUPP;
4108 			break;
4109 
4110 		case F_GETFL:
4111 			// Get file descriptor open mode
4112 			status = descriptor->open_mode;
4113 			break;
4114 
4115 		case F_DUPFD:
4116 		{
4117 			struct io_context *context = get_current_io_context(kernel);
4118 
4119 			status = new_fd_etc(context, descriptor, (int)argument);
4120 			if (status >= 0) {
4121 				mutex_lock(&context->io_mutex);
4122 				fd_set_close_on_exec(context, fd, false);
4123 				mutex_unlock(&context->io_mutex);
4124 
4125 				atomic_add(&descriptor->ref_count, 1);
4126 			}
4127 			break;
4128 		}
4129 
4130 		case F_GETLK:
4131 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4132 			if (status == B_OK) {
4133 				// copy back flock structure
4134 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
4135 			}
4136 			break;
4137 
4138 		case F_SETLK:
4139 		case F_SETLKW:
4140 			status = normalize_flock(descriptor, &flock);
4141 			if (status < B_OK)
4142 				break;
4143 
4144 			if (flock.l_type == F_UNLCK)
4145 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4146 			else {
4147 				// the open mode must match the lock type
4148 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
4149 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
4150 					status = B_FILE_ERROR;
4151 				else
4152 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
4153 			}
4154 			break;
4155 
4156 		// ToDo: add support for more ops?
4157 
4158 		default:
4159 			status = B_BAD_VALUE;
4160 	}
4161 
4162 	put_fd(descriptor);
4163 	return status;
4164 }
4165 
4166 
4167 static status_t
4168 common_sync(int fd, bool kernel)
4169 {
4170 	struct file_descriptor *descriptor;
4171 	struct vnode *vnode;
4172 	status_t status;
4173 
4174 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4175 
4176 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4177 	if (descriptor == NULL)
4178 		return B_FILE_ERROR;
4179 
4180 	if (FS_CALL(vnode, fsync) != NULL)
4181 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4182 	else
4183 		status = EOPNOTSUPP;
4184 
4185 	put_fd(descriptor);
4186 	return status;
4187 }
4188 
4189 
4190 static status_t
4191 common_lock_node(int fd, bool kernel)
4192 {
4193 	// TODO: Implement!
4194 	//return EOPNOTSUPP;
4195 	return B_OK;
4196 }
4197 
4198 
4199 static status_t
4200 common_unlock_node(int fd, bool kernel)
4201 {
4202 	// TODO: Implement!
4203 	//return EOPNOTSUPP;
4204 	return B_OK;
4205 }
4206 
4207 
4208 static status_t
4209 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4210 	bool kernel)
4211 {
4212 	struct vnode *vnode;
4213 	status_t status;
4214 
4215 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4216 	if (status < B_OK)
4217 		return status;
4218 
4219 	if (FS_CALL(vnode, read_link) != NULL) {
4220 		status = FS_CALL(vnode, read_link)(vnode->mount->cookie,
4221 			vnode->private_node, buffer, _bufferSize);
4222 	} else
4223 		status = B_BAD_VALUE;
4224 
4225 	put_vnode(vnode);
4226 	return status;
4227 }
4228 
4229 
4230 static status_t
4231 common_write_link(char *path, char *toPath, bool kernel)
4232 {
4233 	struct vnode *vnode;
4234 	status_t status;
4235 
4236 	status = path_to_vnode(path, false, &vnode, NULL, kernel);
4237 	if (status < B_OK)
4238 		return status;
4239 
4240 	if (FS_CALL(vnode, write_link) != NULL)
4241 		status = FS_CALL(vnode, write_link)(vnode->mount->cookie, vnode->private_node, toPath);
4242 	else
4243 		status = EOPNOTSUPP;
4244 
4245 	put_vnode(vnode);
4246 
4247 	return status;
4248 }
4249 
4250 
4251 static status_t
4252 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4253 	bool kernel)
4254 {
4255 	// path validity checks have to be in the calling function!
4256 	char name[B_FILE_NAME_LENGTH];
4257 	struct vnode *vnode;
4258 	status_t status;
4259 
4260 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4261 
4262 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4263 	if (status < B_OK)
4264 		return status;
4265 
4266 	if (FS_CALL(vnode, create_symlink) != NULL)
4267 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4268 	else
4269 		status = EROFS;
4270 
4271 	put_vnode(vnode);
4272 
4273 	return status;
4274 }
4275 
4276 
4277 static status_t
4278 common_create_link(char *path, char *toPath, bool kernel)
4279 {
4280 	// path validity checks have to be in the calling function!
4281 	char name[B_FILE_NAME_LENGTH];
4282 	struct vnode *directory, *vnode;
4283 	status_t status;
4284 
4285 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4286 
4287 	status = path_to_dir_vnode(path, &directory, name, kernel);
4288 	if (status < B_OK)
4289 		return status;
4290 
4291 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4292 	if (status < B_OK)
4293 		goto err;
4294 
4295 	if (directory->mount != vnode->mount) {
4296 		status = B_CROSS_DEVICE_LINK;
4297 		goto err1;
4298 	}
4299 
4300 	if (FS_CALL(vnode, link) != NULL)
4301 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4302 	else
4303 		status = EROFS;
4304 
4305 err1:
4306 	put_vnode(vnode);
4307 err:
4308 	put_vnode(directory);
4309 
4310 	return status;
4311 }
4312 
4313 
4314 static status_t
4315 common_unlink(int fd, char *path, bool kernel)
4316 {
4317 	char filename[B_FILE_NAME_LENGTH];
4318 	struct vnode *vnode;
4319 	status_t status;
4320 
4321 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4322 
4323 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4324 	if (status < 0)
4325 		return status;
4326 
4327 	if (FS_CALL(vnode, unlink) != NULL)
4328 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4329 	else
4330 		status = EROFS;
4331 
4332 	put_vnode(vnode);
4333 
4334 	return status;
4335 }
4336 
4337 
4338 static status_t
4339 common_access(char *path, int mode, bool kernel)
4340 {
4341 	struct vnode *vnode;
4342 	status_t status;
4343 
4344 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4345 	if (status < B_OK)
4346 		return status;
4347 
4348 	if (FS_CALL(vnode, access) != NULL)
4349 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4350 	else
4351 		status = B_OK;
4352 
4353 	put_vnode(vnode);
4354 
4355 	return status;
4356 }
4357 
4358 
4359 static status_t
4360 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4361 {
4362 	struct vnode *fromVnode, *toVnode;
4363 	char fromName[B_FILE_NAME_LENGTH];
4364 	char toName[B_FILE_NAME_LENGTH];
4365 	status_t status;
4366 
4367 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4368 
4369 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4370 	if (status < 0)
4371 		return status;
4372 
4373 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4374 	if (status < 0)
4375 		goto err;
4376 
4377 	if (fromVnode->device != toVnode->device) {
4378 		status = B_CROSS_DEVICE_LINK;
4379 		goto err1;
4380 	}
4381 
4382 	if (FS_CALL(fromVnode, rename) != NULL)
4383 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4384 	else
4385 		status = EROFS;
4386 
4387 err1:
4388 	put_vnode(toVnode);
4389 err:
4390 	put_vnode(fromVnode);
4391 
4392 	return status;
4393 }
4394 
4395 
4396 static status_t
4397 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4398 {
4399 	struct vnode *vnode = descriptor->u.vnode;
4400 
4401 	FUNCTION(("common_read_stat: stat %p\n", stat));
4402 
4403 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4404 		vnode->private_node, stat);
4405 
4406 	// fill in the st_dev and st_ino fields
4407 	if (status == B_OK) {
4408 		stat->st_dev = vnode->device;
4409 		stat->st_ino = vnode->id;
4410 	}
4411 
4412 	return status;
4413 }
4414 
4415 
4416 static status_t
4417 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4418 {
4419 	struct vnode *vnode = descriptor->u.vnode;
4420 
4421 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4422 	if (!FS_CALL(vnode, write_stat))
4423 		return EROFS;
4424 
4425 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4426 }
4427 
4428 
4429 static status_t
4430 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4431 	struct stat *stat, bool kernel)
4432 {
4433 	struct vnode *vnode;
4434 	status_t status;
4435 
4436 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4437 
4438 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4439 	if (status < 0)
4440 		return status;
4441 
4442 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4443 
4444 	// fill in the st_dev and st_ino fields
4445 	if (status == B_OK) {
4446 		stat->st_dev = vnode->device;
4447 		stat->st_ino = vnode->id;
4448 	}
4449 
4450 	put_vnode(vnode);
4451 	return status;
4452 }
4453 
4454 
4455 static status_t
4456 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4457 	const struct stat *stat, int statMask, bool kernel)
4458 {
4459 	struct vnode *vnode;
4460 	status_t status;
4461 
4462 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4463 
4464 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4465 	if (status < 0)
4466 		return status;
4467 
4468 	if (FS_CALL(vnode, write_stat))
4469 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4470 	else
4471 		status = EROFS;
4472 
4473 	put_vnode(vnode);
4474 
4475 	return status;
4476 }
4477 
4478 
4479 static int
4480 attr_dir_open(int fd, char *path, bool kernel)
4481 {
4482 	struct vnode *vnode;
4483 	int status;
4484 
4485 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4486 
4487 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4488 	if (status < B_OK)
4489 		return status;
4490 
4491 	status = open_attr_dir_vnode(vnode, kernel);
4492 	if (status < 0)
4493 		put_vnode(vnode);
4494 
4495 	return status;
4496 }
4497 
4498 
4499 static status_t
4500 attr_dir_close(struct file_descriptor *descriptor)
4501 {
4502 	struct vnode *vnode = descriptor->u.vnode;
4503 
4504 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4505 
4506 	if (FS_CALL(vnode, close_attr_dir))
4507 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4508 
4509 	return B_OK;
4510 }
4511 
4512 
4513 static void
4514 attr_dir_free_fd(struct file_descriptor *descriptor)
4515 {
4516 	struct vnode *vnode = descriptor->u.vnode;
4517 
4518 	if (vnode != NULL) {
4519 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4520 		put_vnode(vnode);
4521 	}
4522 }
4523 
4524 
4525 static status_t
4526 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4527 {
4528 	struct vnode *vnode = descriptor->u.vnode;
4529 
4530 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4531 
4532 	if (FS_CALL(vnode, read_attr_dir))
4533 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4534 
4535 	return EOPNOTSUPP;
4536 }
4537 
4538 
4539 static status_t
4540 attr_dir_rewind(struct file_descriptor *descriptor)
4541 {
4542 	struct vnode *vnode = descriptor->u.vnode;
4543 
4544 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4545 
4546 	if (FS_CALL(vnode, rewind_attr_dir))
4547 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4548 
4549 	return EOPNOTSUPP;
4550 }
4551 
4552 
4553 static int
4554 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4555 {
4556 	struct vnode *vnode;
4557 	fs_cookie cookie;
4558 	int status;
4559 
4560 	if (name == NULL || *name == '\0')
4561 		return B_BAD_VALUE;
4562 
4563 	vnode = get_vnode_from_fd(fd, kernel);
4564 	if (vnode == NULL)
4565 		return B_FILE_ERROR;
4566 
4567 	if (FS_CALL(vnode, create_attr) == NULL) {
4568 		status = EROFS;
4569 		goto err;
4570 	}
4571 
4572 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4573 	if (status < B_OK)
4574 		goto err;
4575 
4576 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4577 		return status;
4578 
4579 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4580 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4581 
4582 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4583 
4584 err:
4585 	put_vnode(vnode);
4586 
4587 	return status;
4588 }
4589 
4590 
4591 static int
4592 attr_open(int fd, const char *name, int openMode, bool kernel)
4593 {
4594 	struct vnode *vnode;
4595 	fs_cookie cookie;
4596 	int status;
4597 
4598 	if (name == NULL || *name == '\0')
4599 		return B_BAD_VALUE;
4600 
4601 	vnode = get_vnode_from_fd(fd, kernel);
4602 	if (vnode == NULL)
4603 		return B_FILE_ERROR;
4604 
4605 	if (FS_CALL(vnode, open_attr) == NULL) {
4606 		status = EOPNOTSUPP;
4607 		goto err;
4608 	}
4609 
4610 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4611 	if (status < B_OK)
4612 		goto err;
4613 
4614 	// now we only need a file descriptor for this attribute and we're done
4615 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4616 		return status;
4617 
4618 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4619 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4620 
4621 err:
4622 	put_vnode(vnode);
4623 
4624 	return status;
4625 }
4626 
4627 
4628 static status_t
4629 attr_close(struct file_descriptor *descriptor)
4630 {
4631 	struct vnode *vnode = descriptor->u.vnode;
4632 
4633 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4634 
4635 	if (FS_CALL(vnode, close_attr))
4636 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4637 
4638 	return B_OK;
4639 }
4640 
4641 
4642 static void
4643 attr_free_fd(struct file_descriptor *descriptor)
4644 {
4645 	struct vnode *vnode = descriptor->u.vnode;
4646 
4647 	if (vnode != NULL) {
4648 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4649 		put_vnode(vnode);
4650 	}
4651 }
4652 
4653 
4654 static status_t
4655 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4656 {
4657 	struct vnode *vnode = descriptor->u.vnode;
4658 
4659 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4660 	if (!FS_CALL(vnode, read_attr))
4661 		return EOPNOTSUPP;
4662 
4663 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4664 }
4665 
4666 
4667 static status_t
4668 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4669 {
4670 	struct vnode *vnode = descriptor->u.vnode;
4671 
4672 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4673 	if (!FS_CALL(vnode, write_attr))
4674 		return EOPNOTSUPP;
4675 
4676 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4677 }
4678 
4679 
4680 static off_t
4681 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4682 {
4683 	off_t offset;
4684 
4685 	switch (seekType) {
4686 		case SEEK_SET:
4687 			offset = 0;
4688 			break;
4689 		case SEEK_CUR:
4690 			offset = descriptor->pos;
4691 			break;
4692 		case SEEK_END:
4693 		{
4694 			struct vnode *vnode = descriptor->u.vnode;
4695 			struct stat stat;
4696 			status_t status;
4697 
4698 			if (FS_CALL(vnode, read_stat) == NULL)
4699 				return EOPNOTSUPP;
4700 
4701 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4702 			if (status < B_OK)
4703 				return status;
4704 
4705 			offset = stat.st_size;
4706 			break;
4707 		}
4708 		default:
4709 			return B_BAD_VALUE;
4710 	}
4711 
4712 	// assumes off_t is 64 bits wide
4713 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4714 		return EOVERFLOW;
4715 
4716 	pos += offset;
4717 	if (pos < 0)
4718 		return B_BAD_VALUE;
4719 
4720 	return descriptor->pos = pos;
4721 }
4722 
4723 
4724 static status_t
4725 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4726 {
4727 	struct vnode *vnode = descriptor->u.vnode;
4728 
4729 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4730 
4731 	if (!FS_CALL(vnode, read_attr_stat))
4732 		return EOPNOTSUPP;
4733 
4734 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4735 }
4736 
4737 
4738 static status_t
4739 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4740 {
4741 	struct vnode *vnode = descriptor->u.vnode;
4742 
4743 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4744 
4745 	if (!FS_CALL(vnode, write_attr_stat))
4746 		return EROFS;
4747 
4748 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4749 }
4750 
4751 
4752 static status_t
4753 attr_remove(int fd, const char *name, bool kernel)
4754 {
4755 	struct file_descriptor *descriptor;
4756 	struct vnode *vnode;
4757 	status_t status;
4758 
4759 	if (name == NULL || *name == '\0')
4760 		return B_BAD_VALUE;
4761 
4762 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4763 
4764 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4765 	if (descriptor == NULL)
4766 		return B_FILE_ERROR;
4767 
4768 	if (FS_CALL(vnode, remove_attr))
4769 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4770 	else
4771 		status = EROFS;
4772 
4773 	put_fd(descriptor);
4774 
4775 	return status;
4776 }
4777 
4778 
4779 static status_t
4780 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4781 {
4782 	struct file_descriptor *fromDescriptor, *toDescriptor;
4783 	struct vnode *fromVnode, *toVnode;
4784 	status_t status;
4785 
4786 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4787 		return B_BAD_VALUE;
4788 
4789 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4790 
4791 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4792 	if (fromDescriptor == NULL)
4793 		return B_FILE_ERROR;
4794 
4795 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4796 	if (toDescriptor == NULL) {
4797 		status = B_FILE_ERROR;
4798 		goto err;
4799 	}
4800 
4801 	// are the files on the same volume?
4802 	if (fromVnode->device != toVnode->device) {
4803 		status = B_CROSS_DEVICE_LINK;
4804 		goto err1;
4805 	}
4806 
4807 	if (FS_CALL(fromVnode, rename_attr))
4808 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4809 	else
4810 		status = EROFS;
4811 
4812 err1:
4813 	put_fd(toDescriptor);
4814 err:
4815 	put_fd(fromDescriptor);
4816 
4817 	return status;
4818 }
4819 
4820 
4821 static status_t
4822 index_dir_open(mount_id mountID, bool kernel)
4823 {
4824 	struct fs_mount *mount;
4825 	fs_cookie cookie;
4826 
4827 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4828 
4829 	status_t status = get_mount(mountID, &mount);
4830 	if (status < B_OK)
4831 		return status;
4832 
4833 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4834 		status = EOPNOTSUPP;
4835 		goto out;
4836 	}
4837 
4838 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4839 	if (status < B_OK)
4840 		goto out;
4841 
4842 	// get fd for the index directory
4843 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4844 	if (status >= 0)
4845 		goto out;
4846 
4847 	// something went wrong
4848 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4849 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4850 
4851 out:
4852 	put_mount(mount);
4853 	return status;
4854 }
4855 
4856 
4857 static status_t
4858 index_dir_close(struct file_descriptor *descriptor)
4859 {
4860 	struct fs_mount *mount = descriptor->u.mount;
4861 
4862 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
4863 
4864 	if (FS_MOUNT_CALL(mount, close_index_dir))
4865 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
4866 
4867 	return B_OK;
4868 }
4869 
4870 
4871 static void
4872 index_dir_free_fd(struct file_descriptor *descriptor)
4873 {
4874 	struct fs_mount *mount = descriptor->u.mount;
4875 
4876 	if (mount != NULL) {
4877 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
4878 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4879 		//put_vnode(vnode);
4880 	}
4881 }
4882 
4883 
4884 static status_t
4885 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4886 {
4887 	struct fs_mount *mount = descriptor->u.mount;
4888 
4889 	if (FS_MOUNT_CALL(mount, read_index_dir))
4890 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4891 
4892 	return EOPNOTSUPP;
4893 }
4894 
4895 
4896 static status_t
4897 index_dir_rewind(struct file_descriptor *descriptor)
4898 {
4899 	struct fs_mount *mount = descriptor->u.mount;
4900 
4901 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
4902 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
4903 
4904 	return EOPNOTSUPP;
4905 }
4906 
4907 
4908 static status_t
4909 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
4910 {
4911 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4912 
4913 	struct fs_mount *mount;
4914 	status_t status = get_mount(mountID, &mount);
4915 	if (status < B_OK)
4916 		return status;
4917 
4918 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
4919 		status = EROFS;
4920 		goto out;
4921 	}
4922 
4923 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
4924 
4925 out:
4926 	put_mount(mount);
4927 	return status;
4928 }
4929 
4930 
4931 #if 0
4932 static status_t
4933 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4934 {
4935 	struct vnode *vnode = descriptor->u.vnode;
4936 
4937 	// ToDo: currently unused!
4938 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
4939 	if (!FS_CALL(vnode, read_index_stat))
4940 		return EOPNOTSUPP;
4941 
4942 	return EOPNOTSUPP;
4943 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4944 }
4945 
4946 
4947 static void
4948 index_free_fd(struct file_descriptor *descriptor)
4949 {
4950 	struct vnode *vnode = descriptor->u.vnode;
4951 
4952 	if (vnode != NULL) {
4953 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4954 		put_vnode(vnode);
4955 	}
4956 }
4957 #endif
4958 
4959 
4960 static status_t
4961 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
4962 {
4963 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4964 
4965 	struct fs_mount *mount;
4966 	status_t status = get_mount(mountID, &mount);
4967 	if (status < B_OK)
4968 		return status;
4969 
4970 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
4971 		status = EOPNOTSUPP;
4972 		goto out;
4973 	}
4974 
4975 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
4976 
4977 out:
4978 	put_mount(mount);
4979 	return status;
4980 }
4981 
4982 
4983 static status_t
4984 index_remove(mount_id mountID, const char *name, bool kernel)
4985 {
4986 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4987 
4988 	struct fs_mount *mount;
4989 	status_t status = get_mount(mountID, &mount);
4990 	if (status < B_OK)
4991 		return status;
4992 
4993 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
4994 		status = EROFS;
4995 		goto out;
4996 	}
4997 
4998 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
4999 
5000 out:
5001 	put_mount(mount);
5002 	return status;
5003 }
5004 
5005 
5006 /**	ToDo: the query FS API is still the pretty much the same as in R5.
5007  *		It would be nice if the FS would find some more kernel support
5008  *		for them.
5009  *		For example, query parsing should be moved into the kernel.
5010  */
5011 
5012 static int
5013 query_open(dev_t device, const char *query, uint32 flags,
5014 	port_id port, int32 token, bool kernel)
5015 {
5016 	struct fs_mount *mount;
5017 	fs_cookie cookie;
5018 
5019 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5020 
5021 	status_t status = get_mount(device, &mount);
5022 	if (status < B_OK)
5023 		return status;
5024 
5025 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5026 		status = EOPNOTSUPP;
5027 		goto out;
5028 	}
5029 
5030 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5031 	if (status < B_OK)
5032 		goto out;
5033 
5034 	// get fd for the index directory
5035 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5036 	if (status >= 0)
5037 		goto out;
5038 
5039 	// something went wrong
5040 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5041 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5042 
5043 out:
5044 	put_mount(mount);
5045 	return status;
5046 }
5047 
5048 
5049 static status_t
5050 query_close(struct file_descriptor *descriptor)
5051 {
5052 	struct fs_mount *mount = descriptor->u.mount;
5053 
5054 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5055 
5056 	if (FS_MOUNT_CALL(mount, close_query))
5057 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5058 
5059 	return B_OK;
5060 }
5061 
5062 
5063 static void
5064 query_free_fd(struct file_descriptor *descriptor)
5065 {
5066 	struct fs_mount *mount = descriptor->u.mount;
5067 
5068 	if (mount != NULL) {
5069 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5070 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5071 		//put_vnode(vnode);
5072 	}
5073 }
5074 
5075 
5076 static status_t
5077 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5078 {
5079 	struct fs_mount *mount = descriptor->u.mount;
5080 
5081 	if (FS_MOUNT_CALL(mount, read_query))
5082 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5083 
5084 	return EOPNOTSUPP;
5085 }
5086 
5087 
5088 static status_t
5089 query_rewind(struct file_descriptor *descriptor)
5090 {
5091 	struct fs_mount *mount = descriptor->u.mount;
5092 
5093 	if (FS_MOUNT_CALL(mount, rewind_query))
5094 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5095 
5096 	return EOPNOTSUPP;
5097 }
5098 
5099 
5100 //	#pragma mark -
5101 //	General File System functions
5102 
5103 
5104 static dev_t
5105 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5106 	const char *args, bool kernel)
5107 {
5108 	struct fs_mount *mount;
5109 	status_t status = 0;
5110 
5111 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5112 
5113 	// The path is always safe, we just have to make sure that fsName is
5114 	// almost valid - we can't make any assumptions about args, though.
5115 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5116 	// We'll get it from the DDM later.
5117 	if (fsName == NULL) {
5118 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5119 			return B_BAD_VALUE;
5120 	} else if (fsName[0] == '\0')
5121 		return B_BAD_VALUE;
5122 
5123 	RecursiveLocker mountOpLocker(sMountOpLock);
5124 
5125 	// Helper to delete a newly created file device on failure.
5126 	// Not exactly beautiful, but helps to keep the code below cleaner.
5127 	struct FileDeviceDeleter {
5128 		FileDeviceDeleter() : id(-1) {}
5129 		~FileDeviceDeleter()
5130 		{
5131 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5132 		}
5133 
5134 		partition_id id;
5135 	} fileDeviceDeleter;
5136 
5137 	// If the file system is not a "virtual" one, the device argument should
5138 	// point to a real file/device (if given at all).
5139 	// get the partition
5140 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5141 	KPartition *partition = NULL;
5142 	KPath normalizedDevice;
5143 	bool newlyCreatedFileDevice = false;
5144 
5145 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5146 		// normalize the device path
5147 		status = normalizedDevice.SetTo(device, true);
5148 		if (status != B_OK)
5149 			return status;
5150 
5151 		// get a corresponding partition from the DDM
5152 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
5153 
5154 		if (!partition) {
5155 			// Partition not found: This either means, the user supplied
5156 			// an invalid path, or the path refers to an image file. We try
5157 			// to let the DDM create a file device for the path.
5158 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5159 				&newlyCreatedFileDevice, false);
5160 			if (deviceID >= 0) {
5161 				partition = ddm->RegisterPartition(deviceID, true);
5162 				if (newlyCreatedFileDevice)
5163 					fileDeviceDeleter.id = deviceID;
5164 			}
5165 		}
5166 
5167 		if (!partition) {
5168 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5169 				normalizedDevice.Path()));
5170 			return B_ENTRY_NOT_FOUND;
5171 		}
5172 
5173 		device = normalizedDevice.Path();
5174 			// correct path to file device
5175 	}
5176 	PartitionRegistrar partitionRegistrar(partition, true);
5177 
5178 	// Write lock the partition's device. For the time being, we keep the lock
5179 	// until we're done mounting -- not nice, but ensure, that no-one is
5180 	// interfering.
5181 	// TODO: Find a better solution.
5182 	KDiskDevice *diskDevice = NULL;
5183 	if (partition) {
5184 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5185 		if (!diskDevice) {
5186 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5187 			return B_ERROR;
5188 		}
5189 	}
5190 
5191 	DeviceWriteLocker writeLocker(diskDevice, true);
5192 		// this takes over the write lock acquired before
5193 
5194 	if (partition) {
5195 		// make sure, that the partition is not busy
5196 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5197 			TRACE(("fs_mount(): Partition is busy.\n"));
5198 			return B_BUSY;
5199 		}
5200 
5201 		// if no FS name had been supplied, we get it from the partition
5202 		if (!fsName) {
5203 			KDiskSystem *diskSystem = partition->DiskSystem();
5204 			if (!diskSystem) {
5205 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5206 					"recognize it.\n"));
5207 				return B_BAD_VALUE;
5208 			}
5209 
5210 			if (!diskSystem->IsFileSystem()) {
5211 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5212 					"partitioning system.\n"));
5213 				return B_BAD_VALUE;
5214 			}
5215 
5216 			// The disk system name will not change, and the KDiskSystem
5217 			// object will not go away while the disk device is locked (and
5218 			// the partition has a reference to it), so this is safe.
5219 			fsName = diskSystem->Name();
5220 		}
5221 	}
5222 
5223 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5224 	if (mount == NULL)
5225 		return B_NO_MEMORY;
5226 
5227 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5228 
5229 	mount->fs_name = get_file_system_name(fsName);
5230 	if (mount->fs_name == NULL) {
5231 		status = B_NO_MEMORY;
5232 		goto err1;
5233 	}
5234 
5235 	mount->device_name = strdup(device);
5236 		// "device" can be NULL
5237 
5238 	mount->fs = get_file_system(fsName);
5239 	if (mount->fs == NULL) {
5240 		status = ENODEV;
5241 		goto err3;
5242 	}
5243 
5244 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5245 	if (status < B_OK)
5246 		goto err4;
5247 
5248 	// initialize structure
5249 	mount->id = sNextMountID++;
5250 	mount->partition = NULL;
5251 	mount->root_vnode = NULL;
5252 	mount->covers_vnode = NULL;
5253 	mount->cookie = NULL;
5254 	mount->unmounting = false;
5255 	mount->owns_file_device = false;
5256 
5257 	// insert mount struct into list before we call FS's mount() function
5258 	// so that vnodes can be created for this mount
5259 	mutex_lock(&sMountMutex);
5260 	hash_insert(sMountsTable, mount);
5261 	mutex_unlock(&sMountMutex);
5262 
5263 	vnode_id rootID;
5264 
5265 	if (!sRoot) {
5266 		// we haven't mounted anything yet
5267 		if (strcmp(path, "/") != 0) {
5268 			status = B_ERROR;
5269 			goto err5;
5270 		}
5271 
5272 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5273 		if (status < 0) {
5274 			// ToDo: why should we hide the error code from the file system here?
5275 			//status = ERR_VFS_GENERAL;
5276 			goto err5;
5277 		}
5278 	} else {
5279 		struct vnode *coveredVnode;
5280 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5281 		if (status < B_OK)
5282 			goto err5;
5283 
5284 		// make sure covered_vnode is a DIR
5285 		struct stat coveredNodeStat;
5286 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5287 			coveredVnode->private_node, &coveredNodeStat);
5288 		if (status < B_OK)
5289 			goto err5;
5290 
5291 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5292 			status = B_NOT_A_DIRECTORY;
5293 			goto err5;
5294 		}
5295 
5296 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5297 			// this is already a mount point
5298 			status = B_BUSY;
5299 			goto err5;
5300 		}
5301 
5302 		mount->covers_vnode = coveredVnode;
5303 
5304 		// mount it
5305 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5306 		if (status < B_OK)
5307 			goto err6;
5308 	}
5309 
5310 	// the root node is supposed to be owned by the file system - it must
5311 	// exist at this point
5312 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5313 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5314 		panic("fs_mount: file system does not own its root node!\n");
5315 		status = B_ERROR;
5316 		goto err7;
5317 	}
5318 
5319 	// No race here, since fs_mount() is the only function changing
5320 	// covers_vnode (and holds sMountOpLock at that time).
5321 	if (mount->covers_vnode)
5322 		mount->covers_vnode->covered_by = mount->root_vnode;
5323 
5324 	if (!sRoot)
5325 		sRoot = mount->root_vnode;
5326 
5327 	// supply the partition (if any) with the mount cookie and mark it mounted
5328 	if (partition) {
5329 		partition->SetMountCookie(mount->cookie);
5330 		partition->SetVolumeID(mount->id);
5331 
5332 		// keep a partition reference as long as the partition is mounted
5333 		partitionRegistrar.Detach();
5334 		mount->partition = partition;
5335 		mount->owns_file_device = newlyCreatedFileDevice;
5336 		fileDeviceDeleter.id = -1;
5337 	}
5338 
5339 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5340 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5341 
5342 	return mount->id;
5343 
5344 err7:
5345 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5346 err6:
5347 	if (mount->covers_vnode)
5348 		put_vnode(mount->covers_vnode);
5349 err5:
5350 	mutex_lock(&sMountMutex);
5351 	hash_remove(sMountsTable, mount);
5352 	mutex_unlock(&sMountMutex);
5353 
5354 	recursive_lock_destroy(&mount->rlock);
5355 err4:
5356 	put_file_system(mount->fs);
5357 	free(mount->device_name);
5358 err3:
5359 	free(mount->fs_name);
5360 err1:
5361 	free(mount);
5362 
5363 	return status;
5364 }
5365 
5366 
5367 static status_t
5368 fs_unmount(char *path, uint32 flags, bool kernel)
5369 {
5370 	struct fs_mount *mount;
5371 	struct vnode *vnode;
5372 	status_t err;
5373 
5374 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5375 
5376 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5377 	if (err < 0)
5378 		return B_ENTRY_NOT_FOUND;
5379 
5380 	RecursiveLocker mountOpLocker(sMountOpLock);
5381 
5382 	mount = find_mount(vnode->device);
5383 	if (!mount)
5384 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5385 
5386 	if (mount->root_vnode != vnode) {
5387 		// not mountpoint
5388 		put_vnode(vnode);
5389 		return B_BAD_VALUE;
5390 	}
5391 
5392 	// if the volume is associated with a partition, lock the device of the
5393 	// partition as long as we are unmounting
5394 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5395 	KPartition *partition = mount->partition;
5396 	KDiskDevice *diskDevice = NULL;
5397 	if (partition) {
5398 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5399 		if (!diskDevice) {
5400 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5401 			return B_ERROR;
5402 		}
5403 	}
5404 	DeviceWriteLocker writeLocker(diskDevice, true);
5405 
5406 	// make sure, that the partition is not busy
5407 	if (partition) {
5408 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5409 			TRACE(("fs_unmount(): Partition is busy.\n"));
5410 			return B_BUSY;
5411 		}
5412 	}
5413 
5414 	// grab the vnode master mutex to keep someone from creating
5415 	// a vnode while we're figuring out if we can continue
5416 	mutex_lock(&sVnodeMutex);
5417 
5418 
5419 	bool disconnectedDescriptors = false;
5420 
5421 	while (true) {
5422 		bool busy = false;
5423 
5424 		// cycle through the list of vnodes associated with this mount and
5425 		// make sure all of them are not busy or have refs on them
5426 		vnode = NULL;
5427 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5428 			// The root vnode ref_count needs to be 2 here: one for the file
5429 			// system, one from the path_to_vnode() call above
5430 			if (vnode->busy
5431 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
5432 					|| (vnode->ref_count != 2 && mount->root_vnode == vnode))) {
5433 				// there are still vnodes in use on this mount, so we cannot
5434 				// unmount yet
5435 				busy = true;
5436 				break;
5437 			}
5438 		}
5439 
5440 		if (!busy)
5441 			break;
5442 
5443 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5444 			mutex_unlock(&sVnodeMutex);
5445 			put_vnode(mount->root_vnode);
5446 
5447 			return B_BUSY;
5448 		}
5449 
5450 		if (disconnectedDescriptors) {
5451 			// wait a bit until the last access is finished, and then try again
5452 			mutex_unlock(&sVnodeMutex);
5453 			snooze(100000);
5454 			mutex_lock(&sVnodeMutex);
5455 			continue;
5456 		}
5457 
5458 		// the file system is still busy - but we're forced to unmount it,
5459 		// so let's disconnect all open file descriptors
5460 
5461 		mount->unmounting = true;
5462 			// prevent new vnodes from being created
5463 
5464 		mutex_unlock(&sVnodeMutex);
5465 
5466 		disconnect_mount_or_vnode_fds(mount, NULL);
5467 		disconnectedDescriptors = true;
5468 
5469 		mutex_lock(&sVnodeMutex);
5470 	}
5471 
5472 	// we can safely continue, mark all of the vnodes busy and this mount
5473 	// structure in unmounting state
5474 	mount->unmounting = true;
5475 
5476 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5477 		vnode->busy = true;
5478 
5479 		if (vnode->ref_count == 0) {
5480 			// this vnode has been unused before
5481 			list_remove_item(&sUnusedVnodeList, vnode);
5482 			sUnusedVnodes--;
5483 		}
5484 	}
5485 
5486 	// The ref_count of the root node is 2 at this point, see above why this is
5487 	mount->root_vnode->ref_count -= 2;
5488 
5489 	mutex_unlock(&sVnodeMutex);
5490 
5491 	mount->covers_vnode->covered_by = NULL;
5492 	put_vnode(mount->covers_vnode);
5493 
5494 	// Free all vnodes associated with this mount.
5495 	// They will be removed from the mount list by free_vnode(), so
5496 	// we don't have to do this.
5497 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5498 		free_vnode(vnode, false);
5499 	}
5500 
5501 	// remove the mount structure from the hash table
5502 	mutex_lock(&sMountMutex);
5503 	hash_remove(sMountsTable, mount);
5504 	mutex_unlock(&sMountMutex);
5505 
5506 	mountOpLocker.Unlock();
5507 
5508 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5509 	notify_unmount(mount->id);
5510 
5511 	// release the file system
5512 	put_file_system(mount->fs);
5513 
5514 	// dereference the partition and mark it unmounted
5515 	if (partition) {
5516 		partition->SetVolumeID(-1);
5517 		partition->SetMountCookie(NULL);
5518 
5519 		if (mount->owns_file_device)
5520 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5521 		partition->Unregister();
5522 	}
5523 
5524 	free(mount->device_name);
5525 	free(mount->fs_name);
5526 	free(mount);
5527 
5528 	return B_OK;
5529 }
5530 
5531 
5532 static status_t
5533 fs_sync(dev_t device)
5534 {
5535 	struct fs_mount *mount;
5536 	status_t status = get_mount(device, &mount);
5537 	if (status < B_OK)
5538 		return status;
5539 
5540 	mutex_lock(&sMountMutex);
5541 
5542 	if (FS_MOUNT_CALL(mount, sync))
5543 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5544 
5545 	mutex_unlock(&sMountMutex);
5546 
5547 	struct vnode *previousVnode = NULL;
5548 	while (true) {
5549 		// synchronize access to vnode list
5550 		recursive_lock_lock(&mount->rlock);
5551 
5552 		struct vnode *vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
5553 			previousVnode);
5554 
5555 		vnode_id id = -1;
5556 		if (vnode != NULL)
5557 			id = vnode->id;
5558 
5559 		recursive_lock_unlock(&mount->rlock);
5560 
5561 		if (vnode == NULL)
5562 			break;
5563 
5564 		// acquire a reference to the vnode
5565 
5566 		if (get_vnode(mount->id, id, &vnode, true) == B_OK) {
5567 			if (previousVnode != NULL)
5568 				put_vnode(previousVnode);
5569 
5570 			if (FS_CALL(vnode, fsync) != NULL)
5571 				FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
5572 
5573 			// the next vnode might change until we lock the vnode list again,
5574 			// but this vnode won't go away since we keep a reference to it.
5575 			previousVnode = vnode;
5576 		} else {
5577 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n", mount->id, id);
5578 			break;
5579 		}
5580 	}
5581 
5582 	if (previousVnode != NULL)
5583 		put_vnode(previousVnode);
5584 
5585 	put_mount(mount);
5586 	return status;
5587 }
5588 
5589 
5590 static status_t
5591 fs_read_info(dev_t device, struct fs_info *info)
5592 {
5593 	struct fs_mount *mount;
5594 	status_t status = get_mount(device, &mount);
5595 	if (status < B_OK)
5596 		return status;
5597 
5598 	// fill in info the file system doesn't (have to) know about
5599 	memset(info, 0, sizeof(struct fs_info));
5600 	info->dev = mount->id;
5601 	info->root = mount->root_vnode->id;
5602 	strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5603 	if (mount->device_name != NULL)
5604 		strlcpy(info->device_name, mount->device_name, sizeof(info->device_name));
5605 
5606 	if (FS_MOUNT_CALL(mount, read_fs_info))
5607 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5608 
5609 	// if the call is not supported by the file system, there are still
5610 	// the parts that we filled out ourselves
5611 
5612 	put_mount(mount);
5613 	return status;
5614 }
5615 
5616 
5617 static status_t
5618 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5619 {
5620 	struct fs_mount *mount;
5621 	status_t status = get_mount(device, &mount);
5622 	if (status < B_OK)
5623 		return status;
5624 
5625 	if (FS_MOUNT_CALL(mount, write_fs_info))
5626 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5627 	else
5628 		status = EROFS;
5629 
5630 	put_mount(mount);
5631 	return status;
5632 }
5633 
5634 
5635 static dev_t
5636 fs_next_device(int32 *_cookie)
5637 {
5638 	struct fs_mount *mount = NULL;
5639 	dev_t device = *_cookie;
5640 
5641 	mutex_lock(&sMountMutex);
5642 
5643 	// Since device IDs are assigned sequentially, this algorithm
5644 	// does work good enough. It makes sure that the device list
5645 	// returned is sorted, and that no device is skipped when an
5646 	// already visited device got unmounted.
5647 
5648 	while (device < sNextMountID) {
5649 		mount = find_mount(device++);
5650 		if (mount != NULL && mount->cookie != NULL)
5651 			break;
5652 	}
5653 
5654 	*_cookie = device;
5655 
5656 	if (mount != NULL)
5657 		device = mount->id;
5658 	else
5659 		device = B_BAD_VALUE;
5660 
5661 	mutex_unlock(&sMountMutex);
5662 
5663 	return device;
5664 }
5665 
5666 
5667 static status_t
5668 get_cwd(char *buffer, size_t size, bool kernel)
5669 {
5670 	// Get current working directory from io context
5671 	struct io_context *context = get_current_io_context(kernel);
5672 	status_t status;
5673 
5674 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5675 
5676 	mutex_lock(&context->io_mutex);
5677 
5678 	if (context->cwd)
5679 		status = dir_vnode_to_path(context->cwd, buffer, size);
5680 	else
5681 		status = B_ERROR;
5682 
5683 	mutex_unlock(&context->io_mutex);
5684 	return status;
5685 }
5686 
5687 
5688 static status_t
5689 set_cwd(int fd, char *path, bool kernel)
5690 {
5691 	struct io_context *context;
5692 	struct vnode *vnode = NULL;
5693 	struct vnode *oldDirectory;
5694 	struct stat stat;
5695 	status_t status;
5696 
5697 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5698 
5699 	// Get vnode for passed path, and bail if it failed
5700 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5701 	if (status < 0)
5702 		return status;
5703 
5704 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5705 	if (status < 0)
5706 		goto err;
5707 
5708 	if (!S_ISDIR(stat.st_mode)) {
5709 		// nope, can't cwd to here
5710 		status = B_NOT_A_DIRECTORY;
5711 		goto err;
5712 	}
5713 
5714 	// Get current io context and lock
5715 	context = get_current_io_context(kernel);
5716 	mutex_lock(&context->io_mutex);
5717 
5718 	// save the old current working directory first
5719 	oldDirectory = context->cwd;
5720 	context->cwd = vnode;
5721 
5722 	mutex_unlock(&context->io_mutex);
5723 
5724 	if (oldDirectory)
5725 		put_vnode(oldDirectory);
5726 
5727 	return B_NO_ERROR;
5728 
5729 err:
5730 	put_vnode(vnode);
5731 	return status;
5732 }
5733 
5734 
5735 //	#pragma mark -
5736 //	Calls from within the kernel
5737 
5738 
5739 dev_t
5740 _kern_mount(const char *path, const char *device, const char *fsName,
5741 	uint32 flags, const char *args, size_t argsLength)
5742 {
5743 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5744 	if (pathBuffer.InitCheck() != B_OK)
5745 		return B_NO_MEMORY;
5746 
5747 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5748 }
5749 
5750 
5751 status_t
5752 _kern_unmount(const char *path, uint32 flags)
5753 {
5754 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5755 	if (pathBuffer.InitCheck() != B_OK)
5756 		return B_NO_MEMORY;
5757 
5758 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5759 }
5760 
5761 
5762 status_t
5763 _kern_read_fs_info(dev_t device, struct fs_info *info)
5764 {
5765 	if (info == NULL)
5766 		return B_BAD_VALUE;
5767 
5768 	return fs_read_info(device, info);
5769 }
5770 
5771 
5772 status_t
5773 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5774 {
5775 	if (info == NULL)
5776 		return B_BAD_VALUE;
5777 
5778 	return fs_write_info(device, info, mask);
5779 }
5780 
5781 
5782 status_t
5783 _kern_sync(void)
5784 {
5785 	// Note: _kern_sync() is also called from _user_sync()
5786 	int32 cookie = 0;
5787 	dev_t device;
5788 	while ((device = next_dev(&cookie)) >= 0) {
5789 		status_t status = fs_sync(device);
5790 		if (status != B_OK && status != B_BAD_VALUE)
5791 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5792 	}
5793 
5794 	return B_OK;
5795 }
5796 
5797 
5798 dev_t
5799 _kern_next_device(int32 *_cookie)
5800 {
5801 	return fs_next_device(_cookie);
5802 }
5803 
5804 
5805 status_t
5806 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
5807 	size_t infoSize)
5808 {
5809 	if (infoSize != sizeof(fd_info))
5810 		return B_BAD_VALUE;
5811 
5812 	struct io_context *context = NULL;
5813 	sem_id contextMutex = -1;
5814 	struct team *team = NULL;
5815 
5816 	cpu_status state = disable_interrupts();
5817 	GRAB_TEAM_LOCK();
5818 
5819 	team = team_get_team_struct_locked(teamID);
5820 	if (team) {
5821 		context = (io_context *)team->io_context;
5822 		contextMutex = context->io_mutex.sem;
5823 	}
5824 
5825 	RELEASE_TEAM_LOCK();
5826 	restore_interrupts(state);
5827 
5828 	// we now have a context - since we couldn't lock it while having
5829 	// safe access to the team structure, we now need to lock the mutex
5830 	// manually
5831 
5832 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
5833 		// team doesn't exit or seems to be gone
5834 		return B_BAD_TEAM_ID;
5835 	}
5836 
5837 	// the team cannot be deleted completely while we're owning its
5838 	// io_context mutex, so we can safely play with it now
5839 
5840 	context->io_mutex.holder = thread_get_current_thread_id();
5841 
5842 	uint32 slot = *_cookie;
5843 
5844 	struct file_descriptor *descriptor;
5845 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
5846 		slot++;
5847 
5848 	if (slot >= context->table_size) {
5849 		mutex_unlock(&context->io_mutex);
5850 		return B_ENTRY_NOT_FOUND;
5851 	}
5852 
5853 	info->number = slot;
5854 	info->open_mode = descriptor->open_mode;
5855 
5856 	struct vnode *vnode = fd_vnode(descriptor);
5857 	if (vnode != NULL) {
5858 		info->device = vnode->device;
5859 		info->node = vnode->id;
5860 	} else if (descriptor->u.mount != NULL) {
5861 		info->device = descriptor->u.mount->id;
5862 		info->node = -1;
5863 	}
5864 
5865 	mutex_unlock(&context->io_mutex);
5866 
5867 	*_cookie = slot + 1;
5868 	return B_OK;
5869 }
5870 
5871 
5872 int
5873 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
5874 {
5875 	if (openMode & O_CREAT)
5876 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
5877 
5878 	return file_open_entry_ref(device, inode, name, openMode, true);
5879 }
5880 
5881 
5882 /**	\brief Opens a node specified by a FD + path pair.
5883  *
5884  *	At least one of \a fd and \a path must be specified.
5885  *	If only \a fd is given, the function opens the node identified by this
5886  *	FD. If only a path is given, this path is opened. If both are given and
5887  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5888  *	of the directory (!) identified by \a fd.
5889  *
5890  *	\param fd The FD. May be < 0.
5891  *	\param path The absolute or relative path. May be \c NULL.
5892  *	\param openMode The open mode.
5893  *	\return A FD referring to the newly opened node, or an error code,
5894  *			if an error occurs.
5895  */
5896 
5897 int
5898 _kern_open(int fd, const char *path, int openMode, int perms)
5899 {
5900 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5901 	if (pathBuffer.InitCheck() != B_OK)
5902 		return B_NO_MEMORY;
5903 
5904 	if (openMode & O_CREAT)
5905 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
5906 
5907 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
5908 }
5909 
5910 
5911 /**	\brief Opens a directory specified by entry_ref or node_ref.
5912  *
5913  *	The supplied name may be \c NULL, in which case directory identified
5914  *	by \a device and \a inode will be opened. Otherwise \a device and
5915  *	\a inode identify the parent directory of the directory to be opened
5916  *	and \a name its entry name.
5917  *
5918  *	\param device If \a name is specified the ID of the device the parent
5919  *		   directory of the directory to be opened resides on, otherwise
5920  *		   the device of the directory itself.
5921  *	\param inode If \a name is specified the node ID of the parent
5922  *		   directory of the directory to be opened, otherwise node ID of the
5923  *		   directory itself.
5924  *	\param name The entry name of the directory to be opened. If \c NULL,
5925  *		   the \a device + \a inode pair identify the node to be opened.
5926  *	\return The FD of the newly opened directory or an error code, if
5927  *			something went wrong.
5928  */
5929 
5930 int
5931 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
5932 {
5933 	return dir_open_entry_ref(device, inode, name, true);
5934 }
5935 
5936 
5937 /**	\brief Opens a directory specified by a FD + path pair.
5938  *
5939  *	At least one of \a fd and \a path must be specified.
5940  *	If only \a fd is given, the function opens the directory identified by this
5941  *	FD. If only a path is given, this path is opened. If both are given and
5942  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5943  *	of the directory (!) identified by \a fd.
5944  *
5945  *	\param fd The FD. May be < 0.
5946  *	\param path The absolute or relative path. May be \c NULL.
5947  *	\return A FD referring to the newly opened directory, or an error code,
5948  *			if an error occurs.
5949  */
5950 
5951 int
5952 _kern_open_dir(int fd, const char *path)
5953 {
5954 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5955 	if (pathBuffer.InitCheck() != B_OK)
5956 		return B_NO_MEMORY;
5957 
5958 	return dir_open(fd, pathBuffer.LockBuffer(), true);
5959 }
5960 
5961 
5962 status_t
5963 _kern_fcntl(int fd, int op, uint32 argument)
5964 {
5965 	return common_fcntl(fd, op, argument, true);
5966 }
5967 
5968 
5969 status_t
5970 _kern_fsync(int fd)
5971 {
5972 	return common_sync(fd, true);
5973 }
5974 
5975 
5976 status_t
5977 _kern_lock_node(int fd)
5978 {
5979 	return common_lock_node(fd, true);
5980 }
5981 
5982 
5983 status_t
5984 _kern_unlock_node(int fd)
5985 {
5986 	return common_unlock_node(fd, true);
5987 }
5988 
5989 
5990 status_t
5991 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
5992 {
5993 	return dir_create_entry_ref(device, inode, name, perms, true);
5994 }
5995 
5996 
5997 /**	\brief Creates a directory specified by a FD + path pair.
5998  *
5999  *	\a path must always be specified (it contains the name of the new directory
6000  *	at least). If only a path is given, this path identifies the location at
6001  *	which the directory shall be created. If both \a fd and \a path are given and
6002  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6003  *	of the directory (!) identified by \a fd.
6004  *
6005  *	\param fd The FD. May be < 0.
6006  *	\param path The absolute or relative path. Must not be \c NULL.
6007  *	\param perms The access permissions the new directory shall have.
6008  *	\return \c B_OK, if the directory has been created successfully, another
6009  *			error code otherwise.
6010  */
6011 
6012 status_t
6013 _kern_create_dir(int fd, const char *path, int perms)
6014 {
6015 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6016 	if (pathBuffer.InitCheck() != B_OK)
6017 		return B_NO_MEMORY;
6018 
6019 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6020 }
6021 
6022 
6023 status_t
6024 _kern_remove_dir(int fd, const char *path)
6025 {
6026 	if (path) {
6027 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6028 		if (pathBuffer.InitCheck() != B_OK)
6029 			return B_NO_MEMORY;
6030 
6031 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6032 	}
6033 
6034 	return dir_remove(fd, NULL, true);
6035 }
6036 
6037 
6038 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
6039  *
6040  *	At least one of \a fd and \a path must be specified.
6041  *	If only \a fd is given, the function the symlink to be read is the node
6042  *	identified by this FD. If only a path is given, this path identifies the
6043  *	symlink to be read. If both are given and the path is absolute, \a fd is
6044  *	ignored; a relative path is reckoned off of the directory (!) identified
6045  *	by \a fd.
6046  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6047  *	will still be updated to reflect the required buffer size.
6048  *
6049  *	\param fd The FD. May be < 0.
6050  *	\param path The absolute or relative path. May be \c NULL.
6051  *	\param buffer The buffer into which the contents of the symlink shall be
6052  *		   written.
6053  *	\param _bufferSize A pointer to the size of the supplied buffer.
6054  *	\return The length of the link on success or an appropriate error code
6055  */
6056 
6057 status_t
6058 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6059 {
6060 	status_t status;
6061 
6062 	if (path) {
6063 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6064 		if (pathBuffer.InitCheck() != B_OK)
6065 			return B_NO_MEMORY;
6066 
6067 		return common_read_link(fd, pathBuffer.LockBuffer(),
6068 			buffer, _bufferSize, true);
6069 	}
6070 
6071 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6072 }
6073 
6074 
6075 status_t
6076 _kern_write_link(const char *path, const char *toPath)
6077 {
6078 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6079 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6080 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6081 		return B_NO_MEMORY;
6082 
6083 	char *toBuffer = toPathBuffer.LockBuffer();
6084 
6085 	status_t status = check_path(toBuffer);
6086 	if (status < B_OK)
6087 		return status;
6088 
6089 	return common_write_link(pathBuffer.LockBuffer(), toBuffer, true);
6090 }
6091 
6092 
6093 /**	\brief Creates a symlink specified by a FD + path pair.
6094  *
6095  *	\a path must always be specified (it contains the name of the new symlink
6096  *	at least). If only a path is given, this path identifies the location at
6097  *	which the symlink shall be created. If both \a fd and \a path are given and
6098  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
6099  *	of the directory (!) identified by \a fd.
6100  *
6101  *	\param fd The FD. May be < 0.
6102  *	\param toPath The absolute or relative path. Must not be \c NULL.
6103  *	\param mode The access permissions the new symlink shall have.
6104  *	\return \c B_OK, if the symlink has been created successfully, another
6105  *			error code otherwise.
6106  */
6107 
6108 status_t
6109 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6110 {
6111 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6112 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6113 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6114 		return B_NO_MEMORY;
6115 
6116 	char *toBuffer = toPathBuffer.LockBuffer();
6117 
6118 	status_t status = check_path(toBuffer);
6119 	if (status < B_OK)
6120 		return status;
6121 
6122 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6123 		toBuffer, mode, true);
6124 }
6125 
6126 
6127 status_t
6128 _kern_create_link(const char *path, const char *toPath)
6129 {
6130 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6131 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6132 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6133 		return B_NO_MEMORY;
6134 
6135 	return common_create_link(pathBuffer.LockBuffer(),
6136 		toPathBuffer.LockBuffer(), true);
6137 }
6138 
6139 
6140 /**	\brief Removes an entry specified by a FD + path pair from its directory.
6141  *
6142  *	\a path must always be specified (it contains at least the name of the entry
6143  *	to be deleted). If only a path is given, this path identifies the entry
6144  *	directly. If both \a fd and \a path are given and the path is absolute,
6145  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6146  *	identified by \a fd.
6147  *
6148  *	\param fd The FD. May be < 0.
6149  *	\param path The absolute or relative path. Must not be \c NULL.
6150  *	\return \c B_OK, if the entry has been removed successfully, another
6151  *			error code otherwise.
6152  */
6153 
6154 status_t
6155 _kern_unlink(int fd, const char *path)
6156 {
6157 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6158 	if (pathBuffer.InitCheck() != B_OK)
6159 		return B_NO_MEMORY;
6160 
6161 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6162 }
6163 
6164 
6165 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
6166  *		   by another FD + path pair.
6167  *
6168  *	\a oldPath and \a newPath must always be specified (they contain at least
6169  *	the name of the entry). If only a path is given, this path identifies the
6170  *	entry directly. If both a FD and a path are given and the path is absolute,
6171  *	the FD is ignored; a relative path is reckoned off of the directory (!)
6172  *	identified by the respective FD.
6173  *
6174  *	\param oldFD The FD of the old location. May be < 0.
6175  *	\param oldPath The absolute or relative path of the old location. Must not
6176  *		   be \c NULL.
6177  *	\param newFD The FD of the new location. May be < 0.
6178  *	\param newPath The absolute or relative path of the new location. Must not
6179  *		   be \c NULL.
6180  *	\return \c B_OK, if the entry has been moved successfully, another
6181  *			error code otherwise.
6182  */
6183 
6184 status_t
6185 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6186 {
6187 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6188 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6189 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6190 		return B_NO_MEMORY;
6191 
6192 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6193 		newFD, newPathBuffer.LockBuffer(), true);
6194 }
6195 
6196 
6197 status_t
6198 _kern_access(const char *path, int mode)
6199 {
6200 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6201 	if (pathBuffer.InitCheck() != B_OK)
6202 		return B_NO_MEMORY;
6203 
6204 	return common_access(pathBuffer.LockBuffer(), mode, true);
6205 }
6206 
6207 
6208 /**	\brief Reads stat data of an entity specified by a FD + path pair.
6209  *
6210  *	If only \a fd is given, the stat operation associated with the type
6211  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6212  *	given, this path identifies the entry for whose node to retrieve the
6213  *	stat data. If both \a fd and \a path are given and the path is absolute,
6214  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6215  *	identified by \a fd and specifies the entry whose stat data shall be
6216  *	retrieved.
6217  *
6218  *	\param fd The FD. May be < 0.
6219  *	\param path The absolute or relative path. Must not be \c NULL.
6220  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6221  *		   function shall not stick to symlinks, but traverse them.
6222  *	\param stat The buffer the stat data shall be written into.
6223  *	\param statSize The size of the supplied stat buffer.
6224  *	\return \c B_OK, if the the stat data have been read successfully, another
6225  *			error code otherwise.
6226  */
6227 
6228 status_t
6229 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6230 	struct stat *stat, size_t statSize)
6231 {
6232 	struct stat completeStat;
6233 	struct stat *originalStat = NULL;
6234 	status_t status;
6235 
6236 	if (statSize > sizeof(struct stat))
6237 		return B_BAD_VALUE;
6238 
6239 	// this supports different stat extensions
6240 	if (statSize < sizeof(struct stat)) {
6241 		originalStat = stat;
6242 		stat = &completeStat;
6243 	}
6244 
6245 	if (path) {
6246 		// path given: get the stat of the node referred to by (fd, path)
6247 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6248 		if (pathBuffer.InitCheck() != B_OK)
6249 			return B_NO_MEMORY;
6250 
6251 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6252 			traverseLeafLink, stat, true);
6253 	} else {
6254 		// no path given: get the FD and use the FD operation
6255 		struct file_descriptor *descriptor
6256 			= get_fd(get_current_io_context(true), fd);
6257 		if (descriptor == NULL)
6258 			return B_FILE_ERROR;
6259 
6260 		if (descriptor->ops->fd_read_stat)
6261 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6262 		else
6263 			status = EOPNOTSUPP;
6264 
6265 		put_fd(descriptor);
6266 	}
6267 
6268 	if (status == B_OK && originalStat != NULL)
6269 		memcpy(originalStat, stat, statSize);
6270 
6271 	return status;
6272 }
6273 
6274 
6275 /**	\brief Writes stat data of an entity specified by a FD + path pair.
6276  *
6277  *	If only \a fd is given, the stat operation associated with the type
6278  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6279  *	given, this path identifies the entry for whose node to write the
6280  *	stat data. If both \a fd and \a path are given and the path is absolute,
6281  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6282  *	identified by \a fd and specifies the entry whose stat data shall be
6283  *	written.
6284  *
6285  *	\param fd The FD. May be < 0.
6286  *	\param path The absolute or relative path. Must not be \c NULL.
6287  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6288  *		   function shall not stick to symlinks, but traverse them.
6289  *	\param stat The buffer containing the stat data to be written.
6290  *	\param statSize The size of the supplied stat buffer.
6291  *	\param statMask A mask specifying which parts of the stat data shall be
6292  *		   written.
6293  *	\return \c B_OK, if the the stat data have been written successfully,
6294  *			another error code otherwise.
6295  */
6296 
6297 status_t
6298 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6299 	const struct stat *stat, size_t statSize, int statMask)
6300 {
6301 	struct stat completeStat;
6302 
6303 	if (statSize > sizeof(struct stat))
6304 		return B_BAD_VALUE;
6305 
6306 	// this supports different stat extensions
6307 	if (statSize < sizeof(struct stat)) {
6308 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6309 		memcpy(&completeStat, stat, statSize);
6310 		stat = &completeStat;
6311 	}
6312 
6313 	status_t status;
6314 
6315 	if (path) {
6316 		// path given: write the stat of the node referred to by (fd, path)
6317 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6318 		if (pathBuffer.InitCheck() != B_OK)
6319 			return B_NO_MEMORY;
6320 
6321 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6322 			traverseLeafLink, stat, statMask, true);
6323 	} else {
6324 		// no path given: get the FD and use the FD operation
6325 		struct file_descriptor *descriptor
6326 			= get_fd(get_current_io_context(true), fd);
6327 		if (descriptor == NULL)
6328 			return B_FILE_ERROR;
6329 
6330 		if (descriptor->ops->fd_write_stat)
6331 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6332 		else
6333 			status = EOPNOTSUPP;
6334 
6335 		put_fd(descriptor);
6336 	}
6337 
6338 	return status;
6339 }
6340 
6341 
6342 int
6343 _kern_open_attr_dir(int fd, const char *path)
6344 {
6345 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6346 	if (pathBuffer.InitCheck() != B_OK)
6347 		return B_NO_MEMORY;
6348 
6349 	if (path != NULL)
6350 		pathBuffer.SetTo(path);
6351 
6352 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6353 }
6354 
6355 
6356 int
6357 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6358 {
6359 	return attr_create(fd, name, type, openMode, true);
6360 }
6361 
6362 
6363 int
6364 _kern_open_attr(int fd, const char *name, int openMode)
6365 {
6366 	return attr_open(fd, name, openMode, true);
6367 }
6368 
6369 
6370 status_t
6371 _kern_remove_attr(int fd, const char *name)
6372 {
6373 	return attr_remove(fd, name, true);
6374 }
6375 
6376 
6377 status_t
6378 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6379 {
6380 	return attr_rename(fromFile, fromName, toFile, toName, true);
6381 }
6382 
6383 
6384 int
6385 _kern_open_index_dir(dev_t device)
6386 {
6387 	return index_dir_open(device, true);
6388 }
6389 
6390 
6391 status_t
6392 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6393 {
6394 	return index_create(device, name, type, flags, true);
6395 }
6396 
6397 
6398 status_t
6399 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6400 {
6401 	return index_name_read_stat(device, name, stat, true);
6402 }
6403 
6404 
6405 status_t
6406 _kern_remove_index(dev_t device, const char *name)
6407 {
6408 	return index_remove(device, name, true);
6409 }
6410 
6411 
6412 status_t
6413 _kern_getcwd(char *buffer, size_t size)
6414 {
6415 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6416 
6417 	// Call vfs to get current working directory
6418 	return get_cwd(buffer, size, true);
6419 }
6420 
6421 
6422 status_t
6423 _kern_setcwd(int fd, const char *path)
6424 {
6425 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6426 	if (pathBuffer.InitCheck() != B_OK)
6427 		return B_NO_MEMORY;
6428 
6429 	if (path != NULL)
6430 		pathBuffer.SetTo(path);
6431 
6432 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6433 }
6434 
6435 
6436 //	#pragma mark -
6437 //	Calls from userland (with extra address checks)
6438 
6439 
6440 dev_t
6441 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6442 	uint32 flags, const char *userArgs, size_t argsLength)
6443 {
6444 	char fileSystem[B_OS_NAME_LENGTH];
6445 	KPath path, device;
6446 	char *args = NULL;
6447 	status_t status;
6448 
6449 	if (!IS_USER_ADDRESS(userPath)
6450 		|| !IS_USER_ADDRESS(userFileSystem)
6451 		|| !IS_USER_ADDRESS(userDevice))
6452 		return B_BAD_ADDRESS;
6453 
6454 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6455 		return B_NO_MEMORY;
6456 
6457 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6458 		return B_BAD_ADDRESS;
6459 
6460 	if (userFileSystem != NULL
6461 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6462 		return B_BAD_ADDRESS;
6463 
6464 	if (userDevice != NULL
6465 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6466 		return B_BAD_ADDRESS;
6467 
6468 	if (userArgs != NULL && argsLength > 0) {
6469 		// this is a safety restriction
6470 		if (argsLength >= 65536)
6471 			return B_NAME_TOO_LONG;
6472 
6473 		args = (char *)malloc(argsLength + 1);
6474 		if (args == NULL)
6475 			return B_NO_MEMORY;
6476 
6477 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
6478 			free(args);
6479 			return B_BAD_ADDRESS;
6480 		}
6481 	}
6482 	path.UnlockBuffer();
6483 	device.UnlockBuffer();
6484 
6485 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6486 		userFileSystem ? fileSystem : NULL, flags, args, false);
6487 
6488 	free(args);
6489 	return status;
6490 }
6491 
6492 
6493 status_t
6494 _user_unmount(const char *userPath, uint32 flags)
6495 {
6496 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6497 	if (pathBuffer.InitCheck() != B_OK)
6498 		return B_NO_MEMORY;
6499 
6500 	char *path = pathBuffer.LockBuffer();
6501 
6502 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6503 		return B_BAD_ADDRESS;
6504 
6505 	return fs_unmount(path, flags, false);
6506 }
6507 
6508 
6509 status_t
6510 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6511 {
6512 	struct fs_info info;
6513 	status_t status;
6514 
6515 	if (userInfo == NULL)
6516 		return B_BAD_VALUE;
6517 
6518 	if (!IS_USER_ADDRESS(userInfo))
6519 		return B_BAD_ADDRESS;
6520 
6521 	status = fs_read_info(device, &info);
6522 	if (status != B_OK)
6523 		return status;
6524 
6525 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6526 		return B_BAD_ADDRESS;
6527 
6528 	return B_OK;
6529 }
6530 
6531 
6532 status_t
6533 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6534 {
6535 	struct fs_info info;
6536 
6537 	if (userInfo == NULL)
6538 		return B_BAD_VALUE;
6539 
6540 	if (!IS_USER_ADDRESS(userInfo)
6541 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6542 		return B_BAD_ADDRESS;
6543 
6544 	return fs_write_info(device, &info, mask);
6545 }
6546 
6547 
6548 dev_t
6549 _user_next_device(int32 *_userCookie)
6550 {
6551 	int32 cookie;
6552 	dev_t device;
6553 
6554 	if (!IS_USER_ADDRESS(_userCookie)
6555 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6556 		return B_BAD_ADDRESS;
6557 
6558 	device = fs_next_device(&cookie);
6559 
6560 	if (device >= B_OK) {
6561 		// update user cookie
6562 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6563 			return B_BAD_ADDRESS;
6564 	}
6565 
6566 	return device;
6567 }
6568 
6569 
6570 status_t
6571 _user_sync(void)
6572 {
6573 	return _kern_sync();
6574 }
6575 
6576 
6577 status_t
6578 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
6579 	size_t infoSize)
6580 {
6581 	struct fd_info info;
6582 	uint32 cookie;
6583 
6584 	// only root can do this (or should root's group be enough?)
6585 	if (geteuid() != 0)
6586 		return B_NOT_ALLOWED;
6587 
6588 	if (infoSize != sizeof(fd_info))
6589 		return B_BAD_VALUE;
6590 
6591 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
6592 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
6593 		return B_BAD_ADDRESS;
6594 
6595 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
6596 	if (status < B_OK)
6597 		return status;
6598 
6599 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
6600 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
6601 		return B_BAD_ADDRESS;
6602 
6603 	return status;
6604 }
6605 
6606 
6607 status_t
6608 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6609 	char *userPath, size_t pathLength)
6610 {
6611 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6612 	if (pathBuffer.InitCheck() != B_OK)
6613 		return B_NO_MEMORY;
6614 
6615 	struct vnode *vnode;
6616 	status_t status;
6617 
6618 	if (!IS_USER_ADDRESS(userPath))
6619 		return B_BAD_ADDRESS;
6620 
6621 	// copy the leaf name onto the stack
6622 	char stackLeaf[B_FILE_NAME_LENGTH];
6623 	if (leaf) {
6624 		if (!IS_USER_ADDRESS(leaf))
6625 			return B_BAD_ADDRESS;
6626 
6627 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6628 		if (len < 0)
6629 			return len;
6630 		if (len >= B_FILE_NAME_LENGTH)
6631 			return B_NAME_TOO_LONG;
6632 		leaf = stackLeaf;
6633 
6634 		// filter invalid leaf names
6635 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6636 			return B_BAD_VALUE;
6637 	}
6638 
6639 	// get the vnode matching the dir's node_ref
6640 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6641 		// special cases "." and "..": we can directly get the vnode of the
6642 		// referenced directory
6643 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6644 		leaf = NULL;
6645 	} else
6646 		status = get_vnode(device, inode, &vnode, false);
6647 	if (status < B_OK)
6648 		return status;
6649 
6650 	char *path = pathBuffer.LockBuffer();
6651 
6652 	// get the directory path
6653 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
6654 	put_vnode(vnode);
6655 		// we don't need the vnode anymore
6656 	if (status < B_OK)
6657 		return status;
6658 
6659 	// append the leaf name
6660 	if (leaf) {
6661 		// insert a directory separator if this is not the file system root
6662 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
6663 				>= pathBuffer.BufferSize())
6664 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
6665 			return B_NAME_TOO_LONG;
6666 		}
6667 	}
6668 
6669 	int len = user_strlcpy(userPath, path, pathLength);
6670 	if (len < 0)
6671 		return len;
6672 	if (len >= (int)pathLength)
6673 		return B_BUFFER_OVERFLOW;
6674 
6675 	return B_OK;
6676 }
6677 
6678 
6679 int
6680 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6681 	int openMode, int perms)
6682 {
6683 	char name[B_FILE_NAME_LENGTH];
6684 
6685 	if (userName == NULL || device < 0 || inode < 0)
6686 		return B_BAD_VALUE;
6687 	if (!IS_USER_ADDRESS(userName)
6688 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6689 		return B_BAD_ADDRESS;
6690 
6691 	if (openMode & O_CREAT)
6692 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6693 
6694 	return file_open_entry_ref(device, inode, name, openMode, false);
6695 }
6696 
6697 
6698 int
6699 _user_open(int fd, const char *userPath, int openMode, int perms)
6700 {
6701 	KPath path(B_PATH_NAME_LENGTH + 1);
6702 	if (path.InitCheck() != B_OK)
6703 		return B_NO_MEMORY;
6704 
6705 	char *buffer = path.LockBuffer();
6706 
6707 	if (!IS_USER_ADDRESS(userPath)
6708 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6709 		return B_BAD_ADDRESS;
6710 
6711 	if (openMode & O_CREAT)
6712 		return file_create(fd, buffer, openMode, perms, false);
6713 
6714 	return file_open(fd, buffer, openMode, false);
6715 }
6716 
6717 
6718 int
6719 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
6720 {
6721 	if (userName != NULL) {
6722 		char name[B_FILE_NAME_LENGTH];
6723 
6724 		if (!IS_USER_ADDRESS(userName)
6725 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
6726 			return B_BAD_ADDRESS;
6727 
6728 		return dir_open_entry_ref(device, inode, name, false);
6729 	}
6730 	return dir_open_entry_ref(device, inode, NULL, false);
6731 }
6732 
6733 
6734 int
6735 _user_open_dir(int fd, const char *userPath)
6736 {
6737 	KPath path(B_PATH_NAME_LENGTH + 1);
6738 	if (path.InitCheck() != B_OK)
6739 		return B_NO_MEMORY;
6740 
6741 	char *buffer = path.LockBuffer();
6742 
6743 	if (!IS_USER_ADDRESS(userPath)
6744 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6745 		return B_BAD_ADDRESS;
6746 
6747 	return dir_open(fd, buffer, false);
6748 }
6749 
6750 
6751 /**	\brief Opens a directory's parent directory and returns the entry name
6752  *		   of the former.
6753  *
6754  *	Aside from that is returns the directory's entry name, this method is
6755  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6756  *	equivalent, if \a userName is \c NULL.
6757  *
6758  *	If a name buffer is supplied and the name does not fit the buffer, the
6759  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6760  *
6761  *	\param fd A FD referring to a directory.
6762  *	\param userName Buffer the directory's entry name shall be written into.
6763  *		   May be \c NULL.
6764  *	\param nameLength Size of the name buffer.
6765  *	\return The file descriptor of the opened parent directory, if everything
6766  *			went fine, an error code otherwise.
6767  */
6768 
6769 int
6770 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6771 {
6772 	bool kernel = false;
6773 
6774 	if (userName && !IS_USER_ADDRESS(userName))
6775 		return B_BAD_ADDRESS;
6776 
6777 	// open the parent dir
6778 	int parentFD = dir_open(fd, "..", kernel);
6779 	if (parentFD < 0)
6780 		return parentFD;
6781 	FDCloser fdCloser(parentFD, kernel);
6782 
6783 	if (userName) {
6784 		// get the vnodes
6785 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6786 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6787 		VNodePutter parentVNodePutter(parentVNode);
6788 		VNodePutter dirVNodePutter(dirVNode);
6789 		if (!parentVNode || !dirVNode)
6790 			return B_FILE_ERROR;
6791 
6792 		// get the vnode name
6793 		char name[B_FILE_NAME_LENGTH];
6794 		status_t status = get_vnode_name(dirVNode, parentVNode,
6795 			name, sizeof(name));
6796 		if (status != B_OK)
6797 			return status;
6798 
6799 		// copy the name to the userland buffer
6800 		int len = user_strlcpy(userName, name, nameLength);
6801 		if (len < 0)
6802 			return len;
6803 		if (len >= (int)nameLength)
6804 			return B_BUFFER_OVERFLOW;
6805 	}
6806 
6807 	return fdCloser.Detach();
6808 }
6809 
6810 
6811 status_t
6812 _user_fcntl(int fd, int op, uint32 argument)
6813 {
6814 	return common_fcntl(fd, op, argument, false);
6815 }
6816 
6817 
6818 status_t
6819 _user_fsync(int fd)
6820 {
6821 	return common_sync(fd, false);
6822 }
6823 
6824 
6825 status_t
6826 _user_lock_node(int fd)
6827 {
6828 	return common_lock_node(fd, false);
6829 }
6830 
6831 
6832 status_t
6833 _user_unlock_node(int fd)
6834 {
6835 	return common_unlock_node(fd, false);
6836 }
6837 
6838 
6839 status_t
6840 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6841 {
6842 	char name[B_FILE_NAME_LENGTH];
6843 	status_t status;
6844 
6845 	if (!IS_USER_ADDRESS(userName))
6846 		return B_BAD_ADDRESS;
6847 
6848 	status = user_strlcpy(name, userName, sizeof(name));
6849 	if (status < 0)
6850 		return status;
6851 
6852 	return dir_create_entry_ref(device, inode, name, perms, false);
6853 }
6854 
6855 
6856 status_t
6857 _user_create_dir(int fd, const char *userPath, int perms)
6858 {
6859 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6860 	if (pathBuffer.InitCheck() != B_OK)
6861 		return B_NO_MEMORY;
6862 
6863 	char *path = pathBuffer.LockBuffer();
6864 
6865 	if (!IS_USER_ADDRESS(userPath)
6866 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6867 		return B_BAD_ADDRESS;
6868 
6869 	return dir_create(fd, path, perms, false);
6870 }
6871 
6872 
6873 status_t
6874 _user_remove_dir(int fd, const char *userPath)
6875 {
6876 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6877 	if (pathBuffer.InitCheck() != B_OK)
6878 		return B_NO_MEMORY;
6879 
6880 	char *path = pathBuffer.LockBuffer();
6881 
6882 	if (userPath != NULL) {
6883 		if (!IS_USER_ADDRESS(userPath)
6884 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6885 			return B_BAD_ADDRESS;
6886 	}
6887 
6888 	return dir_remove(fd, userPath ? path : NULL, false);
6889 }
6890 
6891 
6892 status_t
6893 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
6894 {
6895 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
6896 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
6897 		return B_NO_MEMORY;
6898 
6899 	size_t bufferSize;
6900 
6901 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
6902 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
6903 		return B_BAD_ADDRESS;
6904 
6905 	char *path = pathBuffer.LockBuffer();
6906 	char *buffer = linkBuffer.LockBuffer();
6907 
6908 	if (userPath) {
6909 		if (!IS_USER_ADDRESS(userPath)
6910 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6911 			return B_BAD_ADDRESS;
6912 
6913 		if (bufferSize > B_PATH_NAME_LENGTH)
6914 			bufferSize = B_PATH_NAME_LENGTH;
6915 	}
6916 
6917 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
6918 		&bufferSize, false);
6919 
6920 	// we also update the bufferSize in case of errors
6921 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
6922 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
6923 		return B_BAD_ADDRESS;
6924 
6925 	if (status < B_OK)
6926 		return status;
6927 
6928 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
6929 		return B_BAD_ADDRESS;
6930 
6931 	return B_OK;
6932 }
6933 
6934 
6935 status_t
6936 _user_write_link(const char *userPath, const char *userToPath)
6937 {
6938 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6939 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
6940 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6941 		return B_NO_MEMORY;
6942 
6943 	char *path = pathBuffer.LockBuffer();
6944 	char *toPath = toPathBuffer.LockBuffer();
6945 
6946 	if (!IS_USER_ADDRESS(userPath)
6947 		|| !IS_USER_ADDRESS(userToPath)
6948 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
6949 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
6950 		return B_BAD_ADDRESS;
6951 
6952 	status_t status = check_path(toPath);
6953 	if (status < B_OK)
6954 		return status;
6955 
6956 	return common_write_link(path, toPath, false);
6957 }
6958 
6959 
6960 status_t
6961 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
6962 	int mode)
6963 {
6964 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6965 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
6966 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6967 		return B_NO_MEMORY;
6968 
6969 	char *path = pathBuffer.LockBuffer();
6970 	char *toPath = toPathBuffer.LockBuffer();
6971 
6972 	if (!IS_USER_ADDRESS(userPath)
6973 		|| !IS_USER_ADDRESS(userToPath)
6974 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
6975 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
6976 		return B_BAD_ADDRESS;
6977 
6978 	status_t status = check_path(toPath);
6979 	if (status < B_OK)
6980 		return status;
6981 
6982 	return common_create_symlink(fd, path, toPath, mode, false);
6983 }
6984 
6985 
6986 status_t
6987 _user_create_link(const char *userPath, const char *userToPath)
6988 {
6989 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6990 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
6991 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6992 		return B_NO_MEMORY;
6993 
6994 	char *path = pathBuffer.LockBuffer();
6995 	char *toPath = toPathBuffer.LockBuffer();
6996 
6997 	if (!IS_USER_ADDRESS(userPath)
6998 		|| !IS_USER_ADDRESS(userToPath)
6999 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7000 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7001 		return B_BAD_ADDRESS;
7002 
7003 	status_t status = check_path(toPath);
7004 	if (status < B_OK)
7005 		return status;
7006 
7007 	return common_create_link(path, toPath, false);
7008 }
7009 
7010 
7011 status_t
7012 _user_unlink(int fd, const char *userPath)
7013 {
7014 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7015 	if (pathBuffer.InitCheck() != B_OK)
7016 		return B_NO_MEMORY;
7017 
7018 	char *path = pathBuffer.LockBuffer();
7019 
7020 	if (!IS_USER_ADDRESS(userPath)
7021 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7022 		return B_BAD_ADDRESS;
7023 
7024 	return common_unlink(fd, path, false);
7025 }
7026 
7027 
7028 status_t
7029 _user_rename(int oldFD, const char *userOldPath, int newFD,
7030 	const char *userNewPath)
7031 {
7032 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7033 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7034 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7035 		return B_NO_MEMORY;
7036 
7037 	char *oldPath = oldPathBuffer.LockBuffer();
7038 	char *newPath = newPathBuffer.LockBuffer();
7039 
7040 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7041 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7042 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7043 		return B_BAD_ADDRESS;
7044 
7045 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7046 }
7047 
7048 
7049 status_t
7050 _user_access(const char *userPath, int mode)
7051 {
7052 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7053 	if (pathBuffer.InitCheck() != B_OK)
7054 		return B_NO_MEMORY;
7055 
7056 	char *path = pathBuffer.LockBuffer();
7057 
7058 	if (!IS_USER_ADDRESS(userPath)
7059 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7060 		return B_BAD_ADDRESS;
7061 
7062 	return common_access(path, mode, false);
7063 }
7064 
7065 
7066 status_t
7067 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7068 	struct stat *userStat, size_t statSize)
7069 {
7070 	struct stat stat;
7071 	status_t status;
7072 
7073 	if (statSize > sizeof(struct stat))
7074 		return B_BAD_VALUE;
7075 
7076 	if (!IS_USER_ADDRESS(userStat))
7077 		return B_BAD_ADDRESS;
7078 
7079 	if (userPath) {
7080 		// path given: get the stat of the node referred to by (fd, path)
7081 		if (!IS_USER_ADDRESS(userPath))
7082 			return B_BAD_ADDRESS;
7083 
7084 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7085 		if (pathBuffer.InitCheck() != B_OK)
7086 			return B_NO_MEMORY;
7087 
7088 		char *path = pathBuffer.LockBuffer();
7089 
7090 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7091 		if (length < B_OK)
7092 			return length;
7093 		if (length >= B_PATH_NAME_LENGTH)
7094 			return B_NAME_TOO_LONG;
7095 
7096 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7097 	} else {
7098 		// no path given: get the FD and use the FD operation
7099 		struct file_descriptor *descriptor
7100 			= get_fd(get_current_io_context(false), fd);
7101 		if (descriptor == NULL)
7102 			return B_FILE_ERROR;
7103 
7104 		if (descriptor->ops->fd_read_stat)
7105 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7106 		else
7107 			status = EOPNOTSUPP;
7108 
7109 		put_fd(descriptor);
7110 	}
7111 
7112 	if (status < B_OK)
7113 		return status;
7114 
7115 	return user_memcpy(userStat, &stat, statSize);
7116 }
7117 
7118 
7119 status_t
7120 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7121 	const struct stat *userStat, size_t statSize, int statMask)
7122 {
7123 	if (statSize > sizeof(struct stat))
7124 		return B_BAD_VALUE;
7125 
7126 	struct stat stat;
7127 
7128 	if (!IS_USER_ADDRESS(userStat)
7129 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7130 		return B_BAD_ADDRESS;
7131 
7132 	// clear additional stat fields
7133 	if (statSize < sizeof(struct stat))
7134 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7135 
7136 	status_t status;
7137 
7138 	if (userPath) {
7139 		// path given: write the stat of the node referred to by (fd, path)
7140 		if (!IS_USER_ADDRESS(userPath))
7141 			return B_BAD_ADDRESS;
7142 
7143 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7144 		if (pathBuffer.InitCheck() != B_OK)
7145 			return B_NO_MEMORY;
7146 
7147 		char *path = pathBuffer.LockBuffer();
7148 
7149 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7150 		if (length < B_OK)
7151 			return length;
7152 		if (length >= B_PATH_NAME_LENGTH)
7153 			return B_NAME_TOO_LONG;
7154 
7155 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7156 			statMask, false);
7157 	} else {
7158 		// no path given: get the FD and use the FD operation
7159 		struct file_descriptor *descriptor
7160 			= get_fd(get_current_io_context(false), fd);
7161 		if (descriptor == NULL)
7162 			return B_FILE_ERROR;
7163 
7164 		if (descriptor->ops->fd_write_stat)
7165 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7166 		else
7167 			status = EOPNOTSUPP;
7168 
7169 		put_fd(descriptor);
7170 	}
7171 
7172 	return status;
7173 }
7174 
7175 
7176 int
7177 _user_open_attr_dir(int fd, const char *userPath)
7178 {
7179 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7180 	if (pathBuffer.InitCheck() != B_OK)
7181 		return B_NO_MEMORY;
7182 
7183 	char *path = pathBuffer.LockBuffer();
7184 
7185 	if (userPath != NULL) {
7186 		if (!IS_USER_ADDRESS(userPath)
7187 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7188 			return B_BAD_ADDRESS;
7189 	}
7190 
7191 	return attr_dir_open(fd, userPath ? path : NULL, false);
7192 }
7193 
7194 
7195 int
7196 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7197 {
7198 	char name[B_FILE_NAME_LENGTH];
7199 
7200 	if (!IS_USER_ADDRESS(userName)
7201 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7202 		return B_BAD_ADDRESS;
7203 
7204 	return attr_create(fd, name, type, openMode, false);
7205 }
7206 
7207 
7208 int
7209 _user_open_attr(int fd, const char *userName, int openMode)
7210 {
7211 	char name[B_FILE_NAME_LENGTH];
7212 
7213 	if (!IS_USER_ADDRESS(userName)
7214 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7215 		return B_BAD_ADDRESS;
7216 
7217 	return attr_open(fd, name, openMode, false);
7218 }
7219 
7220 
7221 status_t
7222 _user_remove_attr(int fd, const char *userName)
7223 {
7224 	char name[B_FILE_NAME_LENGTH];
7225 
7226 	if (!IS_USER_ADDRESS(userName)
7227 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7228 		return B_BAD_ADDRESS;
7229 
7230 	return attr_remove(fd, name, false);
7231 }
7232 
7233 
7234 status_t
7235 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7236 {
7237 	if (!IS_USER_ADDRESS(userFromName)
7238 		|| !IS_USER_ADDRESS(userToName))
7239 		return B_BAD_ADDRESS;
7240 
7241 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7242 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7243 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7244 		return B_NO_MEMORY;
7245 
7246 	char *fromName = fromNameBuffer.LockBuffer();
7247 	char *toName = toNameBuffer.LockBuffer();
7248 
7249 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7250 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7251 		return B_BAD_ADDRESS;
7252 
7253 	return attr_rename(fromFile, fromName, toFile, toName, false);
7254 }
7255 
7256 
7257 int
7258 _user_open_index_dir(dev_t device)
7259 {
7260 	return index_dir_open(device, false);
7261 }
7262 
7263 
7264 status_t
7265 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7266 {
7267 	char name[B_FILE_NAME_LENGTH];
7268 
7269 	if (!IS_USER_ADDRESS(userName)
7270 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7271 		return B_BAD_ADDRESS;
7272 
7273 	return index_create(device, name, type, flags, false);
7274 }
7275 
7276 
7277 status_t
7278 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7279 {
7280 	char name[B_FILE_NAME_LENGTH];
7281 	struct stat stat;
7282 	status_t status;
7283 
7284 	if (!IS_USER_ADDRESS(userName)
7285 		|| !IS_USER_ADDRESS(userStat)
7286 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7287 		return B_BAD_ADDRESS;
7288 
7289 	status = index_name_read_stat(device, name, &stat, false);
7290 	if (status == B_OK) {
7291 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7292 			return B_BAD_ADDRESS;
7293 	}
7294 
7295 	return status;
7296 }
7297 
7298 
7299 status_t
7300 _user_remove_index(dev_t device, const char *userName)
7301 {
7302 	char name[B_FILE_NAME_LENGTH];
7303 
7304 	if (!IS_USER_ADDRESS(userName)
7305 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7306 		return B_BAD_ADDRESS;
7307 
7308 	return index_remove(device, name, false);
7309 }
7310 
7311 
7312 status_t
7313 _user_getcwd(char *userBuffer, size_t size)
7314 {
7315 	if (!IS_USER_ADDRESS(userBuffer))
7316 		return B_BAD_ADDRESS;
7317 
7318 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7319 	if (pathBuffer.InitCheck() != B_OK)
7320 		return B_NO_MEMORY;
7321 
7322 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7323 
7324 	if (size > B_PATH_NAME_LENGTH)
7325 		size = B_PATH_NAME_LENGTH;
7326 
7327 	char *path = pathBuffer.LockBuffer();
7328 
7329 	status_t status = get_cwd(path, size, false);
7330 	if (status < B_OK)
7331 		return status;
7332 
7333 	// Copy back the result
7334 	if (user_strlcpy(userBuffer, path, size) < B_OK)
7335 		return B_BAD_ADDRESS;
7336 
7337 	return status;
7338 }
7339 
7340 
7341 status_t
7342 _user_setcwd(int fd, const char *userPath)
7343 {
7344 	TRACE(("user_setcwd: path = %p\n", userPath));
7345 
7346 	KPath pathBuffer(B_PATH_NAME_LENGTH);
7347 	if (pathBuffer.InitCheck() != B_OK)
7348 		return B_NO_MEMORY;
7349 
7350 	char *path = pathBuffer.LockBuffer();
7351 
7352 	if (userPath != NULL) {
7353 		if (!IS_USER_ADDRESS(userPath)
7354 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7355 			return B_BAD_ADDRESS;
7356 	}
7357 
7358 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7359 }
7360 
7361 
7362 int
7363 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7364 	uint32 flags, port_id port, int32 token)
7365 {
7366 	char *query;
7367 
7368 	if (device < 0 || userQuery == NULL || queryLength == 0)
7369 		return B_BAD_VALUE;
7370 
7371 	// this is a safety restriction
7372 	if (queryLength >= 65536)
7373 		return B_NAME_TOO_LONG;
7374 
7375 	query = (char *)malloc(queryLength + 1);
7376 	if (query == NULL)
7377 		return B_NO_MEMORY;
7378 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7379 		free(query);
7380 		return B_BAD_ADDRESS;
7381 	}
7382 
7383 	int fd = query_open(device, query, flags, port, token, false);
7384 
7385 	free(query);
7386 	return fd;
7387 }
7388