xref: /haiku/src/system/kernel/fs/vfs.cpp (revision a4f6a81235ca2522c01f532de13cad9b729d4029)
1 /*
2  * Copyright 2002-2006, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <block_cache.h>
18 #include <fd.h>
19 #include <file_cache.h>
20 #include <khash.h>
21 #include <KPath.h>
22 #include <lock.h>
23 #include <syscalls.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <vm_low_memory.h>
28 
29 #include <boot/kernel_args.h>
30 #include <disk_device_manager/KDiskDevice.h>
31 #include <disk_device_manager/KDiskDeviceManager.h>
32 #include <disk_device_manager/KDiskDeviceUtils.h>
33 #include <disk_device_manager/KDiskSystem.h>
34 #include <fs/node_monitor.h>
35 #include <util/kernel_cpp.h>
36 
37 #include <string.h>
38 #include <stdio.h>
39 #include <ctype.h>
40 #include <unistd.h>
41 #include <sys/stat.h>
42 #include <sys/resource.h>
43 #include <fcntl.h>
44 #include <limits.h>
45 #include <stddef.h>
46 
47 //#define TRACE_VFS
48 #ifdef TRACE_VFS
49 #	define TRACE(x) dprintf x
50 #	define FUNCTION(x) dprintf x
51 #else
52 #	define TRACE(x) ;
53 #	define FUNCTION(x) ;
54 #endif
55 
56 #define ADD_DEBUGGER_COMMANDS
57 
58 #define MAX_SYM_LINKS SYMLINKS_MAX
59 
60 const static uint32 kMaxUnusedVnodes = 8192;
61 	// This is the maximum number of unused vnodes that the system
62 	// will keep around.
63 	// It may be chosen with respect to the available memory or enhanced
64 	// by some timestamp/frequency heurism.
65 
66 struct vnode {
67 	struct vnode	*next;
68 	vm_cache_ref	*cache;
69 	mount_id		device;
70 	list_link		mount_link;
71 	list_link		unused_link;
72 	vnode_id		id;
73 	fs_vnode		private_node;
74 	struct fs_mount	*mount;
75 	struct vnode	*covered_by;
76 	int32			ref_count;
77 	uint8			remove : 1;
78 	uint8			busy : 1;
79 	uint8			unpublished : 1;
80 	struct advisory_locking	*advisory_locking;
81 };
82 
83 struct vnode_hash_key {
84 	mount_id	device;
85 	vnode_id	vnode;
86 };
87 
88 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
89 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
90 
91 struct fs_mount {
92 	struct fs_mount	*next;
93 	file_system_module_info *fs;
94 	mount_id		id;
95 	void			*cookie;
96 	char			*device_name;
97 	char			*fs_name;
98 	recursive_lock	rlock;	// guards the vnodes list
99 	struct vnode	*root_vnode;
100 	struct vnode	*covers_vnode;
101 	KPartition		*partition;
102 	struct list		vnodes;
103 	bool			unmounting;
104 	bool			owns_file_device;
105 };
106 
107 struct advisory_locking {
108 	sem_id			lock;
109 	sem_id			wait_sem;
110 	struct list		locks;
111 };
112 
113 struct advisory_lock {
114 	list_link		link;
115 	team_id			team;
116 	off_t			offset;
117 	off_t			length;
118 	bool			shared;
119 };
120 
121 static mutex sFileSystemsMutex;
122 
123 /**	\brief Guards sMountsTable.
124  *
125  *	The holder is allowed to read/write access the sMountsTable.
126  *	Manipulation of the fs_mount structures themselves
127  *	(and their destruction) requires different locks though.
128  */
129 static mutex sMountMutex;
130 
131 /**	\brief Guards mount/unmount operations.
132  *
133  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
134  *	That is locking the lock ensures that no FS is mounted/unmounted. In
135  *	particular this means that
136  *	- sMountsTable will not be modified,
137  *	- the fields immutable after initialization of the fs_mount structures in
138  *	  sMountsTable will not be modified,
139  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified,
140  *
141  *	The thread trying to lock the lock must not hold sVnodeMutex or
142  *	sMountMutex.
143  */
144 static recursive_lock sMountOpLock;
145 
146 /**	\brief Guards sVnodeTable.
147  *
148  *	The holder is allowed to read/write access sVnodeTable and to
149  *	to any unbusy vnode in that table, save
150  *	to the immutable fields (device, id, private_node, mount) to which
151  *	only read-only access is allowed, and to the field covered_by, which is
152  *	guarded by sMountOpLock.
153  *
154  *	The thread trying to lock the mutex must not hold sMountMutex.
155  */
156 static mutex sVnodeMutex;
157 
158 #define VNODE_HASH_TABLE_SIZE 1024
159 static hash_table *sVnodeTable;
160 static list sUnusedVnodeList;
161 static uint32 sUnusedVnodes = 0;
162 static struct vnode *sRoot;
163 
164 #define MOUNTS_HASH_TABLE_SIZE 16
165 static hash_table *sMountsTable;
166 static mount_id sNextMountID = 1;
167 
168 mode_t __gUmask = 022;
169 
170 /* function declarations */
171 
172 // file descriptor operation prototypes
173 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
174 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
175 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
176 static void file_free_fd(struct file_descriptor *);
177 static status_t file_close(struct file_descriptor *);
178 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
179 	struct select_sync *sync);
180 static status_t file_deselect(struct file_descriptor *, uint8 event,
181 	struct select_sync *sync);
182 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
183 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
184 static status_t dir_rewind(struct file_descriptor *);
185 static void dir_free_fd(struct file_descriptor *);
186 static status_t dir_close(struct file_descriptor *);
187 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
188 static status_t attr_dir_rewind(struct file_descriptor *);
189 static void attr_dir_free_fd(struct file_descriptor *);
190 static status_t attr_dir_close(struct file_descriptor *);
191 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
192 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
193 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
194 static void attr_free_fd(struct file_descriptor *);
195 static status_t attr_close(struct file_descriptor *);
196 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
197 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
198 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
199 static status_t index_dir_rewind(struct file_descriptor *);
200 static void index_dir_free_fd(struct file_descriptor *);
201 static status_t index_dir_close(struct file_descriptor *);
202 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
203 static status_t query_rewind(struct file_descriptor *);
204 static void query_free_fd(struct file_descriptor *);
205 static status_t query_close(struct file_descriptor *);
206 
207 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
208 static status_t common_read_stat(struct file_descriptor *, struct stat *);
209 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
210 
211 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
212 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
213 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
214 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
215 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
216 static void inc_vnode_ref_count(struct vnode *vnode);
217 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
218 static inline void put_vnode(struct vnode *vnode);
219 
220 static struct fd_ops sFileOps = {
221 	file_read,
222 	file_write,
223 	file_seek,
224 	common_ioctl,
225 	file_select,
226 	file_deselect,
227 	NULL,		// read_dir()
228 	NULL,		// rewind_dir()
229 	common_read_stat,
230 	common_write_stat,
231 	file_close,
232 	file_free_fd
233 };
234 
235 static struct fd_ops sDirectoryOps = {
236 	NULL,		// read()
237 	NULL,		// write()
238 	NULL,		// seek()
239 	common_ioctl,
240 	NULL,		// select()
241 	NULL,		// deselect()
242 	dir_read,
243 	dir_rewind,
244 	common_read_stat,
245 	common_write_stat,
246 	dir_close,
247 	dir_free_fd
248 };
249 
250 static struct fd_ops sAttributeDirectoryOps = {
251 	NULL,		// read()
252 	NULL,		// write()
253 	NULL,		// seek()
254 	common_ioctl,
255 	NULL,		// select()
256 	NULL,		// deselect()
257 	attr_dir_read,
258 	attr_dir_rewind,
259 	common_read_stat,
260 	common_write_stat,
261 	attr_dir_close,
262 	attr_dir_free_fd
263 };
264 
265 static struct fd_ops sAttributeOps = {
266 	attr_read,
267 	attr_write,
268 	attr_seek,
269 	common_ioctl,
270 	NULL,		// select()
271 	NULL,		// deselect()
272 	NULL,		// read_dir()
273 	NULL,		// rewind_dir()
274 	attr_read_stat,
275 	attr_write_stat,
276 	attr_close,
277 	attr_free_fd
278 };
279 
280 static struct fd_ops sIndexDirectoryOps = {
281 	NULL,		// read()
282 	NULL,		// write()
283 	NULL,		// seek()
284 	NULL,		// ioctl()
285 	NULL,		// select()
286 	NULL,		// deselect()
287 	index_dir_read,
288 	index_dir_rewind,
289 	NULL,		// read_stat()
290 	NULL,		// write_stat()
291 	index_dir_close,
292 	index_dir_free_fd
293 };
294 
295 #if 0
296 static struct fd_ops sIndexOps = {
297 	NULL,		// read()
298 	NULL,		// write()
299 	NULL,		// seek()
300 	NULL,		// ioctl()
301 	NULL,		// select()
302 	NULL,		// deselect()
303 	NULL,		// dir_read()
304 	NULL,		// dir_rewind()
305 	index_read_stat,	// read_stat()
306 	NULL,		// write_stat()
307 	NULL,		// dir_close()
308 	NULL		// free_fd()
309 };
310 #endif
311 
312 static struct fd_ops sQueryOps = {
313 	NULL,		// read()
314 	NULL,		// write()
315 	NULL,		// seek()
316 	NULL,		// ioctl()
317 	NULL,		// select()
318 	NULL,		// deselect()
319 	query_read,
320 	query_rewind,
321 	NULL,		// read_stat()
322 	NULL,		// write_stat()
323 	query_close,
324 	query_free_fd
325 };
326 
327 
328 // VNodePutter
329 class VNodePutter {
330 public:
331 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
332 
333 	~VNodePutter()
334 	{
335 		Put();
336 	}
337 
338 	void SetTo(struct vnode *vnode)
339 	{
340 		Put();
341 		fVNode = vnode;
342 	}
343 
344 	void Put()
345 	{
346 		if (fVNode) {
347 			put_vnode(fVNode);
348 			fVNode = NULL;
349 		}
350 	}
351 
352 	struct vnode *Detach()
353 	{
354 		struct vnode *vnode = fVNode;
355 		fVNode = NULL;
356 		return vnode;
357 	}
358 
359 private:
360 	struct vnode *fVNode;
361 };
362 
363 
364 class FDCloser {
365 public:
366 	FDCloser() : fFD(-1), fKernel(true) {}
367 
368 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
369 
370 	~FDCloser()
371 	{
372 		Close();
373 	}
374 
375 	void SetTo(int fd, bool kernel)
376 	{
377 		Close();
378 		fFD = fd;
379 		fKernel = kernel;
380 	}
381 
382 	void Close()
383 	{
384 		if (fFD >= 0) {
385 			if (fKernel)
386 				_kern_close(fFD);
387 			else
388 				_user_close(fFD);
389 			fFD = -1;
390 		}
391 	}
392 
393 	int Detach()
394 	{
395 		int fd = fFD;
396 		fFD = -1;
397 		return fd;
398 	}
399 
400 private:
401 	int		fFD;
402 	bool	fKernel;
403 };
404 
405 
406 static int
407 mount_compare(void *_m, const void *_key)
408 {
409 	struct fs_mount *mount = (fs_mount *)_m;
410 	const mount_id *id = (mount_id *)_key;
411 
412 	if (mount->id == *id)
413 		return 0;
414 
415 	return -1;
416 }
417 
418 
419 static uint32
420 mount_hash(void *_m, const void *_key, uint32 range)
421 {
422 	struct fs_mount *mount = (fs_mount *)_m;
423 	const mount_id *id = (mount_id *)_key;
424 
425 	if (mount)
426 		return mount->id % range;
427 
428 	return (uint32)*id % range;
429 }
430 
431 
432 /** Finds the mounted device (the fs_mount structure) with the given ID.
433  *	Note, you must hold the gMountMutex lock when you call this function.
434  */
435 
436 static struct fs_mount *
437 find_mount(mount_id id)
438 {
439 	ASSERT_LOCKED_MUTEX(&sMountMutex);
440 
441 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
442 }
443 
444 
445 static status_t
446 get_mount(mount_id id, struct fs_mount **_mount)
447 {
448 	struct fs_mount *mount;
449 	status_t status;
450 
451 	mutex_lock(&sMountMutex);
452 
453 	mount = find_mount(id);
454 	if (mount) {
455 		// ToDo: the volume is locked (against removal) by locking
456 		//	its root node - investigate if that's a good idea
457 		if (mount->root_vnode)
458 			inc_vnode_ref_count(mount->root_vnode);
459 		else {
460 			// might have been called during a mount operation in which
461 			// case the root node may still be NULL
462 			mount = NULL;
463 		}
464 	} else
465 		status = B_BAD_VALUE;
466 
467 	mutex_unlock(&sMountMutex);
468 
469 	if (mount == NULL)
470 		return B_BUSY;
471 
472 	*_mount = mount;
473 	return B_OK;
474 }
475 
476 
477 static void
478 put_mount(struct fs_mount *mount)
479 {
480 	if (mount)
481 		put_vnode(mount->root_vnode);
482 }
483 
484 
485 static status_t
486 put_file_system(file_system_module_info *fs)
487 {
488 	return put_module(fs->info.name);
489 }
490 
491 
492 /**	Tries to open the specified file system module.
493  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
494  *	Returns a pointer to file system module interface, or NULL if it
495  *	could not open the module.
496  */
497 
498 static file_system_module_info *
499 get_file_system(const char *fsName)
500 {
501 	char name[B_FILE_NAME_LENGTH];
502 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
503 		// construct module name if we didn't get one
504 		// (we currently support only one API)
505 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
506 		fsName = NULL;
507 	}
508 
509 	file_system_module_info *info;
510 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
511 		return NULL;
512 
513 	return info;
514 }
515 
516 
517 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
518  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
519  *	The name is allocated for you, and you have to free() it when you're
520  *	done with it.
521  *	Returns NULL if the required memory is no available.
522  */
523 
524 static char *
525 get_file_system_name(const char *fsName)
526 {
527 	const size_t length = strlen("file_systems/");
528 
529 	if (strncmp(fsName, "file_systems/", length)) {
530 		// the name already seems to be the module's file name
531 		return strdup(fsName);
532 	}
533 
534 	fsName += length;
535 	const char *end = strchr(fsName, '/');
536 	if (end == NULL) {
537 		// this doesn't seem to be a valid name, but well...
538 		return strdup(fsName);
539 	}
540 
541 	// cut off the trailing /v1
542 
543 	char *name = (char *)malloc(end + 1 - fsName);
544 	if (name == NULL)
545 		return NULL;
546 
547 	strlcpy(name, fsName, end + 1 - fsName);
548 	return name;
549 }
550 
551 
552 static int
553 vnode_compare(void *_vnode, const void *_key)
554 {
555 	struct vnode *vnode = (struct vnode *)_vnode;
556 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
557 
558 	if (vnode->device == key->device && vnode->id == key->vnode)
559 		return 0;
560 
561 	return -1;
562 }
563 
564 
565 static uint32
566 vnode_hash(void *_vnode, const void *_key, uint32 range)
567 {
568 	struct vnode *vnode = (struct vnode *)_vnode;
569 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
570 
571 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
572 
573 	if (vnode != NULL)
574 		return VHASH(vnode->device, vnode->id) % range;
575 
576 	return VHASH(key->device, key->vnode) % range;
577 
578 #undef VHASH
579 }
580 
581 
582 static void
583 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
584 {
585 	recursive_lock_lock(&mount->rlock);
586 
587 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
588 
589 	recursive_lock_unlock(&mount->rlock);
590 }
591 
592 
593 static void
594 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
595 {
596 	recursive_lock_lock(&mount->rlock);
597 
598 	list_remove_link(&vnode->mount_link);
599 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
600 
601 	recursive_lock_unlock(&mount->rlock);
602 }
603 
604 
605 static status_t
606 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
607 {
608 	FUNCTION(("create_new_vnode()\n"));
609 
610 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
611 	if (vnode == NULL)
612 		return B_NO_MEMORY;
613 
614 	// initialize basic values
615 	memset(vnode, 0, sizeof(struct vnode));
616 	vnode->device = mountID;
617 	vnode->id = vnodeID;
618 
619 	// add the vnode to the mount structure
620 	mutex_lock(&sMountMutex);
621 	vnode->mount = find_mount(mountID);
622 	if (!vnode->mount || vnode->mount->unmounting) {
623 		mutex_unlock(&sMountMutex);
624 		free(vnode);
625 		return B_ENTRY_NOT_FOUND;
626 	}
627 
628 	hash_insert(sVnodeTable, vnode);
629 	add_vnode_to_mount_list(vnode, vnode->mount);
630 
631 	mutex_unlock(&sMountMutex);
632 
633 	vnode->ref_count = 1;
634 	*_vnode = vnode;
635 
636 	return B_OK;
637 }
638 
639 
640 /**	Frees the vnode and all resources it has acquired, and removes
641  *	it from the vnode hash as well as from its mount structure.
642  *	Will also make sure that any cache modifications are written back.
643  */
644 
645 static void
646 free_vnode(struct vnode *vnode, bool reenter)
647 {
648 	ASSERT(vnode->ref_count == 0 && vnode->busy);
649 
650 	// write back any changes in this vnode's cache -- but only
651 	// if the vnode won't be deleted, in which case the changes
652 	// will be discarded
653 
654 	if (vnode->cache && !vnode->remove)
655 		vm_cache_write_modified(vnode->cache);
656 
657 	if (!vnode->unpublished) {
658 		if (vnode->remove)
659 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
660 		else
661 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
662 	}
663 
664 	// The file system has removed the resources of the vnode now, so we can
665 	// make it available again (and remove the busy vnode from the hash)
666 	mutex_lock(&sVnodeMutex);
667 	hash_remove(sVnodeTable, vnode);
668 	mutex_unlock(&sVnodeMutex);
669 
670 	// if we have a vm_cache attached, remove it
671 	if (vnode->cache)
672 		vm_cache_release_ref(vnode->cache);
673 
674 	vnode->cache = NULL;
675 
676 	remove_vnode_from_mount_list(vnode, vnode->mount);
677 
678 	free(vnode);
679 }
680 
681 
682 /**	\brief Decrements the reference counter of the given vnode and deletes it,
683  *	if the counter dropped to 0.
684  *
685  *	The caller must, of course, own a reference to the vnode to call this
686  *	function.
687  *	The caller must not hold the sVnodeMutex or the sMountMutex.
688  *
689  *	\param vnode the vnode.
690  *	\param reenter \c true, if this function is called (indirectly) from within
691  *		   a file system.
692  *	\return \c B_OK, if everything went fine, an error code otherwise.
693  */
694 
695 static status_t
696 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
697 {
698 	int32 oldRefCount;
699 
700 	mutex_lock(&sVnodeMutex);
701 
702 	if (vnode->busy)
703 		panic("dec_vnode_ref_count called on vnode that was busy! vnode %p\n", vnode);
704 
705 	oldRefCount = atomic_add(&vnode->ref_count, -1);
706 
707 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
708 
709 	if (oldRefCount == 1) {
710 		bool freeNode = false;
711 
712 		// Just insert the vnode into an unused list if we don't need
713 		// to delete it
714 		if (vnode->remove) {
715 			vnode->busy = true;
716 			freeNode = true;
717 		} else {
718 			list_add_item(&sUnusedVnodeList, vnode);
719 			if (++sUnusedVnodes > kMaxUnusedVnodes
720 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
721 				// there are too many unused vnodes so we free the oldest one
722 				// ToDo: evaluate this mechanism
723 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
724 				vnode->busy = true;
725 				freeNode = true;
726 				sUnusedVnodes--;
727 			}
728 		}
729 
730 		mutex_unlock(&sVnodeMutex);
731 
732 		if (freeNode)
733 			free_vnode(vnode, reenter);
734 	} else
735 		mutex_unlock(&sVnodeMutex);
736 
737 	return B_OK;
738 }
739 
740 
741 /**	\brief Increments the reference counter of the given vnode.
742  *
743  *	The caller must either already have a reference to the vnode or hold
744  *	the sVnodeMutex.
745  *
746  *	\param vnode the vnode.
747  */
748 
749 static void
750 inc_vnode_ref_count(struct vnode *vnode)
751 {
752 	atomic_add(&vnode->ref_count, 1);
753 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
754 }
755 
756 
757 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
758  *
759  *	The caller must hold the sVnodeMutex.
760  *
761  *	\param mountID the mount ID.
762  *	\param vnodeID the node ID.
763  *
764  *	\return The vnode structure, if it was found in the hash table, \c NULL
765  *			otherwise.
766  */
767 
768 static struct vnode *
769 lookup_vnode(mount_id mountID, vnode_id vnodeID)
770 {
771 	struct vnode_hash_key key;
772 
773 	key.device = mountID;
774 	key.vnode = vnodeID;
775 
776 	return (vnode *)hash_lookup(sVnodeTable, &key);
777 }
778 
779 
780 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
781  *
782  *	If the node is not yet in memory, it will be loaded.
783  *
784  *	The caller must not hold the sVnodeMutex or the sMountMutex.
785  *
786  *	\param mountID the mount ID.
787  *	\param vnodeID the node ID.
788  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
789  *		   retrieved vnode structure shall be written.
790  *	\param reenter \c true, if this function is called (indirectly) from within
791  *		   a file system.
792  *	\return \c B_OK, if everything when fine, an error code otherwise.
793  */
794 
795 static status_t
796 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
797 {
798 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
799 
800 	mutex_lock(&sVnodeMutex);
801 
802 restart:
803 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
804 	if (vnode && vnode->busy) {
805 		// ToDo: this is an endless loop if the vnode is not
806 		//	becoming unbusy anymore (for whatever reason)
807 		mutex_unlock(&sVnodeMutex);
808 		snooze(10000); // 10 ms
809 		mutex_lock(&sVnodeMutex);
810 		goto restart;
811 	}
812 
813 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
814 
815 	status_t status;
816 
817 	if (vnode) {
818 		if (vnode->ref_count == 0) {
819 			// this vnode has been unused before
820 			list_remove_item(&sUnusedVnodeList, vnode);
821 		}
822 		inc_vnode_ref_count(vnode);
823 	} else {
824 		// we need to create a new vnode and read it in
825 		status = create_new_vnode(&vnode, mountID, vnodeID);
826 		if (status < B_OK)
827 			goto err;
828 
829 		vnode->busy = true;
830 		mutex_unlock(&sVnodeMutex);
831 
832 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
833 		if (status < B_OK || vnode->private_node == NULL) {
834 			if (status == B_NO_ERROR)
835 				status = B_BAD_VALUE;
836 		}
837 		mutex_lock(&sVnodeMutex);
838 
839 		if (status < B_OK)
840 			goto err1;
841 
842 		vnode->busy = false;
843 	}
844 
845 	mutex_unlock(&sVnodeMutex);
846 
847 	TRACE(("get_vnode: returning %p\n", vnode));
848 
849 	*_vnode = vnode;
850 	return B_OK;
851 
852 err1:
853 	hash_remove(sVnodeTable, vnode);
854 	remove_vnode_from_mount_list(vnode, vnode->mount);
855 err:
856 	mutex_unlock(&sVnodeMutex);
857 	if (vnode)
858 		free(vnode);
859 
860 	return status;
861 }
862 
863 
864 /**	\brief Decrements the reference counter of the given vnode and deletes it,
865  *	if the counter dropped to 0.
866  *
867  *	The caller must, of course, own a reference to the vnode to call this
868  *	function.
869  *	The caller must not hold the sVnodeMutex or the sMountMutex.
870  *
871  *	\param vnode the vnode.
872  */
873 
874 static inline void
875 put_vnode(struct vnode *vnode)
876 {
877 	dec_vnode_ref_count(vnode, false);
878 }
879 
880 
881 static void
882 vnode_low_memory_handler(void */*data*/, int32 level)
883 {
884 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
885 
886 	int32 count = 1;
887 	switch (level) {
888 		case B_NO_LOW_MEMORY:
889 			return;
890 		case B_LOW_MEMORY_NOTE:
891 			count = sUnusedVnodes / 100;
892 			break;
893 		case B_LOW_MEMORY_WARNING:
894 			count = sUnusedVnodes / 10;
895 			break;
896 		case B_LOW_MEMORY_CRITICAL:
897 			count = sUnusedVnodes;
898 			break;
899 	}
900 
901 	for (int32 i = 0; i < count; i++) {
902 		mutex_lock(&sVnodeMutex);
903 
904 		struct vnode *vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
905 		if (vnode == NULL) {
906 			mutex_unlock(&sVnodeMutex);
907 			break;
908 		}
909 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
910 
911 		vnode->busy = true;
912 		sUnusedVnodes--;
913 
914 		mutex_unlock(&sVnodeMutex);
915 
916 		free_vnode(vnode, false);
917 	}
918 }
919 
920 
921 static status_t
922 create_advisory_locking(struct vnode *vnode)
923 {
924 	if (vnode == NULL)
925 		return B_FILE_ERROR;
926 
927 	struct advisory_locking *locking = (struct advisory_locking *)malloc(sizeof(struct advisory_locking));
928 	if (locking == NULL)
929 		return B_NO_MEMORY;
930 
931 	status_t status;
932 
933 	locking->wait_sem = create_sem(0, "advisory lock");
934 	if (locking->wait_sem < B_OK) {
935 		status = locking->wait_sem;
936 		goto err1;
937 	}
938 
939 	locking->lock = create_sem(1, "advisory locking");
940 	if (locking->lock < B_OK) {
941 		status = locking->lock;
942 		goto err2;
943 	}
944 
945 	list_init(&locking->locks);
946 	vnode->advisory_locking = locking;
947 	return B_OK;
948 
949 err2:
950 	delete_sem(locking->wait_sem);
951 err1:
952 	free(locking);
953 	return status;
954 }
955 
956 
957 static inline void
958 put_advisory_locking(struct advisory_locking *locking)
959 {
960 	release_sem(locking->lock);
961 }
962 
963 
964 static struct advisory_locking *
965 get_advisory_locking(struct vnode *vnode)
966 {
967 	mutex_lock(&sVnodeMutex);
968 
969 	struct advisory_locking *locking = vnode->advisory_locking;
970 	if (locking != NULL)
971 		acquire_sem(locking->lock);
972 
973 	mutex_unlock(&sVnodeMutex);
974 	return locking;
975 }
976 
977 
978 static status_t
979 get_advisory_lock(struct vnode *vnode, struct flock *flock)
980 {
981 	return B_ERROR;
982 }
983 
984 
985 /**	Removes the specified lock, or all locks of the calling team
986  *	if \a flock is NULL.
987  */
988 
989 static status_t
990 release_advisory_lock(struct vnode *vnode, struct flock *flock)
991 {
992 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
993 
994 	struct advisory_locking *locking = get_advisory_locking(vnode);
995 	if (locking == NULL)
996 		return flock != NULL ? B_BAD_VALUE : B_OK;
997 
998 	team_id team = team_get_current_team_id();
999 
1000 	// find matching lock entry
1001 
1002 	status_t status = B_BAD_VALUE;
1003 	struct advisory_lock *lock = NULL;
1004 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1005 		if (lock->team == team && (flock == NULL || (flock != NULL
1006 			&& lock->offset == flock->l_start
1007 			&& lock->length == flock->l_len))) {
1008 			// we found our lock, free it
1009 			list_remove_item(&locking->locks, lock);
1010 			free(lock);
1011 			status = B_OK;
1012 			break;
1013 		}
1014 	}
1015 
1016 	bool removeLocking = list_is_empty(&locking->locks);
1017 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1018 
1019 	put_advisory_locking(locking);
1020 
1021 	if (status < B_OK)
1022 		return status;
1023 
1024 	if (removeLocking) {
1025 		// we can remove the whole advisory locking structure; it's no longer used
1026 		mutex_lock(&sVnodeMutex);
1027 		locking = vnode->advisory_locking;
1028 		if (locking != NULL)
1029 			acquire_sem(locking->lock);
1030 
1031 		// the locking could have been changed in the mean time
1032 		if (list_is_empty(&locking->locks))
1033 			vnode->advisory_locking = NULL;
1034 		else {
1035 			removeLocking = false;
1036 			release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1037 		}
1038 
1039 		mutex_unlock(&sVnodeMutex);
1040 	}
1041 	if (removeLocking) {
1042 		// we've detached the locking from the vnode, so we can safely delete it
1043 		delete_sem(locking->lock);
1044 		delete_sem(locking->wait_sem);
1045 		free(locking);
1046 	}
1047 
1048 	return B_OK;
1049 }
1050 
1051 
1052 static status_t
1053 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1054 {
1055 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1056 		vnode, flock, wait ? "yes" : "no"));
1057 
1058 	bool shared = flock->l_type == F_RDLCK;
1059 	status_t status = B_OK;
1060 
1061 restart:
1062 	// if this vnode has an advisory_locking structure attached,
1063 	// lock that one and search for any colliding lock
1064 	struct advisory_locking *locking = get_advisory_locking(vnode);
1065 	sem_id waitForLock = -1;
1066 
1067 	if (locking != NULL) {
1068 		// test for collisions
1069 		struct advisory_lock *lock = NULL;
1070 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1071 			if (lock->offset <= flock->l_start + flock->l_len
1072 				&& lock->offset + lock->length > flock->l_start) {
1073 				// locks do overlap
1074 				if (!shared || !lock->shared) {
1075 					// we need to wait
1076 					waitForLock = locking->wait_sem;
1077 					break;
1078 				}
1079 			}
1080 		}
1081 
1082 		if (waitForLock < B_OK || !wait)
1083 			put_advisory_locking(locking);
1084 	}
1085 
1086 	// wait for the lock if we have to, or else return immediately
1087 
1088 	if (waitForLock >= B_OK) {
1089 		if (!wait)
1090 			status = B_PERMISSION_DENIED;
1091 		else {
1092 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1093 			if (status == B_OK) {
1094 				// see if we're still colliding
1095 				goto restart;
1096 			}
1097 		}
1098 	}
1099 
1100 	if (status < B_OK)
1101 		return status;
1102 
1103 	// install new lock
1104 
1105 	mutex_lock(&sVnodeMutex);
1106 
1107 	locking = vnode->advisory_locking;
1108 	if (locking == NULL) {
1109 		status = create_advisory_locking(vnode);
1110 		locking = vnode->advisory_locking;
1111 	}
1112 
1113 	if (locking != NULL)
1114 		acquire_sem(locking->lock);
1115 
1116 	mutex_unlock(&sVnodeMutex);
1117 
1118 	if (status < B_OK)
1119 		return status;
1120 
1121 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1122 	if (lock == NULL) {
1123 		if (waitForLock >= B_OK)
1124 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1125 		release_sem(locking->lock);
1126 		return B_NO_MEMORY;
1127 	}
1128 
1129 	lock->team = team_get_current_team_id();
1130 	// values must already be normalized when getting here
1131 	lock->offset = flock->l_start;
1132 	lock->length = flock->l_len;
1133 	lock->shared = shared;
1134 
1135 	list_add_item(&locking->locks, lock);
1136 	release_sem(locking->lock);
1137 
1138 	return status;
1139 }
1140 
1141 
1142 static status_t
1143 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1144 {
1145 	switch (flock->l_whence) {
1146 		case SEEK_SET:
1147 			break;
1148 		case SEEK_CUR:
1149 			flock->l_start += descriptor->pos;
1150 			break;
1151 		case SEEK_END:
1152 		{
1153 			struct vnode *vnode = descriptor->u.vnode;
1154 			struct stat stat;
1155 			status_t status;
1156 
1157 			if (FS_CALL(vnode, read_stat) == NULL)
1158 				return EOPNOTSUPP;
1159 
1160 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1161 			if (status < B_OK)
1162 				return status;
1163 
1164 			flock->l_start += stat.st_size;
1165 			break;
1166 		}
1167 		default:
1168 			return B_BAD_VALUE;
1169 	}
1170 
1171 	if (flock->l_start < 0)
1172 		flock->l_start = 0;
1173 	if (flock->l_len == 0)
1174 		flock->l_len = OFF_MAX;
1175 
1176 	// don't let the offset and length overflow
1177 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1178 		flock->l_len = OFF_MAX - flock->l_start;
1179 
1180 	if (flock->l_len < 0) {
1181 		// a negative length reverses the region
1182 		flock->l_start += flock->l_len;
1183 		flock->l_len = -flock->l_len;
1184 	}
1185 
1186 	return B_OK;
1187 }
1188 
1189 
1190 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1191  *		   by.
1192  *
1193  *	Given an arbitrary vnode, the function checks, whether the node is covered
1194  *	by the root of a volume. If it is the function obtains a reference to the
1195  *	volume root node and returns it.
1196  *
1197  *	\param vnode The vnode in question.
1198  *	\return The volume root vnode the vnode cover is covered by, if it is
1199  *			indeed a mount point, or \c NULL otherwise.
1200  */
1201 
1202 static struct vnode *
1203 resolve_mount_point_to_volume_root(struct vnode *vnode)
1204 {
1205 	if (!vnode)
1206 		return NULL;
1207 
1208 	struct vnode *volumeRoot = NULL;
1209 
1210 	recursive_lock_lock(&sMountOpLock);
1211 	if (vnode->covered_by) {
1212 		volumeRoot = vnode->covered_by;
1213 		inc_vnode_ref_count(volumeRoot);
1214 	}
1215 	recursive_lock_unlock(&sMountOpLock);
1216 
1217 	return volumeRoot;
1218 }
1219 
1220 
1221 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1222  *		   by.
1223  *
1224  *	Given an arbitrary vnode (identified by mount and node ID), the function
1225  *	checks, whether the node is covered by the root of a volume. If it is the
1226  *	function returns the mount and node ID of the volume root node. Otherwise
1227  *	it simply returns the supplied mount and node ID.
1228  *
1229  *	In case of error (e.g. the supplied node could not be found) the variables
1230  *	for storing the resolved mount and node ID remain untouched and an error
1231  *	code is returned.
1232  *
1233  *	\param mountID The mount ID of the vnode in question.
1234  *	\param nodeID The node ID of the vnode in question.
1235  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1236  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1237  *	\return
1238  *	- \c B_OK, if everything went fine,
1239  *	- another error code, if something went wrong.
1240  */
1241 
1242 status_t
1243 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1244 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1245 {
1246 	// get the node
1247 	struct vnode *node;
1248 	status_t error = get_vnode(mountID, nodeID, &node, false);
1249 	if (error != B_OK)
1250 		return error;
1251 
1252 	// resolve the node
1253 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1254 	if (resolvedNode) {
1255 		put_vnode(node);
1256 		node = resolvedNode;
1257 	}
1258 
1259 	// set the return values
1260 	*resolvedMountID = node->device;
1261 	*resolvedNodeID = node->id;
1262 
1263 	put_vnode(node);
1264 
1265 	return B_OK;
1266 }
1267 
1268 
1269 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1270  *
1271  *	Given an arbitrary vnode, the function checks, whether the node is the
1272  *	root of a volume. If it is (and if it is not "/"), the function obtains
1273  *	a reference to the underlying mount point node and returns it.
1274  *
1275  *	\param vnode The vnode in question.
1276  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1277  *			root and not "/", or \c NULL otherwise.
1278  */
1279 
1280 static struct vnode *
1281 resolve_volume_root_to_mount_point(struct vnode *vnode)
1282 {
1283 	if (!vnode)
1284 		return NULL;
1285 
1286 	struct vnode *mountPoint = NULL;
1287 
1288 	recursive_lock_lock(&sMountOpLock);
1289 	struct fs_mount *mount = vnode->mount;
1290 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1291 		mountPoint = mount->covers_vnode;
1292 		inc_vnode_ref_count(mountPoint);
1293 	}
1294 	recursive_lock_unlock(&sMountOpLock);
1295 
1296 	return mountPoint;
1297 }
1298 
1299 
1300 /**	\brief Gets the directory path and leaf name for a given path.
1301  *
1302  *	The supplied \a path is transformed to refer to the directory part of
1303  *	the entry identified by the original path, and into the buffer \a filename
1304  *	the leaf name of the original entry is written.
1305  *	Neither the returned path nor the leaf name can be expected to be
1306  *	canonical.
1307  *
1308  *	\param path The path to be analyzed. Must be able to store at least one
1309  *		   additional character.
1310  *	\param filename The buffer into which the leaf name will be written.
1311  *		   Must be of size B_FILE_NAME_LENGTH at least.
1312  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1313  *		   name is longer than \c B_FILE_NAME_LENGTH.
1314  */
1315 
1316 static status_t
1317 get_dir_path_and_leaf(char *path, char *filename)
1318 {
1319 	char *p = strrchr(path, '/');
1320 		// '/' are not allowed in file names!
1321 
1322 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1323 
1324 	if (!p) {
1325 		// this path is single segment with no '/' in it
1326 		// ex. "foo"
1327 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1328 			return B_NAME_TOO_LONG;
1329 		strcpy(path, ".");
1330 	} else {
1331 		p++;
1332 		if (*p == '\0') {
1333 			// special case: the path ends in '/'
1334 			strcpy(filename, ".");
1335 		} else {
1336 			// normal leaf: replace the leaf portion of the path with a '.'
1337 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1338 				>= B_FILE_NAME_LENGTH) {
1339 				return B_NAME_TOO_LONG;
1340 			}
1341 		}
1342 		p[0] = '.';
1343 		p[1] = '\0';
1344 	}
1345 	return B_OK;
1346 }
1347 
1348 
1349 static status_t
1350 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1351 {
1352 	char clonedName[B_FILE_NAME_LENGTH + 1];
1353 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1354 		return B_NAME_TOO_LONG;
1355 
1356 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1357 	struct vnode *directory;
1358 
1359 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1360 	if (status < 0)
1361 		return status;
1362 
1363 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1364 }
1365 
1366 
1367 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1368  *	\a path must not be NULL.
1369  *	If it returns successfully, \a path contains the name of the last path
1370  *	component.
1371  */
1372 
1373 static status_t
1374 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1375 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1376 {
1377 	status_t status = 0;
1378 	vnode_id lastParentID = vnode->id;
1379 	int type = 0;
1380 
1381 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1382 
1383 	if (path == NULL)
1384 		return B_BAD_VALUE;
1385 
1386 	while (true) {
1387 		struct vnode *nextVnode;
1388 		vnode_id vnodeID;
1389 		char *nextPath;
1390 
1391 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1392 
1393 		// done?
1394 		if (path[0] == '\0')
1395 			break;
1396 
1397 		// walk to find the next path component ("path" will point to a single
1398 		// path component), and filter out multiple slashes
1399 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1400 
1401 		if (*nextPath == '/') {
1402 			*nextPath = '\0';
1403 			do
1404 				nextPath++;
1405 			while (*nextPath == '/');
1406 		}
1407 
1408 		// See if the '..' is at the root of a mount and move to the covered
1409 		// vnode so we pass the '..' path to the underlying filesystem
1410 		if (!strcmp("..", path)
1411 			&& vnode->mount->root_vnode == vnode
1412 			&& vnode->mount->covers_vnode) {
1413 			nextVnode = vnode->mount->covers_vnode;
1414 			inc_vnode_ref_count(nextVnode);
1415 			put_vnode(vnode);
1416 			vnode = nextVnode;
1417 		}
1418 
1419 		// Check if we have the right to search the current directory vnode.
1420 		// If a file system doesn't have the access() function, we assume that
1421 		// searching a directory is always allowed
1422 		if (FS_CALL(vnode, access))
1423 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1424 
1425 		// Tell the filesystem to get the vnode of this path component (if we got the
1426 		// permission from the call above)
1427 		if (status >= B_OK)
1428 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1429 
1430 		if (status < B_OK) {
1431 			put_vnode(vnode);
1432 			return status;
1433 		}
1434 
1435 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1436 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1437 		// ref count incremented at this point
1438 		mutex_lock(&sVnodeMutex);
1439 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1440 		mutex_unlock(&sVnodeMutex);
1441 
1442 		if (!nextVnode) {
1443 			// pretty screwed up here - the file system found the vnode, but the hash
1444 			// lookup failed, so our internal structures are messed up
1445 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1446 				vnode->device, vnodeID);
1447 			put_vnode(vnode);
1448 			return B_ENTRY_NOT_FOUND;
1449 		}
1450 
1451 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1452 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1453 			size_t bufferSize;
1454 			char *buffer;
1455 
1456 			TRACE(("traverse link\n"));
1457 
1458 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1459 			if (count + 1 > MAX_SYM_LINKS) {
1460 				status = B_LINK_LIMIT;
1461 				goto resolve_link_error;
1462 			}
1463 
1464 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1465 			if (buffer == NULL) {
1466 				status = B_NO_MEMORY;
1467 				goto resolve_link_error;
1468 			}
1469 
1470 			status = FS_CALL(nextVnode, read_link)(nextVnode->mount->cookie,
1471 				nextVnode->private_node, buffer, &bufferSize);
1472 			if (status < B_OK) {
1473 				free(buffer);
1474 
1475 		resolve_link_error:
1476 				put_vnode(vnode);
1477 				put_vnode(nextVnode);
1478 
1479 				return status;
1480 			}
1481 			put_vnode(nextVnode);
1482 
1483 			// Check if we start from the root directory or the current
1484 			// directory ("vnode" still points to that one).
1485 			// Cut off all leading slashes if it's the root directory
1486 			path = buffer;
1487 			if (path[0] == '/') {
1488 				// we don't need the old directory anymore
1489 				put_vnode(vnode);
1490 
1491 				while (*++path == '/')
1492 					;
1493 				vnode = sRoot;
1494 				inc_vnode_ref_count(vnode);
1495 			}
1496 			inc_vnode_ref_count(vnode);
1497 				// balance the next recursion - we will decrement the ref_count
1498 				// of the vnode, no matter if we succeeded or not
1499 
1500 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1501 				&nextVnode, &lastParentID, _type);
1502 
1503 			free(buffer);
1504 
1505 			if (status < B_OK) {
1506 				put_vnode(vnode);
1507 				return status;
1508 			}
1509 		} else
1510 			lastParentID = vnode->id;
1511 
1512 		// decrease the ref count on the old dir we just looked up into
1513 		put_vnode(vnode);
1514 
1515 		path = nextPath;
1516 		vnode = nextVnode;
1517 
1518 		// see if we hit a mount point
1519 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1520 		if (mountPoint) {
1521 			put_vnode(vnode);
1522 			vnode = mountPoint;
1523 		}
1524 	}
1525 
1526 	*_vnode = vnode;
1527 	if (_type)
1528 		*_type = type;
1529 	if (_parentID)
1530 		*_parentID = lastParentID;
1531 
1532 	return B_OK;
1533 }
1534 
1535 
1536 static status_t
1537 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1538 	vnode_id *_parentID, bool kernel)
1539 {
1540 	struct vnode *start = NULL;
1541 
1542 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1543 
1544 	if (!path)
1545 		return B_BAD_VALUE;
1546 
1547 	// figure out if we need to start at root or at cwd
1548 	if (*path == '/') {
1549 		if (sRoot == NULL) {
1550 			// we're a bit early, aren't we?
1551 			return B_ERROR;
1552 		}
1553 
1554 		while (*++path == '/')
1555 			;
1556 		start = sRoot;
1557 		inc_vnode_ref_count(start);
1558 	} else {
1559 		struct io_context *context = get_current_io_context(kernel);
1560 
1561 		mutex_lock(&context->io_mutex);
1562 		start = context->cwd;
1563 		if (start != NULL)
1564 			inc_vnode_ref_count(start);
1565 		mutex_unlock(&context->io_mutex);
1566 
1567 		if (start == NULL)
1568 			return B_ERROR;
1569 	}
1570 
1571 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1572 }
1573 
1574 
1575 /** Returns the vnode in the next to last segment of the path, and returns
1576  *	the last portion in filename.
1577  *	The path buffer must be able to store at least one additional character.
1578  */
1579 
1580 static status_t
1581 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1582 {
1583 	status_t status = get_dir_path_and_leaf(path, filename);
1584 	if (status != B_OK)
1585 		return status;
1586 
1587 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1588 }
1589 
1590 
1591 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1592  *		   to by a FD + path pair.
1593  *
1594  *	\a path must be given in either case. \a fd might be omitted, in which
1595  *	case \a path is either an absolute path or one relative to the current
1596  *	directory. If both a supplied and \a path is relative it is reckoned off
1597  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1598  *	ignored.
1599  *
1600  *	The caller has the responsibility to call put_vnode() on the returned
1601  *	directory vnode.
1602  *
1603  *	\param fd The FD. May be < 0.
1604  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1605  *	       is modified by this function. It must have at least room for a
1606  *	       string one character longer than the path it contains.
1607  *	\param _vnode A pointer to a variable the directory vnode shall be written
1608  *		   into.
1609  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1610  *		   the leaf name of the specified entry will be written.
1611  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1612  *		   invoked from userland.
1613  *	\return \c B_OK, if everything went fine, another error code otherwise.
1614  */
1615 
1616 static status_t
1617 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1618 	char *filename, bool kernel)
1619 {
1620 	if (!path)
1621 		return B_BAD_VALUE;
1622 	if (fd < 0)
1623 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1624 
1625 	status_t status = get_dir_path_and_leaf(path, filename);
1626 	if (status != B_OK)
1627 		return status;
1628 
1629 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1630 }
1631 
1632 
1633 static status_t
1634 get_vnode_name(struct vnode *vnode, struct vnode *parent,
1635 	char *name, size_t nameSize)
1636 {
1637 	VNodePutter vnodePutter;
1638 
1639 	// See if vnode is the root of a mount and move to the covered
1640 	// vnode so we get the underlying file system
1641 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1642 		vnode = vnode->mount->covers_vnode;
1643 		inc_vnode_ref_count(vnode);
1644 		vnodePutter.SetTo(vnode);
1645 	}
1646 
1647 	if (FS_CALL(vnode, get_vnode_name)) {
1648 		// The FS supports getting the name of a vnode.
1649 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1650 			vnode->private_node, name, nameSize);
1651 	}
1652 
1653 	// The FS doesn't support getting the name of a vnode. So we search the
1654 	// parent directory for the vnode, if the caller let us.
1655 
1656 	if (parent == NULL)
1657 		return EOPNOTSUPP;
1658 
1659 	fs_cookie cookie;
1660 
1661 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1662 		parent->private_node, &cookie);
1663 	if (status >= B_OK) {
1664 		char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1665 		struct dirent *dirent = (struct dirent *)buffer;
1666 		while (true) {
1667 			uint32 num = 1;
1668 			status = dir_read(parent, cookie, dirent, sizeof(buffer), &num);
1669 			if (status < B_OK)
1670 				break;
1671 
1672 			if (vnode->id == dirent->d_ino) {
1673 				// found correct entry!
1674 				if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1675 					status = B_BUFFER_OVERFLOW;
1676 				break;
1677 			}
1678 		}
1679 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1680 	}
1681 	return status;
1682 }
1683 
1684 
1685 /**	Gets the full path to a given directory vnode.
1686  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1687  *	file system doesn't support this call, it will fall back to iterating
1688  *	through the parent directory to get the name of the child.
1689  *
1690  *	To protect against circular loops, it supports a maximum tree depth
1691  *	of 256 levels.
1692  *
1693  *	Note that the path may not be correct the time this function returns!
1694  *	It doesn't use any locking to prevent returning the correct path, as
1695  *	paths aren't safe anyway: the path to a file can change at any time.
1696  *
1697  *	It might be a good idea, though, to check if the returned path exists
1698  *	in the calling function (it's not done here because of efficiency)
1699  */
1700 
1701 static status_t
1702 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1703 {
1704 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1705 
1706 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1707 	char path[B_PATH_NAME_LENGTH];
1708 	int32 insert = sizeof(path);
1709 	int32 maxLevel = 256;
1710 	int32 length;
1711 	status_t status;
1712 
1713 	if (vnode == NULL || buffer == NULL)
1714 		return EINVAL;
1715 
1716 	// we don't use get_vnode() here because this call is more
1717 	// efficient and does all we need from get_vnode()
1718 	inc_vnode_ref_count(vnode);
1719 
1720 	// resolve a volume root to its mount point
1721 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1722 	if (mountPoint) {
1723 		put_vnode(vnode);
1724 		vnode = mountPoint;
1725 	}
1726 
1727 	path[--insert] = '\0';
1728 
1729 	while (true) {
1730 		// the name buffer is also used for fs_read_dir()
1731 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1732 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1733 		struct vnode *parentVnode;
1734 		vnode_id parentID, id;
1735 		int type;
1736 
1737 		// lookup the parent vnode
1738 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..", &parentID, &type);
1739 		if (status < B_OK)
1740 			goto out;
1741 
1742 		mutex_lock(&sVnodeMutex);
1743 		parentVnode = lookup_vnode(vnode->device, parentID);
1744 		mutex_unlock(&sVnodeMutex);
1745 
1746 		if (parentVnode == NULL) {
1747 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n", vnode->device, parentID);
1748 			status = B_ENTRY_NOT_FOUND;
1749 			goto out;
1750 		}
1751 
1752 		// resolve a volume root to its mount point
1753 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1754 		if (mountPoint) {
1755 			put_vnode(parentVnode);
1756 			parentVnode = mountPoint;
1757 			parentID = parentVnode->id;
1758 		}
1759 
1760 		bool hitRoot = (parentVnode == vnode);
1761 
1762 		// Does the file system support getting the name of a vnode?
1763 		// If so, get it here...
1764 		if (status == B_OK && FS_CALL(vnode, get_vnode_name))
1765 			status = FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie, vnode->private_node, name, B_FILE_NAME_LENGTH);
1766 
1767 		// ... if not, find it out later (by iterating through
1768 		// the parent directory, searching for the id)
1769 		id = vnode->id;
1770 
1771 		// release the current vnode, we only need its parent from now on
1772 		put_vnode(vnode);
1773 		vnode = parentVnode;
1774 
1775 		if (status < B_OK)
1776 			goto out;
1777 
1778 		// ToDo: add an explicit check for loops in about 10 levels to do
1779 		// real loop detection
1780 
1781 		// don't go deeper as 'maxLevel' to prevent circular loops
1782 		if (maxLevel-- < 0) {
1783 			status = ELOOP;
1784 			goto out;
1785 		}
1786 
1787 		if (hitRoot) {
1788 			// we have reached "/", which means we have constructed the full
1789 			// path
1790 			break;
1791 		}
1792 
1793 		if (!FS_CALL(vnode, get_vnode_name)) {
1794 			// If we haven't got the vnode's name yet, we have to search for it
1795 			// in the parent directory now
1796 			fs_cookie cookie;
1797 
1798 			status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
1799 			if (status >= B_OK) {
1800 				struct dirent *dirent = (struct dirent *)nameBuffer;
1801 				while (true) {
1802 					uint32 num = 1;
1803 					status = dir_read(vnode, cookie, dirent, sizeof(nameBuffer),
1804 						&num);
1805 
1806 					if (status < B_OK)
1807 						break;
1808 
1809 					if (id == dirent->d_ino)
1810 						// found correct entry!
1811 						break;
1812 				}
1813 				FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1814 			}
1815 
1816 			if (status < B_OK)
1817 				goto out;
1818 		}
1819 
1820 		// add the name infront of the current path
1821 		name[B_FILE_NAME_LENGTH - 1] = '\0';
1822 		length = strlen(name);
1823 		insert -= length;
1824 		if (insert <= 0) {
1825 			status = ENOBUFS;
1826 			goto out;
1827 		}
1828 		memcpy(path + insert, name, length);
1829 		path[--insert] = '/';
1830 	}
1831 
1832 	// the root dir will result in an empty path: fix it
1833 	if (path[insert] == '\0')
1834 		path[--insert] = '/';
1835 
1836 	TRACE(("  path is: %s\n", path + insert));
1837 
1838 	// copy the path to the output buffer
1839 	length = sizeof(path) - insert;
1840 	if (length <= (int)bufferSize)
1841 		memcpy(buffer, path + insert, length);
1842 	else
1843 		status = ENOBUFS;
1844 
1845 out:
1846 	put_vnode(vnode);
1847 	return status;
1848 }
1849 
1850 
1851 /**	Checks the length of every path component, and adds a '.'
1852  *	if the path ends in a slash.
1853  *	The given path buffer must be able to store at least one
1854  *	additional character.
1855  */
1856 
1857 static status_t
1858 check_path(char *to)
1859 {
1860 	int32 length = 0;
1861 
1862 	// check length of every path component
1863 
1864 	while (*to) {
1865 		char *begin;
1866 		if (*to == '/')
1867 			to++, length++;
1868 
1869 		begin = to;
1870 		while (*to != '/' && *to)
1871 			to++, length++;
1872 
1873 		if (to - begin > B_FILE_NAME_LENGTH)
1874 			return B_NAME_TOO_LONG;
1875 	}
1876 
1877 	if (length == 0)
1878 		return B_ENTRY_NOT_FOUND;
1879 
1880 	// complete path if there is a slash at the end
1881 
1882 	if (*(to - 1) == '/') {
1883 		if (length > B_PATH_NAME_LENGTH - 2)
1884 			return B_NAME_TOO_LONG;
1885 
1886 		to[0] = '.';
1887 		to[1] = '\0';
1888 	}
1889 
1890 	return B_OK;
1891 }
1892 
1893 
1894 static struct file_descriptor *
1895 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
1896 {
1897 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
1898 	if (descriptor == NULL)
1899 		return NULL;
1900 
1901 	if (fd_vnode(descriptor) == NULL) {
1902 		put_fd(descriptor);
1903 		return NULL;
1904 	}
1905 
1906 	// ToDo: when we can close a file descriptor at any point, investigate
1907 	//	if this is still valid to do (accessing the vnode without ref_count
1908 	//	or locking)
1909 	*_vnode = descriptor->u.vnode;
1910 	return descriptor;
1911 }
1912 
1913 
1914 static struct vnode *
1915 get_vnode_from_fd(int fd, bool kernel)
1916 {
1917 	struct file_descriptor *descriptor;
1918 	struct vnode *vnode;
1919 
1920 	descriptor = get_fd(get_current_io_context(kernel), fd);
1921 	if (descriptor == NULL)
1922 		return NULL;
1923 
1924 	vnode = fd_vnode(descriptor);
1925 	if (vnode != NULL)
1926 		inc_vnode_ref_count(vnode);
1927 
1928 	put_fd(descriptor);
1929 	return vnode;
1930 }
1931 
1932 
1933 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
1934  *	only the path will be considered. In this case, the \a path must not be
1935  *	NULL.
1936  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
1937  *	and should be NULL for files.
1938  */
1939 
1940 static status_t
1941 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
1942 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
1943 {
1944 	if (fd < 0 && !path)
1945 		return B_BAD_VALUE;
1946 
1947 	if (fd < 0 || (path != NULL && path[0] == '/')) {
1948 		// no FD or absolute path
1949 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
1950 	}
1951 
1952 	// FD only, or FD + relative path
1953 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
1954 	if (!vnode)
1955 		return B_FILE_ERROR;
1956 
1957 	if (path != NULL) {
1958 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
1959 			_vnode, _parentID, NULL);
1960 	}
1961 
1962 	// there is no relative path to take into account
1963 
1964 	*_vnode = vnode;
1965 	if (_parentID)
1966 		*_parentID = -1;
1967 
1968 	return B_OK;
1969 }
1970 
1971 
1972 static int
1973 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
1974 	fs_cookie cookie, int openMode, bool kernel)
1975 {
1976 	struct file_descriptor *descriptor;
1977 	int fd;
1978 
1979 	descriptor = alloc_fd();
1980 	if (!descriptor)
1981 		return B_NO_MEMORY;
1982 
1983 	if (vnode)
1984 		descriptor->u.vnode = vnode;
1985 	else
1986 		descriptor->u.mount = mount;
1987 	descriptor->cookie = cookie;
1988 
1989 	switch (type) {
1990 		// vnode types
1991 		case FDTYPE_FILE:
1992 			descriptor->ops = &sFileOps;
1993 			break;
1994 		case FDTYPE_DIR:
1995 			descriptor->ops = &sDirectoryOps;
1996 			break;
1997 		case FDTYPE_ATTR:
1998 			descriptor->ops = &sAttributeOps;
1999 			break;
2000 		case FDTYPE_ATTR_DIR:
2001 			descriptor->ops = &sAttributeDirectoryOps;
2002 			break;
2003 
2004 		// mount types
2005 		case FDTYPE_INDEX_DIR:
2006 			descriptor->ops = &sIndexDirectoryOps;
2007 			break;
2008 		case FDTYPE_QUERY:
2009 			descriptor->ops = &sQueryOps;
2010 			break;
2011 
2012 		default:
2013 			panic("get_new_fd() called with unknown type %d\n", type);
2014 			break;
2015 	}
2016 	descriptor->type = type;
2017 	descriptor->open_mode = openMode;
2018 
2019 	fd = new_fd(get_current_io_context(kernel), descriptor);
2020 	if (fd < 0) {
2021 		free(descriptor);
2022 		return B_NO_MORE_FDS;
2023 	}
2024 
2025 	return fd;
2026 }
2027 
2028 #ifdef ADD_DEBUGGER_COMMANDS
2029 
2030 
2031 static void
2032 _dump_advisory_locking(advisory_locking *locking)
2033 {
2034 	if (locking == NULL)
2035 		return;
2036 
2037 	kprintf("   lock:        %ld", locking->lock);
2038 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2039 
2040 	struct advisory_lock *lock = NULL;
2041 	int32 index = 0;
2042 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2043 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2044 		kprintf("        offset: %Ld\n", lock->offset);
2045 		kprintf("        length: %Ld\n", lock->length);
2046 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2047 	}
2048 }
2049 
2050 
2051 static void
2052 _dump_mount(struct fs_mount *mount)
2053 {
2054 	kprintf("MOUNT: %p\n", mount);
2055 	kprintf(" id:            %ld\n", mount->id);
2056 	kprintf(" device_name:   %s\n", mount->device_name);
2057 	kprintf(" fs_name:       %s\n", mount->fs_name);
2058 	kprintf(" cookie:        %p\n", mount->cookie);
2059 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2060 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2061 	kprintf(" partition:     %p\n", mount->partition);
2062 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2063 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2064 		mount->owns_file_device ? " owns_file_device" : "");
2065 }
2066 
2067 
2068 static void
2069 _dump_vnode(struct vnode *vnode)
2070 {
2071 	kprintf("VNODE: %p\n", vnode);
2072 	kprintf(" device:        %ld\n", vnode->device);
2073 	kprintf(" id:            %Ld\n", vnode->id);
2074 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2075 	kprintf(" private_node:  %p\n", vnode->private_node);
2076 	kprintf(" mount:         %p\n", vnode->mount);
2077 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2078 	kprintf(" cache_ref:     %p\n", vnode->cache);
2079 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2080 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2081 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2082 
2083 	_dump_advisory_locking(vnode->advisory_locking);
2084 }
2085 
2086 
2087 static int
2088 dump_mount(int argc, char **argv)
2089 {
2090 	if (argc != 2) {
2091 		kprintf("usage: mount [id/address]\n");
2092 		return 0;
2093 	}
2094 
2095 	struct fs_mount *mount = NULL;
2096 
2097 	// if the argument looks like a hex number, treat it as such
2098 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2099 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2100 		if (IS_USER_ADDRESS(mount)) {
2101 			kprintf("invalid fs_mount address\n");
2102 			return 0;
2103 		}
2104 	} else {
2105 		mount_id id = atoll(argv[1]);
2106 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2107 		if (mount == NULL) {
2108 			kprintf("fs_mount not found\n");
2109 			return 0;
2110 		}
2111 	}
2112 
2113 	_dump_mount(mount);
2114 	return 0;
2115 }
2116 
2117 
2118 static int
2119 dump_mounts(int argc, char **argv)
2120 {
2121 	struct hash_iterator iterator;
2122 	struct fs_mount *mount;
2123 
2124 	kprintf("address     id root       covers     fs_name\n");
2125 
2126 	hash_open(sMountsTable, &iterator);
2127 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2128 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2129 			mount->covers_vnode, mount->fs_name);
2130 	}
2131 
2132 	hash_close(sMountsTable, &iterator, false);
2133 	return 0;
2134 }
2135 
2136 
2137 static int
2138 dump_vnode(int argc, char **argv)
2139 {
2140 	if (argc < 2) {
2141 		kprintf("usage: vnode [id/device id/address]\n");
2142 		return 0;
2143 	}
2144 
2145 	struct vnode *vnode = NULL;
2146 
2147 	// if the argument looks like a hex number, treat it as such
2148 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2149 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2150 		if (IS_USER_ADDRESS(vnode)) {
2151 			kprintf("invalid vnode address\n");
2152 			return 0;
2153 		}
2154 		_dump_vnode(vnode);
2155 		return 0;
2156 	}
2157 
2158 	struct hash_iterator iterator;
2159 	mount_id device = -1;
2160 	vnode_id id;
2161 	if (argc > 2) {
2162 		device = atoi(argv[1]);
2163 		id = atoll(argv[2]);
2164 	} else
2165 		id = atoll(argv[1]);
2166 
2167 	hash_open(sVnodeTable, &iterator);
2168 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2169 		if (vnode->id != id || device != -1 && vnode->device != device)
2170 			continue;
2171 
2172 		_dump_vnode(vnode);
2173 	}
2174 
2175 	hash_close(sVnodeTable, &iterator, false);
2176 	return 0;
2177 }
2178 
2179 
2180 static int
2181 dump_vnodes(int argc, char **argv)
2182 {
2183 	// restrict dumped nodes to a certain device if requested
2184 	mount_id device = -1;
2185 	if (argc > 1)
2186 		device = atoi(argv[1]);
2187 
2188 	struct hash_iterator iterator;
2189 	struct vnode *vnode;
2190 
2191 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2192 
2193 	hash_open(sVnodeTable, &iterator);
2194 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2195 		if (device != -1 && vnode->device != device)
2196 			continue;
2197 
2198 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2199 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2200 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2201 			vnode->unpublished ? "u" : "-");
2202 	}
2203 
2204 	hash_close(sVnodeTable, &iterator, false);
2205 	return 0;
2206 }
2207 
2208 
2209 static int
2210 dump_vnode_caches(int argc, char **argv)
2211 {
2212 	struct hash_iterator iterator;
2213 	struct vnode *vnode;
2214 
2215 	kprintf("address    dev     inode cache          size   pages\n");
2216 
2217 	hash_open(sVnodeTable, &iterator);
2218 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2219 		if (vnode->cache == NULL)
2220 			continue;
2221 
2222 		// count pages in cache
2223 		size_t numPages = 0;
2224 		for (struct vm_page *page = vnode->cache->cache->page_list;
2225 				page != NULL; page = page->cache_next) {
2226 			numPages++;
2227 		}
2228 
2229 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2230 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2231 	}
2232 
2233 	hash_close(sVnodeTable, &iterator, false);
2234 	return 0;
2235 }
2236 
2237 
2238 int
2239 dump_io_context(int argc, char **argv)
2240 {
2241 	if (argc > 2) {
2242 		kprintf("usage: io_context [team id/address]\n");
2243 		return 0;
2244 	}
2245 
2246 	struct io_context *context = NULL;
2247 
2248 	if (argc > 1) {
2249 		uint32 num = strtoul(argv[1], NULL, 0);
2250 		if (IS_KERNEL_ADDRESS(num))
2251 			context = (struct io_context *)num;
2252 		else {
2253 			struct team *team = team_get_team_struct_locked(num);
2254 			if (team == NULL) {
2255 				kprintf("could not find team with ID %ld\n", num);
2256 				return 0;
2257 			}
2258 			context = (struct io_context *)team->io_context;
2259 		}
2260 	} else
2261 		context = get_current_io_context(true);
2262 
2263 	kprintf("I/O CONTEXT: %p\n", context);
2264 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2265 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2266 	kprintf(" max fds:\t%lu\n", context->table_size);
2267 
2268 	if (context->num_used_fds)
2269 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2270 
2271 	for (uint32 i = 0; i < context->table_size; i++) {
2272 		struct file_descriptor *fd = context->fds[i];
2273 		if (fd == NULL)
2274 			continue;
2275 
2276 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2277 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2278 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2279 			fd->u.vnode);
2280 	}
2281 
2282 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2283 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2284 
2285 	return 0;
2286 }
2287 
2288 #endif	// ADD_DEBUGGER_COMMANDS
2289 
2290 
2291 //	#pragma mark - public VFS API
2292 
2293 
2294 extern "C" status_t
2295 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2296 {
2297 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2298 		mountID, vnodeID, privateNode));
2299 
2300 	if (privateNode == NULL)
2301 		return B_BAD_VALUE;
2302 
2303 	mutex_lock(&sVnodeMutex);
2304 
2305 	// file system integrity check:
2306 	// test if the vnode already exists and bail out if this is the case!
2307 
2308 	// ToDo: the R5 implementation obviously checks for a different cookie
2309 	//	and doesn't panic if they are equal
2310 
2311 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2312 	if (vnode != NULL)
2313 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2314 
2315 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2316 	if (status == B_OK) {
2317 		vnode->private_node = privateNode;
2318 		vnode->busy = true;
2319 		vnode->unpublished = true;
2320 	}
2321 
2322 	TRACE(("returns: %s\n", strerror(status)));
2323 
2324 	mutex_unlock(&sVnodeMutex);
2325 	return status;
2326 }
2327 
2328 
2329 extern "C" status_t
2330 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2331 {
2332 	FUNCTION(("publish_vnode()\n"));
2333 
2334 	mutex_lock(&sVnodeMutex);
2335 
2336 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2337 	status_t status = B_OK;
2338 
2339 	if (vnode != NULL && vnode->busy && vnode->unpublished
2340 		&& vnode->private_node == privateNode) {
2341 		vnode->busy = false;
2342 		vnode->unpublished = false;
2343 	} else if (vnode == NULL && privateNode != NULL) {
2344 		status = create_new_vnode(&vnode, mountID, vnodeID);
2345 		if (status == B_OK)
2346 			vnode->private_node = privateNode;
2347 	} else
2348 		status = B_BAD_VALUE;
2349 
2350 	TRACE(("returns: %s\n", strerror(status)));
2351 
2352 	mutex_unlock(&sVnodeMutex);
2353 	return status;
2354 }
2355 
2356 
2357 extern "C" status_t
2358 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2359 {
2360 	struct vnode *vnode;
2361 
2362 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2363 	if (status < B_OK)
2364 		return status;
2365 
2366 	*_fsNode = vnode->private_node;
2367 	return B_OK;
2368 }
2369 
2370 
2371 extern "C" status_t
2372 put_vnode(mount_id mountID, vnode_id vnodeID)
2373 {
2374 	struct vnode *vnode;
2375 
2376 	mutex_lock(&sVnodeMutex);
2377 	vnode = lookup_vnode(mountID, vnodeID);
2378 	mutex_unlock(&sVnodeMutex);
2379 
2380 	if (vnode)
2381 		dec_vnode_ref_count(vnode, true);
2382 
2383 	return B_OK;
2384 }
2385 
2386 
2387 extern "C" status_t
2388 remove_vnode(mount_id mountID, vnode_id vnodeID)
2389 {
2390 	struct vnode *vnode;
2391 	bool remove = false;
2392 
2393 	mutex_lock(&sVnodeMutex);
2394 
2395 	vnode = lookup_vnode(mountID, vnodeID);
2396 	if (vnode != NULL) {
2397 		if (vnode->covered_by != NULL) {
2398 			// this vnode is in use
2399 			mutex_unlock(&sVnodeMutex);
2400 			return B_BUSY;
2401 		}
2402 
2403 		vnode->remove = true;
2404 		if (vnode->unpublished) {
2405 			// prepare the vnode for deletion
2406 			vnode->busy = true;
2407 			remove = true;
2408 		}
2409 	}
2410 
2411 	mutex_unlock(&sVnodeMutex);
2412 
2413 	if (remove) {
2414 		// if the vnode hasn't been published yet, we delete it here
2415 		atomic_add(&vnode->ref_count, -1);
2416 		free_vnode(vnode, true);
2417 	}
2418 
2419 	return B_OK;
2420 }
2421 
2422 
2423 extern "C" status_t
2424 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2425 {
2426 	struct vnode *vnode;
2427 
2428 	mutex_lock(&sVnodeMutex);
2429 
2430 	vnode = lookup_vnode(mountID, vnodeID);
2431 	if (vnode)
2432 		vnode->remove = false;
2433 
2434 	mutex_unlock(&sVnodeMutex);
2435 	return B_OK;
2436 }
2437 
2438 
2439 //	#pragma mark - private VFS API
2440 //	Functions the VFS exports for other parts of the kernel
2441 
2442 
2443 /** Acquires another reference to the vnode that has to be released
2444  *	by calling vfs_put_vnode().
2445  */
2446 
2447 void
2448 vfs_acquire_vnode(void *_vnode)
2449 {
2450 	inc_vnode_ref_count((struct vnode *)_vnode);
2451 }
2452 
2453 
2454 /** This is currently called from file_cache_create() only.
2455  *	It's probably a temporary solution as long as devfs requires that
2456  *	fs_read_pages()/fs_write_pages() are called with the standard
2457  *	open cookie and not with a device cookie.
2458  *	If that's done differently, remove this call; it has no other
2459  *	purpose.
2460  */
2461 
2462 extern "C" status_t
2463 vfs_get_cookie_from_fd(int fd, void **_cookie)
2464 {
2465 	struct file_descriptor *descriptor;
2466 
2467 	descriptor = get_fd(get_current_io_context(true), fd);
2468 	if (descriptor == NULL)
2469 		return B_FILE_ERROR;
2470 
2471 	*_cookie = descriptor->cookie;
2472 	return B_OK;
2473 }
2474 
2475 
2476 extern "C" int
2477 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2478 {
2479 	*vnode = get_vnode_from_fd(fd, kernel);
2480 
2481 	if (*vnode == NULL)
2482 		return B_FILE_ERROR;
2483 
2484 	return B_NO_ERROR;
2485 }
2486 
2487 
2488 extern "C" status_t
2489 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2490 {
2491 	struct vnode *vnode;
2492 	status_t status;
2493 	char buffer[B_PATH_NAME_LENGTH + 1];
2494 
2495 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2496 
2497 	strlcpy(buffer, path, sizeof(buffer));
2498 
2499 	status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2500 	if (status < B_OK)
2501 		return status;
2502 
2503 	*_vnode = vnode;
2504 	return B_OK;
2505 }
2506 
2507 
2508 extern "C" status_t
2509 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2510 {
2511 	struct vnode *vnode;
2512 
2513 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2514 	if (status < B_OK)
2515 		return status;
2516 
2517 	*_vnode = vnode;
2518 	return B_OK;
2519 }
2520 
2521 
2522 extern "C" status_t
2523 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2524 	const char *name, void **_vnode)
2525 {
2526 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2527 }
2528 
2529 
2530 extern "C" void
2531 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2532 {
2533 	struct vnode *vnode = (struct vnode *)_vnode;
2534 
2535 	*_mountID = vnode->device;
2536 	*_vnodeID = vnode->id;
2537 }
2538 
2539 
2540 /**	Looks up a vnode with the given mount and vnode ID.
2541  *	Must only be used with "in-use" vnodes as it doesn't grab a reference
2542  *	to the node.
2543  *	It's currently only be used by file_cache_create().
2544  */
2545 
2546 extern "C" status_t
2547 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2548 {
2549 	mutex_lock(&sVnodeMutex);
2550 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2551 	mutex_unlock(&sVnodeMutex);
2552 
2553 	if (vnode == NULL)
2554 		return B_ERROR;
2555 
2556 	*_vnode = vnode;
2557 	return B_OK;
2558 }
2559 
2560 
2561 extern "C" status_t
2562 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2563 {
2564 	char buffer[B_PATH_NAME_LENGTH + 1];
2565 	struct vnode *vnode;
2566 	status_t status;
2567 
2568 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n", mountID, path, kernel));
2569 
2570 	strlcpy(buffer, path, sizeof(buffer));
2571 	status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2572 	if (status < B_OK)
2573 		return status;
2574 
2575 	if (vnode->device != mountID) {
2576 		// wrong mount ID - must not gain access on foreign file system nodes
2577 		put_vnode(vnode);
2578 		return B_BAD_VALUE;
2579 	}
2580 
2581 	*_node = vnode->private_node;
2582 	return B_OK;
2583 }
2584 
2585 
2586 /**	Finds the full path to the file that contains the module \a moduleName,
2587  *	puts it into \a pathBuffer, and returns B_OK for success.
2588  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2589  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2590  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2591  *	functions returns unsuccessfully.
2592  */
2593 
2594 status_t
2595 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2596 	size_t bufferSize)
2597 {
2598 	struct vnode *dir, *file;
2599 	status_t status;
2600 	size_t length;
2601 	char *path;
2602 
2603 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2604 		return B_BUFFER_OVERFLOW;
2605 
2606 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2607 	if (status < B_OK)
2608 		return status;
2609 
2610 	// the path buffer had been clobbered by the above call
2611 	length = strlcpy(pathBuffer, basePath, bufferSize);
2612 	if (pathBuffer[length - 1] != '/')
2613 		pathBuffer[length++] = '/';
2614 
2615 	path = pathBuffer + length;
2616 	bufferSize -= length;
2617 
2618 	while (moduleName) {
2619 		int type;
2620 
2621 		char *nextPath = strchr(moduleName, '/');
2622 		if (nextPath == NULL)
2623 			length = strlen(moduleName);
2624 		else {
2625 			length = nextPath - moduleName;
2626 			nextPath++;
2627 		}
2628 
2629 		if (length + 1 >= bufferSize) {
2630 			status = B_BUFFER_OVERFLOW;
2631 			goto err;
2632 		}
2633 
2634 		memcpy(path, moduleName, length);
2635 		path[length] = '\0';
2636 		moduleName = nextPath;
2637 
2638 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2639 		if (status < B_OK)
2640 			goto err;
2641 
2642 		put_vnode(dir);
2643 
2644 		if (S_ISDIR(type)) {
2645 			// goto the next directory
2646 			path[length] = '/';
2647 			path[length + 1] = '\0';
2648 			path += length + 1;
2649 			bufferSize -= length + 1;
2650 
2651 			dir = file;
2652 		} else if (S_ISREG(type)) {
2653 			// it's a file so it should be what we've searched for
2654 			put_vnode(file);
2655 
2656 			return B_OK;
2657 		} else {
2658 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
2659 			status = B_ERROR;
2660 			goto err;
2661 		}
2662 	}
2663 
2664 	// if we got here, the moduleName just pointed to a directory, not to
2665 	// a real module - what should we do in this case?
2666 	status = B_ENTRY_NOT_FOUND;
2667 
2668 err:
2669 	put_vnode(dir);
2670 	return status;
2671 }
2672 
2673 
2674 /**	\brief Normalizes a given path.
2675  *
2676  *	The path must refer to an existing or non-existing entry in an existing
2677  *	directory, that is chopping off the leaf component the remaining path must
2678  *	refer to an existing directory.
2679  *
2680  *	The returned will be canonical in that it will be absolute, will not
2681  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2682  *	and none of the directory components will by symbolic links.
2683  *
2684  *	Any two paths referring to the same entry, will result in the same
2685  *	normalized path (well, that is pretty much the definition of `normalized',
2686  *	isn't it :-).
2687  *
2688  *	\param path The path to be normalized.
2689  *	\param buffer The buffer into which the normalized path will be written.
2690  *	\param bufferSize The size of \a buffer.
2691  *	\param kernel \c true, if the IO context of the kernel shall be used,
2692  *		   otherwise that of the team this thread belongs to. Only relevant,
2693  *		   if the path is relative (to get the CWD).
2694  *	\return \c B_OK if everything went fine, another error code otherwise.
2695  */
2696 
2697 status_t
2698 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2699 	bool kernel)
2700 {
2701 	if (!path || !buffer || bufferSize < 1)
2702 		return B_BAD_VALUE;
2703 
2704 	TRACE(("vfs_normalize_path(`%s')\n", path));
2705 
2706 	// copy the supplied path to the stack, so it can be modified
2707 	char mutablePath[B_PATH_NAME_LENGTH + 1];
2708 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2709 		return B_NAME_TOO_LONG;
2710 
2711 	// get the dir vnode and the leaf name
2712 	struct vnode *dirNode;
2713 	char leaf[B_FILE_NAME_LENGTH];
2714 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2715 	if (error != B_OK) {
2716 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2717 		return error;
2718 	}
2719 
2720 	// if the leaf is "." or "..", we directly get the correct directory
2721 	// vnode and ignore the leaf later
2722 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2723 	if (isDir)
2724 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2725 	if (error != B_OK) {
2726 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n", strerror(error)));
2727 		return error;
2728 	}
2729 
2730 	// get the directory path
2731 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2732 	put_vnode(dirNode);
2733 	if (error < B_OK) {
2734 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2735 		return error;
2736 	}
2737 
2738 	// append the leaf name
2739 	if (!isDir) {
2740 		// insert a directory separator only if this is not the file system root
2741 		if ((strcmp(buffer, "/") != 0
2742 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
2743 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
2744 			return B_NAME_TOO_LONG;
2745 		}
2746 	}
2747 
2748 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
2749 	return B_OK;
2750 }
2751 
2752 
2753 extern "C" void
2754 vfs_put_vnode(void *_vnode)
2755 {
2756 	put_vnode((struct vnode *)_vnode);
2757 }
2758 
2759 
2760 extern "C" status_t
2761 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
2762 {
2763 	// Get current working directory from io context
2764 	struct io_context *context = get_current_io_context(false);
2765 	status_t status = B_OK;
2766 
2767 	mutex_lock(&context->io_mutex);
2768 
2769 	if (context->cwd != NULL) {
2770 		*_mountID = context->cwd->device;
2771 		*_vnodeID = context->cwd->id;
2772 	} else
2773 		status = B_ERROR;
2774 
2775 	mutex_unlock(&context->io_mutex);
2776 	return status;
2777 }
2778 
2779 
2780 extern "C" bool
2781 vfs_can_page(void *_vnode, void *cookie)
2782 {
2783 	struct vnode *vnode = (struct vnode *)_vnode;
2784 
2785 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
2786 
2787 	if (FS_CALL(vnode, can_page))
2788 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
2789 
2790 	return false;
2791 }
2792 
2793 
2794 extern "C" status_t
2795 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count, size_t *_numBytes)
2796 {
2797 	struct vnode *vnode = (struct vnode *)_vnode;
2798 
2799 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
2800 
2801 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node, cookie, pos, vecs, count, _numBytes);
2802 }
2803 
2804 
2805 extern "C" status_t
2806 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count, size_t *_numBytes)
2807 {
2808 	struct vnode *vnode = (struct vnode *)_vnode;
2809 
2810 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
2811 
2812 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node, cookie, pos, vecs, count, _numBytes);
2813 }
2814 
2815 
2816 /** Gets the vnode's vm_cache object. If it didn't have one, it will be
2817  *	created if \a allocate is \c true.
2818  *	In case it's successful, it will also grab a reference to the cache
2819  *	it returns.
2820  */
2821 
2822 extern "C" status_t
2823 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
2824 {
2825 	struct vnode *vnode = (struct vnode *)_vnode;
2826 
2827 	if (vnode->cache != NULL) {
2828 		vm_cache_acquire_ref(vnode->cache);
2829 		*_cache = vnode->cache;
2830 		return B_OK;
2831 	}
2832 
2833 	mutex_lock(&sVnodeMutex);
2834 
2835 	status_t status = B_OK;
2836 
2837 	// The cache could have been created in the meantime
2838 	if (vnode->cache == NULL) {
2839 		if (allocate)
2840 			status = vm_create_vnode_cache(vnode, &vnode->cache);
2841 		else
2842 			status = B_BAD_VALUE;
2843 	} else
2844 		vm_cache_acquire_ref(vnode->cache);
2845 
2846 	if (status == B_OK)
2847 		*_cache = vnode->cache;
2848 
2849 	mutex_unlock(&sVnodeMutex);
2850 	return status;
2851 }
2852 
2853 
2854 status_t
2855 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
2856 {
2857 	struct vnode *vnode = (struct vnode *)_vnode;
2858 
2859 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
2860 
2861 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
2862 }
2863 
2864 
2865 status_t
2866 vfs_stat_vnode(void *_vnode, struct stat *stat)
2867 {
2868 	struct vnode *vnode = (struct vnode *)_vnode;
2869 
2870 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
2871 		vnode->private_node, stat);
2872 
2873 	// fill in the st_dev and st_ino fields
2874 	if (status == B_OK) {
2875 		stat->st_dev = vnode->device;
2876 		stat->st_ino = vnode->id;
2877 	}
2878 
2879 	return status;
2880 }
2881 
2882 
2883 status_t
2884 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
2885 {
2886 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
2887 }
2888 
2889 
2890 /**	Closes all file descriptors of the specified I/O context that
2891  *	don't have the O_CLOEXEC flag set.
2892  */
2893 
2894 void
2895 vfs_exec_io_context(void *_context)
2896 {
2897 	struct io_context *context = (struct io_context *)_context;
2898 	uint32 i;
2899 
2900 	for (i = 0; i < context->table_size; i++) {
2901 		mutex_lock(&context->io_mutex);
2902 
2903 		struct file_descriptor *descriptor = context->fds[i];
2904 		bool remove = false;
2905 
2906 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
2907 			context->fds[i] = NULL;
2908 			context->num_used_fds--;
2909 
2910 			remove = true;
2911 		}
2912 
2913 		mutex_unlock(&context->io_mutex);
2914 
2915 		if (remove) {
2916 			close_fd(descriptor);
2917 			put_fd(descriptor);
2918 		}
2919 	}
2920 }
2921 
2922 
2923 /** Sets up a new io_control structure, and inherits the properties
2924  *	of the parent io_control if it is given.
2925  */
2926 
2927 void *
2928 vfs_new_io_context(void *_parentContext)
2929 {
2930 	size_t tableSize;
2931 	struct io_context *context;
2932 	struct io_context *parentContext;
2933 
2934 	context = (io_context *)malloc(sizeof(struct io_context));
2935 	if (context == NULL)
2936 		return NULL;
2937 
2938 	memset(context, 0, sizeof(struct io_context));
2939 
2940 	parentContext = (struct io_context *)_parentContext;
2941 	if (parentContext)
2942 		tableSize = parentContext->table_size;
2943 	else
2944 		tableSize = DEFAULT_FD_TABLE_SIZE;
2945 
2946 	// allocate space for FDs and their close-on-exec flag
2947 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
2948 		+ tableSize / 8);
2949 	if (context->fds == NULL) {
2950 		free(context);
2951 		return NULL;
2952 	}
2953 
2954 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
2955 		+ tableSize / 8);
2956 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
2957 
2958 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
2959 		free(context->fds);
2960 		free(context);
2961 		return NULL;
2962 	}
2963 
2964 	// Copy all parent files which don't have the O_CLOEXEC flag set
2965 
2966 	if (parentContext) {
2967 		size_t i;
2968 
2969 		mutex_lock(&parentContext->io_mutex);
2970 
2971 		context->cwd = parentContext->cwd;
2972 		if (context->cwd)
2973 			inc_vnode_ref_count(context->cwd);
2974 
2975 		for (i = 0; i < tableSize; i++) {
2976 			struct file_descriptor *descriptor = parentContext->fds[i];
2977 
2978 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
2979 				context->fds[i] = descriptor;
2980 				context->num_used_fds++;
2981 				atomic_add(&descriptor->ref_count, 1);
2982 				atomic_add(&descriptor->open_count, 1);
2983 			}
2984 		}
2985 
2986 		mutex_unlock(&parentContext->io_mutex);
2987 	} else {
2988 		context->cwd = sRoot;
2989 
2990 		if (context->cwd)
2991 			inc_vnode_ref_count(context->cwd);
2992 	}
2993 
2994 	context->table_size = tableSize;
2995 
2996 	list_init(&context->node_monitors);
2997 	context->max_monitors = MAX_NODE_MONITORS;
2998 
2999 	return context;
3000 }
3001 
3002 
3003 status_t
3004 vfs_free_io_context(void *_ioContext)
3005 {
3006 	struct io_context *context = (struct io_context *)_ioContext;
3007 	uint32 i;
3008 
3009 	if (context->cwd)
3010 		dec_vnode_ref_count(context->cwd, false);
3011 
3012 	mutex_lock(&context->io_mutex);
3013 
3014 	for (i = 0; i < context->table_size; i++) {
3015 		if (struct file_descriptor *descriptor = context->fds[i]) {
3016 			close_fd(descriptor);
3017 			put_fd(descriptor);
3018 		}
3019 	}
3020 
3021 	mutex_destroy(&context->io_mutex);
3022 
3023 	remove_node_monitors(context);
3024 	free(context->fds);
3025 	free(context);
3026 
3027 	return B_OK;
3028 }
3029 
3030 
3031 static status_t
3032 vfs_resize_fd_table(struct io_context *context, const int newSize)
3033 {
3034 	void *fds;
3035 	int	status = B_OK;
3036 
3037 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3038 		return EINVAL;
3039 
3040 	mutex_lock(&context->io_mutex);
3041 
3042 	if ((size_t)newSize < context->table_size) {
3043 		// shrink the fd table
3044 		int i;
3045 
3046 		// Make sure none of the fds being dropped are in use
3047 		for(i = context->table_size; i-- > newSize;) {
3048 			if (context->fds[i]) {
3049 				status = EBUSY;
3050 				goto out;
3051 			}
3052 		}
3053 
3054 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3055 		if (fds == NULL) {
3056 			status = ENOMEM;
3057 			goto out;
3058 		}
3059 
3060 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
3061 	} else {
3062 		// enlarge the fd table
3063 
3064 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3065 		if (fds == NULL) {
3066 			status = ENOMEM;
3067 			goto out;
3068 		}
3069 
3070 		// copy the fd array, and zero the additional slots
3071 		memcpy(fds, context->fds, sizeof(void *) * context->table_size);
3072 		memset((char *)fds + (sizeof(void *) * context->table_size), 0,
3073 			sizeof(void *) * (newSize - context->table_size));
3074 	}
3075 
3076 	free(context->fds);
3077 	context->fds = (file_descriptor **)fds;
3078 	context->table_size = newSize;
3079 
3080 out:
3081 	mutex_unlock(&context->io_mutex);
3082 	return status;
3083 }
3084 
3085 
3086 int
3087 vfs_getrlimit(int resource, struct rlimit * rlp)
3088 {
3089 	if (!rlp)
3090 		return -1;
3091 
3092 	switch (resource) {
3093 		case RLIMIT_NOFILE:
3094 		{
3095 			struct io_context *ioctx = get_current_io_context(false);
3096 
3097 			mutex_lock(&ioctx->io_mutex);
3098 
3099 			rlp->rlim_cur = ioctx->table_size;
3100 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3101 
3102 			mutex_unlock(&ioctx->io_mutex);
3103 
3104 			return 0;
3105 		}
3106 
3107 		default:
3108 			return -1;
3109 	}
3110 }
3111 
3112 
3113 int
3114 vfs_setrlimit(int resource, const struct rlimit * rlp)
3115 {
3116 	if (!rlp)
3117 		return -1;
3118 
3119 	switch (resource) {
3120 		case RLIMIT_NOFILE:
3121 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3122 
3123 		default:
3124 			return -1;
3125 	}
3126 }
3127 
3128 
3129 status_t
3130 vfs_init(kernel_args *args)
3131 {
3132 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3133 		&vnode_compare, &vnode_hash);
3134 	if (sVnodeTable == NULL)
3135 		panic("vfs_init: error creating vnode hash table\n");
3136 
3137 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3138 
3139 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3140 		&mount_compare, &mount_hash);
3141 	if (sMountsTable == NULL)
3142 		panic("vfs_init: error creating mounts hash table\n");
3143 
3144 	node_monitor_init();
3145 
3146 	sRoot = NULL;
3147 
3148 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3149 		panic("vfs_init: error allocating file systems lock\n");
3150 
3151 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3152 		panic("vfs_init: error allocating mount op lock\n");
3153 
3154 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3155 		panic("vfs_init: error allocating mount lock\n");
3156 
3157 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3158 		panic("vfs_init: error allocating vnode lock\n");
3159 
3160 	if (block_cache_init() != B_OK)
3161 		return B_ERROR;
3162 
3163 #ifdef ADD_DEBUGGER_COMMANDS
3164 	// add some debugger commands
3165 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3166 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3167 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3168 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3169 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3170 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3171 #endif
3172 
3173 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3174 
3175 	return file_cache_init();
3176 }
3177 
3178 
3179 //	#pragma mark -
3180 //	The filetype-dependent implementations (fd_ops + open/create/rename/remove, ...)
3181 
3182 
3183 /** Calls fs_open() on the given vnode and returns a new
3184  *	file descriptor for it
3185  */
3186 
3187 static int
3188 create_vnode(struct vnode *directory, const char *name, int openMode, int perms, bool kernel)
3189 {
3190 	struct vnode *vnode;
3191 	fs_cookie cookie;
3192 	vnode_id newID;
3193 	int status;
3194 
3195 	if (FS_CALL(directory, create) == NULL)
3196 		return EROFS;
3197 
3198 	status = FS_CALL(directory, create)(directory->mount->cookie, directory->private_node, name, openMode, perms, &cookie, &newID);
3199 	if (status < B_OK)
3200 		return status;
3201 
3202 	mutex_lock(&sVnodeMutex);
3203 	vnode = lookup_vnode(directory->device, newID);
3204 	mutex_unlock(&sVnodeMutex);
3205 
3206 	if (vnode == NULL) {
3207 		dprintf("vfs: fs_create() returned success but there is no vnode!");
3208 		return EINVAL;
3209 	}
3210 
3211 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3212 		return status;
3213 
3214 	// something went wrong, clean up
3215 
3216 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3217 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3218 	put_vnode(vnode);
3219 
3220 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3221 
3222 	return status;
3223 }
3224 
3225 
3226 /** Calls fs_open() on the given vnode and returns a new
3227  *	file descriptor for it
3228  */
3229 
3230 static int
3231 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3232 {
3233 	fs_cookie cookie;
3234 	int status;
3235 
3236 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3237 	if (status < 0)
3238 		return status;
3239 
3240 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3241 	if (status < 0) {
3242 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3243 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3244 	}
3245 	return status;
3246 }
3247 
3248 
3249 /** Calls fs open_dir() on the given vnode and returns a new
3250  *	file descriptor for it
3251  */
3252 
3253 static int
3254 open_dir_vnode(struct vnode *vnode, bool kernel)
3255 {
3256 	fs_cookie cookie;
3257 	int status;
3258 
3259 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3260 	if (status < B_OK)
3261 		return status;
3262 
3263 	// file is opened, create a fd
3264 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3265 	if (status >= 0)
3266 		return status;
3267 
3268 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3269 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3270 
3271 	return status;
3272 }
3273 
3274 
3275 /** Calls fs open_attr_dir() on the given vnode and returns a new
3276  *	file descriptor for it.
3277  *	Used by attr_dir_open(), and attr_dir_open_fd().
3278  */
3279 
3280 static int
3281 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3282 {
3283 	fs_cookie cookie;
3284 	int status;
3285 
3286 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3287 		return EOPNOTSUPP;
3288 
3289 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3290 	if (status < 0)
3291 		return status;
3292 
3293 	// file is opened, create a fd
3294 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3295 	if (status >= 0)
3296 		return status;
3297 
3298 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3299 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3300 
3301 	return status;
3302 }
3303 
3304 
3305 static int
3306 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3307 {
3308 	struct vnode *directory;
3309 	int status;
3310 
3311 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3312 
3313 	// get directory to put the new file in
3314 	status = get_vnode(mountID, directoryID, &directory, false);
3315 	if (status < B_OK)
3316 		return status;
3317 
3318 	status = create_vnode(directory, name, openMode, perms, kernel);
3319 	put_vnode(directory);
3320 
3321 	return status;
3322 }
3323 
3324 
3325 static int
3326 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3327 {
3328 	char name[B_FILE_NAME_LENGTH];
3329 	struct vnode *directory;
3330 	int status;
3331 
3332 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3333 
3334 	// get directory to put the new file in
3335 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3336 	if (status < 0)
3337 		return status;
3338 
3339 	status = create_vnode(directory, name, openMode, perms, kernel);
3340 
3341 	put_vnode(directory);
3342 	return status;
3343 }
3344 
3345 
3346 static int
3347 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3348 {
3349 	struct vnode *vnode;
3350 	int status;
3351 
3352 	if (name == NULL || *name == '\0')
3353 		return B_BAD_VALUE;
3354 
3355 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3356 		mountID, directoryID, name, openMode));
3357 
3358 	// get the vnode matching the entry_ref
3359 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3360 	if (status < B_OK)
3361 		return status;
3362 
3363 	status = open_vnode(vnode, openMode, kernel);
3364 	if (status < B_OK)
3365 		put_vnode(vnode);
3366 
3367 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3368 	return status;
3369 }
3370 
3371 
3372 static int
3373 file_open(int fd, char *path, int openMode, bool kernel)
3374 {
3375 	int status = B_OK;
3376 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3377 
3378 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3379 		fd, path, openMode, kernel));
3380 
3381 	// get the vnode matching the vnode + path combination
3382 	struct vnode *vnode = NULL;
3383 	vnode_id parentID;
3384 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3385 	if (status != B_OK)
3386 		return status;
3387 
3388 	// open the vnode
3389 	status = open_vnode(vnode, openMode, kernel);
3390 	// put only on error -- otherwise our reference was transferred to the FD
3391 	if (status < B_OK)
3392 		put_vnode(vnode);
3393 
3394 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3395 		vnode->device, parentID, vnode->id, NULL);
3396 
3397 	return status;
3398 }
3399 
3400 
3401 static status_t
3402 file_close(struct file_descriptor *descriptor)
3403 {
3404 	struct vnode *vnode = descriptor->u.vnode;
3405 	status_t status = B_OK;
3406 
3407 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3408 
3409 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3410 	if (FS_CALL(vnode, close))
3411 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3412 
3413 	if (status == B_OK) {
3414 		// remove all outstanding locks for this team
3415 		release_advisory_lock(vnode, NULL);
3416 	}
3417 	return status;
3418 }
3419 
3420 
3421 static void
3422 file_free_fd(struct file_descriptor *descriptor)
3423 {
3424 	struct vnode *vnode = descriptor->u.vnode;
3425 
3426 	if (vnode != NULL) {
3427 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3428 		put_vnode(vnode);
3429 	}
3430 }
3431 
3432 
3433 static status_t
3434 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3435 {
3436 	struct vnode *vnode = descriptor->u.vnode;
3437 
3438 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3439 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3440 }
3441 
3442 
3443 static status_t
3444 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3445 {
3446 	struct vnode *vnode = descriptor->u.vnode;
3447 
3448 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3449 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3450 }
3451 
3452 
3453 static off_t
3454 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3455 {
3456 	off_t offset;
3457 
3458 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3459 	// ToDo: seek should fail for pipes and FIFOs...
3460 
3461 	switch (seekType) {
3462 		case SEEK_SET:
3463 			offset = 0;
3464 			break;
3465 		case SEEK_CUR:
3466 			offset = descriptor->pos;
3467 			break;
3468 		case SEEK_END:
3469 		{
3470 			struct vnode *vnode = descriptor->u.vnode;
3471 			struct stat stat;
3472 			status_t status;
3473 
3474 			if (FS_CALL(vnode, read_stat) == NULL)
3475 				return EOPNOTSUPP;
3476 
3477 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3478 			if (status < B_OK)
3479 				return status;
3480 
3481 			offset = stat.st_size;
3482 			break;
3483 		}
3484 		default:
3485 			return B_BAD_VALUE;
3486 	}
3487 
3488 	// assumes off_t is 64 bits wide
3489 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3490 		return EOVERFLOW;
3491 
3492 	pos += offset;
3493 	if (pos < 0)
3494 		return B_BAD_VALUE;
3495 
3496 	return descriptor->pos = pos;
3497 }
3498 
3499 
3500 static status_t
3501 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3502 	struct select_sync *sync)
3503 {
3504 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3505 
3506 	struct vnode *vnode = descriptor->u.vnode;
3507 
3508 	// If the FS has no select() hook, notify select() now.
3509 	if (FS_CALL(vnode, select) == NULL)
3510 		return notify_select_event((selectsync*)sync, ref, event);
3511 
3512 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3513 		descriptor->cookie, event, ref, (selectsync*)sync);
3514 }
3515 
3516 
3517 static status_t
3518 file_deselect(struct file_descriptor *descriptor, uint8 event,
3519 	struct select_sync *sync)
3520 {
3521 	struct vnode *vnode = descriptor->u.vnode;
3522 
3523 	if (FS_CALL(vnode, deselect) == NULL)
3524 		return B_OK;
3525 
3526 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3527 		descriptor->cookie, event, (selectsync*)sync);
3528 }
3529 
3530 
3531 static status_t
3532 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3533 {
3534 	struct vnode *vnode;
3535 	vnode_id newID;
3536 	status_t status;
3537 
3538 	if (name == NULL || *name == '\0')
3539 		return B_BAD_VALUE;
3540 
3541 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3542 
3543 	status = get_vnode(mountID, parentID, &vnode, kernel);
3544 	if (status < B_OK)
3545 		return status;
3546 
3547 	if (FS_CALL(vnode, create_dir))
3548 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3549 	else
3550 		status = EROFS;
3551 
3552 	put_vnode(vnode);
3553 	return status;
3554 }
3555 
3556 
3557 static status_t
3558 dir_create(int fd, char *path, int perms, bool kernel)
3559 {
3560 	char filename[B_FILE_NAME_LENGTH];
3561 	struct vnode *vnode;
3562 	vnode_id newID;
3563 	status_t status;
3564 
3565 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3566 
3567 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3568 	if (status < 0)
3569 		return status;
3570 
3571 	if (FS_CALL(vnode, create_dir))
3572 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3573 	else
3574 		status = EROFS;
3575 
3576 	put_vnode(vnode);
3577 	return status;
3578 }
3579 
3580 
3581 static int
3582 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3583 {
3584 	struct vnode *vnode;
3585 	int status;
3586 
3587 	FUNCTION(("dir_open_entry_ref()\n"));
3588 
3589 	if (name && *name == '\0')
3590 		return B_BAD_VALUE;
3591 
3592 	// get the vnode matching the entry_ref/node_ref
3593 	if (name)
3594 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3595 	else
3596 		status = get_vnode(mountID, parentID, &vnode, false);
3597 	if (status < B_OK)
3598 		return status;
3599 
3600 	status = open_dir_vnode(vnode, kernel);
3601 	if (status < B_OK)
3602 		put_vnode(vnode);
3603 
3604 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3605 	return status;
3606 }
3607 
3608 
3609 static int
3610 dir_open(int fd, char *path, bool kernel)
3611 {
3612 	int status = B_OK;
3613 
3614 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3615 
3616 	// get the vnode matching the vnode + path combination
3617 	struct vnode *vnode = NULL;
3618 	vnode_id parentID;
3619 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3620 	if (status != B_OK)
3621 		return status;
3622 
3623 	// open the dir
3624 	status = open_dir_vnode(vnode, kernel);
3625 	if (status < B_OK)
3626 		put_vnode(vnode);
3627 
3628 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
3629 	return status;
3630 }
3631 
3632 
3633 static status_t
3634 dir_close(struct file_descriptor *descriptor)
3635 {
3636 	struct vnode *vnode = descriptor->u.vnode;
3637 
3638 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
3639 
3640 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
3641 	if (FS_CALL(vnode, close_dir))
3642 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3643 
3644 	return B_OK;
3645 }
3646 
3647 
3648 static void
3649 dir_free_fd(struct file_descriptor *descriptor)
3650 {
3651 	struct vnode *vnode = descriptor->u.vnode;
3652 
3653 	if (vnode != NULL) {
3654 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3655 		put_vnode(vnode);
3656 	}
3657 }
3658 
3659 
3660 static status_t
3661 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3662 {
3663 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
3664 }
3665 
3666 
3667 static void
3668 fix_dirent(struct vnode *parent, struct dirent *entry)
3669 {
3670 	// set d_pdev and d_pino
3671 	entry->d_pdev = parent->device;
3672 	entry->d_pino = parent->id;
3673 
3674 	// If this is the ".." entry and the directory is the root of a FS,
3675 	// we need to replace d_dev and d_ino with the actual values.
3676 	if (strcmp(entry->d_name, "..") == 0
3677 		&& parent->mount->root_vnode == parent
3678 		&& parent->mount->covers_vnode) {
3679 
3680 		inc_vnode_ref_count(parent);	// vnode_path_to_vnode() puts the node
3681 
3682 		struct vnode *vnode;
3683 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
3684 			NULL, NULL);
3685 
3686 		if (status == B_OK) {
3687 			entry->d_dev = vnode->device;
3688 			entry->d_ino = vnode->id;
3689 		}
3690 	} else {
3691 		// resolve mount points
3692 		struct vnode *vnode = NULL;
3693 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
3694 		if (status != B_OK)
3695 			return;
3696 
3697 		recursive_lock_lock(&sMountOpLock);
3698 		if (vnode->covered_by) {
3699 			entry->d_dev = vnode->covered_by->device;
3700 			entry->d_ino = vnode->covered_by->id;
3701 		}
3702 		recursive_lock_unlock(&sMountOpLock);
3703 
3704 		put_vnode(vnode);
3705 	}
3706 }
3707 
3708 
3709 static status_t
3710 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3711 {
3712 	if (!FS_CALL(vnode, read_dir))
3713 		return EOPNOTSUPP;
3714 
3715 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
3716 	if (error != B_OK)
3717 		return error;
3718 
3719 	// we need to adjust the read dirents
3720 	if (*_count > 0) {
3721 		// XXX: Currently reading only one dirent is supported. Make this a loop!
3722 		fix_dirent(vnode, buffer);
3723 	}
3724 
3725 	return error;
3726 }
3727 
3728 
3729 static status_t
3730 dir_rewind(struct file_descriptor *descriptor)
3731 {
3732 	struct vnode *vnode = descriptor->u.vnode;
3733 
3734 	if (FS_CALL(vnode, rewind_dir))
3735 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
3736 
3737 	return EOPNOTSUPP;
3738 }
3739 
3740 
3741 static status_t
3742 dir_remove(int fd, char *path, bool kernel)
3743 {
3744 	char name[B_FILE_NAME_LENGTH];
3745 	struct vnode *directory;
3746 	status_t status;
3747 
3748 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3749 	if (status < 0)
3750 		return status;
3751 
3752 	if (FS_CALL(directory, remove_dir)) {
3753 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
3754 			directory->private_node, name);
3755 	} else
3756 		status = EROFS;
3757 
3758 	put_vnode(directory);
3759 	return status;
3760 }
3761 
3762 
3763 static status_t
3764 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
3765 {
3766 	struct vnode *vnode = descriptor->u.vnode;
3767 
3768 	if (FS_CALL(vnode, ioctl)) {
3769 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
3770 			descriptor->cookie, op, buffer, length);
3771 	}
3772 
3773 	return EOPNOTSUPP;
3774 }
3775 
3776 
3777 static status_t
3778 common_fcntl(int fd, int op, uint32 argument, bool kernel)
3779 {
3780 	struct file_descriptor *descriptor;
3781 	struct vnode *vnode;
3782 	struct flock flock;
3783 	status_t status;
3784 
3785 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
3786 		fd, op, argument, kernel ? "kernel" : "user"));
3787 
3788 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
3789 	if (descriptor == NULL)
3790 		return B_FILE_ERROR;
3791 
3792 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
3793 		if (descriptor->type != FDTYPE_FILE)
3794 			return B_BAD_VALUE;
3795 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
3796 			return B_BAD_ADDRESS;
3797 	}
3798 
3799 	switch (op) {
3800 		case F_SETFD:
3801 		{
3802 			struct io_context *context = get_current_io_context(kernel);
3803 			// Set file descriptor flags
3804 
3805 			// O_CLOEXEC is the only flag available at this time
3806 			mutex_lock(&context->io_mutex);
3807 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
3808 			mutex_unlock(&context->io_mutex);
3809 
3810 			status = B_OK;
3811 			break;
3812 		}
3813 
3814 		case F_GETFD:
3815 		{
3816 			struct io_context *context = get_current_io_context(kernel);
3817 
3818 			// Get file descriptor flags
3819 			mutex_lock(&context->io_mutex);
3820 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
3821 			mutex_unlock(&context->io_mutex);
3822 			break;
3823 		}
3824 
3825 		case F_SETFL:
3826 			// Set file descriptor open mode
3827 			if (FS_CALL(vnode, set_flags)) {
3828 				// we only accept changes to O_APPEND and O_NONBLOCK
3829 				argument &= O_APPEND | O_NONBLOCK;
3830 
3831 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
3832 					vnode->private_node, descriptor->cookie, (int)argument);
3833 				if (status == B_OK) {
3834 					// update this descriptor's open_mode field
3835 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
3836 						| argument;
3837 				}
3838 			} else
3839 				status = EOPNOTSUPP;
3840 			break;
3841 
3842 		case F_GETFL:
3843 			// Get file descriptor open mode
3844 			status = descriptor->open_mode;
3845 			break;
3846 
3847 		case F_DUPFD:
3848 		{
3849 			struct io_context *context = get_current_io_context(kernel);
3850 
3851 			status = new_fd_etc(context, descriptor, (int)argument);
3852 			if (status >= 0) {
3853 				mutex_lock(&context->io_mutex);
3854 				fd_set_close_on_exec(context, fd, false);
3855 				mutex_unlock(&context->io_mutex);
3856 
3857 				atomic_add(&descriptor->ref_count, 1);
3858 			}
3859 			break;
3860 		}
3861 
3862 		case F_GETLK:
3863 			status = get_advisory_lock(descriptor->u.vnode, &flock);
3864 			if (status == B_OK) {
3865 				// copy back flock structure
3866 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
3867 			}
3868 			break;
3869 
3870 		case F_SETLK:
3871 		case F_SETLKW:
3872 			status = normalize_flock(descriptor, &flock);
3873 			if (status < B_OK)
3874 				break;
3875 
3876 			if (flock.l_type == F_UNLCK)
3877 				status = release_advisory_lock(descriptor->u.vnode, &flock);
3878 			else {
3879 				// the open mode must match the lock type
3880 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
3881 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
3882 					status = B_FILE_ERROR;
3883 				else
3884 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
3885 			}
3886 			break;
3887 
3888 		// ToDo: add support for more ops?
3889 
3890 		default:
3891 			status = B_BAD_VALUE;
3892 	}
3893 
3894 	put_fd(descriptor);
3895 	return status;
3896 }
3897 
3898 
3899 static status_t
3900 common_sync(int fd, bool kernel)
3901 {
3902 	struct file_descriptor *descriptor;
3903 	struct vnode *vnode;
3904 	status_t status;
3905 
3906 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
3907 
3908 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
3909 	if (descriptor == NULL)
3910 		return B_FILE_ERROR;
3911 
3912 	if (FS_CALL(vnode, fsync) != NULL)
3913 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
3914 	else
3915 		status = EOPNOTSUPP;
3916 
3917 	put_fd(descriptor);
3918 	return status;
3919 }
3920 
3921 
3922 static status_t
3923 common_lock_node(int fd, bool kernel)
3924 {
3925 	// TODO: Implement!
3926 	return EOPNOTSUPP;
3927 }
3928 
3929 
3930 static status_t
3931 common_unlock_node(int fd, bool kernel)
3932 {
3933 	// TODO: Implement!
3934 	return EOPNOTSUPP;
3935 }
3936 
3937 
3938 static status_t
3939 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
3940 	bool kernel)
3941 {
3942 	struct vnode *vnode;
3943 	status_t status;
3944 
3945 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
3946 	if (status < B_OK)
3947 		return status;
3948 
3949 	if (FS_CALL(vnode, read_link) != NULL) {
3950 		status = FS_CALL(vnode, read_link)(vnode->mount->cookie,
3951 			vnode->private_node, buffer, _bufferSize);
3952 	} else
3953 		status = B_BAD_VALUE;
3954 
3955 	put_vnode(vnode);
3956 	return status;
3957 }
3958 
3959 
3960 static status_t
3961 common_write_link(char *path, char *toPath, bool kernel)
3962 {
3963 	struct vnode *vnode;
3964 	status_t status;
3965 
3966 	status = path_to_vnode(path, false, &vnode, NULL, kernel);
3967 	if (status < B_OK)
3968 		return status;
3969 
3970 	if (FS_CALL(vnode, write_link) != NULL)
3971 		status = FS_CALL(vnode, write_link)(vnode->mount->cookie, vnode->private_node, toPath);
3972 	else
3973 		status = EOPNOTSUPP;
3974 
3975 	put_vnode(vnode);
3976 
3977 	return status;
3978 }
3979 
3980 
3981 static status_t
3982 common_create_symlink(int fd, char *path, const char *toPath, int mode,
3983 	bool kernel)
3984 {
3985 	// path validity checks have to be in the calling function!
3986 	char name[B_FILE_NAME_LENGTH];
3987 	struct vnode *vnode;
3988 	status_t status;
3989 
3990 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
3991 
3992 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
3993 	if (status < B_OK)
3994 		return status;
3995 
3996 	if (FS_CALL(vnode, create_symlink) != NULL)
3997 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
3998 	else
3999 		status = EROFS;
4000 
4001 	put_vnode(vnode);
4002 
4003 	return status;
4004 }
4005 
4006 
4007 static status_t
4008 common_create_link(char *path, char *toPath, bool kernel)
4009 {
4010 	// path validity checks have to be in the calling function!
4011 	char name[B_FILE_NAME_LENGTH];
4012 	struct vnode *directory, *vnode;
4013 	status_t status;
4014 
4015 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4016 
4017 	status = path_to_dir_vnode(path, &directory, name, kernel);
4018 	if (status < B_OK)
4019 		return status;
4020 
4021 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4022 	if (status < B_OK)
4023 		goto err;
4024 
4025 	if (directory->mount != vnode->mount) {
4026 		status = B_CROSS_DEVICE_LINK;
4027 		goto err1;
4028 	}
4029 
4030 	if (FS_CALL(vnode, link) != NULL)
4031 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4032 	else
4033 		status = EROFS;
4034 
4035 err1:
4036 	put_vnode(vnode);
4037 err:
4038 	put_vnode(directory);
4039 
4040 	return status;
4041 }
4042 
4043 
4044 static status_t
4045 common_unlink(int fd, char *path, bool kernel)
4046 {
4047 	char filename[B_FILE_NAME_LENGTH];
4048 	struct vnode *vnode;
4049 	status_t status;
4050 
4051 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4052 
4053 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4054 	if (status < 0)
4055 		return status;
4056 
4057 	if (FS_CALL(vnode, unlink) != NULL)
4058 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4059 	else
4060 		status = EROFS;
4061 
4062 	put_vnode(vnode);
4063 
4064 	return status;
4065 }
4066 
4067 
4068 static status_t
4069 common_access(char *path, int mode, bool kernel)
4070 {
4071 	struct vnode *vnode;
4072 	status_t status;
4073 
4074 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4075 	if (status < B_OK)
4076 		return status;
4077 
4078 	if (FS_CALL(vnode, access) != NULL)
4079 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4080 	else
4081 		status = B_OK;
4082 
4083 	put_vnode(vnode);
4084 
4085 	return status;
4086 }
4087 
4088 
4089 static status_t
4090 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4091 {
4092 	struct vnode *fromVnode, *toVnode;
4093 	char fromName[B_FILE_NAME_LENGTH];
4094 	char toName[B_FILE_NAME_LENGTH];
4095 	status_t status;
4096 
4097 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4098 
4099 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4100 	if (status < 0)
4101 		return status;
4102 
4103 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4104 	if (status < 0)
4105 		goto err;
4106 
4107 	if (fromVnode->device != toVnode->device) {
4108 		status = B_CROSS_DEVICE_LINK;
4109 		goto err1;
4110 	}
4111 
4112 	if (FS_CALL(fromVnode, rename) != NULL)
4113 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4114 	else
4115 		status = EROFS;
4116 
4117 err1:
4118 	put_vnode(toVnode);
4119 err:
4120 	put_vnode(fromVnode);
4121 
4122 	return status;
4123 }
4124 
4125 
4126 static status_t
4127 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4128 {
4129 	struct vnode *vnode = descriptor->u.vnode;
4130 
4131 	FUNCTION(("common_read_stat: stat %p\n", stat));
4132 
4133 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4134 		vnode->private_node, stat);
4135 
4136 	// fill in the st_dev and st_ino fields
4137 	if (status == B_OK) {
4138 		stat->st_dev = vnode->device;
4139 		stat->st_ino = vnode->id;
4140 	}
4141 
4142 	return status;
4143 }
4144 
4145 
4146 static status_t
4147 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4148 {
4149 	struct vnode *vnode = descriptor->u.vnode;
4150 
4151 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4152 	if (!FS_CALL(vnode, write_stat))
4153 		return EROFS;
4154 
4155 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4156 }
4157 
4158 
4159 static status_t
4160 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4161 	struct stat *stat, bool kernel)
4162 {
4163 	struct vnode *vnode;
4164 	status_t status;
4165 
4166 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4167 
4168 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4169 	if (status < 0)
4170 		return status;
4171 
4172 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4173 
4174 	// fill in the st_dev and st_ino fields
4175 	if (status == B_OK) {
4176 		stat->st_dev = vnode->device;
4177 		stat->st_ino = vnode->id;
4178 	}
4179 
4180 	put_vnode(vnode);
4181 	return status;
4182 }
4183 
4184 
4185 static status_t
4186 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4187 	const struct stat *stat, int statMask, bool kernel)
4188 {
4189 	struct vnode *vnode;
4190 	status_t status;
4191 
4192 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4193 
4194 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4195 	if (status < 0)
4196 		return status;
4197 
4198 	if (FS_CALL(vnode, write_stat))
4199 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4200 	else
4201 		status = EROFS;
4202 
4203 	put_vnode(vnode);
4204 
4205 	return status;
4206 }
4207 
4208 
4209 static int
4210 attr_dir_open(int fd, char *path, bool kernel)
4211 {
4212 	struct vnode *vnode;
4213 	int status;
4214 
4215 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4216 
4217 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4218 	if (status < B_OK)
4219 		return status;
4220 
4221 	status = open_attr_dir_vnode(vnode, kernel);
4222 	if (status < 0)
4223 		put_vnode(vnode);
4224 
4225 	return status;
4226 }
4227 
4228 
4229 static status_t
4230 attr_dir_close(struct file_descriptor *descriptor)
4231 {
4232 	struct vnode *vnode = descriptor->u.vnode;
4233 
4234 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4235 
4236 	if (FS_CALL(vnode, close_attr_dir))
4237 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4238 
4239 	return B_OK;
4240 }
4241 
4242 
4243 static void
4244 attr_dir_free_fd(struct file_descriptor *descriptor)
4245 {
4246 	struct vnode *vnode = descriptor->u.vnode;
4247 
4248 	if (vnode != NULL) {
4249 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4250 		put_vnode(vnode);
4251 	}
4252 }
4253 
4254 
4255 static status_t
4256 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4257 {
4258 	struct vnode *vnode = descriptor->u.vnode;
4259 
4260 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4261 
4262 	if (FS_CALL(vnode, read_attr_dir))
4263 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4264 
4265 	return EOPNOTSUPP;
4266 }
4267 
4268 
4269 static status_t
4270 attr_dir_rewind(struct file_descriptor *descriptor)
4271 {
4272 	struct vnode *vnode = descriptor->u.vnode;
4273 
4274 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4275 
4276 	if (FS_CALL(vnode, rewind_attr_dir))
4277 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4278 
4279 	return EOPNOTSUPP;
4280 }
4281 
4282 
4283 static int
4284 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4285 {
4286 	struct vnode *vnode;
4287 	fs_cookie cookie;
4288 	int status;
4289 
4290 	if (name == NULL || *name == '\0')
4291 		return B_BAD_VALUE;
4292 
4293 	vnode = get_vnode_from_fd(fd, kernel);
4294 	if (vnode == NULL)
4295 		return B_FILE_ERROR;
4296 
4297 	if (FS_CALL(vnode, create_attr) == NULL) {
4298 		status = EROFS;
4299 		goto err;
4300 	}
4301 
4302 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4303 	if (status < B_OK)
4304 		goto err;
4305 
4306 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4307 		return status;
4308 
4309 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4310 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4311 
4312 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4313 
4314 err:
4315 	put_vnode(vnode);
4316 
4317 	return status;
4318 }
4319 
4320 
4321 static int
4322 attr_open(int fd, const char *name, int openMode, bool kernel)
4323 {
4324 	struct vnode *vnode;
4325 	fs_cookie cookie;
4326 	int status;
4327 
4328 	if (name == NULL || *name == '\0')
4329 		return B_BAD_VALUE;
4330 
4331 	vnode = get_vnode_from_fd(fd, kernel);
4332 	if (vnode == NULL)
4333 		return B_FILE_ERROR;
4334 
4335 	if (FS_CALL(vnode, open_attr) == NULL) {
4336 		status = EOPNOTSUPP;
4337 		goto err;
4338 	}
4339 
4340 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4341 	if (status < B_OK)
4342 		goto err;
4343 
4344 	// now we only need a file descriptor for this attribute and we're done
4345 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4346 		return status;
4347 
4348 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4349 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4350 
4351 err:
4352 	put_vnode(vnode);
4353 
4354 	return status;
4355 }
4356 
4357 
4358 static status_t
4359 attr_close(struct file_descriptor *descriptor)
4360 {
4361 	struct vnode *vnode = descriptor->u.vnode;
4362 
4363 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4364 
4365 	if (FS_CALL(vnode, close_attr))
4366 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4367 
4368 	return B_OK;
4369 }
4370 
4371 
4372 static void
4373 attr_free_fd(struct file_descriptor *descriptor)
4374 {
4375 	struct vnode *vnode = descriptor->u.vnode;
4376 
4377 	if (vnode != NULL) {
4378 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4379 		put_vnode(vnode);
4380 	}
4381 }
4382 
4383 
4384 static status_t
4385 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4386 {
4387 	struct vnode *vnode = descriptor->u.vnode;
4388 
4389 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4390 	if (!FS_CALL(vnode, read_attr))
4391 		return EOPNOTSUPP;
4392 
4393 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4394 }
4395 
4396 
4397 static status_t
4398 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4399 {
4400 	struct vnode *vnode = descriptor->u.vnode;
4401 
4402 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4403 	if (!FS_CALL(vnode, write_attr))
4404 		return EOPNOTSUPP;
4405 
4406 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4407 }
4408 
4409 
4410 static off_t
4411 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4412 {
4413 	off_t offset;
4414 
4415 	switch (seekType) {
4416 		case SEEK_SET:
4417 			offset = 0;
4418 			break;
4419 		case SEEK_CUR:
4420 			offset = descriptor->pos;
4421 			break;
4422 		case SEEK_END:
4423 		{
4424 			struct vnode *vnode = descriptor->u.vnode;
4425 			struct stat stat;
4426 			status_t status;
4427 
4428 			if (FS_CALL(vnode, read_stat) == NULL)
4429 				return EOPNOTSUPP;
4430 
4431 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4432 			if (status < B_OK)
4433 				return status;
4434 
4435 			offset = stat.st_size;
4436 			break;
4437 		}
4438 		default:
4439 			return B_BAD_VALUE;
4440 	}
4441 
4442 	// assumes off_t is 64 bits wide
4443 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4444 		return EOVERFLOW;
4445 
4446 	pos += offset;
4447 	if (pos < 0)
4448 		return B_BAD_VALUE;
4449 
4450 	return descriptor->pos = pos;
4451 }
4452 
4453 
4454 static status_t
4455 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4456 {
4457 	struct vnode *vnode = descriptor->u.vnode;
4458 
4459 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4460 
4461 	if (!FS_CALL(vnode, read_attr_stat))
4462 		return EOPNOTSUPP;
4463 
4464 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4465 }
4466 
4467 
4468 static status_t
4469 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4470 {
4471 	struct vnode *vnode = descriptor->u.vnode;
4472 
4473 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4474 
4475 	if (!FS_CALL(vnode, write_attr_stat))
4476 		return EROFS;
4477 
4478 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4479 }
4480 
4481 
4482 static status_t
4483 attr_remove(int fd, const char *name, bool kernel)
4484 {
4485 	struct file_descriptor *descriptor;
4486 	struct vnode *vnode;
4487 	status_t status;
4488 
4489 	if (name == NULL || *name == '\0')
4490 		return B_BAD_VALUE;
4491 
4492 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4493 
4494 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4495 	if (descriptor == NULL)
4496 		return B_FILE_ERROR;
4497 
4498 	if (FS_CALL(vnode, remove_attr))
4499 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4500 	else
4501 		status = EROFS;
4502 
4503 	put_fd(descriptor);
4504 
4505 	return status;
4506 }
4507 
4508 
4509 static status_t
4510 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4511 {
4512 	struct file_descriptor *fromDescriptor, *toDescriptor;
4513 	struct vnode *fromVnode, *toVnode;
4514 	status_t status;
4515 
4516 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4517 		return B_BAD_VALUE;
4518 
4519 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4520 
4521 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4522 	if (fromDescriptor == NULL)
4523 		return B_FILE_ERROR;
4524 
4525 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4526 	if (toDescriptor == NULL) {
4527 		status = B_FILE_ERROR;
4528 		goto err;
4529 	}
4530 
4531 	// are the files on the same volume?
4532 	if (fromVnode->device != toVnode->device) {
4533 		status = B_CROSS_DEVICE_LINK;
4534 		goto err1;
4535 	}
4536 
4537 	if (FS_CALL(fromVnode, rename_attr))
4538 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4539 	else
4540 		status = EROFS;
4541 
4542 err1:
4543 	put_fd(toDescriptor);
4544 err:
4545 	put_fd(fromDescriptor);
4546 
4547 	return status;
4548 }
4549 
4550 
4551 static status_t
4552 index_dir_open(mount_id mountID, bool kernel)
4553 {
4554 	struct fs_mount *mount;
4555 	fs_cookie cookie;
4556 
4557 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4558 
4559 	status_t status = get_mount(mountID, &mount);
4560 	if (status < B_OK)
4561 		return status;
4562 
4563 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4564 		status = EOPNOTSUPP;
4565 		goto out;
4566 	}
4567 
4568 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4569 	if (status < B_OK)
4570 		goto out;
4571 
4572 	// get fd for the index directory
4573 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4574 	if (status >= 0)
4575 		goto out;
4576 
4577 	// something went wrong
4578 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4579 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4580 
4581 out:
4582 	put_mount(mount);
4583 	return status;
4584 }
4585 
4586 
4587 static status_t
4588 index_dir_close(struct file_descriptor *descriptor)
4589 {
4590 	struct fs_mount *mount = descriptor->u.mount;
4591 
4592 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
4593 
4594 	if (FS_MOUNT_CALL(mount, close_index_dir))
4595 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
4596 
4597 	return B_OK;
4598 }
4599 
4600 
4601 static void
4602 index_dir_free_fd(struct file_descriptor *descriptor)
4603 {
4604 	struct fs_mount *mount = descriptor->u.mount;
4605 
4606 	if (mount != NULL) {
4607 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
4608 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4609 		//put_vnode(vnode);
4610 	}
4611 }
4612 
4613 
4614 static status_t
4615 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4616 {
4617 	struct fs_mount *mount = descriptor->u.mount;
4618 
4619 	if (FS_MOUNT_CALL(mount, read_index_dir))
4620 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4621 
4622 	return EOPNOTSUPP;
4623 }
4624 
4625 
4626 static status_t
4627 index_dir_rewind(struct file_descriptor *descriptor)
4628 {
4629 	struct fs_mount *mount = descriptor->u.mount;
4630 
4631 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
4632 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
4633 
4634 	return EOPNOTSUPP;
4635 }
4636 
4637 
4638 static status_t
4639 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
4640 {
4641 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4642 
4643 	struct fs_mount *mount;
4644 	status_t status = get_mount(mountID, &mount);
4645 	if (status < B_OK)
4646 		return status;
4647 
4648 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
4649 		status = EROFS;
4650 		goto out;
4651 	}
4652 
4653 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
4654 
4655 out:
4656 	put_mount(mount);
4657 	return status;
4658 }
4659 
4660 
4661 #if 0
4662 static status_t
4663 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4664 {
4665 	struct vnode *vnode = descriptor->u.vnode;
4666 
4667 	// ToDo: currently unused!
4668 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
4669 	if (!FS_CALL(vnode, read_index_stat))
4670 		return EOPNOTSUPP;
4671 
4672 	return EOPNOTSUPP;
4673 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4674 }
4675 
4676 
4677 static void
4678 index_free_fd(struct file_descriptor *descriptor)
4679 {
4680 	struct vnode *vnode = descriptor->u.vnode;
4681 
4682 	if (vnode != NULL) {
4683 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4684 		put_vnode(vnode);
4685 	}
4686 }
4687 #endif
4688 
4689 
4690 static status_t
4691 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
4692 {
4693 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4694 
4695 	struct fs_mount *mount;
4696 	status_t status = get_mount(mountID, &mount);
4697 	if (status < B_OK)
4698 		return status;
4699 
4700 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
4701 		status = EOPNOTSUPP;
4702 		goto out;
4703 	}
4704 
4705 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
4706 
4707 out:
4708 	put_mount(mount);
4709 	return status;
4710 }
4711 
4712 
4713 static status_t
4714 index_remove(mount_id mountID, const char *name, bool kernel)
4715 {
4716 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4717 
4718 	struct fs_mount *mount;
4719 	status_t status = get_mount(mountID, &mount);
4720 	if (status < B_OK)
4721 		return status;
4722 
4723 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
4724 		status = EROFS;
4725 		goto out;
4726 	}
4727 
4728 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
4729 
4730 out:
4731 	put_mount(mount);
4732 	return status;
4733 }
4734 
4735 
4736 /**	ToDo: the query FS API is still the pretty much the same as in R5.
4737  *		It would be nice if the FS would find some more kernel support
4738  *		for them.
4739  *		For example, query parsing should be moved into the kernel.
4740  */
4741 
4742 static int
4743 query_open(dev_t device, const char *query, uint32 flags,
4744 	port_id port, int32 token, bool kernel)
4745 {
4746 	struct fs_mount *mount;
4747 	fs_cookie cookie;
4748 
4749 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
4750 
4751 	status_t status = get_mount(device, &mount);
4752 	if (status < B_OK)
4753 		return status;
4754 
4755 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
4756 		status = EOPNOTSUPP;
4757 		goto out;
4758 	}
4759 
4760 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
4761 	if (status < B_OK)
4762 		goto out;
4763 
4764 	// get fd for the index directory
4765 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
4766 	if (status >= 0)
4767 		goto out;
4768 
4769 	// something went wrong
4770 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
4771 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
4772 
4773 out:
4774 	put_mount(mount);
4775 	return status;
4776 }
4777 
4778 
4779 static status_t
4780 query_close(struct file_descriptor *descriptor)
4781 {
4782 	struct fs_mount *mount = descriptor->u.mount;
4783 
4784 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
4785 
4786 	if (FS_MOUNT_CALL(mount, close_query))
4787 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
4788 
4789 	return B_OK;
4790 }
4791 
4792 
4793 static void
4794 query_free_fd(struct file_descriptor *descriptor)
4795 {
4796 	struct fs_mount *mount = descriptor->u.mount;
4797 
4798 	if (mount != NULL) {
4799 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
4800 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4801 		//put_vnode(vnode);
4802 	}
4803 }
4804 
4805 
4806 static status_t
4807 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4808 {
4809 	struct fs_mount *mount = descriptor->u.mount;
4810 
4811 	if (FS_MOUNT_CALL(mount, read_query))
4812 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4813 
4814 	return EOPNOTSUPP;
4815 }
4816 
4817 
4818 static status_t
4819 query_rewind(struct file_descriptor *descriptor)
4820 {
4821 	struct fs_mount *mount = descriptor->u.mount;
4822 
4823 	if (FS_MOUNT_CALL(mount, rewind_query))
4824 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
4825 
4826 	return EOPNOTSUPP;
4827 }
4828 
4829 
4830 //	#pragma mark -
4831 //	General File System functions
4832 
4833 
4834 static dev_t
4835 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
4836 	const char *args, bool kernel)
4837 {
4838 	struct fs_mount *mount;
4839 	status_t status = 0;
4840 
4841 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
4842 
4843 	// The path is always safe, we just have to make sure that fsName is
4844 	// almost valid - we can't make any assumptions about args, though.
4845 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
4846 	// We'll get it from the DDM later.
4847 	if (fsName == NULL) {
4848 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
4849 			return B_BAD_VALUE;
4850 	} else if (fsName[0] == '\0')
4851 		return B_BAD_VALUE;
4852 
4853 	RecursiveLocker mountOpLocker(sMountOpLock);
4854 
4855 	// Helper to delete a newly created file device on failure.
4856 	// Not exactly beautiful, but helps to keep the code below cleaner.
4857 	struct FileDeviceDeleter {
4858 		FileDeviceDeleter() : id(-1) {}
4859 		~FileDeviceDeleter()
4860 		{
4861 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
4862 		}
4863 
4864 		partition_id id;
4865 	} fileDeviceDeleter;
4866 
4867 	// If the file system is not a "virtual" one, the device argument should
4868 	// point to a real file/device (if given at all).
4869 	// get the partition
4870 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
4871 	KPartition *partition = NULL;
4872 	bool newlyCreatedFileDevice = false;
4873 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
4874 		// normalize the device path
4875 		KPath normalizedDevice;
4876 		status = normalizedDevice.SetTo(device, true);
4877 		if (status != B_OK)
4878 			return status;
4879 
4880 		// get a corresponding partition from the DDM
4881 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
4882 
4883 		if (!partition) {
4884 			// Partition not found: This either means, the user supplied
4885 			// an invalid path, or the path refers to an image file. We try
4886 			// to let the DDM create a file device for the path.
4887 			partition_id deviceID = ddm->CreateFileDevice(
4888 				normalizedDevice.Path(), &newlyCreatedFileDevice);
4889 			if (deviceID >= 0) {
4890 				partition = ddm->RegisterPartition(deviceID, true);
4891 				if (newlyCreatedFileDevice)
4892 					fileDeviceDeleter.id = deviceID;
4893 // TODO: We must wait here, until the partition scan job is done.
4894 			}
4895 		}
4896 
4897 		if (!partition) {
4898 			TRACE(("fs_mount(): Partition `%s' not found.\n",
4899 				normalizedDevice.Path()));
4900 			return B_ENTRY_NOT_FOUND;
4901 		}
4902 	}
4903 	PartitionRegistrar partitionRegistrar(partition, true);
4904 
4905 	// Write lock the partition's device. For the time being, we keep the lock
4906 	// until we're done mounting -- not nice, but ensure, that no-one is
4907 	// interfering.
4908 	// TODO: Find a better solution.
4909 	KDiskDevice *diskDevice = NULL;
4910 	if (partition) {
4911 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
4912 		if (!diskDevice) {
4913 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
4914 			return B_ERROR;
4915 		}
4916 	}
4917 	DeviceWriteLocker writeLocker(diskDevice, true);
4918 
4919 	if (partition) {
4920 		// make sure, that the partition is not busy
4921 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
4922 			TRACE(("fs_mount(): Partition is busy.\n"));
4923 			return B_BUSY;
4924 		}
4925 
4926 		// if no FS name had been supplied, we get it from the partition
4927 		if (!fsName) {
4928 			KDiskSystem *diskSystem = partition->DiskSystem();
4929 			if (!diskSystem) {
4930 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
4931 					"recognize it.\n"));
4932 				return B_BAD_VALUE;
4933 			}
4934 
4935 			if (!diskSystem->IsFileSystem()) {
4936 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
4937 					"partitioning system.\n"));
4938 				return B_BAD_VALUE;
4939 			}
4940 
4941 			// The disk system name will not change, and the KDiskSystem
4942 			// object will not go away while the disk device is locked (and
4943 			// the partition has a reference to it), so this is safe.
4944 			fsName = diskSystem->Name();
4945 		}
4946 	}
4947 
4948 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
4949 	if (mount == NULL)
4950 		return B_NO_MEMORY;
4951 
4952 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
4953 
4954 	mount->fs_name = get_file_system_name(fsName);
4955 	if (mount->fs_name == NULL) {
4956 		status = B_NO_MEMORY;
4957 		goto err1;
4958 	}
4959 
4960 	mount->device_name = strdup(device);
4961 		// "device" can be NULL
4962 
4963 	mount->fs = get_file_system(fsName);
4964 	if (mount->fs == NULL) {
4965 		status = ENODEV;
4966 		goto err3;
4967 	}
4968 
4969 	status = recursive_lock_init(&mount->rlock, "mount rlock");
4970 	if (status < B_OK)
4971 		goto err4;
4972 
4973 	// initialize structure
4974 	mount->id = sNextMountID++;
4975 	mount->partition = NULL;
4976 	mount->root_vnode = NULL;
4977 	mount->covers_vnode = NULL;
4978 	mount->cookie = NULL;
4979 	mount->unmounting = false;
4980 	mount->owns_file_device = false;
4981 
4982 	// insert mount struct into list before we call FS's mount() function
4983 	// so that vnodes can be created for this mount
4984 	mutex_lock(&sMountMutex);
4985 	hash_insert(sMountsTable, mount);
4986 	mutex_unlock(&sMountMutex);
4987 
4988 	vnode_id rootID;
4989 
4990 	if (!sRoot) {
4991 		// we haven't mounted anything yet
4992 		if (strcmp(path, "/") != 0) {
4993 			status = B_ERROR;
4994 			goto err5;
4995 		}
4996 
4997 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
4998 		if (status < 0) {
4999 			// ToDo: why should we hide the error code from the file system here?
5000 			//status = ERR_VFS_GENERAL;
5001 			goto err5;
5002 		}
5003 	} else {
5004 		struct vnode *coveredVnode;
5005 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5006 		if (status < B_OK)
5007 			goto err5;
5008 
5009 		// make sure covered_vnode is a DIR
5010 		struct stat coveredNodeStat;
5011 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5012 			coveredVnode->private_node, &coveredNodeStat);
5013 		if (status < B_OK)
5014 			goto err5;
5015 
5016 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5017 			status = B_NOT_A_DIRECTORY;
5018 			goto err5;
5019 		}
5020 
5021 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5022 			// this is already a mount point
5023 			status = B_BUSY;
5024 			goto err5;
5025 		}
5026 
5027 		mount->covers_vnode = coveredVnode;
5028 
5029 		// mount it
5030 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5031 		if (status < B_OK)
5032 			goto err6;
5033 	}
5034 
5035 	// the root node is supposed to be owned by the file system - it must
5036 	// exist at this point
5037 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5038 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5039 		panic("fs_mount: file system does not own its root node!\n");
5040 		status = B_ERROR;
5041 		goto err7;
5042 	}
5043 
5044 	// No race here, since fs_mount() is the only function changing
5045 	// covers_vnode (and holds sMountOpLock at that time).
5046 	if (mount->covers_vnode)
5047 		mount->covers_vnode->covered_by = mount->root_vnode;
5048 
5049 	if (!sRoot)
5050 		sRoot = mount->root_vnode;
5051 
5052 	// supply the partition (if any) with the mount cookie and mark it mounted
5053 	if (partition) {
5054 		partition->SetMountCookie(mount->cookie);
5055 		partition->SetVolumeID(mount->id);
5056 
5057 		// keep a partition reference as long as the partition is mounted
5058 		partitionRegistrar.Detach();
5059 		mount->partition = partition;
5060 		mount->owns_file_device = newlyCreatedFileDevice;
5061 		fileDeviceDeleter.id = -1;
5062 	}
5063 
5064 	return mount->id;
5065 
5066 err7:
5067 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5068 err6:
5069 	if (mount->covers_vnode)
5070 		put_vnode(mount->covers_vnode);
5071 err5:
5072 	mutex_lock(&sMountMutex);
5073 	hash_remove(sMountsTable, mount);
5074 	mutex_unlock(&sMountMutex);
5075 
5076 	recursive_lock_destroy(&mount->rlock);
5077 err4:
5078 	put_file_system(mount->fs);
5079 	free(mount->device_name);
5080 err3:
5081 	free(mount->fs_name);
5082 err1:
5083 	free(mount);
5084 
5085 	return status;
5086 }
5087 
5088 
5089 static status_t
5090 fs_unmount(char *path, uint32 flags, bool kernel)
5091 {
5092 	struct fs_mount *mount;
5093 	struct vnode *vnode;
5094 	status_t err;
5095 
5096 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5097 
5098 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5099 	if (err < 0)
5100 		return B_ENTRY_NOT_FOUND;
5101 
5102 	RecursiveLocker mountOpLocker(sMountOpLock);
5103 
5104 	mount = find_mount(vnode->device);
5105 	if (!mount)
5106 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5107 
5108 	if (mount->root_vnode != vnode) {
5109 		// not mountpoint
5110 		put_vnode(vnode);
5111 		return B_BAD_VALUE;
5112 	}
5113 
5114 	// if the volume is associated with a partition, lock the device of the
5115 	// partition as long as we are unmounting
5116 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5117 	KPartition *partition = mount->partition;
5118 	KDiskDevice *diskDevice = NULL;
5119 	if (partition) {
5120 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5121 		if (!diskDevice) {
5122 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5123 			return B_ERROR;
5124 		}
5125 	}
5126 	DeviceWriteLocker writeLocker(diskDevice, true);
5127 
5128 	// make sure, that the partition is not busy
5129 	if (partition) {
5130 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5131 			TRACE(("fs_unmount(): Partition is busy.\n"));
5132 			return B_BUSY;
5133 		}
5134 	}
5135 
5136 	// grab the vnode master mutex to keep someone from creating
5137 	// a vnode while we're figuring out if we can continue
5138 	mutex_lock(&sVnodeMutex);
5139 
5140 	// simplify the loop below: we decrement the root vnode ref_count
5141 	// by the known number of references: one for the file system, one
5142 	// from the path_to_vnode() call above
5143 	mount->root_vnode->ref_count -= 2;
5144 
5145 	bool disconnectedDescriptors = false;
5146 
5147 	while (true) {
5148 		bool busy = false;
5149 
5150 		// cycle through the list of vnodes associated with this mount and
5151 		// make sure all of them are not busy or have refs on them
5152 		vnode = NULL;
5153 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5154 			if (vnode->busy || vnode->ref_count != 0) {
5155 				// there are still vnodes in use on this mount, so we cannot
5156 				// unmount yet
5157 				busy = true;
5158 				break;
5159 			}
5160 		}
5161 
5162 		if (!busy)
5163 			break;
5164 
5165 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5166 			mount->root_vnode->ref_count += 2;
5167 			mutex_unlock(&sVnodeMutex);
5168 			put_vnode(mount->root_vnode);
5169 
5170 			return B_BUSY;
5171 		}
5172 
5173 		if (disconnectedDescriptors) {
5174 			// wait a bit until the last access is finished, and then try again
5175 			mutex_unlock(&sVnodeMutex);
5176 			snooze(100000);
5177 			mutex_lock(&sVnodeMutex);
5178 			continue;
5179 		}
5180 
5181 		// the file system is still busy - but we're forced to unmount it,
5182 		// so let's disconnect all open file descriptors
5183 
5184 		mount->unmounting = true;
5185 			// prevent new vnodes from being created
5186 
5187 		mutex_unlock(&sVnodeMutex);
5188 
5189 		// iterate over all teams and peek into their file descriptors
5190 
5191 		int32 nextTeamID = 0;
5192 
5193 		while (true) {
5194 			struct io_context *context = NULL;
5195 			sem_id contextMutex = -1;
5196 			struct team *team = NULL;
5197 			team_id lastTeamID;
5198 
5199 			cpu_status state = disable_interrupts();
5200 			GRAB_TEAM_LOCK();
5201 
5202 			lastTeamID = peek_next_thread_id();
5203 			if (nextTeamID < lastTeamID) {
5204 				// get next valid team
5205 				while (nextTeamID < lastTeamID
5206 					&& !(team = team_get_team_struct_locked(nextTeamID))) {
5207 					nextTeamID++;
5208 				}
5209 
5210 				if (team) {
5211 					context = (io_context *)team->io_context;
5212 					contextMutex = context->io_mutex.sem;
5213 					nextTeamID++;
5214 				}
5215 			}
5216 
5217 			RELEASE_TEAM_LOCK();
5218 			restore_interrupts(state);
5219 
5220 			if (context == NULL)
5221 				break;
5222 
5223 			// we now have a context - since we couldn't lock it while having
5224 			// safe access to the team structure, we now need to lock the mutex
5225 			// manually
5226 
5227 			if (acquire_sem(contextMutex) != B_OK) {
5228 				// team seems to be gone, go over to the next team
5229 				continue;
5230 			}
5231 
5232 			// the team cannot be deleted completely while we're owning its
5233 			// io_context mutex, so we can safely play with it now
5234 
5235 			context->io_mutex.holder = thread_get_current_thread_id();
5236 
5237 			if (context->cwd != NULL && context->cwd->mount == mount) {
5238 				put_vnode(context->cwd);
5239 
5240 				if (context->cwd == mount->root_vnode) {
5241 					// redirect the current working directory to the covered vnode
5242 					context->cwd = mount->covers_vnode;
5243 					inc_vnode_ref_count(context->cwd);
5244 				} else
5245 					context->cwd = NULL;
5246 			}
5247 
5248 			for (uint32 i = 0; i < context->table_size; i++) {
5249 				if (struct file_descriptor *descriptor = context->fds[i]) {
5250 					inc_fd_ref_count(descriptor);
5251 
5252 					// if this descriptor points at this mount, we
5253 					// need to disconnect it to be able to unmount
5254 					vnode = fd_vnode(descriptor);
5255 					if (vnode != NULL && vnode->mount == mount
5256 						|| vnode == NULL && descriptor->u.mount == mount)
5257 						disconnect_fd(descriptor);
5258 
5259 					put_fd(descriptor);
5260 				}
5261 			}
5262 
5263 			mutex_unlock(&context->io_mutex);
5264 		}
5265 
5266 		disconnectedDescriptors = true;
5267 		mutex_lock(&sVnodeMutex);
5268 	}
5269 
5270 	// we can safely continue, mark all of the vnodes busy and this mount
5271 	// structure in unmounting state
5272 	mount->unmounting = true;
5273 
5274 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5275 		vnode->busy = true;
5276 	}
5277 
5278 	mutex_unlock(&sVnodeMutex);
5279 
5280 	mount->covers_vnode->covered_by = NULL;
5281 	put_vnode(mount->covers_vnode);
5282 
5283 	// Free all vnodes associated with this mount.
5284 	// They will be removed from the mount list by free_vnode(), so
5285 	// we don't have to do this.
5286 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5287 		free_vnode(vnode, false);
5288 	}
5289 
5290 	// remove the mount structure from the hash table
5291 	mutex_lock(&sMountMutex);
5292 	hash_remove(sMountsTable, mount);
5293 	mutex_unlock(&sMountMutex);
5294 
5295 	mountOpLocker.Unlock();
5296 
5297 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5298 
5299 	// release the file system
5300 	put_file_system(mount->fs);
5301 
5302 	// dereference the partition and mark it unmounted
5303 	if (partition) {
5304 		partition->SetVolumeID(-1);
5305 		partition->SetMountCookie(NULL);
5306 
5307 		if (mount->owns_file_device)
5308 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5309 		partition->Unregister();
5310 	}
5311 
5312 	free(mount->device_name);
5313 	free(mount->fs_name);
5314 	free(mount);
5315 
5316 	return B_OK;
5317 }
5318 
5319 
5320 static status_t
5321 fs_sync(dev_t device)
5322 {
5323 	struct fs_mount *mount;
5324 	status_t status = get_mount(device, &mount);
5325 	if (status < B_OK)
5326 		return status;
5327 
5328 	mutex_lock(&sMountMutex);
5329 
5330 	if (FS_MOUNT_CALL(mount, sync))
5331 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5332 
5333 	mutex_unlock(&sMountMutex);
5334 
5335 	// synchronize all vnodes
5336 	recursive_lock_lock(&mount->rlock);
5337 
5338 	struct vnode *vnode = NULL;
5339 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5340 		if (vnode->cache)
5341 			vm_cache_write_modified(vnode->cache);
5342 	}
5343 
5344 	recursive_lock_unlock(&mount->rlock);
5345 	put_mount(mount);
5346 	return status;
5347 }
5348 
5349 
5350 static status_t
5351 fs_read_info(dev_t device, struct fs_info *info)
5352 {
5353 	struct fs_mount *mount;
5354 	status_t status = get_mount(device, &mount);
5355 	if (status < B_OK)
5356 		return status;
5357 
5358 	// fill in info the file system doesn't (have to) know about
5359 	memset(info, 0, sizeof(struct fs_info));
5360 	info->dev = mount->id;
5361 	info->root = mount->root_vnode->id;
5362 	strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5363 	if (mount->device_name != NULL)
5364 		strlcpy(info->device_name, mount->device_name, sizeof(info->device_name));
5365 
5366 	if (FS_MOUNT_CALL(mount, read_fs_info))
5367 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5368 
5369 	// if the call is not supported by the file system, there are still
5370 	// the parts that we filled out ourselves
5371 
5372 	put_mount(mount);
5373 	return status;
5374 }
5375 
5376 
5377 static status_t
5378 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5379 {
5380 	struct fs_mount *mount;
5381 	status_t status = get_mount(device, &mount);
5382 	if (status < B_OK)
5383 		return status;
5384 
5385 	if (FS_MOUNT_CALL(mount, write_fs_info))
5386 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5387 	else
5388 		status = EROFS;
5389 
5390 	put_mount(mount);
5391 	return status;
5392 }
5393 
5394 
5395 static dev_t
5396 fs_next_device(int32 *_cookie)
5397 {
5398 	struct fs_mount *mount = NULL;
5399 	dev_t device = *_cookie;
5400 
5401 	mutex_lock(&sMountMutex);
5402 
5403 	// Since device IDs are assigned sequentially, this algorithm
5404 	// does work good enough. It makes sure that the device list
5405 	// returned is sorted, and that no device is skipped when an
5406 	// already visited device got unmounted.
5407 
5408 	while (device < sNextMountID) {
5409 		mount = find_mount(device++);
5410 		if (mount != NULL && mount->cookie != NULL)
5411 			break;
5412 	}
5413 
5414 	*_cookie = device;
5415 
5416 	if (mount != NULL)
5417 		device = mount->id;
5418 	else
5419 		device = B_BAD_VALUE;
5420 
5421 	mutex_unlock(&sMountMutex);
5422 
5423 	return device;
5424 }
5425 
5426 
5427 static status_t
5428 get_cwd(char *buffer, size_t size, bool kernel)
5429 {
5430 	// Get current working directory from io context
5431 	struct io_context *context = get_current_io_context(kernel);
5432 	status_t status;
5433 
5434 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5435 
5436 	mutex_lock(&context->io_mutex);
5437 
5438 	if (context->cwd)
5439 		status = dir_vnode_to_path(context->cwd, buffer, size);
5440 	else
5441 		status = B_ERROR;
5442 
5443 	mutex_unlock(&context->io_mutex);
5444 	return status;
5445 }
5446 
5447 
5448 static status_t
5449 set_cwd(int fd, char *path, bool kernel)
5450 {
5451 	struct io_context *context;
5452 	struct vnode *vnode = NULL;
5453 	struct vnode *oldDirectory;
5454 	struct stat stat;
5455 	status_t status;
5456 
5457 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5458 
5459 	// Get vnode for passed path, and bail if it failed
5460 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5461 	if (status < 0)
5462 		return status;
5463 
5464 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5465 	if (status < 0)
5466 		goto err;
5467 
5468 	if (!S_ISDIR(stat.st_mode)) {
5469 		// nope, can't cwd to here
5470 		status = B_NOT_A_DIRECTORY;
5471 		goto err;
5472 	}
5473 
5474 	// Get current io context and lock
5475 	context = get_current_io_context(kernel);
5476 	mutex_lock(&context->io_mutex);
5477 
5478 	// save the old current working directory first
5479 	oldDirectory = context->cwd;
5480 	context->cwd = vnode;
5481 
5482 	mutex_unlock(&context->io_mutex);
5483 
5484 	if (oldDirectory)
5485 		put_vnode(oldDirectory);
5486 
5487 	return B_NO_ERROR;
5488 
5489 err:
5490 	put_vnode(vnode);
5491 	return status;
5492 }
5493 
5494 
5495 //	#pragma mark -
5496 //	Calls from within the kernel
5497 
5498 
5499 dev_t
5500 _kern_mount(const char *path, const char *device, const char *fsName,
5501 	uint32 flags, const char *args)
5502 {
5503 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5504 	if (pathBuffer.InitCheck() != B_OK)
5505 		return B_NO_MEMORY;
5506 
5507 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5508 }
5509 
5510 
5511 status_t
5512 _kern_unmount(const char *path, uint32 flags)
5513 {
5514 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5515 	if (pathBuffer.InitCheck() != B_OK)
5516 		return B_NO_MEMORY;
5517 
5518 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5519 }
5520 
5521 
5522 status_t
5523 _kern_read_fs_info(dev_t device, struct fs_info *info)
5524 {
5525 	if (info == NULL)
5526 		return B_BAD_VALUE;
5527 
5528 	return fs_read_info(device, info);
5529 }
5530 
5531 
5532 status_t
5533 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5534 {
5535 	if (info == NULL)
5536 		return B_BAD_VALUE;
5537 
5538 	return fs_write_info(device, info, mask);
5539 }
5540 
5541 
5542 status_t
5543 _kern_sync(void)
5544 {
5545 	// Note: _kern_sync() is also called from _user_sync()
5546 	int32 cookie = 0;
5547 	dev_t device;
5548 	while ((device = next_dev(&cookie)) >= 0) {
5549 		status_t status = fs_sync(device);
5550 		if (status != B_OK && status != B_BAD_VALUE)
5551 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5552 	}
5553 
5554 	return B_OK;
5555 }
5556 
5557 
5558 dev_t
5559 _kern_next_device(int32 *_cookie)
5560 {
5561 	return fs_next_device(_cookie);
5562 }
5563 
5564 
5565 status_t
5566 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
5567 	size_t infoSize)
5568 {
5569 	if (infoSize != sizeof(fd_info))
5570 		return B_BAD_VALUE;
5571 
5572 	struct io_context *context = NULL;
5573 	sem_id contextMutex = -1;
5574 	struct team *team = NULL;
5575 
5576 	cpu_status state = disable_interrupts();
5577 	GRAB_TEAM_LOCK();
5578 
5579 	team = team_get_team_struct_locked(teamID);
5580 	if (team) {
5581 		context = (io_context *)team->io_context;
5582 		contextMutex = context->io_mutex.sem;
5583 	}
5584 
5585 	RELEASE_TEAM_LOCK();
5586 	restore_interrupts(state);
5587 
5588 	// we now have a context - since we couldn't lock it while having
5589 	// safe access to the team structure, we now need to lock the mutex
5590 	// manually
5591 
5592 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
5593 		// team doesn't exit or seems to be gone
5594 		return B_BAD_TEAM_ID;
5595 	}
5596 
5597 	// the team cannot be deleted completely while we're owning its
5598 	// io_context mutex, so we can safely play with it now
5599 
5600 	context->io_mutex.holder = thread_get_current_thread_id();
5601 
5602 	uint32 slot = *_cookie;
5603 
5604 	struct file_descriptor *descriptor;
5605 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
5606 		slot++;
5607 
5608 	if (slot >= context->table_size) {
5609 		mutex_unlock(&context->io_mutex);
5610 		return B_ENTRY_NOT_FOUND;
5611 	}
5612 
5613 	info->number = slot;
5614 	info->open_mode = descriptor->open_mode;
5615 
5616 	struct vnode *vnode = fd_vnode(descriptor);
5617 	if (vnode != NULL) {
5618 		info->device = vnode->device;
5619 		info->node = vnode->id;
5620 	} else if (descriptor->u.mount != NULL) {
5621 		info->device = descriptor->u.mount->id;
5622 		info->node = -1;
5623 	}
5624 
5625 	mutex_unlock(&context->io_mutex);
5626 
5627 	*_cookie = slot + 1;
5628 	return B_OK;
5629 }
5630 
5631 
5632 int
5633 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
5634 {
5635 	if (openMode & O_CREAT)
5636 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
5637 
5638 	return file_open_entry_ref(device, inode, name, openMode, true);
5639 }
5640 
5641 
5642 /**	\brief Opens a node specified by a FD + path pair.
5643  *
5644  *	At least one of \a fd and \a path must be specified.
5645  *	If only \a fd is given, the function opens the node identified by this
5646  *	FD. If only a path is given, this path is opened. If both are given and
5647  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5648  *	of the directory (!) identified by \a fd.
5649  *
5650  *	\param fd The FD. May be < 0.
5651  *	\param path The absolute or relative path. May be \c NULL.
5652  *	\param openMode The open mode.
5653  *	\return A FD referring to the newly opened node, or an error code,
5654  *			if an error occurs.
5655  */
5656 
5657 int
5658 _kern_open(int fd, const char *path, int openMode, int perms)
5659 {
5660 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5661 	if (pathBuffer.InitCheck() != B_OK)
5662 		return B_NO_MEMORY;
5663 
5664 	if (openMode & O_CREAT)
5665 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
5666 
5667 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
5668 }
5669 
5670 
5671 /**	\brief Opens a directory specified by entry_ref or node_ref.
5672  *
5673  *	The supplied name may be \c NULL, in which case directory identified
5674  *	by \a device and \a inode will be opened. Otherwise \a device and
5675  *	\a inode identify the parent directory of the directory to be opened
5676  *	and \a name its entry name.
5677  *
5678  *	\param device If \a name is specified the ID of the device the parent
5679  *		   directory of the directory to be opened resides on, otherwise
5680  *		   the device of the directory itself.
5681  *	\param inode If \a name is specified the node ID of the parent
5682  *		   directory of the directory to be opened, otherwise node ID of the
5683  *		   directory itself.
5684  *	\param name The entry name of the directory to be opened. If \c NULL,
5685  *		   the \a device + \a inode pair identify the node to be opened.
5686  *	\return The FD of the newly opened directory or an error code, if
5687  *			something went wrong.
5688  */
5689 
5690 int
5691 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
5692 {
5693 	return dir_open_entry_ref(device, inode, name, true);
5694 }
5695 
5696 
5697 /**	\brief Opens a directory specified by a FD + path pair.
5698  *
5699  *	At least one of \a fd and \a path must be specified.
5700  *	If only \a fd is given, the function opens the directory identified by this
5701  *	FD. If only a path is given, this path is opened. If both are given and
5702  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5703  *	of the directory (!) identified by \a fd.
5704  *
5705  *	\param fd The FD. May be < 0.
5706  *	\param path The absolute or relative path. May be \c NULL.
5707  *	\return A FD referring to the newly opened directory, or an error code,
5708  *			if an error occurs.
5709  */
5710 
5711 int
5712 _kern_open_dir(int fd, const char *path)
5713 {
5714 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5715 	if (pathBuffer.InitCheck() != B_OK)
5716 		return B_NO_MEMORY;
5717 
5718 	return dir_open(fd, pathBuffer.LockBuffer(), true);
5719 }
5720 
5721 
5722 status_t
5723 _kern_fcntl(int fd, int op, uint32 argument)
5724 {
5725 	return common_fcntl(fd, op, argument, true);
5726 }
5727 
5728 
5729 status_t
5730 _kern_fsync(int fd)
5731 {
5732 	return common_sync(fd, true);
5733 }
5734 
5735 
5736 status_t
5737 _kern_lock_node(int fd)
5738 {
5739 	return common_lock_node(fd, true);
5740 }
5741 
5742 
5743 status_t
5744 _kern_unlock_node(int fd)
5745 {
5746 	return common_unlock_node(fd, true);
5747 }
5748 
5749 
5750 status_t
5751 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
5752 {
5753 	return dir_create_entry_ref(device, inode, name, perms, true);
5754 }
5755 
5756 
5757 /**	\brief Creates a directory specified by a FD + path pair.
5758  *
5759  *	\a path must always be specified (it contains the name of the new directory
5760  *	at least). If only a path is given, this path identifies the location at
5761  *	which the directory shall be created. If both \a fd and \a path are given and
5762  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5763  *	of the directory (!) identified by \a fd.
5764  *
5765  *	\param fd The FD. May be < 0.
5766  *	\param path The absolute or relative path. Must not be \c NULL.
5767  *	\param perms The access permissions the new directory shall have.
5768  *	\return \c B_OK, if the directory has been created successfully, another
5769  *			error code otherwise.
5770  */
5771 
5772 status_t
5773 _kern_create_dir(int fd, const char *path, int perms)
5774 {
5775 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5776 	if (pathBuffer.InitCheck() != B_OK)
5777 		return B_NO_MEMORY;
5778 
5779 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
5780 }
5781 
5782 
5783 status_t
5784 _kern_remove_dir(int fd, const char *path)
5785 {
5786 	if (path) {
5787 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5788 		if (pathBuffer.InitCheck() != B_OK)
5789 			return B_NO_MEMORY;
5790 
5791 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
5792 	}
5793 
5794 	return dir_remove(fd, NULL, true);
5795 }
5796 
5797 
5798 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
5799  *
5800  *	At least one of \a fd and \a path must be specified.
5801  *	If only \a fd is given, the function the symlink to be read is the node
5802  *	identified by this FD. If only a path is given, this path identifies the
5803  *	symlink to be read. If both are given and the path is absolute, \a fd is
5804  *	ignored; a relative path is reckoned off of the directory (!) identified
5805  *	by \a fd.
5806  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
5807  *	will still be updated to reflect the required buffer size.
5808  *
5809  *	\param fd The FD. May be < 0.
5810  *	\param path The absolute or relative path. May be \c NULL.
5811  *	\param buffer The buffer into which the contents of the symlink shall be
5812  *		   written.
5813  *	\param _bufferSize A pointer to the size of the supplied buffer.
5814  *	\return The length of the link on success or an appropriate error code
5815  */
5816 
5817 status_t
5818 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
5819 {
5820 	status_t status;
5821 
5822 	if (path) {
5823 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5824 		if (pathBuffer.InitCheck() != B_OK)
5825 			return B_NO_MEMORY;
5826 
5827 		return common_read_link(fd, pathBuffer.LockBuffer(),
5828 			buffer, _bufferSize, true);
5829 	}
5830 
5831 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
5832 }
5833 
5834 
5835 status_t
5836 _kern_write_link(const char *path, const char *toPath)
5837 {
5838 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5839 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5840 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5841 		return B_NO_MEMORY;
5842 
5843 	char *toBuffer = toPathBuffer.LockBuffer();
5844 
5845 	status_t status = check_path(toBuffer);
5846 	if (status < B_OK)
5847 		return status;
5848 
5849 	return common_write_link(pathBuffer.LockBuffer(), toBuffer, true);
5850 }
5851 
5852 
5853 /**	\brief Creates a symlink specified by a FD + path pair.
5854  *
5855  *	\a path must always be specified (it contains the name of the new symlink
5856  *	at least). If only a path is given, this path identifies the location at
5857  *	which the symlink shall be created. If both \a fd and \a path are given and
5858  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5859  *	of the directory (!) identified by \a fd.
5860  *
5861  *	\param fd The FD. May be < 0.
5862  *	\param toPath The absolute or relative path. Must not be \c NULL.
5863  *	\param mode The access permissions the new symlink shall have.
5864  *	\return \c B_OK, if the symlink has been created successfully, another
5865  *			error code otherwise.
5866  */
5867 
5868 status_t
5869 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
5870 {
5871 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5872 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5873 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5874 		return B_NO_MEMORY;
5875 
5876 	char *toBuffer = toPathBuffer.LockBuffer();
5877 
5878 	status_t status = check_path(toBuffer);
5879 	if (status < B_OK)
5880 		return status;
5881 
5882 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
5883 		toBuffer, mode, true);
5884 }
5885 
5886 
5887 status_t
5888 _kern_create_link(const char *path, const char *toPath)
5889 {
5890 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5891 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5892 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5893 		return B_NO_MEMORY;
5894 
5895 	return common_create_link(pathBuffer.LockBuffer(),
5896 		toPathBuffer.LockBuffer(), true);
5897 }
5898 
5899 
5900 /**	\brief Removes an entry specified by a FD + path pair from its directory.
5901  *
5902  *	\a path must always be specified (it contains at least the name of the entry
5903  *	to be deleted). If only a path is given, this path identifies the entry
5904  *	directly. If both \a fd and \a path are given and the path is absolute,
5905  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
5906  *	identified by \a fd.
5907  *
5908  *	\param fd The FD. May be < 0.
5909  *	\param path The absolute or relative path. Must not be \c NULL.
5910  *	\return \c B_OK, if the entry has been removed successfully, another
5911  *			error code otherwise.
5912  */
5913 
5914 status_t
5915 _kern_unlink(int fd, const char *path)
5916 {
5917 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5918 	if (pathBuffer.InitCheck() != B_OK)
5919 		return B_NO_MEMORY;
5920 
5921 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
5922 }
5923 
5924 
5925 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
5926  *		   by another FD + path pair.
5927  *
5928  *	\a oldPath and \a newPath must always be specified (they contain at least
5929  *	the name of the entry). If only a path is given, this path identifies the
5930  *	entry directly. If both a FD and a path are given and the path is absolute,
5931  *	the FD is ignored; a relative path is reckoned off of the directory (!)
5932  *	identified by the respective FD.
5933  *
5934  *	\param oldFD The FD of the old location. May be < 0.
5935  *	\param oldPath The absolute or relative path of the old location. Must not
5936  *		   be \c NULL.
5937  *	\param newFD The FD of the new location. May be < 0.
5938  *	\param newPath The absolute or relative path of the new location. Must not
5939  *		   be \c NULL.
5940  *	\return \c B_OK, if the entry has been moved successfully, another
5941  *			error code otherwise.
5942  */
5943 
5944 status_t
5945 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
5946 {
5947 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
5948 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
5949 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
5950 		return B_NO_MEMORY;
5951 
5952 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
5953 		newFD, newPathBuffer.LockBuffer(), true);
5954 }
5955 
5956 
5957 status_t
5958 _kern_access(const char *path, int mode)
5959 {
5960 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5961 	if (pathBuffer.InitCheck() != B_OK)
5962 		return B_NO_MEMORY;
5963 
5964 	return common_access(pathBuffer.LockBuffer(), mode, true);
5965 }
5966 
5967 
5968 /**	\brief Reads stat data of an entity specified by a FD + path pair.
5969  *
5970  *	If only \a fd is given, the stat operation associated with the type
5971  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
5972  *	given, this path identifies the entry for whose node to retrieve the
5973  *	stat data. If both \a fd and \a path are given and the path is absolute,
5974  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
5975  *	identified by \a fd and specifies the entry whose stat data shall be
5976  *	retrieved.
5977  *
5978  *	\param fd The FD. May be < 0.
5979  *	\param path The absolute or relative path. Must not be \c NULL.
5980  *	\param traverseLeafLink If \a path is given, \c true specifies that the
5981  *		   function shall not stick to symlinks, but traverse them.
5982  *	\param stat The buffer the stat data shall be written into.
5983  *	\param statSize The size of the supplied stat buffer.
5984  *	\return \c B_OK, if the the stat data have been read successfully, another
5985  *			error code otherwise.
5986  */
5987 
5988 status_t
5989 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
5990 	struct stat *stat, size_t statSize)
5991 {
5992 	struct stat completeStat;
5993 	struct stat *originalStat = NULL;
5994 	status_t status;
5995 
5996 	if (statSize > sizeof(struct stat))
5997 		return B_BAD_VALUE;
5998 
5999 	// this supports different stat extensions
6000 	if (statSize < sizeof(struct stat)) {
6001 		originalStat = stat;
6002 		stat = &completeStat;
6003 	}
6004 
6005 	if (path) {
6006 		// path given: get the stat of the node referred to by (fd, path)
6007 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6008 		if (pathBuffer.InitCheck() != B_OK)
6009 			return B_NO_MEMORY;
6010 
6011 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6012 			traverseLeafLink, stat, true);
6013 	} else {
6014 		// no path given: get the FD and use the FD operation
6015 		struct file_descriptor *descriptor
6016 			= get_fd(get_current_io_context(true), fd);
6017 		if (descriptor == NULL)
6018 			return B_FILE_ERROR;
6019 
6020 		if (descriptor->ops->fd_read_stat)
6021 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6022 		else
6023 			status = EOPNOTSUPP;
6024 
6025 		put_fd(descriptor);
6026 	}
6027 
6028 	if (status == B_OK && originalStat != NULL)
6029 		memcpy(originalStat, stat, statSize);
6030 
6031 	return status;
6032 }
6033 
6034 
6035 /**	\brief Writes stat data of an entity specified by a FD + path pair.
6036  *
6037  *	If only \a fd is given, the stat operation associated with the type
6038  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6039  *	given, this path identifies the entry for whose node to write the
6040  *	stat data. If both \a fd and \a path are given and the path is absolute,
6041  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
6042  *	identified by \a fd and specifies the entry whose stat data shall be
6043  *	written.
6044  *
6045  *	\param fd The FD. May be < 0.
6046  *	\param path The absolute or relative path. Must not be \c NULL.
6047  *	\param traverseLeafLink If \a path is given, \c true specifies that the
6048  *		   function shall not stick to symlinks, but traverse them.
6049  *	\param stat The buffer containing the stat data to be written.
6050  *	\param statSize The size of the supplied stat buffer.
6051  *	\param statMask A mask specifying which parts of the stat data shall be
6052  *		   written.
6053  *	\return \c B_OK, if the the stat data have been written successfully,
6054  *			another error code otherwise.
6055  */
6056 
6057 status_t
6058 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6059 	const struct stat *stat, size_t statSize, int statMask)
6060 {
6061 	struct stat completeStat;
6062 
6063 	if (statSize > sizeof(struct stat))
6064 		return B_BAD_VALUE;
6065 
6066 	// this supports different stat extensions
6067 	if (statSize < sizeof(struct stat)) {
6068 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6069 		memcpy(&completeStat, stat, statSize);
6070 		stat = &completeStat;
6071 	}
6072 
6073 	status_t status;
6074 
6075 	if (path) {
6076 		// path given: write the stat of the node referred to by (fd, path)
6077 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6078 		if (pathBuffer.InitCheck() != B_OK)
6079 			return B_NO_MEMORY;
6080 
6081 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6082 			traverseLeafLink, stat, statMask, true);
6083 	} else {
6084 		// no path given: get the FD and use the FD operation
6085 		struct file_descriptor *descriptor
6086 			= get_fd(get_current_io_context(true), fd);
6087 		if (descriptor == NULL)
6088 			return B_FILE_ERROR;
6089 
6090 		if (descriptor->ops->fd_write_stat)
6091 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6092 		else
6093 			status = EOPNOTSUPP;
6094 
6095 		put_fd(descriptor);
6096 	}
6097 
6098 	return status;
6099 }
6100 
6101 
6102 int
6103 _kern_open_attr_dir(int fd, const char *path)
6104 {
6105 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6106 	if (pathBuffer.InitCheck() != B_OK)
6107 		return B_NO_MEMORY;
6108 
6109 	if (path != NULL)
6110 		pathBuffer.SetTo(path);
6111 
6112 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6113 }
6114 
6115 
6116 int
6117 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6118 {
6119 	return attr_create(fd, name, type, openMode, true);
6120 }
6121 
6122 
6123 int
6124 _kern_open_attr(int fd, const char *name, int openMode)
6125 {
6126 	return attr_open(fd, name, openMode, true);
6127 }
6128 
6129 
6130 status_t
6131 _kern_remove_attr(int fd, const char *name)
6132 {
6133 	return attr_remove(fd, name, true);
6134 }
6135 
6136 
6137 status_t
6138 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6139 {
6140 	return attr_rename(fromFile, fromName, toFile, toName, true);
6141 }
6142 
6143 
6144 int
6145 _kern_open_index_dir(dev_t device)
6146 {
6147 	return index_dir_open(device, true);
6148 }
6149 
6150 
6151 status_t
6152 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6153 {
6154 	return index_create(device, name, type, flags, true);
6155 }
6156 
6157 
6158 status_t
6159 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6160 {
6161 	return index_name_read_stat(device, name, stat, true);
6162 }
6163 
6164 
6165 status_t
6166 _kern_remove_index(dev_t device, const char *name)
6167 {
6168 	return index_remove(device, name, true);
6169 }
6170 
6171 
6172 status_t
6173 _kern_getcwd(char *buffer, size_t size)
6174 {
6175 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6176 
6177 	// Call vfs to get current working directory
6178 	return get_cwd(buffer, size, true);
6179 }
6180 
6181 
6182 status_t
6183 _kern_setcwd(int fd, const char *path)
6184 {
6185 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6186 	if (pathBuffer.InitCheck() != B_OK)
6187 		return B_NO_MEMORY;
6188 
6189 	if (path != NULL)
6190 		pathBuffer.SetTo(path);
6191 
6192 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6193 }
6194 
6195 
6196 //	#pragma mark -
6197 //	Calls from userland (with extra address checks)
6198 
6199 
6200 dev_t
6201 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6202 	uint32 flags, const char *userArgs)
6203 {
6204 	char fileSystem[B_OS_NAME_LENGTH];
6205 	KPath path, device;
6206 	char *args = NULL;
6207 	status_t status;
6208 
6209 	if (!IS_USER_ADDRESS(userPath)
6210 		|| !IS_USER_ADDRESS(userFileSystem)
6211 		|| !IS_USER_ADDRESS(userDevice))
6212 		return B_BAD_ADDRESS;
6213 
6214 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6215 		return B_NO_MEMORY;
6216 
6217 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6218 		return B_BAD_ADDRESS;
6219 
6220 	if (userFileSystem != NULL
6221 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6222 		return B_BAD_ADDRESS;
6223 
6224 	if (userDevice != NULL
6225 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6226 		return B_BAD_ADDRESS;
6227 
6228 	if (userArgs != NULL) {
6229 		// We have no real length restriction, so we need to create
6230 		// a buffer large enough to hold the argument string
6231 		// ToDo: we could think about determinung the length of the string
6232 		//	in userland :)
6233 		ssize_t length = user_strlcpy(args, userArgs, 0);
6234 		if (length < B_OK)
6235 			return B_BAD_ADDRESS;
6236 
6237 		// this is a safety restriction
6238 		if (length > 32 * 1024)
6239 			return B_NAME_TOO_LONG;
6240 
6241 		if (length > 0) {
6242 			args = (char *)malloc(length + 1);
6243 			if (args == NULL)
6244 				return B_NO_MEMORY;
6245 
6246 			if (user_strlcpy(args, userArgs, length + 1) < B_OK) {
6247 				free(args);
6248 				return B_BAD_ADDRESS;
6249 			}
6250 		}
6251 	}
6252 	path.UnlockBuffer();
6253 	device.UnlockBuffer();
6254 
6255 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6256 		userFileSystem ? fileSystem : NULL, flags, args, false);
6257 
6258 	free(args);
6259 	return status;
6260 }
6261 
6262 
6263 status_t
6264 _user_unmount(const char *userPath, uint32 flags)
6265 {
6266 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6267 	if (pathBuffer.InitCheck() != B_OK)
6268 		return B_NO_MEMORY;
6269 
6270 	char *path = pathBuffer.LockBuffer();
6271 
6272 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6273 		return B_BAD_ADDRESS;
6274 
6275 	return fs_unmount(path, flags, false);
6276 }
6277 
6278 
6279 status_t
6280 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6281 {
6282 	struct fs_info info;
6283 	status_t status;
6284 
6285 	if (userInfo == NULL)
6286 		return B_BAD_VALUE;
6287 
6288 	if (!IS_USER_ADDRESS(userInfo))
6289 		return B_BAD_ADDRESS;
6290 
6291 	status = fs_read_info(device, &info);
6292 	if (status != B_OK)
6293 		return status;
6294 
6295 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6296 		return B_BAD_ADDRESS;
6297 
6298 	return B_OK;
6299 }
6300 
6301 
6302 status_t
6303 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6304 {
6305 	struct fs_info info;
6306 
6307 	if (userInfo == NULL)
6308 		return B_BAD_VALUE;
6309 
6310 	if (!IS_USER_ADDRESS(userInfo)
6311 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6312 		return B_BAD_ADDRESS;
6313 
6314 	return fs_write_info(device, &info, mask);
6315 }
6316 
6317 
6318 dev_t
6319 _user_next_device(int32 *_userCookie)
6320 {
6321 	int32 cookie;
6322 	dev_t device;
6323 
6324 	if (!IS_USER_ADDRESS(_userCookie)
6325 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6326 		return B_BAD_ADDRESS;
6327 
6328 	device = fs_next_device(&cookie);
6329 
6330 	if (device >= B_OK) {
6331 		// update user cookie
6332 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6333 			return B_BAD_ADDRESS;
6334 	}
6335 
6336 	return device;
6337 }
6338 
6339 
6340 status_t
6341 _user_sync(void)
6342 {
6343 	return _kern_sync();
6344 }
6345 
6346 
6347 status_t
6348 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
6349 	size_t infoSize)
6350 {
6351 	struct fd_info info;
6352 	uint32 cookie;
6353 
6354 	// only root can do this (or should root's group be enough?)
6355 	if (geteuid() != 0)
6356 		return B_NOT_ALLOWED;
6357 
6358 	if (infoSize != sizeof(fd_info))
6359 		return B_BAD_VALUE;
6360 
6361 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
6362 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
6363 		return B_BAD_ADDRESS;
6364 
6365 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
6366 	if (status < B_OK)
6367 		return status;
6368 
6369 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
6370 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
6371 		return B_BAD_ADDRESS;
6372 
6373 	return status;
6374 }
6375 
6376 
6377 status_t
6378 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6379 	char *userPath, size_t pathLength)
6380 {
6381 	char path[B_PATH_NAME_LENGTH + 1];
6382 	struct vnode *vnode;
6383 	status_t status;
6384 
6385 	if (!IS_USER_ADDRESS(userPath))
6386 		return B_BAD_ADDRESS;
6387 
6388 	// copy the leaf name onto the stack
6389 	char stackLeaf[B_FILE_NAME_LENGTH];
6390 	if (leaf) {
6391 		if (!IS_USER_ADDRESS(leaf))
6392 			return B_BAD_ADDRESS;
6393 
6394 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6395 		if (len < 0)
6396 			return len;
6397 		if (len >= B_FILE_NAME_LENGTH)
6398 			return B_NAME_TOO_LONG;
6399 		leaf = stackLeaf;
6400 
6401 		// filter invalid leaf names
6402 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6403 			return B_BAD_VALUE;
6404 	}
6405 
6406 	// get the vnode matching the dir's node_ref
6407 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6408 		// special cases "." and "..": we can directly get the vnode of the
6409 		// referenced directory
6410 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6411 		leaf = NULL;
6412 	} else
6413 		status = get_vnode(device, inode, &vnode, false);
6414 	if (status < B_OK)
6415 		return status;
6416 
6417 	// get the directory path
6418 	status = dir_vnode_to_path(vnode, path, sizeof(path));
6419 	put_vnode(vnode);
6420 		// we don't need the vnode anymore
6421 	if (status < B_OK)
6422 		return status;
6423 
6424 	// append the leaf name
6425 	if (leaf) {
6426 		// insert a directory separator if this is not the file system root
6427 		if ((strcmp(path, "/") && strlcat(path, "/", sizeof(path)) >= sizeof(path))
6428 			|| strlcat(path, leaf, sizeof(path)) >= sizeof(path)) {
6429 			return B_NAME_TOO_LONG;
6430 		}
6431 	}
6432 
6433 	int len = user_strlcpy(userPath, path, pathLength);
6434 	if (len < 0)
6435 		return len;
6436 	if (len >= (int)pathLength)
6437 		return B_BUFFER_OVERFLOW;
6438 	return B_OK;
6439 }
6440 
6441 
6442 int
6443 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6444 	int openMode, int perms)
6445 {
6446 	char name[B_FILE_NAME_LENGTH];
6447 	int status;
6448 
6449 	if (!IS_USER_ADDRESS(userName))
6450 		return B_BAD_ADDRESS;
6451 
6452 	status = user_strlcpy(name, userName, sizeof(name));
6453 	if (status < B_OK)
6454 		return status;
6455 
6456 	if (openMode & O_CREAT)
6457 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6458 
6459 	return file_open_entry_ref(device, inode, name, openMode, false);
6460 }
6461 
6462 
6463 int
6464 _user_open(int fd, const char *userPath, int openMode, int perms)
6465 {
6466 	char path[B_PATH_NAME_LENGTH + 1];
6467 	int status;
6468 
6469 	if (!IS_USER_ADDRESS(userPath))
6470 		return B_BAD_ADDRESS;
6471 
6472 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6473 	if (status < 0)
6474 		return status;
6475 
6476 	if (openMode & O_CREAT)
6477 		return file_create(fd, path, openMode, perms, false);
6478 
6479 	return file_open(fd, path, openMode, false);
6480 }
6481 
6482 
6483 int
6484 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *uname)
6485 {
6486 	if (uname) {
6487 		char name[B_FILE_NAME_LENGTH];
6488 
6489 		if (!IS_USER_ADDRESS(uname))
6490 			return B_BAD_ADDRESS;
6491 
6492 		int status = user_strlcpy(name, uname, sizeof(name));
6493 		if (status < B_OK)
6494 			return status;
6495 
6496 		return dir_open_entry_ref(device, inode, name, false);
6497 	}
6498 	return dir_open_entry_ref(device, inode, NULL, false);
6499 }
6500 
6501 
6502 int
6503 _user_open_dir(int fd, const char *userPath)
6504 {
6505 	char path[B_PATH_NAME_LENGTH + 1];
6506 	int status;
6507 
6508 	if (!IS_USER_ADDRESS(userPath))
6509 		return B_BAD_ADDRESS;
6510 
6511 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6512 	if (status < 0)
6513 		return status;
6514 
6515 	return dir_open(fd, path, false);
6516 }
6517 
6518 
6519 /**	\brief Opens a directory's parent directory and returns the entry name
6520  *		   of the former.
6521  *
6522  *	Aside from that is returns the directory's entry name, this method is
6523  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6524  *	equivalent, if \a userName is \c NULL.
6525  *
6526  *	If a name buffer is supplied and the name does not fit the buffer, the
6527  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6528  *
6529  *	\param fd A FD referring to a directory.
6530  *	\param userName Buffer the directory's entry name shall be written into.
6531  *		   May be \c NULL.
6532  *	\param nameLength Size of the name buffer.
6533  *	\return The file descriptor of the opened parent directory, if everything
6534  *			went fine, an error code otherwise.
6535  */
6536 
6537 int
6538 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6539 {
6540 	bool kernel = false;
6541 
6542 	if (userName && !IS_USER_ADDRESS(userName))
6543 		return B_BAD_ADDRESS;
6544 
6545 	// open the parent dir
6546 	int parentFD = dir_open(fd, "..", kernel);
6547 	if (parentFD < 0)
6548 		return parentFD;
6549 	FDCloser fdCloser(parentFD, kernel);
6550 
6551 	if (userName) {
6552 		// get the vnodes
6553 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6554 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6555 		VNodePutter parentVNodePutter(parentVNode);
6556 		VNodePutter dirVNodePutter(dirVNode);
6557 		if (!parentVNode || !dirVNode)
6558 			return B_FILE_ERROR;
6559 
6560 		// get the vnode name
6561 		char name[B_FILE_NAME_LENGTH];
6562 		status_t status = get_vnode_name(dirVNode, parentVNode,
6563 			name, sizeof(name));
6564 		if (status != B_OK)
6565 			return status;
6566 
6567 		// copy the name to the userland buffer
6568 		int len = user_strlcpy(userName, name, nameLength);
6569 		if (len < 0)
6570 			return len;
6571 		if (len >= (int)nameLength)
6572 			return B_BUFFER_OVERFLOW;
6573 	}
6574 
6575 	return fdCloser.Detach();
6576 }
6577 
6578 
6579 status_t
6580 _user_fcntl(int fd, int op, uint32 argument)
6581 {
6582 	return common_fcntl(fd, op, argument, false);
6583 }
6584 
6585 
6586 status_t
6587 _user_fsync(int fd)
6588 {
6589 	return common_sync(fd, false);
6590 }
6591 
6592 
6593 status_t
6594 _user_lock_node(int fd)
6595 {
6596 	return common_lock_node(fd, false);
6597 }
6598 
6599 
6600 status_t
6601 _user_unlock_node(int fd)
6602 {
6603 	return common_unlock_node(fd, false);
6604 }
6605 
6606 
6607 status_t
6608 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6609 {
6610 	char name[B_FILE_NAME_LENGTH];
6611 	status_t status;
6612 
6613 	if (!IS_USER_ADDRESS(userName))
6614 		return B_BAD_ADDRESS;
6615 
6616 	status = user_strlcpy(name, userName, sizeof(name));
6617 	if (status < 0)
6618 		return status;
6619 
6620 	return dir_create_entry_ref(device, inode, name, perms, false);
6621 }
6622 
6623 
6624 status_t
6625 _user_create_dir(int fd, const char *userPath, int perms)
6626 {
6627 	char path[B_PATH_NAME_LENGTH + 1];
6628 	status_t status;
6629 
6630 	if (!IS_USER_ADDRESS(userPath))
6631 		return B_BAD_ADDRESS;
6632 
6633 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6634 	if (status < 0)
6635 		return status;
6636 
6637 	return dir_create(fd, path, perms, false);
6638 }
6639 
6640 
6641 status_t
6642 _user_remove_dir(int fd, const char *userPath)
6643 {
6644 	char path[B_PATH_NAME_LENGTH + 1];
6645 
6646 	if ((userPath != NULL && !IS_USER_ADDRESS(userPath))
6647 		|| (userPath != NULL && user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK))
6648 		return B_BAD_ADDRESS;
6649 
6650 	return dir_remove(fd, userPath ? path : NULL, false);
6651 }
6652 
6653 
6654 status_t
6655 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
6656 {
6657 	char path[B_PATH_NAME_LENGTH + 1];
6658 	char buffer[B_PATH_NAME_LENGTH];
6659 	size_t bufferSize;
6660 	status_t status;
6661 
6662 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
6663 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
6664 		return B_BAD_ADDRESS;
6665 
6666 	if (userPath) {
6667 		if (!IS_USER_ADDRESS(userPath))
6668 			return B_BAD_ADDRESS;
6669 
6670 		status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6671 		if (status < 0)
6672 			return status;
6673 
6674 		if (bufferSize > B_PATH_NAME_LENGTH)
6675 			bufferSize = B_PATH_NAME_LENGTH;
6676 	}
6677 
6678 	status = common_read_link(fd, userPath ? path : NULL, buffer, &bufferSize, false);
6679 
6680 	// we also update the bufferSize in case of errors
6681 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
6682 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
6683 		return B_BAD_ADDRESS;
6684 
6685 	if (status < B_OK)
6686 		return status;
6687 
6688 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
6689 		return B_BAD_ADDRESS;
6690 
6691 	return B_OK;
6692 }
6693 
6694 
6695 status_t
6696 _user_write_link(const char *userPath, const char *userToPath)
6697 {
6698 	char path[B_PATH_NAME_LENGTH + 1];
6699 	char toPath[B_PATH_NAME_LENGTH + 1];
6700 	status_t status;
6701 
6702 	if (!IS_USER_ADDRESS(userPath)
6703 		|| !IS_USER_ADDRESS(userToPath))
6704 		return B_BAD_ADDRESS;
6705 
6706 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6707 	if (status < 0)
6708 		return status;
6709 
6710 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6711 	if (status < 0)
6712 		return status;
6713 
6714 	status = check_path(toPath);
6715 	if (status < B_OK)
6716 		return status;
6717 
6718 	return common_write_link(path, toPath, false);
6719 }
6720 
6721 
6722 status_t
6723 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
6724 	int mode)
6725 {
6726 	char path[B_PATH_NAME_LENGTH + 1];
6727 	char toPath[B_PATH_NAME_LENGTH + 1];
6728 	status_t status;
6729 
6730 	if (!IS_USER_ADDRESS(userPath)
6731 		|| !IS_USER_ADDRESS(userToPath))
6732 		return B_BAD_ADDRESS;
6733 
6734 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6735 	if (status < 0)
6736 		return status;
6737 
6738 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6739 	if (status < 0)
6740 		return status;
6741 
6742 	status = check_path(toPath);
6743 	if (status < B_OK)
6744 		return status;
6745 
6746 	return common_create_symlink(fd, path, toPath, mode, false);
6747 }
6748 
6749 
6750 status_t
6751 _user_create_link(const char *userPath, const char *userToPath)
6752 {
6753 	char path[B_PATH_NAME_LENGTH + 1];
6754 	char toPath[B_PATH_NAME_LENGTH + 1];
6755 	status_t status;
6756 
6757 	if (!IS_USER_ADDRESS(userPath)
6758 		|| !IS_USER_ADDRESS(userToPath))
6759 		return B_BAD_ADDRESS;
6760 
6761 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6762 	if (status < 0)
6763 		return status;
6764 
6765 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6766 	if (status < 0)
6767 		return status;
6768 
6769 	status = check_path(toPath);
6770 	if (status < B_OK)
6771 		return status;
6772 
6773 	return common_create_link(path, toPath, false);
6774 }
6775 
6776 
6777 status_t
6778 _user_unlink(int fd, const char *userPath)
6779 {
6780 	char path[B_PATH_NAME_LENGTH + 1];
6781 	status_t status;
6782 
6783 	if (!IS_USER_ADDRESS(userPath))
6784 		return B_BAD_ADDRESS;
6785 
6786 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6787 	if (status < 0)
6788 		return status;
6789 
6790 	return common_unlink(fd, path, false);
6791 }
6792 
6793 
6794 status_t
6795 _user_rename(int oldFD, const char *userOldPath, int newFD,
6796 	const char *userNewPath)
6797 {
6798 	char oldPath[B_PATH_NAME_LENGTH + 1];
6799 	char newPath[B_PATH_NAME_LENGTH + 1];
6800 	status_t status;
6801 
6802 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
6803 		return B_BAD_ADDRESS;
6804 
6805 	status = user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH);
6806 	if (status < 0)
6807 		return status;
6808 
6809 	status = user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH);
6810 	if (status < 0)
6811 		return status;
6812 
6813 	return common_rename(oldFD, oldPath, newFD, newPath, false);
6814 }
6815 
6816 
6817 status_t
6818 _user_access(const char *userPath, int mode)
6819 {
6820 	char path[B_PATH_NAME_LENGTH + 1];
6821 	status_t status;
6822 
6823 	if (!IS_USER_ADDRESS(userPath))
6824 		return B_BAD_ADDRESS;
6825 
6826 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6827 	if (status < 0)
6828 		return status;
6829 
6830 	return common_access(path, mode, false);
6831 }
6832 
6833 
6834 status_t
6835 _user_read_stat(int fd, const char *userPath, bool traverseLink,
6836 	struct stat *userStat, size_t statSize)
6837 {
6838 	struct stat stat;
6839 	status_t status;
6840 
6841 	if (statSize > sizeof(struct stat))
6842 		return B_BAD_VALUE;
6843 
6844 	if (!IS_USER_ADDRESS(userStat))
6845 		return B_BAD_ADDRESS;
6846 
6847 	if (userPath) {
6848 		// path given: get the stat of the node referred to by (fd, path)
6849 		char path[B_PATH_NAME_LENGTH + 1];
6850 		if (!IS_USER_ADDRESS(userPath))
6851 			return B_BAD_ADDRESS;
6852 		int len = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6853 		if (len < 0)
6854 			return len;
6855 		if (len >= B_PATH_NAME_LENGTH)
6856 			return B_NAME_TOO_LONG;
6857 
6858 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
6859 	} else {
6860 		// no path given: get the FD and use the FD operation
6861 		struct file_descriptor *descriptor
6862 			= get_fd(get_current_io_context(false), fd);
6863 		if (descriptor == NULL)
6864 			return B_FILE_ERROR;
6865 
6866 		if (descriptor->ops->fd_read_stat)
6867 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
6868 		else
6869 			status = EOPNOTSUPP;
6870 
6871 		put_fd(descriptor);
6872 	}
6873 
6874 	if (status < B_OK)
6875 		return status;
6876 
6877 	return user_memcpy(userStat, &stat, statSize);
6878 }
6879 
6880 
6881 status_t
6882 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
6883 	const struct stat *userStat, size_t statSize, int statMask)
6884 {
6885 	char path[B_PATH_NAME_LENGTH + 1];
6886 	struct stat stat;
6887 
6888 	if (statSize > sizeof(struct stat))
6889 		return B_BAD_VALUE;
6890 
6891 	if (!IS_USER_ADDRESS(userStat)
6892 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
6893 		return B_BAD_ADDRESS;
6894 
6895 	// clear additional stat fields
6896 	if (statSize < sizeof(struct stat))
6897 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
6898 
6899 	status_t status;
6900 
6901 	if (userPath) {
6902 		// path given: write the stat of the node referred to by (fd, path)
6903 		if (!IS_USER_ADDRESS(userPath))
6904 			return B_BAD_ADDRESS;
6905 		int len = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6906 		if (len < 0)
6907 			return len;
6908 		if (len >= B_PATH_NAME_LENGTH)
6909 			return B_NAME_TOO_LONG;
6910 
6911 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
6912 			statMask, false);
6913 	} else {
6914 		// no path given: get the FD and use the FD operation
6915 		struct file_descriptor *descriptor
6916 			= get_fd(get_current_io_context(false), fd);
6917 		if (descriptor == NULL)
6918 			return B_FILE_ERROR;
6919 
6920 		if (descriptor->ops->fd_write_stat)
6921 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
6922 		else
6923 			status = EOPNOTSUPP;
6924 
6925 		put_fd(descriptor);
6926 	}
6927 
6928 	return status;
6929 }
6930 
6931 
6932 int
6933 _user_open_attr_dir(int fd, const char *userPath)
6934 {
6935 	char pathBuffer[B_PATH_NAME_LENGTH + 1];
6936 
6937 	if (userPath != NULL) {
6938 		if (!IS_USER_ADDRESS(userPath)
6939 			|| user_strlcpy(pathBuffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6940 			return B_BAD_ADDRESS;
6941 	}
6942 
6943 	return attr_dir_open(fd, userPath ? pathBuffer : NULL, false);
6944 }
6945 
6946 
6947 int
6948 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
6949 {
6950 	char name[B_FILE_NAME_LENGTH];
6951 
6952 	if (!IS_USER_ADDRESS(userName)
6953 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6954 		return B_BAD_ADDRESS;
6955 
6956 	return attr_create(fd, name, type, openMode, false);
6957 }
6958 
6959 
6960 int
6961 _user_open_attr(int fd, const char *userName, int openMode)
6962 {
6963 	char name[B_FILE_NAME_LENGTH];
6964 
6965 	if (!IS_USER_ADDRESS(userName)
6966 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6967 		return B_BAD_ADDRESS;
6968 
6969 	return attr_open(fd, name, openMode, false);
6970 }
6971 
6972 
6973 status_t
6974 _user_remove_attr(int fd, const char *userName)
6975 {
6976 	char name[B_FILE_NAME_LENGTH];
6977 
6978 	if (!IS_USER_ADDRESS(userName)
6979 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6980 		return B_BAD_ADDRESS;
6981 
6982 	return attr_remove(fd, name, false);
6983 }
6984 
6985 
6986 status_t
6987 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
6988 {
6989 	char fromName[B_FILE_NAME_LENGTH];
6990 	char toName[B_FILE_NAME_LENGTH];
6991 
6992 	if (!IS_USER_ADDRESS(userFromName)
6993 		|| !IS_USER_ADDRESS(userToName))
6994 		return B_BAD_ADDRESS;
6995 
6996 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
6997 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
6998 		return B_BAD_ADDRESS;
6999 
7000 	return attr_rename(fromFile, fromName, toFile, toName, false);
7001 }
7002 
7003 
7004 int
7005 _user_open_index_dir(dev_t device)
7006 {
7007 	return index_dir_open(device, false);
7008 }
7009 
7010 
7011 status_t
7012 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7013 {
7014 	char name[B_FILE_NAME_LENGTH];
7015 
7016 	if (!IS_USER_ADDRESS(userName)
7017 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7018 		return B_BAD_ADDRESS;
7019 
7020 	return index_create(device, name, type, flags, false);
7021 }
7022 
7023 
7024 status_t
7025 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7026 {
7027 	char name[B_FILE_NAME_LENGTH];
7028 	struct stat stat;
7029 	status_t status;
7030 
7031 	if (!IS_USER_ADDRESS(userName)
7032 		|| !IS_USER_ADDRESS(userStat)
7033 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7034 		return B_BAD_ADDRESS;
7035 
7036 	status = index_name_read_stat(device, name, &stat, false);
7037 	if (status == B_OK) {
7038 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7039 			return B_BAD_ADDRESS;
7040 	}
7041 
7042 	return status;
7043 }
7044 
7045 
7046 status_t
7047 _user_remove_index(dev_t device, const char *userName)
7048 {
7049 	char name[B_FILE_NAME_LENGTH];
7050 
7051 	if (!IS_USER_ADDRESS(userName)
7052 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7053 		return B_BAD_ADDRESS;
7054 
7055 	return index_remove(device, name, false);
7056 }
7057 
7058 
7059 status_t
7060 _user_getcwd(char *userBuffer, size_t size)
7061 {
7062 	char buffer[B_PATH_NAME_LENGTH];
7063 	status_t status;
7064 
7065 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7066 
7067 	if (!IS_USER_ADDRESS(userBuffer))
7068 		return B_BAD_ADDRESS;
7069 
7070 	if (size > B_PATH_NAME_LENGTH)
7071 		size = B_PATH_NAME_LENGTH;
7072 
7073 	status = get_cwd(buffer, size, false);
7074 	if (status < 0)
7075 		return status;
7076 
7077 	// Copy back the result
7078 	if (user_strlcpy(userBuffer, buffer, size) < B_OK)
7079 		return B_BAD_ADDRESS;
7080 
7081 	return status;
7082 }
7083 
7084 
7085 status_t
7086 _user_setcwd(int fd, const char *userPath)
7087 {
7088 	char path[B_PATH_NAME_LENGTH];
7089 
7090 	TRACE(("user_setcwd: path = %p\n", userPath));
7091 
7092 	if (userPath != NULL) {
7093 		if (!IS_USER_ADDRESS(userPath)
7094 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7095 			return B_BAD_ADDRESS;
7096 	}
7097 
7098 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7099 }
7100 
7101 
7102 int
7103 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7104 	uint32 flags, port_id port, int32 token)
7105 {
7106 	char *query;
7107 
7108 	if (device < 0 || userQuery == NULL || queryLength == 0 || queryLength >= 65536)
7109 		return B_BAD_VALUE;
7110 
7111 	query = (char *)malloc(queryLength + 1);
7112 	if (query == NULL)
7113 		return B_NO_MEMORY;
7114 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7115 		free(query);
7116 		return B_BAD_ADDRESS;
7117 	}
7118 
7119 	int fd = query_open(device, query, flags, port, token, false);
7120 
7121 	free(query);
7122 	return fd;
7123 }
7124