xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 89755088d790ff4fe36f8aa77dacb2bd15507108)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /*! Virtual File System and File System Interface Layer */
10 
11 
12 #include <ctype.h>
13 #include <fcntl.h>
14 #include <limits.h>
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/file.h>
19 #include <sys/resource.h>
20 #include <sys/stat.h>
21 #include <unistd.h>
22 
23 #include <fs_info.h>
24 #include <fs_interface.h>
25 #include <fs_volume.h>
26 #include <OS.h>
27 #include <StorageDefs.h>
28 
29 #include <util/AutoLock.h>
30 
31 #include <block_cache.h>
32 #include <fd.h>
33 #include <file_cache.h>
34 #include <khash.h>
35 #include <KPath.h>
36 #include <lock.h>
37 #include <syscalls.h>
38 #include <syscall_restart.h>
39 #include <vfs.h>
40 #include <vm.h>
41 #include <vm_cache.h>
42 #include <vm_low_memory.h>
43 
44 #include <boot/kernel_args.h>
45 #include <disk_device_manager/KDiskDevice.h>
46 #include <disk_device_manager/KDiskDeviceManager.h>
47 #include <disk_device_manager/KDiskDeviceUtils.h>
48 #include <disk_device_manager/KDiskSystem.h>
49 #include <fs/node_monitor.h>
50 
51 
52 //#define TRACE_VFS
53 #ifdef TRACE_VFS
54 #	define TRACE(x) dprintf x
55 #	define FUNCTION(x) dprintf x
56 #else
57 #	define TRACE(x) ;
58 #	define FUNCTION(x) ;
59 #endif
60 
61 #define ADD_DEBUGGER_COMMANDS
62 
63 const static uint32 kMaxUnusedVnodes = 8192;
64 	// This is the maximum number of unused vnodes that the system
65 	// will keep around (weak limit, if there is enough memory left,
66 	// they won't get flushed even when hitting that limit).
67 	// It may be chosen with respect to the available memory or enhanced
68 	// by some timestamp/frequency heurism.
69 
70 struct vnode {
71 	struct vnode	*next;
72 	vm_cache		*cache;
73 	dev_t			device;
74 	list_link		mount_link;
75 	list_link		unused_link;
76 	ino_t			id;
77 	fs_vnode		private_node;
78 	struct fs_mount	*mount;
79 	struct vnode	*covered_by;
80 	int32			ref_count;
81 	uint8			remove : 1;
82 	uint8			busy : 1;
83 	uint8			unpublished : 1;
84 	struct advisory_locking	*advisory_locking;
85 	struct file_descriptor *mandatory_locked_by;
86 };
87 
88 struct vnode_hash_key {
89 	dev_t	device;
90 	ino_t	vnode;
91 };
92 
93 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
94 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
95 
96 /*!	\brief Structure to manage a mounted file system
97 
98 	Note: The root_vnode and covers_vnode fields (what others?) are
99 	initialized in fs_mount() and not changed afterwards. That is as soon
100 	as the mount is mounted and it is made sure it won't be unmounted
101 	(e.g. by holding a reference to a vnode of that mount) (read) access
102 	to those fields is always safe, even without additional locking. Morever
103 	while mounted the mount holds a reference to the covers_vnode, and thus
104 	making the access path vnode->mount->covers_vnode->mount->... safe if a
105 	reference to vnode is held (note that for the root mount covers_vnode
106 	is NULL, though).
107 */
108 struct fs_mount {
109 	struct fs_mount	*next;
110 	file_system_module_info *fs;
111 	dev_t			id;
112 	void			*cookie;
113 	char			*device_name;
114 	char			*fs_name;
115 	recursive_lock	rlock;	// guards the vnodes list
116 	struct vnode	*root_vnode;
117 	struct vnode	*covers_vnode;
118 	KPartition		*partition;
119 	struct list		vnodes;
120 	bool			unmounting;
121 	bool			owns_file_device;
122 };
123 
124 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
125 	list_link		link;
126 	team_id			team;
127 	pid_t			session;
128 	off_t			start;
129 	off_t			end;
130 	bool			shared;
131 };
132 
133 typedef DoublyLinkedList<advisory_lock> LockList;
134 
135 struct advisory_locking {
136 	sem_id			lock;
137 	sem_id			wait_sem;
138 	LockList		locks;
139 };
140 
141 static mutex sFileSystemsMutex;
142 
143 /*!	\brief Guards sMountsTable.
144 
145 	The holder is allowed to read/write access the sMountsTable.
146 	Manipulation of the fs_mount structures themselves
147 	(and their destruction) requires different locks though.
148 */
149 static mutex sMountMutex;
150 
151 /*!	\brief Guards mount/unmount operations.
152 
153 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
154 	That is locking the lock ensures that no FS is mounted/unmounted. In
155 	particular this means that
156 	- sMountsTable will not be modified,
157 	- the fields immutable after initialization of the fs_mount structures in
158 	  sMountsTable will not be modified,
159 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
160 
161 	The thread trying to lock the lock must not hold sVnodeMutex or
162 	sMountMutex.
163 */
164 static recursive_lock sMountOpLock;
165 
166 /*!	\brief Guards the vnode::covered_by field of any vnode
167 
168 	The holder is allowed to read access the vnode::covered_by field of any
169 	vnode. Additionally holding sMountOpLock allows for write access.
170 
171 	The thread trying to lock the must not hold sVnodeMutex.
172 */
173 static mutex sVnodeCoveredByMutex;
174 
175 /*!	\brief Guards sVnodeTable.
176 
177 	The holder is allowed to read/write access sVnodeTable and to
178 	any unbusy vnode in that table, save to the immutable fields (device, id,
179 	private_node, mount) to which
180 	only read-only access is allowed, and to the field covered_by, which is
181 	guarded by sMountOpLock and sVnodeCoveredByMutex.
182 
183 	The thread trying to lock the mutex must not hold sMountMutex.
184 	You must not have this mutex held when calling create_sem(), as this
185 	might call vfs_free_unused_vnodes().
186 */
187 static mutex sVnodeMutex;
188 
189 #define VNODE_HASH_TABLE_SIZE 1024
190 static hash_table *sVnodeTable;
191 static list sUnusedVnodeList;
192 static uint32 sUnusedVnodes = 0;
193 static struct vnode *sRoot;
194 
195 #define MOUNTS_HASH_TABLE_SIZE 16
196 static hash_table *sMountsTable;
197 static dev_t sNextMountID = 1;
198 
199 #define MAX_TEMP_IO_VECS 8
200 
201 mode_t __gUmask = 022;
202 
203 /* function declarations */
204 
205 // file descriptor operation prototypes
206 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
207 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
208 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
209 static void file_free_fd(struct file_descriptor *);
210 static status_t file_close(struct file_descriptor *);
211 static status_t file_select(struct file_descriptor *, uint8 event,
212 	struct selectsync *sync);
213 static status_t file_deselect(struct file_descriptor *, uint8 event,
214 	struct selectsync *sync);
215 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
216 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
217 static status_t dir_rewind(struct file_descriptor *);
218 static void dir_free_fd(struct file_descriptor *);
219 static status_t dir_close(struct file_descriptor *);
220 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
221 static status_t attr_dir_rewind(struct file_descriptor *);
222 static void attr_dir_free_fd(struct file_descriptor *);
223 static status_t attr_dir_close(struct file_descriptor *);
224 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
225 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
226 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
227 static void attr_free_fd(struct file_descriptor *);
228 static status_t attr_close(struct file_descriptor *);
229 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
230 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
231 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
232 static status_t index_dir_rewind(struct file_descriptor *);
233 static void index_dir_free_fd(struct file_descriptor *);
234 static status_t index_dir_close(struct file_descriptor *);
235 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
236 static status_t query_rewind(struct file_descriptor *);
237 static void query_free_fd(struct file_descriptor *);
238 static status_t query_close(struct file_descriptor *);
239 
240 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
241 static status_t common_read_stat(struct file_descriptor *, struct stat *);
242 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
243 
244 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
245 	bool traverseLeafLink, int count, struct vnode **_vnode, ino_t *_parentID, int *_type);
246 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
247 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
248 	struct vnode **_vnode, ino_t *_parentID, bool kernel);
249 static void inc_vnode_ref_count(struct vnode *vnode);
250 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
251 static inline void put_vnode(struct vnode *vnode);
252 static status_t fs_unmount(char *path, dev_t mountID, uint32 flags,
253 	bool kernel);
254 
255 
256 static struct fd_ops sFileOps = {
257 	file_read,
258 	file_write,
259 	file_seek,
260 	common_ioctl,
261 	file_select,
262 	file_deselect,
263 	NULL,		// read_dir()
264 	NULL,		// rewind_dir()
265 	common_read_stat,
266 	common_write_stat,
267 	file_close,
268 	file_free_fd
269 };
270 
271 static struct fd_ops sDirectoryOps = {
272 	NULL,		// read()
273 	NULL,		// write()
274 	NULL,		// seek()
275 	common_ioctl,
276 	NULL,		// select()
277 	NULL,		// deselect()
278 	dir_read,
279 	dir_rewind,
280 	common_read_stat,
281 	common_write_stat,
282 	dir_close,
283 	dir_free_fd
284 };
285 
286 static struct fd_ops sAttributeDirectoryOps = {
287 	NULL,		// read()
288 	NULL,		// write()
289 	NULL,		// seek()
290 	common_ioctl,
291 	NULL,		// select()
292 	NULL,		// deselect()
293 	attr_dir_read,
294 	attr_dir_rewind,
295 	common_read_stat,
296 	common_write_stat,
297 	attr_dir_close,
298 	attr_dir_free_fd
299 };
300 
301 static struct fd_ops sAttributeOps = {
302 	attr_read,
303 	attr_write,
304 	attr_seek,
305 	common_ioctl,
306 	NULL,		// select()
307 	NULL,		// deselect()
308 	NULL,		// read_dir()
309 	NULL,		// rewind_dir()
310 	attr_read_stat,
311 	attr_write_stat,
312 	attr_close,
313 	attr_free_fd
314 };
315 
316 static struct fd_ops sIndexDirectoryOps = {
317 	NULL,		// read()
318 	NULL,		// write()
319 	NULL,		// seek()
320 	NULL,		// ioctl()
321 	NULL,		// select()
322 	NULL,		// deselect()
323 	index_dir_read,
324 	index_dir_rewind,
325 	NULL,		// read_stat()
326 	NULL,		// write_stat()
327 	index_dir_close,
328 	index_dir_free_fd
329 };
330 
331 #if 0
332 static struct fd_ops sIndexOps = {
333 	NULL,		// read()
334 	NULL,		// write()
335 	NULL,		// seek()
336 	NULL,		// ioctl()
337 	NULL,		// select()
338 	NULL,		// deselect()
339 	NULL,		// dir_read()
340 	NULL,		// dir_rewind()
341 	index_read_stat,	// read_stat()
342 	NULL,		// write_stat()
343 	NULL,		// dir_close()
344 	NULL		// free_fd()
345 };
346 #endif
347 
348 static struct fd_ops sQueryOps = {
349 	NULL,		// read()
350 	NULL,		// write()
351 	NULL,		// seek()
352 	NULL,		// ioctl()
353 	NULL,		// select()
354 	NULL,		// deselect()
355 	query_read,
356 	query_rewind,
357 	NULL,		// read_stat()
358 	NULL,		// write_stat()
359 	query_close,
360 	query_free_fd
361 };
362 
363 
364 // VNodePutter
365 class VNodePutter {
366 public:
367 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
368 
369 	~VNodePutter()
370 	{
371 		Put();
372 	}
373 
374 	void SetTo(struct vnode *vnode)
375 	{
376 		Put();
377 		fVNode = vnode;
378 	}
379 
380 	void Put()
381 	{
382 		if (fVNode) {
383 			put_vnode(fVNode);
384 			fVNode = NULL;
385 		}
386 	}
387 
388 	struct vnode *Detach()
389 	{
390 		struct vnode *vnode = fVNode;
391 		fVNode = NULL;
392 		return vnode;
393 	}
394 
395 private:
396 	struct vnode *fVNode;
397 };
398 
399 
400 class FDCloser {
401 public:
402 	FDCloser() : fFD(-1), fKernel(true) {}
403 
404 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
405 
406 	~FDCloser()
407 	{
408 		Close();
409 	}
410 
411 	void SetTo(int fd, bool kernel)
412 	{
413 		Close();
414 		fFD = fd;
415 		fKernel = kernel;
416 	}
417 
418 	void Close()
419 	{
420 		if (fFD >= 0) {
421 			if (fKernel)
422 				_kern_close(fFD);
423 			else
424 				_user_close(fFD);
425 			fFD = -1;
426 		}
427 	}
428 
429 	int Detach()
430 	{
431 		int fd = fFD;
432 		fFD = -1;
433 		return fd;
434 	}
435 
436 private:
437 	int		fFD;
438 	bool	fKernel;
439 };
440 
441 
442 static int
443 mount_compare(void *_m, const void *_key)
444 {
445 	struct fs_mount *mount = (fs_mount *)_m;
446 	const dev_t *id = (dev_t *)_key;
447 
448 	if (mount->id == *id)
449 		return 0;
450 
451 	return -1;
452 }
453 
454 
455 static uint32
456 mount_hash(void *_m, const void *_key, uint32 range)
457 {
458 	struct fs_mount *mount = (fs_mount *)_m;
459 	const dev_t *id = (dev_t *)_key;
460 
461 	if (mount)
462 		return mount->id % range;
463 
464 	return (uint32)*id % range;
465 }
466 
467 
468 /*! Finds the mounted device (the fs_mount structure) with the given ID.
469 	Note, you must hold the gMountMutex lock when you call this function.
470 */
471 static struct fs_mount *
472 find_mount(dev_t id)
473 {
474 	ASSERT_LOCKED_MUTEX(&sMountMutex);
475 
476 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
477 }
478 
479 
480 static status_t
481 get_mount(dev_t id, struct fs_mount **_mount)
482 {
483 	struct fs_mount *mount;
484 	status_t status;
485 
486 	MutexLocker nodeLocker(sVnodeMutex);
487 	MutexLocker mountLocker(sMountMutex);
488 
489 	mount = find_mount(id);
490 	if (mount == NULL)
491 		return B_BAD_VALUE;
492 
493 	struct vnode* rootNode = mount->root_vnode;
494 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
495 		// might have been called during a mount/unmount operation
496 		return B_BUSY;
497 	}
498 
499 	inc_vnode_ref_count(mount->root_vnode);
500 	*_mount = mount;
501 	return B_OK;
502 }
503 
504 
505 static void
506 put_mount(struct fs_mount *mount)
507 {
508 	if (mount)
509 		put_vnode(mount->root_vnode);
510 }
511 
512 
513 static status_t
514 put_file_system(file_system_module_info *fs)
515 {
516 	return put_module(fs->info.name);
517 }
518 
519 
520 /*!	Tries to open the specified file system module.
521 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
522 	Returns a pointer to file system module interface, or NULL if it
523 	could not open the module.
524 */
525 static file_system_module_info *
526 get_file_system(const char *fsName)
527 {
528 	char name[B_FILE_NAME_LENGTH];
529 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
530 		// construct module name if we didn't get one
531 		// (we currently support only one API)
532 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
533 		fsName = NULL;
534 	}
535 
536 	file_system_module_info *info;
537 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
538 		return NULL;
539 
540 	return info;
541 }
542 
543 
544 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
545 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
546 	The name is allocated for you, and you have to free() it when you're
547 	done with it.
548 	Returns NULL if the required memory is no available.
549 */
550 static char *
551 get_file_system_name(const char *fsName)
552 {
553 	const size_t length = strlen("file_systems/");
554 
555 	if (strncmp(fsName, "file_systems/", length)) {
556 		// the name already seems to be the module's file name
557 		return strdup(fsName);
558 	}
559 
560 	fsName += length;
561 	const char *end = strchr(fsName, '/');
562 	if (end == NULL) {
563 		// this doesn't seem to be a valid name, but well...
564 		return strdup(fsName);
565 	}
566 
567 	// cut off the trailing /v1
568 
569 	char *name = (char *)malloc(end + 1 - fsName);
570 	if (name == NULL)
571 		return NULL;
572 
573 	strlcpy(name, fsName, end + 1 - fsName);
574 	return name;
575 }
576 
577 
578 static int
579 vnode_compare(void *_vnode, const void *_key)
580 {
581 	struct vnode *vnode = (struct vnode *)_vnode;
582 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
583 
584 	if (vnode->device == key->device && vnode->id == key->vnode)
585 		return 0;
586 
587 	return -1;
588 }
589 
590 
591 static uint32
592 vnode_hash(void *_vnode, const void *_key, uint32 range)
593 {
594 	struct vnode *vnode = (struct vnode *)_vnode;
595 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
596 
597 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
598 
599 	if (vnode != NULL)
600 		return VHASH(vnode->device, vnode->id) % range;
601 
602 	return VHASH(key->device, key->vnode) % range;
603 
604 #undef VHASH
605 }
606 
607 
608 static void
609 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
610 {
611 	recursive_lock_lock(&mount->rlock);
612 
613 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
614 
615 	recursive_lock_unlock(&mount->rlock);
616 }
617 
618 
619 static void
620 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
621 {
622 	recursive_lock_lock(&mount->rlock);
623 
624 	list_remove_link(&vnode->mount_link);
625 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
626 
627 	recursive_lock_unlock(&mount->rlock);
628 }
629 
630 
631 static status_t
632 create_new_vnode(struct vnode **_vnode, dev_t mountID, ino_t vnodeID)
633 {
634 	FUNCTION(("create_new_vnode()\n"));
635 
636 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
637 	if (vnode == NULL)
638 		return B_NO_MEMORY;
639 
640 	// initialize basic values
641 	memset(vnode, 0, sizeof(struct vnode));
642 	vnode->device = mountID;
643 	vnode->id = vnodeID;
644 
645 	// add the vnode to the mount structure
646 	mutex_lock(&sMountMutex);
647 	vnode->mount = find_mount(mountID);
648 	if (!vnode->mount || vnode->mount->unmounting) {
649 		mutex_unlock(&sMountMutex);
650 		free(vnode);
651 		return B_ENTRY_NOT_FOUND;
652 	}
653 
654 	hash_insert(sVnodeTable, vnode);
655 	add_vnode_to_mount_list(vnode, vnode->mount);
656 
657 	mutex_unlock(&sMountMutex);
658 
659 	vnode->ref_count = 1;
660 	*_vnode = vnode;
661 
662 	return B_OK;
663 }
664 
665 
666 /*!	Frees the vnode and all resources it has acquired, and removes
667 	it from the vnode hash as well as from its mount structure.
668 	Will also make sure that any cache modifications are written back.
669 */
670 static void
671 free_vnode(struct vnode *vnode, bool reenter)
672 {
673 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
674 
675 	// write back any changes in this vnode's cache -- but only
676 	// if the vnode won't be deleted, in which case the changes
677 	// will be discarded
678 
679 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
680 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
681 
682 	// Note: If this vnode has a cache attached, there will still be two
683 	// references to that cache at this point. The last one belongs to the vnode
684 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
685 	// cache. Each but the last reference to a cache also includes a reference
686 	// to the vnode. The file cache, however, released its reference (cf.
687 	// file_cache_create()), so that this vnode's ref count has the chance to
688 	// ever drop to 0. Deleting the file cache now, will cause the next to last
689 	// cache reference to be released, which will also release a (no longer
690 	// existing) vnode reference. To avoid problems, we set the vnode's ref
691 	// count, so that it will neither become negative nor 0.
692 	vnode->ref_count = 2;
693 
694 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
695 	// cache either (i.e. no one can get a cache reference while we're deleting
696 	// the vnode).. This is, however, not the case for the page daemon. It gets
697 	// its cache references via the pages it scans, so it can in fact get a
698 	// vnode reference while we're deleting the vnode.
699 
700 	if (!vnode->unpublished) {
701 		if (vnode->remove) {
702 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie,
703 				vnode->private_node, reenter);
704 		} else {
705 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node,
706 				reenter);
707 		}
708 	}
709 
710 	// The file system has removed the resources of the vnode now, so we can
711 	// make it available again (and remove the busy vnode from the hash)
712 	mutex_lock(&sVnodeMutex);
713 	hash_remove(sVnodeTable, vnode);
714 	mutex_unlock(&sVnodeMutex);
715 
716 	// if we have a vm_cache attached, remove it
717 	if (vnode->cache)
718 		vm_cache_release_ref(vnode->cache);
719 
720 	vnode->cache = NULL;
721 
722 	remove_vnode_from_mount_list(vnode, vnode->mount);
723 
724 	free(vnode);
725 }
726 
727 
728 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
729 	if the counter dropped to 0.
730 
731 	The caller must, of course, own a reference to the vnode to call this
732 	function.
733 	The caller must not hold the sVnodeMutex or the sMountMutex.
734 
735 	\param vnode the vnode.
736 	\param reenter \c true, if this function is called (indirectly) from within
737 		   a file system.
738 	\return \c B_OK, if everything went fine, an error code otherwise.
739 */
740 static status_t
741 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
742 {
743 	mutex_lock(&sVnodeMutex);
744 
745 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
746 
747 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
748 
749 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
750 
751 	if (oldRefCount == 1) {
752 		if (vnode->busy)
753 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
754 
755 		bool freeNode = false;
756 
757 		// Just insert the vnode into an unused list if we don't need
758 		// to delete it
759 		if (vnode->remove) {
760 			vnode->busy = true;
761 			freeNode = true;
762 		} else {
763 			list_add_item(&sUnusedVnodeList, vnode);
764 			if (++sUnusedVnodes > kMaxUnusedVnodes
765 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
766 				// there are too many unused vnodes so we free the oldest one
767 				// ToDo: evaluate this mechanism
768 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
769 				vnode->busy = true;
770 				freeNode = true;
771 				sUnusedVnodes--;
772 			}
773 		}
774 
775 		mutex_unlock(&sVnodeMutex);
776 
777 		if (freeNode)
778 			free_vnode(vnode, reenter);
779 	} else
780 		mutex_unlock(&sVnodeMutex);
781 
782 	return B_OK;
783 }
784 
785 
786 /*!	\brief Increments the reference counter of the given vnode.
787 
788 	The caller must either already have a reference to the vnode or hold
789 	the sVnodeMutex.
790 
791 	\param vnode the vnode.
792 */
793 static void
794 inc_vnode_ref_count(struct vnode *vnode)
795 {
796 	atomic_add(&vnode->ref_count, 1);
797 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
798 }
799 
800 
801 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
802 
803 	The caller must hold the sVnodeMutex.
804 
805 	\param mountID the mount ID.
806 	\param vnodeID the node ID.
807 
808 	\return The vnode structure, if it was found in the hash table, \c NULL
809 			otherwise.
810 */
811 static struct vnode *
812 lookup_vnode(dev_t mountID, ino_t vnodeID)
813 {
814 	struct vnode_hash_key key;
815 
816 	key.device = mountID;
817 	key.vnode = vnodeID;
818 
819 	return (vnode *)hash_lookup(sVnodeTable, &key);
820 }
821 
822 
823 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
824 
825 	If the node is not yet in memory, it will be loaded.
826 
827 	The caller must not hold the sVnodeMutex or the sMountMutex.
828 
829 	\param mountID the mount ID.
830 	\param vnodeID the node ID.
831 	\param _vnode Pointer to a vnode* variable into which the pointer to the
832 		   retrieved vnode structure shall be written.
833 	\param reenter \c true, if this function is called (indirectly) from within
834 		   a file system.
835 	\return \c B_OK, if everything when fine, an error code otherwise.
836 */
837 static status_t
838 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode, bool canWait,
839 	int reenter)
840 {
841 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
842 
843 	mutex_lock(&sVnodeMutex);
844 
845 	int32 tries = 1000;
846 		// try for 10 secs
847 restart:
848 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
849 	if (vnode && vnode->busy) {
850 		mutex_unlock(&sVnodeMutex);
851 		if (!canWait || --tries < 0) {
852 			// vnode doesn't seem to become unbusy
853 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
854 			return B_BUSY;
855 		}
856 		snooze(10000); // 10 ms
857 		mutex_lock(&sVnodeMutex);
858 		goto restart;
859 	}
860 
861 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
862 
863 	status_t status;
864 
865 	if (vnode) {
866 		if (vnode->ref_count == 0) {
867 			// this vnode has been unused before
868 			list_remove_item(&sUnusedVnodeList, vnode);
869 			sUnusedVnodes--;
870 		}
871 		inc_vnode_ref_count(vnode);
872 	} else {
873 		// we need to create a new vnode and read it in
874 		status = create_new_vnode(&vnode, mountID, vnodeID);
875 		if (status < B_OK)
876 			goto err;
877 
878 		vnode->busy = true;
879 		mutex_unlock(&sVnodeMutex);
880 
881 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID,
882 			&vnode->private_node, reenter);
883 		if (status == B_OK && vnode->private_node == NULL)
884 			status = B_BAD_VALUE;
885 
886 		mutex_lock(&sVnodeMutex);
887 
888 		if (status < B_OK)
889 			goto err1;
890 
891 		vnode->busy = false;
892 	}
893 
894 	mutex_unlock(&sVnodeMutex);
895 
896 	TRACE(("get_vnode: returning %p\n", vnode));
897 
898 	*_vnode = vnode;
899 	return B_OK;
900 
901 err1:
902 	hash_remove(sVnodeTable, vnode);
903 	remove_vnode_from_mount_list(vnode, vnode->mount);
904 err:
905 	mutex_unlock(&sVnodeMutex);
906 	if (vnode)
907 		free(vnode);
908 
909 	return status;
910 }
911 
912 
913 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
914 	if the counter dropped to 0.
915 
916 	The caller must, of course, own a reference to the vnode to call this
917 	function.
918 	The caller must not hold the sVnodeMutex or the sMountMutex.
919 
920 	\param vnode the vnode.
921 */
922 static inline void
923 put_vnode(struct vnode *vnode)
924 {
925 	dec_vnode_ref_count(vnode, false);
926 }
927 
928 
929 static void
930 vnode_low_memory_handler(void */*data*/, int32 level)
931 {
932 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
933 
934 	uint32 count = 1;
935 	switch (level) {
936 		case B_NO_LOW_MEMORY:
937 			return;
938 		case B_LOW_MEMORY_NOTE:
939 			count = sUnusedVnodes / 100;
940 			break;
941 		case B_LOW_MEMORY_WARNING:
942 			count = sUnusedVnodes / 10;
943 			break;
944 		case B_LOW_MEMORY_CRITICAL:
945 			count = sUnusedVnodes;
946 			break;
947 	}
948 
949 	if (count > sUnusedVnodes)
950 		count = sUnusedVnodes;
951 
952 	// first, write back the modified pages of some unused vnodes
953 
954 	uint32 freeCount = count;
955 
956 	for (uint32 i = 0; i < count; i++) {
957 		mutex_lock(&sVnodeMutex);
958 		struct vnode *vnode = (struct vnode *)list_remove_head_item(
959 			&sUnusedVnodeList);
960 		if (vnode == NULL) {
961 			mutex_unlock(&sVnodeMutex);
962 			break;
963 		}
964 
965 		inc_vnode_ref_count(vnode);
966 		sUnusedVnodes--;
967 
968 		mutex_unlock(&sVnodeMutex);
969 
970 		if (vnode->cache != NULL)
971 			vm_cache_write_modified(vnode->cache, false);
972 
973 		dec_vnode_ref_count(vnode, false);
974 	}
975 
976 	// and then free them
977 
978 	for (uint32 i = 0; i < freeCount; i++) {
979 		mutex_lock(&sVnodeMutex);
980 
981 		// We're removing vnodes from the tail of the list - hoping it's
982 		// one of those we have just written back; otherwise we'll write
983 		// back the vnode with the busy flag turned on, and that might
984 		// take some time.
985 		struct vnode *vnode = (struct vnode *)list_remove_tail_item(
986 			&sUnusedVnodeList);
987 		if (vnode == NULL) {
988 			mutex_unlock(&sVnodeMutex);
989 			break;
990 		}
991 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
992 
993 		vnode->busy = true;
994 		sUnusedVnodes--;
995 
996 		mutex_unlock(&sVnodeMutex);
997 
998 		free_vnode(vnode, false);
999 	}
1000 }
1001 
1002 
1003 static inline void
1004 put_advisory_locking(struct advisory_locking *locking)
1005 {
1006 	release_sem(locking->lock);
1007 }
1008 
1009 
1010 /*!	Returns the advisory_locking object of the \a vnode in case it
1011 	has one, and locks it.
1012 	You have to call put_advisory_locking() when you're done with
1013 	it.
1014 	Note, you must not have the vnode mutex locked when calling
1015 	this function.
1016 */
1017 static struct advisory_locking *
1018 get_advisory_locking(struct vnode *vnode)
1019 {
1020 	mutex_lock(&sVnodeMutex);
1021 
1022 	struct advisory_locking *locking = vnode->advisory_locking;
1023 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1024 
1025 	mutex_unlock(&sVnodeMutex);
1026 
1027 	if (lock >= B_OK)
1028 		lock = acquire_sem(lock);
1029 	if (lock < B_OK) {
1030 		// This means the locking has been deleted in the mean time
1031 		// or had never existed in the first place - otherwise, we
1032 		// would get the lock at some point.
1033 		return NULL;
1034 	}
1035 
1036 	return locking;
1037 }
1038 
1039 
1040 /*!	Creates a locked advisory_locking object, and attaches it to the
1041 	given \a vnode.
1042 	Returns B_OK in case of success - also if the vnode got such an
1043 	object from someone else in the mean time, you'll still get this
1044 	one locked then.
1045 */
1046 static status_t
1047 create_advisory_locking(struct vnode *vnode)
1048 {
1049 	if (vnode == NULL)
1050 		return B_FILE_ERROR;
1051 
1052 	struct advisory_locking *locking = new(std::nothrow) advisory_locking;
1053 	if (locking == NULL)
1054 		return B_NO_MEMORY;
1055 
1056 	status_t status;
1057 
1058 	locking->wait_sem = create_sem(0, "advisory lock");
1059 	if (locking->wait_sem < B_OK) {
1060 		status = locking->wait_sem;
1061 		goto err1;
1062 	}
1063 
1064 	locking->lock = create_sem(0, "advisory locking");
1065 	if (locking->lock < B_OK) {
1066 		status = locking->lock;
1067 		goto err2;
1068 	}
1069 
1070 	// We need to set the locking structure atomically - someone
1071 	// else might set one at the same time
1072 	do {
1073 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking,
1074 				(addr_t)locking, (addr_t)NULL) == (addr_t)NULL)
1075 			return B_OK;
1076 	} while (get_advisory_locking(vnode) == NULL);
1077 
1078 	status = B_OK;
1079 		// we delete the one we've just created, but nevertheless, the vnode
1080 		// does have a locking structure now
1081 
1082 	delete_sem(locking->lock);
1083 err2:
1084 	delete_sem(locking->wait_sem);
1085 err1:
1086 	delete locking;
1087 	return status;
1088 }
1089 
1090 
1091 /*!	Retrieves the first lock that has been set by the current team.
1092 */
1093 static status_t
1094 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1095 {
1096 	struct advisory_locking *locking = get_advisory_locking(vnode);
1097 	if (locking == NULL)
1098 		return B_BAD_VALUE;
1099 
1100 	// TODO: this should probably get the flock by its file descriptor!
1101 	team_id team = team_get_current_team_id();
1102 	status_t status = B_BAD_VALUE;
1103 
1104 	LockList::Iterator iterator = locking->locks.GetIterator();
1105 	while (iterator.HasNext()) {
1106 		struct advisory_lock *lock = iterator.Next();
1107 
1108 		if (lock->team == team) {
1109 			flock->l_start = lock->start;
1110 			flock->l_len = lock->end - lock->start + 1;
1111 			status = B_OK;
1112 			break;
1113 		}
1114 	}
1115 
1116 	put_advisory_locking(locking);
1117 	return status;
1118 }
1119 
1120 
1121 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1122 	with the advisory_lock \a lock.
1123 */
1124 static bool
1125 advisory_lock_intersects(struct advisory_lock *lock, struct flock *flock)
1126 {
1127 	if (flock == NULL)
1128 		return true;
1129 
1130 	return lock->start <= flock->l_start - 1 + flock->l_len
1131 		&& lock->end >= flock->l_start;
1132 }
1133 
1134 
1135 /*!	Removes the specified lock, or all locks of the calling team
1136 	if \a flock is NULL.
1137 */
1138 static status_t
1139 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1140 {
1141 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1142 
1143 	struct advisory_locking *locking = get_advisory_locking(vnode);
1144 	if (locking == NULL)
1145 		return B_OK;
1146 
1147 	// TODO: use the thread ID instead??
1148 	team_id team = team_get_current_team_id();
1149 	pid_t session = thread_get_current_thread()->team->session_id;
1150 
1151 	// find matching lock entries
1152 
1153 	LockList::Iterator iterator = locking->locks.GetIterator();
1154 	while (iterator.HasNext()) {
1155 		struct advisory_lock *lock = iterator.Next();
1156 		bool removeLock = false;
1157 
1158 		if (lock->session == session)
1159 			removeLock = true;
1160 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1161 			bool endsBeyond = false;
1162 			bool startsBefore = false;
1163 			if (flock != NULL) {
1164 				startsBefore = lock->start < flock->l_start;
1165 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1166 			}
1167 
1168 			if (!startsBefore && !endsBeyond) {
1169 				// lock is completely contained in flock
1170 				removeLock = true;
1171 			} else if (startsBefore && !endsBeyond) {
1172 				// cut the end of the lock
1173 				lock->end = flock->l_start - 1;
1174 			} else if (!startsBefore && endsBeyond) {
1175 				// cut the start of the lock
1176 				lock->start = flock->l_start + flock->l_len;
1177 			} else {
1178 				// divide the lock into two locks
1179 				struct advisory_lock *secondLock = new advisory_lock;
1180 				if (secondLock == NULL) {
1181 					// TODO: we should probably revert the locks we already
1182 					// changed... (ie. allocate upfront)
1183 					put_advisory_locking(locking);
1184 					return B_NO_MEMORY;
1185 				}
1186 
1187 				lock->end = flock->l_start - 1;
1188 
1189 				secondLock->team = lock->team;
1190 				secondLock->session = lock->session;
1191 				// values must already be normalized when getting here
1192 				secondLock->start = flock->l_start + flock->l_len;
1193 				secondLock->end = lock->end;
1194 				secondLock->shared = lock->shared;
1195 
1196 				locking->locks.Add(secondLock);
1197 			}
1198 		}
1199 
1200 		if (removeLock) {
1201 			// this lock is no longer used
1202 			iterator.Remove();
1203 			free(lock);
1204 		}
1205 	}
1206 
1207 	bool removeLocking = locking->locks.IsEmpty();
1208 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1209 
1210 	put_advisory_locking(locking);
1211 
1212 	if (removeLocking) {
1213 		// We can remove the whole advisory locking structure; it's no
1214 		// longer used
1215 		locking = get_advisory_locking(vnode);
1216 		if (locking != NULL) {
1217 			// the locking could have been changed in the mean time
1218 			if (locking->locks.IsEmpty()) {
1219 				vnode->advisory_locking = NULL;
1220 
1221 				// we've detached the locking from the vnode, so we can
1222 				// safely delete it
1223 				delete_sem(locking->lock);
1224 				delete_sem(locking->wait_sem);
1225 				delete locking;
1226 			} else {
1227 				// the locking is in use again
1228 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1229 			}
1230 		}
1231 	}
1232 
1233 	return B_OK;
1234 }
1235 
1236 
1237 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1238 	will wait for the lock to become available, if there are any collisions
1239 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1240 
1241 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1242 	BSD flock() semantics are used, that is, all children can unlock the file
1243 	in question (we even allow parents to remove the lock, though, but that
1244 	seems to be in line to what the BSD's are doing).
1245 */
1246 static status_t
1247 acquire_advisory_lock(struct vnode *vnode, pid_t session, struct flock *flock,
1248 	bool wait)
1249 {
1250 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1251 		vnode, flock, wait ? "yes" : "no"));
1252 
1253 	bool shared = flock->l_type == F_RDLCK;
1254 	status_t status = B_OK;
1255 
1256 	// TODO: do deadlock detection!
1257 
1258 restart:
1259 	// if this vnode has an advisory_locking structure attached,
1260 	// lock that one and search for any colliding file lock
1261 	struct advisory_locking *locking = get_advisory_locking(vnode);
1262 	team_id team = team_get_current_team_id();
1263 	sem_id waitForLock = -1;
1264 
1265 	if (locking != NULL) {
1266 		// test for collisions
1267 		LockList::Iterator iterator = locking->locks.GetIterator();
1268 		while (iterator.HasNext()) {
1269 			struct advisory_lock *lock = iterator.Next();
1270 
1271 			// TODO: locks from the same team might be joinable!
1272 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1273 				// locks do overlap
1274 				if (!shared || !lock->shared) {
1275 					// we need to wait
1276 					waitForLock = locking->wait_sem;
1277 					break;
1278 				}
1279 			}
1280 		}
1281 
1282 		if (waitForLock < B_OK || !wait)
1283 			put_advisory_locking(locking);
1284 	}
1285 
1286 	// wait for the lock if we have to, or else return immediately
1287 
1288 	if (waitForLock >= B_OK) {
1289 		if (!wait)
1290 			status = session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1291 		else {
1292 			status = switch_sem_etc(locking->lock, waitForLock, 1,
1293 				B_CAN_INTERRUPT, 0);
1294 			if (status == B_OK) {
1295 				// see if we're still colliding
1296 				goto restart;
1297 			}
1298 		}
1299 	}
1300 
1301 	if (status < B_OK)
1302 		return status;
1303 
1304 	// install new lock
1305 
1306 	locking = get_advisory_locking(vnode);
1307 	if (locking == NULL) {
1308 		// we need to create a new locking object
1309 		status = create_advisory_locking(vnode);
1310 		if (status < B_OK)
1311 			return status;
1312 
1313 		locking = vnode->advisory_locking;
1314 			// we own the locking object, so it can't go away
1315 	}
1316 
1317 	struct advisory_lock *lock = (struct advisory_lock *)malloc(
1318 		sizeof(struct advisory_lock));
1319 	if (lock == NULL) {
1320 		if (waitForLock >= B_OK)
1321 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1322 		release_sem(locking->lock);
1323 		return B_NO_MEMORY;
1324 	}
1325 
1326 	lock->team = team_get_current_team_id();
1327 	lock->session = session;
1328 	// values must already be normalized when getting here
1329 	lock->start = flock->l_start;
1330 	lock->end = flock->l_start - 1 + flock->l_len;
1331 	lock->shared = shared;
1332 
1333 	locking->locks.Add(lock);
1334 	put_advisory_locking(locking);
1335 
1336 	return status;
1337 }
1338 
1339 
1340 /*!	Normalizes the \a flock structure to make it easier to compare the
1341 	structure with others. The l_start and l_len fields are set to absolute
1342 	values according to the l_whence field.
1343 */
1344 static status_t
1345 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1346 {
1347 	switch (flock->l_whence) {
1348 		case SEEK_SET:
1349 			break;
1350 		case SEEK_CUR:
1351 			flock->l_start += descriptor->pos;
1352 			break;
1353 		case SEEK_END:
1354 		{
1355 			struct vnode *vnode = descriptor->u.vnode;
1356 			struct stat stat;
1357 			status_t status;
1358 
1359 			if (FS_CALL(vnode, read_stat) == NULL)
1360 				return EOPNOTSUPP;
1361 
1362 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
1363 				vnode->private_node, &stat);
1364 			if (status < B_OK)
1365 				return status;
1366 
1367 			flock->l_start += stat.st_size;
1368 			break;
1369 		}
1370 		default:
1371 			return B_BAD_VALUE;
1372 	}
1373 
1374 	if (flock->l_start < 0)
1375 		flock->l_start = 0;
1376 	if (flock->l_len == 0)
1377 		flock->l_len = OFF_MAX;
1378 
1379 	// don't let the offset and length overflow
1380 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1381 		flock->l_len = OFF_MAX - flock->l_start;
1382 
1383 	if (flock->l_len < 0) {
1384 		// a negative length reverses the region
1385 		flock->l_start += flock->l_len;
1386 		flock->l_len = -flock->l_len;
1387 	}
1388 
1389 	return B_OK;
1390 }
1391 
1392 
1393 /*!	Disconnects all file descriptors that are associated with the
1394 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1395 	\a mount object.
1396 
1397 	Note, after you've called this function, there might still be ongoing
1398 	accesses - they won't be interrupted if they already happened before.
1399 	However, any subsequent access will fail.
1400 
1401 	This is not a cheap function and should be used with care and rarely.
1402 	TODO: there is currently no means to stop a blocking read/write!
1403 */
1404 void
1405 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1406 	struct vnode *vnodeToDisconnect)
1407 {
1408 	// iterate over all teams and peek into their file descriptors
1409 	int32 nextTeamID = 0;
1410 
1411 	while (true) {
1412 		struct io_context *context = NULL;
1413 		sem_id contextMutex = -1;
1414 		struct team *team = NULL;
1415 		team_id lastTeamID;
1416 
1417 		cpu_status state = disable_interrupts();
1418 		GRAB_TEAM_LOCK();
1419 
1420 		lastTeamID = peek_next_thread_id();
1421 		if (nextTeamID < lastTeamID) {
1422 			// get next valid team
1423 			while (nextTeamID < lastTeamID
1424 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1425 				nextTeamID++;
1426 			}
1427 
1428 			if (team) {
1429 				context = (io_context *)team->io_context;
1430 				contextMutex = context->io_mutex.sem;
1431 				nextTeamID++;
1432 			}
1433 		}
1434 
1435 		RELEASE_TEAM_LOCK();
1436 		restore_interrupts(state);
1437 
1438 		if (context == NULL)
1439 			break;
1440 
1441 		// we now have a context - since we couldn't lock it while having
1442 		// safe access to the team structure, we now need to lock the mutex
1443 		// manually
1444 
1445 		if (acquire_sem(contextMutex) != B_OK) {
1446 			// team seems to be gone, go over to the next team
1447 			continue;
1448 		}
1449 
1450 		// the team cannot be deleted completely while we're owning its
1451 		// io_context mutex, so we can safely play with it now
1452 
1453 		context->io_mutex.holder = thread_get_current_thread_id();
1454 
1455 		if (context->cwd != NULL && context->cwd->mount == mount
1456 			&& (vnodeToDisconnect == NULL
1457 				|| vnodeToDisconnect == context->cwd)) {
1458 			put_vnode(context->cwd);
1459 				// Note: We're only accessing the pointer, not the vnode itself
1460 				// in the lines below.
1461 
1462 			if (context->cwd == mount->root_vnode) {
1463 				// redirect the current working directory to the covered vnode
1464 				context->cwd = mount->covers_vnode;
1465 				inc_vnode_ref_count(context->cwd);
1466 			} else
1467 				context->cwd = NULL;
1468 		}
1469 
1470 		for (uint32 i = 0; i < context->table_size; i++) {
1471 			if (struct file_descriptor *descriptor = context->fds[i]) {
1472 				inc_fd_ref_count(descriptor);
1473 
1474 				// if this descriptor points at this mount, we
1475 				// need to disconnect it to be able to unmount
1476 				struct vnode *vnode = fd_vnode(descriptor);
1477 				if (vnodeToDisconnect != NULL) {
1478 					if (vnode == vnodeToDisconnect)
1479 						disconnect_fd(descriptor);
1480 				} else if (vnode != NULL && vnode->mount == mount
1481 					|| vnode == NULL && descriptor->u.mount == mount)
1482 					disconnect_fd(descriptor);
1483 
1484 				put_fd(descriptor);
1485 			}
1486 		}
1487 
1488 		mutex_unlock(&context->io_mutex);
1489 	}
1490 }
1491 
1492 
1493 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1494 		   by.
1495 
1496 	Given an arbitrary vnode, the function checks, whether the node is covered
1497 	by the root of a volume. If it is the function obtains a reference to the
1498 	volume root node and returns it.
1499 
1500 	\param vnode The vnode in question.
1501 	\return The volume root vnode the vnode cover is covered by, if it is
1502 			indeed a mount point, or \c NULL otherwise.
1503 */
1504 static struct vnode *
1505 resolve_mount_point_to_volume_root(struct vnode *vnode)
1506 {
1507 	if (!vnode)
1508 		return NULL;
1509 
1510 	struct vnode *volumeRoot = NULL;
1511 
1512 	mutex_lock(&sVnodeCoveredByMutex);
1513 	if (vnode->covered_by) {
1514 		volumeRoot = vnode->covered_by;
1515 		inc_vnode_ref_count(volumeRoot);
1516 	}
1517 	mutex_unlock(&sVnodeCoveredByMutex);
1518 
1519 	return volumeRoot;
1520 }
1521 
1522 
1523 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1524 		   by.
1525 
1526 	Given an arbitrary vnode (identified by mount and node ID), the function
1527 	checks, whether the node is covered by the root of a volume. If it is the
1528 	function returns the mount and node ID of the volume root node. Otherwise
1529 	it simply returns the supplied mount and node ID.
1530 
1531 	In case of error (e.g. the supplied node could not be found) the variables
1532 	for storing the resolved mount and node ID remain untouched and an error
1533 	code is returned.
1534 
1535 	\param mountID The mount ID of the vnode in question.
1536 	\param nodeID The node ID of the vnode in question.
1537 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1538 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1539 	\return
1540 	- \c B_OK, if everything went fine,
1541 	- another error code, if something went wrong.
1542 */
1543 status_t
1544 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1545 	dev_t *resolvedMountID, ino_t *resolvedNodeID)
1546 {
1547 	// get the node
1548 	struct vnode *node;
1549 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1550 	if (error != B_OK)
1551 		return error;
1552 
1553 	// resolve the node
1554 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1555 	if (resolvedNode) {
1556 		put_vnode(node);
1557 		node = resolvedNode;
1558 	}
1559 
1560 	// set the return values
1561 	*resolvedMountID = node->device;
1562 	*resolvedNodeID = node->id;
1563 
1564 	put_vnode(node);
1565 
1566 	return B_OK;
1567 }
1568 
1569 
1570 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1571 
1572 	Given an arbitrary vnode, the function checks, whether the node is the
1573 	root of a volume. If it is (and if it is not "/"), the function obtains
1574 	a reference to the underlying mount point node and returns it.
1575 
1576 	\param vnode The vnode in question (caller must have a reference).
1577 	\return The mount point vnode the vnode covers, if it is indeed a volume
1578 			root and not "/", or \c NULL otherwise.
1579 */
1580 static struct vnode *
1581 resolve_volume_root_to_mount_point(struct vnode *vnode)
1582 {
1583 	if (!vnode)
1584 		return NULL;
1585 
1586 	struct vnode *mountPoint = NULL;
1587 
1588 	struct fs_mount *mount = vnode->mount;
1589 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1590 		mountPoint = mount->covers_vnode;
1591 		inc_vnode_ref_count(mountPoint);
1592 	}
1593 
1594 	return mountPoint;
1595 }
1596 
1597 
1598 /*!	\brief Gets the directory path and leaf name for a given path.
1599 
1600 	The supplied \a path is transformed to refer to the directory part of
1601 	the entry identified by the original path, and into the buffer \a filename
1602 	the leaf name of the original entry is written.
1603 	Neither the returned path nor the leaf name can be expected to be
1604 	canonical.
1605 
1606 	\param path The path to be analyzed. Must be able to store at least one
1607 		   additional character.
1608 	\param filename The buffer into which the leaf name will be written.
1609 		   Must be of size B_FILE_NAME_LENGTH at least.
1610 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1611 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1612 		   if the given path name is empty.
1613 */
1614 static status_t
1615 get_dir_path_and_leaf(char *path, char *filename)
1616 {
1617 	if (*path == '\0')
1618 		return B_ENTRY_NOT_FOUND;
1619 
1620 	char *p = strrchr(path, '/');
1621 		// '/' are not allowed in file names!
1622 
1623 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1624 
1625 	if (!p) {
1626 		// this path is single segment with no '/' in it
1627 		// ex. "foo"
1628 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1629 			return B_NAME_TOO_LONG;
1630 		strcpy(path, ".");
1631 	} else {
1632 		p++;
1633 		if (*p == '\0') {
1634 			// special case: the path ends in '/'
1635 			strcpy(filename, ".");
1636 		} else {
1637 			// normal leaf: replace the leaf portion of the path with a '.'
1638 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1639 				>= B_FILE_NAME_LENGTH) {
1640 				return B_NAME_TOO_LONG;
1641 			}
1642 		}
1643 		p[0] = '.';
1644 		p[1] = '\0';
1645 	}
1646 	return B_OK;
1647 }
1648 
1649 
1650 static status_t
1651 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char *name,
1652 	bool traverse, struct vnode **_vnode)
1653 {
1654 	char clonedName[B_FILE_NAME_LENGTH + 1];
1655 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1656 		return B_NAME_TOO_LONG;
1657 
1658 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1659 	struct vnode *directory;
1660 
1661 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
1662 	if (status < 0)
1663 		return status;
1664 
1665 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, _vnode, NULL,
1666 		NULL);
1667 }
1668 
1669 
1670 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
1671 	\a path must not be NULL.
1672 	If it returns successfully, \a path contains the name of the last path
1673 	component. This function clobbers the buffer pointed to by \a path only
1674 	if it does contain more than one component.
1675 	Note, this reduces the ref_count of the starting \a vnode, no matter if
1676 	it is successful or not!
1677 */
1678 static status_t
1679 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1680 	int count, struct vnode **_vnode, ino_t *_parentID, int *_type)
1681 {
1682 	status_t status = 0;
1683 	ino_t lastParentID = vnode->id;
1684 	int type = 0;
1685 
1686 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1687 
1688 	if (path == NULL) {
1689 		put_vnode(vnode);
1690 		return B_BAD_VALUE;
1691 	}
1692 
1693 	if (*path == '\0') {
1694 		put_vnode(vnode);
1695 		return B_ENTRY_NOT_FOUND;
1696 	}
1697 
1698 	while (true) {
1699 		struct vnode *nextVnode;
1700 		ino_t vnodeID;
1701 		char *nextPath;
1702 
1703 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1704 
1705 		// done?
1706 		if (path[0] == '\0')
1707 			break;
1708 
1709 		// walk to find the next path component ("path" will point to a single
1710 		// path component), and filter out multiple slashes
1711 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1712 
1713 		if (*nextPath == '/') {
1714 			*nextPath = '\0';
1715 			do
1716 				nextPath++;
1717 			while (*nextPath == '/');
1718 		}
1719 
1720 		// See if the '..' is at the root of a mount and move to the covered
1721 		// vnode so we pass the '..' path to the underlying filesystem
1722 		if (!strcmp("..", path)
1723 			&& vnode->mount->root_vnode == vnode
1724 			&& vnode->mount->covers_vnode) {
1725 			nextVnode = vnode->mount->covers_vnode;
1726 			inc_vnode_ref_count(nextVnode);
1727 			put_vnode(vnode);
1728 			vnode = nextVnode;
1729 		}
1730 
1731 		// Check if we have the right to search the current directory vnode.
1732 		// If a file system doesn't have the access() function, we assume that
1733 		// searching a directory is always allowed
1734 		if (FS_CALL(vnode, access))
1735 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1736 
1737 		// Tell the filesystem to get the vnode of this path component (if we got the
1738 		// permission from the call above)
1739 		if (status >= B_OK)
1740 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1741 
1742 		if (status < B_OK) {
1743 			put_vnode(vnode);
1744 			return status;
1745 		}
1746 
1747 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1748 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1749 		// ref count incremented at this point
1750 		mutex_lock(&sVnodeMutex);
1751 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1752 		mutex_unlock(&sVnodeMutex);
1753 
1754 		if (!nextVnode) {
1755 			// pretty screwed up here - the file system found the vnode, but the hash
1756 			// lookup failed, so our internal structures are messed up
1757 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1758 				vnode->device, vnodeID);
1759 			put_vnode(vnode);
1760 			return B_ENTRY_NOT_FOUND;
1761 		}
1762 
1763 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1764 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1765 			size_t bufferSize;
1766 			char *buffer;
1767 
1768 			TRACE(("traverse link\n"));
1769 
1770 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1771 			if (count + 1 > B_MAX_SYMLINKS) {
1772 				status = B_LINK_LIMIT;
1773 				goto resolve_link_error;
1774 			}
1775 
1776 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1777 			if (buffer == NULL) {
1778 				status = B_NO_MEMORY;
1779 				goto resolve_link_error;
1780 			}
1781 
1782 			if (FS_CALL(nextVnode, read_symlink) != NULL) {
1783 				bufferSize--;
1784 				status = FS_CALL(nextVnode, read_symlink)(
1785 					nextVnode->mount->cookie, nextVnode->private_node, buffer,
1786 					&bufferSize);
1787 				// null-terminate
1788 				if (status >= 0)
1789 					buffer[bufferSize] = '\0';
1790 			} else
1791 				status = B_BAD_VALUE;
1792 
1793 			if (status < B_OK) {
1794 				free(buffer);
1795 
1796 		resolve_link_error:
1797 				put_vnode(vnode);
1798 				put_vnode(nextVnode);
1799 
1800 				return status;
1801 			}
1802 			put_vnode(nextVnode);
1803 
1804 			// Check if we start from the root directory or the current
1805 			// directory ("vnode" still points to that one).
1806 			// Cut off all leading slashes if it's the root directory
1807 			path = buffer;
1808 			bool absoluteSymlink = false;
1809 			if (path[0] == '/') {
1810 				// we don't need the old directory anymore
1811 				put_vnode(vnode);
1812 
1813 				while (*++path == '/')
1814 					;
1815 				vnode = sRoot;
1816 				inc_vnode_ref_count(vnode);
1817 
1818 				absoluteSymlink = true;
1819 			}
1820 
1821 			inc_vnode_ref_count(vnode);
1822 				// balance the next recursion - we will decrement the
1823 				// ref_count of the vnode, no matter if we succeeded or not
1824 
1825 			if (absoluteSymlink && *path == '\0') {
1826 				// symlink was just "/"
1827 				nextVnode = vnode;
1828 			} else {
1829 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
1830 					count + 1, &nextVnode, &lastParentID, _type);
1831 			}
1832 
1833 			free(buffer);
1834 
1835 			if (status < B_OK) {
1836 				put_vnode(vnode);
1837 				return status;
1838 			}
1839 		} else
1840 			lastParentID = vnode->id;
1841 
1842 		// decrease the ref count on the old dir we just looked up into
1843 		put_vnode(vnode);
1844 
1845 		path = nextPath;
1846 		vnode = nextVnode;
1847 
1848 		// see if we hit a mount point
1849 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1850 		if (mountPoint) {
1851 			put_vnode(vnode);
1852 			vnode = mountPoint;
1853 		}
1854 	}
1855 
1856 	*_vnode = vnode;
1857 	if (_type)
1858 		*_type = type;
1859 	if (_parentID)
1860 		*_parentID = lastParentID;
1861 
1862 	return B_OK;
1863 }
1864 
1865 
1866 static status_t
1867 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1868 	ino_t *_parentID, bool kernel)
1869 {
1870 	struct vnode *start = NULL;
1871 
1872 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1873 
1874 	if (!path)
1875 		return B_BAD_VALUE;
1876 
1877 	if (*path == '\0')
1878 		return B_ENTRY_NOT_FOUND;
1879 
1880 	// figure out if we need to start at root or at cwd
1881 	if (*path == '/') {
1882 		if (sRoot == NULL) {
1883 			// we're a bit early, aren't we?
1884 			return B_ERROR;
1885 		}
1886 
1887 		while (*++path == '/')
1888 			;
1889 		start = sRoot;
1890 		inc_vnode_ref_count(start);
1891 
1892 		if (*path == '\0') {
1893 			*_vnode = start;
1894 			return B_OK;
1895 		}
1896 
1897 	} else {
1898 		struct io_context *context = get_current_io_context(kernel);
1899 
1900 		mutex_lock(&context->io_mutex);
1901 		start = context->cwd;
1902 		if (start != NULL)
1903 			inc_vnode_ref_count(start);
1904 		mutex_unlock(&context->io_mutex);
1905 
1906 		if (start == NULL)
1907 			return B_ERROR;
1908 	}
1909 
1910 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1911 }
1912 
1913 
1914 /*! Returns the vnode in the next to last segment of the path, and returns
1915 	the last portion in filename.
1916 	The path buffer must be able to store at least one additional character.
1917 */
1918 static status_t
1919 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1920 {
1921 	status_t status = get_dir_path_and_leaf(path, filename);
1922 	if (status != B_OK)
1923 		return status;
1924 
1925 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1926 }
1927 
1928 
1929 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
1930 		   to by a FD + path pair.
1931 
1932 	\a path must be given in either case. \a fd might be omitted, in which
1933 	case \a path is either an absolute path or one relative to the current
1934 	directory. If both a supplied and \a path is relative it is reckoned off
1935 	of the directory referred to by \a fd. If \a path is absolute \a fd is
1936 	ignored.
1937 
1938 	The caller has the responsibility to call put_vnode() on the returned
1939 	directory vnode.
1940 
1941 	\param fd The FD. May be < 0.
1942 	\param path The absolute or relative path. Must not be \c NULL. The buffer
1943 	       is modified by this function. It must have at least room for a
1944 	       string one character longer than the path it contains.
1945 	\param _vnode A pointer to a variable the directory vnode shall be written
1946 		   into.
1947 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1948 		   the leaf name of the specified entry will be written.
1949 	\param kernel \c true, if invoked from inside the kernel, \c false if
1950 		   invoked from userland.
1951 	\return \c B_OK, if everything went fine, another error code otherwise.
1952 */
1953 static status_t
1954 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1955 	char *filename, bool kernel)
1956 {
1957 	if (!path)
1958 		return B_BAD_VALUE;
1959 	if (*path == '\0')
1960 		return B_ENTRY_NOT_FOUND;
1961 	if (fd < 0)
1962 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1963 
1964 	status_t status = get_dir_path_and_leaf(path, filename);
1965 	if (status != B_OK)
1966 		return status;
1967 
1968 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1969 }
1970 
1971 
1972 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
1973 		   to by a vnode + path pair.
1974 
1975 	\a path must be given in either case. \a vnode might be omitted, in which
1976 	case \a path is either an absolute path or one relative to the current
1977 	directory. If both a supplied and \a path is relative it is reckoned off
1978 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
1979 	ignored.
1980 
1981 	The caller has the responsibility to call put_vnode() on the returned
1982 	directory vnode.
1983 
1984 	\param vnode The vnode. May be \c NULL.
1985 	\param path The absolute or relative path. Must not be \c NULL. The buffer
1986 	       is modified by this function. It must have at least room for a
1987 	       string one character longer than the path it contains.
1988 	\param _vnode A pointer to a variable the directory vnode shall be written
1989 		   into.
1990 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1991 		   the leaf name of the specified entry will be written.
1992 	\param kernel \c true, if invoked from inside the kernel, \c false if
1993 		   invoked from userland.
1994 	\return \c B_OK, if everything went fine, another error code otherwise.
1995 */
1996 static status_t
1997 vnode_and_path_to_dir_vnode(struct vnode* vnode, char *path,
1998 	struct vnode **_vnode, char *filename, bool kernel)
1999 {
2000 	if (!path)
2001 		return B_BAD_VALUE;
2002 	if (*path == '\0')
2003 		return B_ENTRY_NOT_FOUND;
2004 	if (vnode == NULL || path[0] == '/')
2005 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2006 
2007 	status_t status = get_dir_path_and_leaf(path, filename);
2008 	if (status != B_OK)
2009 		return status;
2010 
2011 	inc_vnode_ref_count(vnode);
2012 		// vnode_path_to_vnode() always decrements the ref count
2013 
2014 	return vnode_path_to_vnode(vnode, path, true, 0, _vnode, NULL, NULL);
2015 }
2016 
2017 
2018 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2019 */
2020 static status_t
2021 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
2022 	size_t bufferSize)
2023 {
2024 	if (bufferSize < sizeof(struct dirent))
2025 		return B_BAD_VALUE;
2026 
2027 	// See if vnode is the root of a mount and move to the covered
2028 	// vnode so we get the underlying file system
2029 	VNodePutter vnodePutter;
2030 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
2031 		vnode = vnode->mount->covers_vnode;
2032 		inc_vnode_ref_count(vnode);
2033 		vnodePutter.SetTo(vnode);
2034 	}
2035 
2036 	if (FS_CALL(vnode, get_vnode_name)) {
2037 		// The FS supports getting the name of a vnode.
2038 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
2039 			vnode->private_node, buffer->d_name,
2040 			(char*)buffer + bufferSize - buffer->d_name);
2041 	}
2042 
2043 	// The FS doesn't support getting the name of a vnode. So we search the
2044 	// parent directory for the vnode, if the caller let us.
2045 
2046 	if (parent == NULL)
2047 		return EOPNOTSUPP;
2048 
2049 	fs_cookie cookie;
2050 
2051 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
2052 		parent->private_node, &cookie);
2053 	if (status >= B_OK) {
2054 		while (true) {
2055 			uint32 num = 1;
2056 			status = dir_read(parent, cookie, buffer, bufferSize, &num);
2057 			if (status < B_OK)
2058 				break;
2059 			if (num == 0) {
2060 				status = B_ENTRY_NOT_FOUND;
2061 				break;
2062 			}
2063 
2064 			if (vnode->id == buffer->d_ino) {
2065 				// found correct entry!
2066 				break;
2067 			}
2068 		}
2069 
2070 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node,
2071 			cookie);
2072 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie,
2073 			vnode->private_node, cookie);
2074 	}
2075 	return status;
2076 }
2077 
2078 
2079 static status_t
2080 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
2081 	size_t nameSize)
2082 {
2083 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2084 	struct dirent *dirent = (struct dirent *)buffer;
2085 
2086 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer));
2087 	if (status != B_OK)
2088 		return status;
2089 
2090 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2091 		return B_BUFFER_OVERFLOW;
2092 
2093 	return B_OK;
2094 }
2095 
2096 
2097 /*!	Gets the full path to a given directory vnode.
2098 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2099 	file system doesn't support this call, it will fall back to iterating
2100 	through the parent directory to get the name of the child.
2101 
2102 	To protect against circular loops, it supports a maximum tree depth
2103 	of 256 levels.
2104 
2105 	Note that the path may not be correct the time this function returns!
2106 	It doesn't use any locking to prevent returning the correct path, as
2107 	paths aren't safe anyway: the path to a file can change at any time.
2108 
2109 	It might be a good idea, though, to check if the returned path exists
2110 	in the calling function (it's not done here because of efficiency)
2111 */
2112 static status_t
2113 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
2114 {
2115 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2116 
2117 	if (vnode == NULL || buffer == NULL)
2118 		return B_BAD_VALUE;
2119 
2120 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2121 	KPath pathBuffer;
2122 	if (pathBuffer.InitCheck() != B_OK)
2123 		return B_NO_MEMORY;
2124 
2125 	char *path = pathBuffer.LockBuffer();
2126 	int32 insert = pathBuffer.BufferSize();
2127 	int32 maxLevel = 256;
2128 	int32 length;
2129 	status_t status;
2130 
2131 	// we don't use get_vnode() here because this call is more
2132 	// efficient and does all we need from get_vnode()
2133 	inc_vnode_ref_count(vnode);
2134 
2135 	// resolve a volume root to its mount point
2136 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
2137 	if (mountPoint) {
2138 		put_vnode(vnode);
2139 		vnode = mountPoint;
2140 	}
2141 
2142 	path[--insert] = '\0';
2143 
2144 	while (true) {
2145 		// the name buffer is also used for fs_read_dir()
2146 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2147 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
2148 		struct vnode *parentVnode;
2149 		ino_t parentID;
2150 		int type;
2151 
2152 		// lookup the parent vnode
2153 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
2154 			&parentID, &type);
2155 		if (status < B_OK)
2156 			goto out;
2157 
2158 		mutex_lock(&sVnodeMutex);
2159 		parentVnode = lookup_vnode(vnode->device, parentID);
2160 		mutex_unlock(&sVnodeMutex);
2161 
2162 		if (parentVnode == NULL) {
2163 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
2164 				vnode->device, parentID);
2165 			status = B_ENTRY_NOT_FOUND;
2166 			goto out;
2167 		}
2168 
2169 		// get the node's name
2170 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2171 			sizeof(nameBuffer));
2172 
2173 		// resolve a volume root to its mount point
2174 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2175 		if (mountPoint) {
2176 			put_vnode(parentVnode);
2177 			parentVnode = mountPoint;
2178 			parentID = parentVnode->id;
2179 		}
2180 
2181 		bool hitRoot = (parentVnode == vnode);
2182 
2183 		// release the current vnode, we only need its parent from now on
2184 		put_vnode(vnode);
2185 		vnode = parentVnode;
2186 
2187 		if (status < B_OK)
2188 			goto out;
2189 
2190 		if (hitRoot) {
2191 			// we have reached "/", which means we have constructed the full
2192 			// path
2193 			break;
2194 		}
2195 
2196 		// ToDo: add an explicit check for loops in about 10 levels to do
2197 		// real loop detection
2198 
2199 		// don't go deeper as 'maxLevel' to prevent circular loops
2200 		if (maxLevel-- < 0) {
2201 			status = ELOOP;
2202 			goto out;
2203 		}
2204 
2205 		// add the name in front of the current path
2206 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2207 		length = strlen(name);
2208 		insert -= length;
2209 		if (insert <= 0) {
2210 			status = ENOBUFS;
2211 			goto out;
2212 		}
2213 		memcpy(path + insert, name, length);
2214 		path[--insert] = '/';
2215 	}
2216 
2217 	// the root dir will result in an empty path: fix it
2218 	if (path[insert] == '\0')
2219 		path[--insert] = '/';
2220 
2221 	TRACE(("  path is: %s\n", path + insert));
2222 
2223 	// copy the path to the output buffer
2224 	length = pathBuffer.BufferSize() - insert;
2225 	if (length <= (int)bufferSize)
2226 		memcpy(buffer, path + insert, length);
2227 	else
2228 		status = ENOBUFS;
2229 
2230 out:
2231 	put_vnode(vnode);
2232 	return status;
2233 }
2234 
2235 
2236 /*!	Checks the length of every path component, and adds a '.'
2237 	if the path ends in a slash.
2238 	The given path buffer must be able to store at least one
2239 	additional character.
2240 */
2241 static status_t
2242 check_path(char *to)
2243 {
2244 	int32 length = 0;
2245 
2246 	// check length of every path component
2247 
2248 	while (*to) {
2249 		char *begin;
2250 		if (*to == '/')
2251 			to++, length++;
2252 
2253 		begin = to;
2254 		while (*to != '/' && *to)
2255 			to++, length++;
2256 
2257 		if (to - begin > B_FILE_NAME_LENGTH)
2258 			return B_NAME_TOO_LONG;
2259 	}
2260 
2261 	if (length == 0)
2262 		return B_ENTRY_NOT_FOUND;
2263 
2264 	// complete path if there is a slash at the end
2265 
2266 	if (*(to - 1) == '/') {
2267 		if (length > B_PATH_NAME_LENGTH - 2)
2268 			return B_NAME_TOO_LONG;
2269 
2270 		to[0] = '.';
2271 		to[1] = '\0';
2272 	}
2273 
2274 	return B_OK;
2275 }
2276 
2277 
2278 static struct file_descriptor *
2279 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2280 {
2281 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2282 	if (descriptor == NULL)
2283 		return NULL;
2284 
2285 	if (fd_vnode(descriptor) == NULL) {
2286 		put_fd(descriptor);
2287 		return NULL;
2288 	}
2289 
2290 	// ToDo: when we can close a file descriptor at any point, investigate
2291 	//	if this is still valid to do (accessing the vnode without ref_count
2292 	//	or locking)
2293 	*_vnode = descriptor->u.vnode;
2294 	return descriptor;
2295 }
2296 
2297 
2298 static struct vnode *
2299 get_vnode_from_fd(int fd, bool kernel)
2300 {
2301 	struct file_descriptor *descriptor;
2302 	struct vnode *vnode;
2303 
2304 	descriptor = get_fd(get_current_io_context(kernel), fd);
2305 	if (descriptor == NULL)
2306 		return NULL;
2307 
2308 	vnode = fd_vnode(descriptor);
2309 	if (vnode != NULL)
2310 		inc_vnode_ref_count(vnode);
2311 
2312 	put_fd(descriptor);
2313 	return vnode;
2314 }
2315 
2316 
2317 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2318 	only the path will be considered. In this case, the \a path must not be
2319 	NULL.
2320 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2321 	and should be NULL for files.
2322 */
2323 static status_t
2324 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2325 	struct vnode **_vnode, ino_t *_parentID, bool kernel)
2326 {
2327 	if (fd < 0 && !path)
2328 		return B_BAD_VALUE;
2329 
2330 	if (path != NULL && *path == '\0')
2331 		return B_ENTRY_NOT_FOUND;
2332 
2333 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2334 		// no FD or absolute path
2335 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2336 	}
2337 
2338 	// FD only, or FD + relative path
2339 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2340 	if (!vnode)
2341 		return B_FILE_ERROR;
2342 
2343 	if (path != NULL) {
2344 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2345 			_vnode, _parentID, NULL);
2346 	}
2347 
2348 	// there is no relative path to take into account
2349 
2350 	*_vnode = vnode;
2351 	if (_parentID)
2352 		*_parentID = -1;
2353 
2354 	return B_OK;
2355 }
2356 
2357 
2358 static int
2359 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2360 	fs_cookie cookie, int openMode, bool kernel)
2361 {
2362 	struct file_descriptor *descriptor;
2363 	int fd;
2364 
2365 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2366 	if (vnode && vnode->mandatory_locked_by != NULL)
2367 		return B_BUSY;
2368 
2369 	descriptor = alloc_fd();
2370 	if (!descriptor)
2371 		return B_NO_MEMORY;
2372 
2373 	if (vnode)
2374 		descriptor->u.vnode = vnode;
2375 	else
2376 		descriptor->u.mount = mount;
2377 	descriptor->cookie = cookie;
2378 
2379 	switch (type) {
2380 		// vnode types
2381 		case FDTYPE_FILE:
2382 			descriptor->ops = &sFileOps;
2383 			break;
2384 		case FDTYPE_DIR:
2385 			descriptor->ops = &sDirectoryOps;
2386 			break;
2387 		case FDTYPE_ATTR:
2388 			descriptor->ops = &sAttributeOps;
2389 			break;
2390 		case FDTYPE_ATTR_DIR:
2391 			descriptor->ops = &sAttributeDirectoryOps;
2392 			break;
2393 
2394 		// mount types
2395 		case FDTYPE_INDEX_DIR:
2396 			descriptor->ops = &sIndexDirectoryOps;
2397 			break;
2398 		case FDTYPE_QUERY:
2399 			descriptor->ops = &sQueryOps;
2400 			break;
2401 
2402 		default:
2403 			panic("get_new_fd() called with unknown type %d\n", type);
2404 			break;
2405 	}
2406 	descriptor->type = type;
2407 	descriptor->open_mode = openMode;
2408 
2409 	fd = new_fd(get_current_io_context(kernel), descriptor);
2410 	if (fd < 0) {
2411 		free(descriptor);
2412 		return B_NO_MORE_FDS;
2413 	}
2414 
2415 	return fd;
2416 }
2417 
2418 #ifdef ADD_DEBUGGER_COMMANDS
2419 
2420 
2421 static void
2422 _dump_advisory_locking(advisory_locking *locking)
2423 {
2424 	if (locking == NULL)
2425 		return;
2426 
2427 	kprintf("   lock:        %ld", locking->lock);
2428 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2429 
2430 	int32 index = 0;
2431 	LockList::Iterator iterator = locking->locks.GetIterator();
2432 	while (iterator.HasNext()) {
2433 		struct advisory_lock *lock = iterator.Next();
2434 
2435 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2436 		kprintf("        start:  %Ld\n", lock->start);
2437 		kprintf("        end:    %Ld\n", lock->end);
2438 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2439 	}
2440 }
2441 
2442 
2443 static void
2444 _dump_mount(struct fs_mount *mount)
2445 {
2446 	kprintf("MOUNT: %p\n", mount);
2447 	kprintf(" id:            %ld\n", mount->id);
2448 	kprintf(" device_name:   %s\n", mount->device_name);
2449 	kprintf(" fs_name:       %s\n", mount->fs_name);
2450 	kprintf(" cookie:        %p\n", mount->cookie);
2451 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2452 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2453 	kprintf(" partition:     %p\n", mount->partition);
2454 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2455 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2456 		mount->owns_file_device ? " owns_file_device" : "");
2457 
2458 	set_debug_variable("_cookie", (addr_t)mount->cookie);
2459 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2460 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2461 	set_debug_variable("_partition", (addr_t)mount->partition);
2462 }
2463 
2464 
2465 static void
2466 _dump_vnode(struct vnode *vnode)
2467 {
2468 	kprintf("VNODE: %p\n", vnode);
2469 	kprintf(" device:        %ld\n", vnode->device);
2470 	kprintf(" id:            %Ld\n", vnode->id);
2471 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2472 	kprintf(" private_node:  %p\n", vnode->private_node);
2473 	kprintf(" mount:         %p\n", vnode->mount);
2474 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2475 	kprintf(" cache:         %p\n", vnode->cache);
2476 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2477 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2478 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2479 
2480 	_dump_advisory_locking(vnode->advisory_locking);
2481 
2482 	set_debug_variable("_node", (addr_t)vnode->private_node);
2483 	set_debug_variable("_mount", (addr_t)vnode->mount);
2484 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
2485 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
2486 }
2487 
2488 
2489 static int
2490 dump_mount(int argc, char **argv)
2491 {
2492 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2493 		kprintf("usage: %s [id|address]\n", argv[0]);
2494 		return 0;
2495 	}
2496 
2497 	uint32 id = parse_expression(argv[1]);
2498 	struct fs_mount *mount = NULL;
2499 
2500 	mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2501 	if (mount == NULL) {
2502 		if (IS_USER_ADDRESS(id)) {
2503 			kprintf("fs_mount not found\n");
2504 			return 0;
2505 		}
2506 		mount = (fs_mount *)id;
2507 	}
2508 
2509 	_dump_mount(mount);
2510 	return 0;
2511 }
2512 
2513 
2514 static int
2515 dump_mounts(int argc, char **argv)
2516 {
2517 	if (argc != 1) {
2518 		kprintf("usage: %s\n", argv[0]);
2519 		return 0;
2520 	}
2521 
2522 	kprintf("address     id root       covers     cookie     fs_name\n");
2523 
2524 	struct hash_iterator iterator;
2525 	struct fs_mount *mount;
2526 
2527 	hash_open(sMountsTable, &iterator);
2528 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2529 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
2530 			mount->covers_vnode, mount->cookie, mount->fs_name);
2531 	}
2532 
2533 	hash_close(sMountsTable, &iterator, false);
2534 	return 0;
2535 }
2536 
2537 
2538 static int
2539 dump_vnode(int argc, char **argv)
2540 {
2541 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
2542 		kprintf("usage: %s <device> <id>\n"
2543 			"   or: %s <address>\n", argv[0], argv[0]);
2544 		return 0;
2545 	}
2546 
2547 	struct vnode *vnode = NULL;
2548 
2549 	if (argc == 2) {
2550 		vnode = (struct vnode *)parse_expression(argv[1]);
2551 		if (IS_USER_ADDRESS(vnode)) {
2552 			kprintf("invalid vnode address\n");
2553 			return 0;
2554 		}
2555 		_dump_vnode(vnode);
2556 		return 0;
2557 	}
2558 
2559 	struct hash_iterator iterator;
2560 	dev_t device = parse_expression(argv[1]);
2561 	ino_t id = atoll(argv[2]);
2562 
2563 	hash_open(sVnodeTable, &iterator);
2564 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2565 		if (vnode->id != id || vnode->device != device)
2566 			continue;
2567 
2568 		_dump_vnode(vnode);
2569 	}
2570 
2571 	hash_close(sVnodeTable, &iterator, false);
2572 	return 0;
2573 }
2574 
2575 
2576 static int
2577 dump_vnodes(int argc, char **argv)
2578 {
2579 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2580 		kprintf("usage: %s [device]\n", argv[0]);
2581 		return 0;
2582 	}
2583 
2584 	// restrict dumped nodes to a certain device if requested
2585 	dev_t device = parse_expression(argv[1]);
2586 
2587 	struct hash_iterator iterator;
2588 	struct vnode *vnode;
2589 
2590 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
2591 		"flags\n");
2592 
2593 	hash_open(sVnodeTable, &iterator);
2594 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2595 		if (vnode->device != device)
2596 			continue;
2597 
2598 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
2599 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
2600 			vnode->advisory_locking, vnode->remove ? "r" : "-",
2601 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2602 	}
2603 
2604 	hash_close(sVnodeTable, &iterator, false);
2605 	return 0;
2606 }
2607 
2608 
2609 static int
2610 dump_vnode_caches(int argc, char **argv)
2611 {
2612 	struct hash_iterator iterator;
2613 	struct vnode *vnode;
2614 
2615 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2616 		kprintf("usage: %s [device]\n", argv[0]);
2617 		return 0;
2618 	}
2619 
2620 	// restrict dumped nodes to a certain device if requested
2621 	dev_t device = -1;
2622 	if (argc > 1)
2623 		device = atoi(argv[1]);
2624 
2625 	kprintf("address    dev     inode cache          size   pages\n");
2626 
2627 	hash_open(sVnodeTable, &iterator);
2628 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2629 		if (vnode->cache == NULL)
2630 			continue;
2631 		if (device != -1 && vnode->device != device)
2632 			continue;
2633 
2634 		// count pages in cache
2635 		size_t numPages = 0;
2636 		for (struct vm_page *page = vnode->cache->page_list;
2637 				page != NULL; page = page->cache_next) {
2638 			numPages++;
2639 		}
2640 
2641 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
2642 			vnode->cache, (vnode->cache->virtual_size + B_PAGE_SIZE - 1)
2643 				/ B_PAGE_SIZE, numPages);
2644 	}
2645 
2646 	hash_close(sVnodeTable, &iterator, false);
2647 	return 0;
2648 }
2649 
2650 
2651 int
2652 dump_io_context(int argc, char **argv)
2653 {
2654 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2655 		kprintf("usage: %s [team-id|address]\n", argv[0]);
2656 		return 0;
2657 	}
2658 
2659 	struct io_context *context = NULL;
2660 
2661 	if (argc > 1) {
2662 		uint32 num = parse_expression(argv[1]);
2663 		if (IS_KERNEL_ADDRESS(num))
2664 			context = (struct io_context *)num;
2665 		else {
2666 			struct team *team = team_get_team_struct_locked(num);
2667 			if (team == NULL) {
2668 				kprintf("could not find team with ID %ld\n", num);
2669 				return 0;
2670 			}
2671 			context = (struct io_context *)team->io_context;
2672 		}
2673 	} else
2674 		context = get_current_io_context(true);
2675 
2676 	kprintf("I/O CONTEXT: %p\n", context);
2677 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2678 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2679 	kprintf(" max fds:\t%lu\n", context->table_size);
2680 
2681 	if (context->num_used_fds)
2682 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2683 
2684 	for (uint32 i = 0; i < context->table_size; i++) {
2685 		struct file_descriptor *fd = context->fds[i];
2686 		if (fd == NULL)
2687 			continue;
2688 
2689 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2690 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2691 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2692 			fd->u.vnode);
2693 	}
2694 
2695 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2696 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2697 
2698 	set_debug_variable("_cwd", (addr_t)context->cwd);
2699 
2700 	return 0;
2701 }
2702 
2703 
2704 int
2705 dump_vnode_usage(int argc, char **argv)
2706 {
2707 	if (argc != 1) {
2708 		kprintf("usage: %s\n", argv[0]);
2709 		return 0;
2710 	}
2711 
2712 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
2713 		kMaxUnusedVnodes);
2714 
2715 	struct hash_iterator iterator;
2716 	hash_open(sVnodeTable, &iterator);
2717 
2718 	uint32 count = 0;
2719 	struct vnode *vnode;
2720 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2721 		count++;
2722 	}
2723 
2724 	hash_close(sVnodeTable, &iterator, false);
2725 
2726 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2727 	return 0;
2728 }
2729 
2730 #endif	// ADD_DEBUGGER_COMMANDS
2731 
2732 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
2733 	and calls the file system hooks to read/write the request to disk.
2734 */
2735 static status_t
2736 common_file_io_vec_pages(struct vnode *vnode, void *cookie,
2737 	const file_io_vec *fileVecs, size_t fileVecCount, const iovec *vecs,
2738 	size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset, size_t *_numBytes,
2739 	bool doWrite)
2740 {
2741 	if (fileVecCount == 0) {
2742 		// There are no file vecs at this offset, so we're obviously trying
2743 		// to access the file outside of its bounds
2744 		return B_BAD_VALUE;
2745 	}
2746 
2747 	size_t numBytes = *_numBytes;
2748 	uint32 fileVecIndex;
2749 	size_t vecOffset = *_vecOffset;
2750 	uint32 vecIndex = *_vecIndex;
2751 	status_t status;
2752 	size_t size;
2753 
2754 	if (!doWrite && vecOffset == 0) {
2755 		// now directly read the data from the device
2756 		// the first file_io_vec can be read directly
2757 
2758 		size = fileVecs[0].length;
2759 		if (size > numBytes)
2760 			size = numBytes;
2761 
2762 		status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2763 			vnode->private_node, cookie, fileVecs[0].offset, &vecs[vecIndex],
2764 			vecCount - vecIndex, &size, false);
2765 		if (status < B_OK)
2766 			return status;
2767 
2768 		// TODO: this is a work-around for buggy device drivers!
2769 		//	When our own drivers honour the length, we can:
2770 		//	a) also use this direct I/O for writes (otherwise, it would
2771 		//	   overwrite precious data)
2772 		//	b) panic if the term below is true (at least for writes)
2773 		if (size > fileVecs[0].length) {
2774 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
2775 			size = fileVecs[0].length;
2776 		}
2777 
2778 		ASSERT(size <= fileVecs[0].length);
2779 
2780 		// If the file portion was contiguous, we're already done now
2781 		if (size == numBytes)
2782 			return B_OK;
2783 
2784 		// if we reached the end of the file, we can return as well
2785 		if (size != fileVecs[0].length) {
2786 			*_numBytes = size;
2787 			return B_OK;
2788 		}
2789 
2790 		fileVecIndex = 1;
2791 
2792 		// first, find out where we have to continue in our iovecs
2793 		for (; vecIndex < vecCount; vecIndex++) {
2794 			if (size < vecs[vecIndex].iov_len)
2795 				break;
2796 
2797 			size -= vecs[vecIndex].iov_len;
2798 		}
2799 
2800 		vecOffset = size;
2801 	} else {
2802 		fileVecIndex = 0;
2803 		size = 0;
2804 	}
2805 
2806 	// Too bad, let's process the rest of the file_io_vecs
2807 
2808 	size_t totalSize = size;
2809 	size_t bytesLeft = numBytes - size;
2810 
2811 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
2812 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
2813 		off_t fileOffset = fileVec.offset;
2814 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
2815 
2816 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
2817 
2818 		// process the complete fileVec
2819 		while (fileLeft > 0) {
2820 			iovec tempVecs[MAX_TEMP_IO_VECS];
2821 			uint32 tempCount = 0;
2822 
2823 			// size tracks how much of what is left of the current fileVec
2824 			// (fileLeft) has been assigned to tempVecs
2825 			size = 0;
2826 
2827 			// assign what is left of the current fileVec to the tempVecs
2828 			for (size = 0; size < fileLeft && vecIndex < vecCount
2829 					&& tempCount < MAX_TEMP_IO_VECS;) {
2830 				// try to satisfy one iovec per iteration (or as much as
2831 				// possible)
2832 
2833 				// bytes left of the current iovec
2834 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
2835 				if (vecLeft == 0) {
2836 					vecOffset = 0;
2837 					vecIndex++;
2838 					continue;
2839 				}
2840 
2841 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
2842 					vecIndex, vecOffset, size));
2843 
2844 				// actually available bytes
2845 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
2846 
2847 				tempVecs[tempCount].iov_base
2848 					= (void *)((addr_t)vecs[vecIndex].iov_base + vecOffset);
2849 				tempVecs[tempCount].iov_len = tempVecSize;
2850 				tempCount++;
2851 
2852 				size += tempVecSize;
2853 				vecOffset += tempVecSize;
2854 			}
2855 
2856 			size_t bytes = size;
2857 			if (doWrite) {
2858 				status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
2859 					vnode->private_node, cookie, fileOffset, tempVecs,
2860 					tempCount, &bytes, false);
2861 			} else {
2862 				status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2863 					vnode->private_node, cookie, fileOffset, tempVecs,
2864 					tempCount, &bytes, false);
2865 			}
2866 			if (status < B_OK)
2867 				return status;
2868 
2869 			totalSize += bytes;
2870 			bytesLeft -= size;
2871 			fileOffset += size;
2872 			fileLeft -= size;
2873 			//dprintf("-> file left = %Lu\n", fileLeft);
2874 
2875 			if (size != bytes || vecIndex >= vecCount) {
2876 				// there are no more bytes or iovecs, let's bail out
2877 				*_numBytes = totalSize;
2878 				return B_OK;
2879 			}
2880 		}
2881 	}
2882 
2883 	*_vecIndex = vecIndex;
2884 	*_vecOffset = vecOffset;
2885 	*_numBytes = totalSize;
2886 	return B_OK;
2887 }
2888 
2889 
2890 //	#pragma mark - public API for file systems
2891 
2892 
2893 extern "C" status_t
2894 new_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2895 {
2896 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2897 		mountID, vnodeID, privateNode));
2898 
2899 	if (privateNode == NULL)
2900 		return B_BAD_VALUE;
2901 
2902 	mutex_lock(&sVnodeMutex);
2903 
2904 	// file system integrity check:
2905 	// test if the vnode already exists and bail out if this is the case!
2906 
2907 	// ToDo: the R5 implementation obviously checks for a different cookie
2908 	//	and doesn't panic if they are equal
2909 
2910 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2911 	if (vnode != NULL)
2912 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2913 
2914 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2915 	if (status == B_OK) {
2916 		vnode->private_node = privateNode;
2917 		vnode->busy = true;
2918 		vnode->unpublished = true;
2919 	}
2920 
2921 	TRACE(("returns: %s\n", strerror(status)));
2922 
2923 	mutex_unlock(&sVnodeMutex);
2924 	return status;
2925 }
2926 
2927 
2928 extern "C" status_t
2929 publish_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2930 {
2931 	FUNCTION(("publish_vnode()\n"));
2932 
2933 	mutex_lock(&sVnodeMutex);
2934 
2935 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2936 	status_t status = B_OK;
2937 
2938 	if (vnode != NULL && vnode->busy && vnode->unpublished
2939 		&& vnode->private_node == privateNode) {
2940 		vnode->busy = false;
2941 		vnode->unpublished = false;
2942 	} else if (vnode == NULL && privateNode != NULL) {
2943 		status = create_new_vnode(&vnode, mountID, vnodeID);
2944 		if (status == B_OK)
2945 			vnode->private_node = privateNode;
2946 	} else
2947 		status = B_BAD_VALUE;
2948 
2949 	TRACE(("returns: %s\n", strerror(status)));
2950 
2951 	mutex_unlock(&sVnodeMutex);
2952 	return status;
2953 }
2954 
2955 
2956 extern "C" status_t
2957 get_vnode(dev_t mountID, ino_t vnodeID, fs_vnode *_fsNode)
2958 {
2959 	struct vnode *vnode;
2960 
2961 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
2962 	if (status < B_OK)
2963 		return status;
2964 
2965 	*_fsNode = vnode->private_node;
2966 	return B_OK;
2967 }
2968 
2969 
2970 extern "C" status_t
2971 put_vnode(dev_t mountID, ino_t vnodeID)
2972 {
2973 	struct vnode *vnode;
2974 
2975 	mutex_lock(&sVnodeMutex);
2976 	vnode = lookup_vnode(mountID, vnodeID);
2977 	mutex_unlock(&sVnodeMutex);
2978 
2979 	if (vnode)
2980 		dec_vnode_ref_count(vnode, true);
2981 
2982 	return B_OK;
2983 }
2984 
2985 
2986 extern "C" status_t
2987 remove_vnode(dev_t mountID, ino_t vnodeID)
2988 {
2989 	struct vnode *vnode;
2990 	bool remove = false;
2991 
2992 	MutexLocker locker(sVnodeMutex);
2993 
2994 	vnode = lookup_vnode(mountID, vnodeID);
2995 	if (vnode == NULL)
2996 		return B_ENTRY_NOT_FOUND;
2997 
2998 	if (vnode->covered_by != NULL) {
2999 		// this vnode is in use
3000 		mutex_unlock(&sVnodeMutex);
3001 		return B_BUSY;
3002 	}
3003 
3004 	vnode->remove = true;
3005 	if (vnode->unpublished) {
3006 		// prepare the vnode for deletion
3007 		vnode->busy = true;
3008 		remove = true;
3009 	}
3010 
3011 	locker.Unlock();
3012 
3013 	if (remove) {
3014 		// if the vnode hasn't been published yet, we delete it here
3015 		atomic_add(&vnode->ref_count, -1);
3016 		free_vnode(vnode, true);
3017 	}
3018 
3019 	return B_OK;
3020 }
3021 
3022 
3023 extern "C" status_t
3024 unremove_vnode(dev_t mountID, ino_t vnodeID)
3025 {
3026 	struct vnode *vnode;
3027 
3028 	mutex_lock(&sVnodeMutex);
3029 
3030 	vnode = lookup_vnode(mountID, vnodeID);
3031 	if (vnode)
3032 		vnode->remove = false;
3033 
3034 	mutex_unlock(&sVnodeMutex);
3035 	return B_OK;
3036 }
3037 
3038 
3039 extern "C" status_t
3040 get_vnode_removed(dev_t mountID, ino_t vnodeID, bool* removed)
3041 {
3042 	mutex_lock(&sVnodeMutex);
3043 
3044 	status_t result;
3045 
3046 	if (struct vnode* vnode = lookup_vnode(mountID, vnodeID)) {
3047 		if (removed)
3048 			*removed = vnode->remove;
3049 		result = B_OK;
3050 	} else
3051 		result = B_BAD_VALUE;
3052 
3053 	mutex_unlock(&sVnodeMutex);
3054 	return result;
3055 }
3056 
3057 
3058 extern "C" status_t
3059 read_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3060 	size_t *_numBytes, bool fsReenter)
3061 {
3062 	struct file_descriptor *descriptor;
3063 	struct vnode *vnode;
3064 
3065 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3066 	if (descriptor == NULL)
3067 		return B_FILE_ERROR;
3068 
3069 	status_t status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
3070 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
3071 		fsReenter);
3072 
3073 	put_fd(descriptor);
3074 	return status;
3075 }
3076 
3077 
3078 extern "C" status_t
3079 write_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3080 	size_t *_numBytes, bool fsReenter)
3081 {
3082 	struct file_descriptor *descriptor;
3083 	struct vnode *vnode;
3084 
3085 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3086 	if (descriptor == NULL)
3087 		return B_FILE_ERROR;
3088 
3089 	status_t status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
3090 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
3091 		fsReenter);
3092 
3093 	put_fd(descriptor);
3094 	return status;
3095 }
3096 
3097 
3098 extern "C" status_t
3099 read_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3100 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3101 	size_t *_bytes)
3102 {
3103 	struct file_descriptor *descriptor;
3104 	struct vnode *vnode;
3105 
3106 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3107 	if (descriptor == NULL)
3108 		return B_FILE_ERROR;
3109 
3110 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3111 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3112 		false);
3113 
3114 	put_fd(descriptor);
3115 	return status;
3116 }
3117 
3118 
3119 extern "C" status_t
3120 write_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3121 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3122 	size_t *_bytes)
3123 {
3124 	struct file_descriptor *descriptor;
3125 	struct vnode *vnode;
3126 
3127 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3128 	if (descriptor == NULL)
3129 		return B_FILE_ERROR;
3130 
3131 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3132 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3133 		true);
3134 
3135 	put_fd(descriptor);
3136 	return status;
3137 }
3138 
3139 
3140 //	#pragma mark - private VFS API
3141 //	Functions the VFS exports for other parts of the kernel
3142 
3143 
3144 /*! Acquires another reference to the vnode that has to be released
3145 	by calling vfs_put_vnode().
3146 */
3147 void
3148 vfs_acquire_vnode(struct vnode *vnode)
3149 {
3150 	inc_vnode_ref_count(vnode);
3151 }
3152 
3153 
3154 /*! This is currently called from file_cache_create() only.
3155 	It's probably a temporary solution as long as devfs requires that
3156 	fs_read_pages()/fs_write_pages() are called with the standard
3157 	open cookie and not with a device cookie.
3158 	If that's done differently, remove this call; it has no other
3159 	purpose.
3160 */
3161 extern "C" status_t
3162 vfs_get_cookie_from_fd(int fd, void **_cookie)
3163 {
3164 	struct file_descriptor *descriptor;
3165 
3166 	descriptor = get_fd(get_current_io_context(true), fd);
3167 	if (descriptor == NULL)
3168 		return B_FILE_ERROR;
3169 
3170 	*_cookie = descriptor->cookie;
3171 	return B_OK;
3172 }
3173 
3174 
3175 extern "C" int
3176 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode **vnode)
3177 {
3178 	*vnode = get_vnode_from_fd(fd, kernel);
3179 
3180 	if (*vnode == NULL)
3181 		return B_FILE_ERROR;
3182 
3183 	return B_NO_ERROR;
3184 }
3185 
3186 
3187 extern "C" status_t
3188 vfs_get_vnode_from_path(const char *path, bool kernel, struct vnode **_vnode)
3189 {
3190 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3191 		path, kernel));
3192 
3193 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3194 	if (pathBuffer.InitCheck() != B_OK)
3195 		return B_NO_MEMORY;
3196 
3197 	char *buffer = pathBuffer.LockBuffer();
3198 	strlcpy(buffer, path, pathBuffer.BufferSize());
3199 
3200 	struct vnode *vnode;
3201 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3202 	if (status < B_OK)
3203 		return status;
3204 
3205 	*_vnode = vnode;
3206 	return B_OK;
3207 }
3208 
3209 
3210 extern "C" status_t
3211 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode **_vnode)
3212 {
3213 	struct vnode *vnode;
3214 
3215 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3216 	if (status < B_OK)
3217 		return status;
3218 
3219 	*_vnode = vnode;
3220 	return B_OK;
3221 }
3222 
3223 
3224 extern "C" status_t
3225 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3226 	const char *name, struct vnode **_vnode)
3227 {
3228 	return entry_ref_to_vnode(mountID, directoryID, name, false, _vnode);
3229 }
3230 
3231 
3232 extern "C" void
3233 vfs_vnode_to_node_ref(struct vnode *vnode, dev_t *_mountID, ino_t *_vnodeID)
3234 {
3235 	*_mountID = vnode->device;
3236 	*_vnodeID = vnode->id;
3237 }
3238 
3239 
3240 /*!	Looks up a vnode with the given mount and vnode ID.
3241 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3242 	to the node.
3243 	It's currently only be used by file_cache_create().
3244 */
3245 extern "C" status_t
3246 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode)
3247 {
3248 	mutex_lock(&sVnodeMutex);
3249 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
3250 	mutex_unlock(&sVnodeMutex);
3251 
3252 	if (vnode == NULL)
3253 		return B_ERROR;
3254 
3255 	*_vnode = vnode;
3256 	return B_OK;
3257 }
3258 
3259 
3260 extern "C" status_t
3261 vfs_get_fs_node_from_path(dev_t mountID, const char *path, bool kernel,
3262 	void **_node)
3263 {
3264 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
3265 		mountID, path, kernel));
3266 
3267 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3268 	if (pathBuffer.InitCheck() != B_OK)
3269 		return B_NO_MEMORY;
3270 
3271 	fs_mount *mount;
3272 	status_t status = get_mount(mountID, &mount);
3273 	if (status < B_OK)
3274 		return status;
3275 
3276 	char *buffer = pathBuffer.LockBuffer();
3277 	strlcpy(buffer, path, pathBuffer.BufferSize());
3278 
3279 	struct vnode *vnode = mount->root_vnode;
3280 
3281 	if (buffer[0] == '/')
3282 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
3283 	else {
3284 		inc_vnode_ref_count(vnode);
3285 			// vnode_path_to_vnode() releases a reference to the starting vnode
3286 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
3287 	}
3288 
3289 	put_mount(mount);
3290 
3291 	if (status < B_OK)
3292 		return status;
3293 
3294 	if (vnode->device != mountID) {
3295 		// wrong mount ID - must not gain access on foreign file system nodes
3296 		put_vnode(vnode);
3297 		return B_BAD_VALUE;
3298 	}
3299 
3300 	*_node = vnode->private_node;
3301 	return B_OK;
3302 }
3303 
3304 
3305 /*!	Finds the full path to the file that contains the module \a moduleName,
3306 	puts it into \a pathBuffer, and returns B_OK for success.
3307 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
3308 	\c B_ENTRY_NOT_FOUNT if no file could be found.
3309 	\a pathBuffer is clobbered in any case and must not be relied on if this
3310 	functions returns unsuccessfully.
3311 	\a basePath and \a pathBuffer must not point to the same space.
3312 */
3313 status_t
3314 vfs_get_module_path(const char *basePath, const char *moduleName,
3315 	char *pathBuffer, size_t bufferSize)
3316 {
3317 	struct vnode *dir, *file;
3318 	status_t status;
3319 	size_t length;
3320 	char *path;
3321 
3322 	if (bufferSize == 0
3323 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
3324 		return B_BUFFER_OVERFLOW;
3325 
3326 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
3327 	if (status < B_OK)
3328 		return status;
3329 
3330 	// the path buffer had been clobbered by the above call
3331 	length = strlcpy(pathBuffer, basePath, bufferSize);
3332 	if (pathBuffer[length - 1] != '/')
3333 		pathBuffer[length++] = '/';
3334 
3335 	path = pathBuffer + length;
3336 	bufferSize -= length;
3337 
3338 	while (moduleName) {
3339 		int type;
3340 
3341 		char *nextPath = strchr(moduleName, '/');
3342 		if (nextPath == NULL)
3343 			length = strlen(moduleName);
3344 		else {
3345 			length = nextPath - moduleName;
3346 			nextPath++;
3347 		}
3348 
3349 		if (length + 1 >= bufferSize) {
3350 			status = B_BUFFER_OVERFLOW;
3351 			goto err;
3352 		}
3353 
3354 		memcpy(path, moduleName, length);
3355 		path[length] = '\0';
3356 		moduleName = nextPath;
3357 
3358 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
3359 		if (status < B_OK) {
3360 			// vnode_path_to_vnode() has already released the reference to dir
3361 			return status;
3362 		}
3363 
3364 		if (S_ISDIR(type)) {
3365 			// goto the next directory
3366 			path[length] = '/';
3367 			path[length + 1] = '\0';
3368 			path += length + 1;
3369 			bufferSize -= length + 1;
3370 
3371 			dir = file;
3372 		} else if (S_ISREG(type)) {
3373 			// it's a file so it should be what we've searched for
3374 			put_vnode(file);
3375 
3376 			return B_OK;
3377 		} else {
3378 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
3379 			status = B_ERROR;
3380 			dir = file;
3381 			goto err;
3382 		}
3383 	}
3384 
3385 	// if we got here, the moduleName just pointed to a directory, not to
3386 	// a real module - what should we do in this case?
3387 	status = B_ENTRY_NOT_FOUND;
3388 
3389 err:
3390 	put_vnode(dir);
3391 	return status;
3392 }
3393 
3394 
3395 /*!	\brief Normalizes a given path.
3396 
3397 	The path must refer to an existing or non-existing entry in an existing
3398 	directory, that is chopping off the leaf component the remaining path must
3399 	refer to an existing directory.
3400 
3401 	The returned will be canonical in that it will be absolute, will not
3402 	contain any "." or ".." components or duplicate occurrences of '/'s,
3403 	and none of the directory components will by symbolic links.
3404 
3405 	Any two paths referring to the same entry, will result in the same
3406 	normalized path (well, that is pretty much the definition of `normalized',
3407 	isn't it :-).
3408 
3409 	\param path The path to be normalized.
3410 	\param buffer The buffer into which the normalized path will be written.
3411 		   May be the same one as \a path.
3412 	\param bufferSize The size of \a buffer.
3413 	\param kernel \c true, if the IO context of the kernel shall be used,
3414 		   otherwise that of the team this thread belongs to. Only relevant,
3415 		   if the path is relative (to get the CWD).
3416 	\return \c B_OK if everything went fine, another error code otherwise.
3417 */
3418 status_t
3419 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
3420 	bool kernel)
3421 {
3422 	if (!path || !buffer || bufferSize < 1)
3423 		return B_BAD_VALUE;
3424 
3425 	TRACE(("vfs_normalize_path(`%s')\n", path));
3426 
3427 	// copy the supplied path to the stack, so it can be modified
3428 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
3429 	if (mutablePathBuffer.InitCheck() != B_OK)
3430 		return B_NO_MEMORY;
3431 
3432 	char *mutablePath = mutablePathBuffer.LockBuffer();
3433 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
3434 		return B_NAME_TOO_LONG;
3435 
3436 	// get the dir vnode and the leaf name
3437 	struct vnode *dirNode;
3438 	char leaf[B_FILE_NAME_LENGTH];
3439 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
3440 	if (error != B_OK) {
3441 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
3442 		return error;
3443 	}
3444 
3445 	// if the leaf is "." or "..", we directly get the correct directory
3446 	// vnode and ignore the leaf later
3447 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
3448 	if (isDir)
3449 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
3450 	if (error != B_OK) {
3451 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
3452 			strerror(error)));
3453 		return error;
3454 	}
3455 
3456 	// get the directory path
3457 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
3458 	put_vnode(dirNode);
3459 	if (error < B_OK) {
3460 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
3461 		return error;
3462 	}
3463 
3464 	// append the leaf name
3465 	if (!isDir) {
3466 		// insert a directory separator only if this is not the file system root
3467 		if ((strcmp(buffer, "/") != 0
3468 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3469 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3470 			return B_NAME_TOO_LONG;
3471 		}
3472 	}
3473 
3474 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3475 	return B_OK;
3476 }
3477 
3478 
3479 extern "C" void
3480 vfs_put_vnode(struct vnode *vnode)
3481 {
3482 	put_vnode(vnode);
3483 }
3484 
3485 
3486 extern "C" status_t
3487 vfs_get_cwd(dev_t *_mountID, ino_t *_vnodeID)
3488 {
3489 	// Get current working directory from io context
3490 	struct io_context *context = get_current_io_context(false);
3491 	status_t status = B_OK;
3492 
3493 	mutex_lock(&context->io_mutex);
3494 
3495 	if (context->cwd != NULL) {
3496 		*_mountID = context->cwd->device;
3497 		*_vnodeID = context->cwd->id;
3498 	} else
3499 		status = B_ERROR;
3500 
3501 	mutex_unlock(&context->io_mutex);
3502 	return status;
3503 }
3504 
3505 
3506 status_t
3507 vfs_unmount(dev_t mountID, uint32 flags)
3508 {
3509 	return fs_unmount(NULL, mountID, flags, true);
3510 }
3511 
3512 
3513 extern "C" status_t
3514 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
3515 {
3516 	struct vnode *vnode;
3517 
3518 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
3519 	if (status < B_OK)
3520 		return status;
3521 
3522 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3523 	put_vnode(vnode);
3524 	return B_OK;
3525 }
3526 
3527 
3528 extern "C" void
3529 vfs_free_unused_vnodes(int32 level)
3530 {
3531 	vnode_low_memory_handler(NULL, level);
3532 }
3533 
3534 
3535 extern "C" bool
3536 vfs_can_page(struct vnode *vnode, void *cookie)
3537 {
3538 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3539 
3540 	if (FS_CALL(vnode, can_page)) {
3541 		return FS_CALL(vnode, can_page)(vnode->mount->cookie,
3542 			vnode->private_node, cookie);
3543 	}
3544 	return false;
3545 }
3546 
3547 
3548 extern "C" status_t
3549 vfs_read_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3550 	size_t count, size_t *_numBytes, bool fsReenter)
3551 {
3552 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3553 
3554 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3555 		cookie, pos, vecs, count, _numBytes, fsReenter);
3556 }
3557 
3558 
3559 extern "C" status_t
3560 vfs_write_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3561 	size_t count, size_t *_numBytes, bool fsReenter)
3562 {
3563 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3564 
3565 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3566 		cookie, pos, vecs, count, _numBytes, fsReenter);
3567 }
3568 
3569 
3570 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
3571 	created if \a allocate is \c true.
3572 	In case it's successful, it will also grab a reference to the cache
3573 	it returns.
3574 */
3575 extern "C" status_t
3576 vfs_get_vnode_cache(struct vnode *vnode, vm_cache **_cache, bool allocate)
3577 {
3578 	if (vnode->cache != NULL) {
3579 		vm_cache_acquire_ref(vnode->cache);
3580 		*_cache = vnode->cache;
3581 		return B_OK;
3582 	}
3583 
3584 	mutex_lock(&sVnodeMutex);
3585 
3586 	status_t status = B_OK;
3587 
3588 	// The cache could have been created in the meantime
3589 	if (vnode->cache == NULL) {
3590 		if (allocate) {
3591 			// TODO: actually the vnode need to be busy already here, or
3592 			//	else this won't work...
3593 			bool wasBusy = vnode->busy;
3594 			vnode->busy = true;
3595 			mutex_unlock(&sVnodeMutex);
3596 
3597 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3598 
3599 			mutex_lock(&sVnodeMutex);
3600 			vnode->busy = wasBusy;
3601 		} else
3602 			status = B_BAD_VALUE;
3603 	}
3604 
3605 	if (status == B_OK) {
3606 		vm_cache_acquire_ref(vnode->cache);
3607 		*_cache = vnode->cache;
3608 	}
3609 
3610 	mutex_unlock(&sVnodeMutex);
3611 	return status;
3612 }
3613 
3614 
3615 status_t
3616 vfs_get_file_map(struct vnode *vnode, off_t offset, size_t size,
3617 	file_io_vec *vecs, size_t *_count)
3618 {
3619 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3620 
3621 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie,
3622 		vnode->private_node, offset, size, vecs, _count);
3623 }
3624 
3625 
3626 status_t
3627 vfs_stat_vnode(struct vnode *vnode, struct stat *stat)
3628 {
3629 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3630 		vnode->private_node, stat);
3631 
3632 	// fill in the st_dev and st_ino fields
3633 	if (status == B_OK) {
3634 		stat->st_dev = vnode->device;
3635 		stat->st_ino = vnode->id;
3636 	}
3637 
3638 	return status;
3639 }
3640 
3641 
3642 status_t
3643 vfs_get_vnode_name(struct vnode *vnode, char *name, size_t nameSize)
3644 {
3645 	return get_vnode_name(vnode, NULL, name, nameSize);
3646 }
3647 
3648 
3649 status_t
3650 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
3651 	char *path, size_t pathLength)
3652 {
3653 	struct vnode *vnode;
3654 	status_t status;
3655 
3656 	// filter invalid leaf names
3657 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
3658 		return B_BAD_VALUE;
3659 
3660 	// get the vnode matching the dir's node_ref
3661 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
3662 		// special cases "." and "..": we can directly get the vnode of the
3663 		// referenced directory
3664 		status = entry_ref_to_vnode(device, inode, leaf, false, &vnode);
3665 		leaf = NULL;
3666 	} else
3667 		status = get_vnode(device, inode, &vnode, true, false);
3668 	if (status < B_OK)
3669 		return status;
3670 
3671 	// get the directory path
3672 	status = dir_vnode_to_path(vnode, path, pathLength);
3673 	put_vnode(vnode);
3674 		// we don't need the vnode anymore
3675 	if (status < B_OK)
3676 		return status;
3677 
3678 	// append the leaf name
3679 	if (leaf) {
3680 		// insert a directory separator if this is not the file system root
3681 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
3682 				>= pathLength)
3683 			|| strlcat(path, leaf, pathLength) >= pathLength) {
3684 			return B_NAME_TOO_LONG;
3685 		}
3686 	}
3687 
3688 	return B_OK;
3689 }
3690 
3691 
3692 /*!	If the given descriptor locked its vnode, that lock will be released. */
3693 void
3694 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3695 {
3696 	struct vnode *vnode = fd_vnode(descriptor);
3697 
3698 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3699 		vnode->mandatory_locked_by = NULL;
3700 }
3701 
3702 
3703 /*!	Closes all file descriptors of the specified I/O context that
3704 	have the O_CLOEXEC flag set.
3705 */
3706 void
3707 vfs_exec_io_context(void *_context)
3708 {
3709 	struct io_context *context = (struct io_context *)_context;
3710 	uint32 i;
3711 
3712 	for (i = 0; i < context->table_size; i++) {
3713 		mutex_lock(&context->io_mutex);
3714 
3715 		struct file_descriptor *descriptor = context->fds[i];
3716 		bool remove = false;
3717 
3718 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3719 			context->fds[i] = NULL;
3720 			context->num_used_fds--;
3721 
3722 			remove = true;
3723 		}
3724 
3725 		mutex_unlock(&context->io_mutex);
3726 
3727 		if (remove) {
3728 			close_fd(descriptor);
3729 			put_fd(descriptor);
3730 		}
3731 	}
3732 }
3733 
3734 
3735 /*! Sets up a new io_control structure, and inherits the properties
3736 	of the parent io_control if it is given.
3737 */
3738 void *
3739 vfs_new_io_context(void *_parentContext)
3740 {
3741 	size_t tableSize;
3742 	struct io_context *context;
3743 	struct io_context *parentContext;
3744 
3745 	context = (io_context *)malloc(sizeof(struct io_context));
3746 	if (context == NULL)
3747 		return NULL;
3748 
3749 	memset(context, 0, sizeof(struct io_context));
3750 
3751 	parentContext = (struct io_context *)_parentContext;
3752 	if (parentContext)
3753 		tableSize = parentContext->table_size;
3754 	else
3755 		tableSize = DEFAULT_FD_TABLE_SIZE;
3756 
3757 	// allocate space for FDs and their close-on-exec flag
3758 	context->fds = (file_descriptor**)malloc(
3759 		sizeof(struct file_descriptor*) * tableSize
3760 		+ sizeof(struct select_sync*) * tableSize
3761 		+ (tableSize + 7) / 8);
3762 	if (context->fds == NULL) {
3763 		free(context);
3764 		return NULL;
3765 	}
3766 
3767 	context->select_infos = (select_info**)(context->fds + tableSize);
3768 	context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
3769 
3770 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
3771 		+ sizeof(struct select_sync*) * tableSize
3772 		+ (tableSize + 7) / 8);
3773 
3774 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3775 		free(context->fds);
3776 		free(context);
3777 		return NULL;
3778 	}
3779 
3780 	// Copy all parent file descriptors
3781 
3782 	if (parentContext) {
3783 		size_t i;
3784 
3785 		mutex_lock(&parentContext->io_mutex);
3786 
3787 		context->cwd = parentContext->cwd;
3788 		if (context->cwd)
3789 			inc_vnode_ref_count(context->cwd);
3790 
3791 		for (i = 0; i < tableSize; i++) {
3792 			struct file_descriptor *descriptor = parentContext->fds[i];
3793 
3794 			if (descriptor != NULL) {
3795 				context->fds[i] = descriptor;
3796 				context->num_used_fds++;
3797 				atomic_add(&descriptor->ref_count, 1);
3798 				atomic_add(&descriptor->open_count, 1);
3799 
3800 				if (fd_close_on_exec(parentContext, i))
3801 					fd_set_close_on_exec(context, i, true);
3802 			}
3803 		}
3804 
3805 		mutex_unlock(&parentContext->io_mutex);
3806 	} else {
3807 		context->cwd = sRoot;
3808 
3809 		if (context->cwd)
3810 			inc_vnode_ref_count(context->cwd);
3811 	}
3812 
3813 	context->table_size = tableSize;
3814 
3815 	list_init(&context->node_monitors);
3816 	context->max_monitors = DEFAULT_NODE_MONITORS;
3817 
3818 	return context;
3819 }
3820 
3821 
3822 status_t
3823 vfs_free_io_context(void *_ioContext)
3824 {
3825 	struct io_context *context = (struct io_context *)_ioContext;
3826 	uint32 i;
3827 
3828 	if (context->cwd)
3829 		dec_vnode_ref_count(context->cwd, false);
3830 
3831 	mutex_lock(&context->io_mutex);
3832 
3833 	for (i = 0; i < context->table_size; i++) {
3834 		if (struct file_descriptor *descriptor = context->fds[i]) {
3835 			close_fd(descriptor);
3836 			put_fd(descriptor);
3837 		}
3838 	}
3839 
3840 	mutex_destroy(&context->io_mutex);
3841 
3842 	remove_node_monitors(context);
3843 	free(context->fds);
3844 	free(context);
3845 
3846 	return B_OK;
3847 }
3848 
3849 
3850 static status_t
3851 vfs_resize_fd_table(struct io_context *context, const int newSize)
3852 {
3853 	struct file_descriptor **fds;
3854 
3855 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3856 		return EINVAL;
3857 
3858 	MutexLocker(context->io_mutex);
3859 
3860 	int oldSize = context->table_size;
3861 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
3862 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
3863 
3864 	// If the tables shrink, make sure none of the fds being dropped are in use.
3865 	if (newSize < oldSize) {
3866 		for (int i = oldSize; i-- > newSize;) {
3867 			if (context->fds[i])
3868 				return EBUSY;
3869 		}
3870 	}
3871 
3872 	// store pointers to the old tables
3873 	file_descriptor** oldFDs = context->fds;
3874 	select_info** oldSelectInfos = context->select_infos;
3875 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
3876 
3877 	// allocate new tables
3878 	file_descriptor** newFDs = (file_descriptor**)malloc(
3879 		sizeof(struct file_descriptor*) * newSize
3880 		+ sizeof(struct select_sync*) * newSize
3881 		+ newCloseOnExitBitmapSize);
3882 	if (newFDs == NULL)
3883 		return ENOMEM;
3884 
3885 	context->fds = newFDs;
3886 	context->select_infos = (select_info**)(context->fds + newSize);
3887 	context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
3888 	context->table_size = newSize;
3889 
3890 	// copy entries from old tables
3891 	int toCopy = min_c(oldSize, newSize);
3892 
3893 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
3894 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
3895 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
3896 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
3897 
3898 	// clear additional entries, if the tables grow
3899 	if (newSize > oldSize) {
3900 		memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
3901 		memset(context->select_infos + oldSize, 0,
3902 			sizeof(void *) * (newSize - oldSize));
3903 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
3904 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
3905 	}
3906 
3907 	free(oldFDs);
3908 
3909 	return B_OK;
3910 }
3911 
3912 
3913 static status_t
3914 vfs_resize_monitor_table(struct io_context *context, const int newSize)
3915 {
3916 	void *fds;
3917 	int	status = B_OK;
3918 
3919 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3920 		return EINVAL;
3921 
3922 	mutex_lock(&context->io_mutex);
3923 
3924 	if ((size_t)newSize < context->num_monitors) {
3925 		status = EBUSY;
3926 		goto out;
3927 	}
3928 	context->max_monitors = newSize;
3929 
3930 out:
3931 	mutex_unlock(&context->io_mutex);
3932 	return status;
3933 }
3934 
3935 
3936 int
3937 vfs_getrlimit(int resource, struct rlimit * rlp)
3938 {
3939 	if (!rlp)
3940 		return B_BAD_ADDRESS;
3941 
3942 	switch (resource) {
3943 		case RLIMIT_NOFILE:
3944 		{
3945 			struct io_context *ioctx = get_current_io_context(false);
3946 
3947 			mutex_lock(&ioctx->io_mutex);
3948 
3949 			rlp->rlim_cur = ioctx->table_size;
3950 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3951 
3952 			mutex_unlock(&ioctx->io_mutex);
3953 
3954 			return 0;
3955 		}
3956 
3957 		case RLIMIT_NOVMON:
3958 		{
3959 			struct io_context *ioctx = get_current_io_context(false);
3960 
3961 			mutex_lock(&ioctx->io_mutex);
3962 
3963 			rlp->rlim_cur = ioctx->max_monitors;
3964 			rlp->rlim_max = MAX_NODE_MONITORS;
3965 
3966 			mutex_unlock(&ioctx->io_mutex);
3967 
3968 			return 0;
3969 		}
3970 
3971 		default:
3972 			return EINVAL;
3973 	}
3974 }
3975 
3976 
3977 int
3978 vfs_setrlimit(int resource, const struct rlimit * rlp)
3979 {
3980 	if (!rlp)
3981 		return B_BAD_ADDRESS;
3982 
3983 	switch (resource) {
3984 		case RLIMIT_NOFILE:
3985 			/* TODO: check getuid() */
3986 			if (rlp->rlim_max != RLIM_SAVED_MAX
3987 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
3988 				return EPERM;
3989 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3990 
3991 		case RLIMIT_NOVMON:
3992 			/* TODO: check getuid() */
3993 			if (rlp->rlim_max != RLIM_SAVED_MAX
3994 				&& rlp->rlim_max != MAX_NODE_MONITORS)
3995 				return EPERM;
3996 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
3997 
3998 		default:
3999 			return EINVAL;
4000 	}
4001 }
4002 
4003 
4004 status_t
4005 vfs_init(kernel_args *args)
4006 {
4007 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
4008 		&vnode_compare, &vnode_hash);
4009 	if (sVnodeTable == NULL)
4010 		panic("vfs_init: error creating vnode hash table\n");
4011 
4012 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
4013 
4014 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
4015 		&mount_compare, &mount_hash);
4016 	if (sMountsTable == NULL)
4017 		panic("vfs_init: error creating mounts hash table\n");
4018 
4019 	node_monitor_init();
4020 
4021 	sRoot = NULL;
4022 
4023 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
4024 		panic("vfs_init: error allocating file systems lock\n");
4025 
4026 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
4027 		panic("vfs_init: error allocating mount op lock\n");
4028 
4029 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
4030 		panic("vfs_init: error allocating mount lock\n");
4031 
4032 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
4033 		panic("vfs_init: error allocating vnode::covered_by lock\n");
4034 
4035 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
4036 		panic("vfs_init: error allocating vnode lock\n");
4037 
4038 	if (block_cache_init() != B_OK)
4039 		return B_ERROR;
4040 
4041 #ifdef ADD_DEBUGGER_COMMANDS
4042 	// add some debugger commands
4043 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
4044 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
4045 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
4046 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
4047 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
4048 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
4049 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
4050 #endif
4051 
4052 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
4053 
4054 	return file_cache_init();
4055 }
4056 
4057 
4058 //	#pragma mark - fd_ops implementations
4059 
4060 
4061 /*!
4062 	Calls fs_open() on the given vnode and returns a new
4063 	file descriptor for it
4064 */
4065 static int
4066 create_vnode(struct vnode *directory, const char *name, int openMode,
4067 	int perms, bool kernel)
4068 {
4069 	struct vnode *vnode;
4070 	fs_cookie cookie;
4071 	ino_t newID;
4072 	int status;
4073 
4074 	if (FS_CALL(directory, create) == NULL)
4075 		return EROFS;
4076 
4077 	status = FS_CALL(directory, create)(directory->mount->cookie,
4078 		directory->private_node, name, openMode, perms, &cookie, &newID);
4079 	if (status < B_OK)
4080 		return status;
4081 
4082 	mutex_lock(&sVnodeMutex);
4083 	vnode = lookup_vnode(directory->device, newID);
4084 	mutex_unlock(&sVnodeMutex);
4085 
4086 	if (vnode == NULL) {
4087 		panic("vfs: fs_create() returned success but there is no vnode, mount ID %ld!\n",
4088 			directory->device);
4089 		return B_BAD_VALUE;
4090 	}
4091 
4092 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
4093 		return status;
4094 
4095 	// something went wrong, clean up
4096 
4097 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
4098 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4099 	put_vnode(vnode);
4100 
4101 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
4102 
4103 	return status;
4104 }
4105 
4106 
4107 /*!
4108 	Calls fs_open() on the given vnode and returns a new
4109 	file descriptor for it
4110 */
4111 static int
4112 open_vnode(struct vnode *vnode, int openMode, bool kernel)
4113 {
4114 	fs_cookie cookie;
4115 	int status;
4116 
4117 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
4118 	if (status < 0)
4119 		return status;
4120 
4121 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4122 	if (status < 0) {
4123 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
4124 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4125 	}
4126 	return status;
4127 }
4128 
4129 
4130 /*! Calls fs open_dir() on the given vnode and returns a new
4131 	file descriptor for it
4132 */
4133 static int
4134 open_dir_vnode(struct vnode *vnode, bool kernel)
4135 {
4136 	fs_cookie cookie;
4137 	int status;
4138 
4139 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
4140 	if (status < B_OK)
4141 		return status;
4142 
4143 	// file is opened, create a fd
4144 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
4145 	if (status >= 0)
4146 		return status;
4147 
4148 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
4149 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4150 
4151 	return status;
4152 }
4153 
4154 
4155 /*! Calls fs open_attr_dir() on the given vnode and returns a new
4156 	file descriptor for it.
4157 	Used by attr_dir_open(), and attr_dir_open_fd().
4158 */
4159 static int
4160 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
4161 {
4162 	fs_cookie cookie;
4163 	int status;
4164 
4165 	if (FS_CALL(vnode, open_attr_dir) == NULL)
4166 		return EOPNOTSUPP;
4167 
4168 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
4169 	if (status < 0)
4170 		return status;
4171 
4172 	// file is opened, create a fd
4173 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
4174 	if (status >= 0)
4175 		return status;
4176 
4177 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
4178 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4179 
4180 	return status;
4181 }
4182 
4183 
4184 static int
4185 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4186 	int openMode, int perms, bool kernel)
4187 {
4188 	struct vnode *directory;
4189 	int status;
4190 
4191 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
4192 
4193 	// get directory to put the new file in
4194 	status = get_vnode(mountID, directoryID, &directory, true, false);
4195 	if (status < B_OK)
4196 		return status;
4197 
4198 	status = create_vnode(directory, name, openMode, perms, kernel);
4199 	put_vnode(directory);
4200 
4201 	return status;
4202 }
4203 
4204 
4205 static int
4206 file_create(int fd, char *path, int openMode, int perms, bool kernel)
4207 {
4208 	char name[B_FILE_NAME_LENGTH];
4209 	struct vnode *directory;
4210 	int status;
4211 
4212 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
4213 
4214 	// get directory to put the new file in
4215 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4216 	if (status < 0)
4217 		return status;
4218 
4219 	status = create_vnode(directory, name, openMode, perms, kernel);
4220 
4221 	put_vnode(directory);
4222 	return status;
4223 }
4224 
4225 
4226 static int
4227 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4228 	int openMode, bool kernel)
4229 {
4230 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4231 	struct vnode *vnode;
4232 	int status;
4233 
4234 	if (name == NULL || *name == '\0')
4235 		return B_BAD_VALUE;
4236 
4237 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
4238 		mountID, directoryID, name, openMode));
4239 
4240 	// get the vnode matching the entry_ref
4241 	status = entry_ref_to_vnode(mountID, directoryID, name, traverse, &vnode);
4242 	if (status < B_OK)
4243 		return status;
4244 
4245 	status = open_vnode(vnode, openMode, kernel);
4246 	if (status < B_OK)
4247 		put_vnode(vnode);
4248 
4249 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID,
4250 		vnode->id, name);
4251 	return status;
4252 }
4253 
4254 
4255 static int
4256 file_open(int fd, char *path, int openMode, bool kernel)
4257 {
4258 	int status = B_OK;
4259 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4260 
4261 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
4262 		fd, path, openMode, kernel));
4263 
4264 	// get the vnode matching the vnode + path combination
4265 	struct vnode *vnode = NULL;
4266 	ino_t parentID;
4267 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
4268 	if (status != B_OK)
4269 		return status;
4270 
4271 	// open the vnode
4272 	status = open_vnode(vnode, openMode, kernel);
4273 	// put only on error -- otherwise our reference was transferred to the FD
4274 	if (status < B_OK)
4275 		put_vnode(vnode);
4276 
4277 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
4278 		vnode->device, parentID, vnode->id, NULL);
4279 
4280 	return status;
4281 }
4282 
4283 
4284 static status_t
4285 file_close(struct file_descriptor *descriptor)
4286 {
4287 	struct vnode *vnode = descriptor->u.vnode;
4288 	status_t status = B_OK;
4289 
4290 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
4291 
4292 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
4293 	if (FS_CALL(vnode, close))
4294 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4295 
4296 	if (status == B_OK) {
4297 		// remove all outstanding locks for this team
4298 		release_advisory_lock(vnode, NULL);
4299 	}
4300 	return status;
4301 }
4302 
4303 
4304 static void
4305 file_free_fd(struct file_descriptor *descriptor)
4306 {
4307 	struct vnode *vnode = descriptor->u.vnode;
4308 
4309 	if (vnode != NULL) {
4310 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4311 		put_vnode(vnode);
4312 	}
4313 }
4314 
4315 
4316 static status_t
4317 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4318 {
4319 	struct vnode *vnode = descriptor->u.vnode;
4320 
4321 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4322 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4323 }
4324 
4325 
4326 static status_t
4327 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4328 {
4329 	struct vnode *vnode = descriptor->u.vnode;
4330 
4331 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4332 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4333 }
4334 
4335 
4336 static off_t
4337 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4338 {
4339 	off_t offset;
4340 
4341 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
4342 
4343 	// stat() the node
4344 	struct vnode *vnode = descriptor->u.vnode;
4345 	if (FS_CALL(vnode, read_stat) == NULL)
4346 		return EOPNOTSUPP;
4347 
4348 	struct stat stat;
4349 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4350 		vnode->private_node, &stat);
4351 	if (status < B_OK)
4352 		return status;
4353 
4354 	// some kinds of files are not seekable
4355 	switch (stat.st_mode & S_IFMT) {
4356 		case S_IFIFO:
4357 			return ESPIPE;
4358 // TODO: We don't catch sockets here, but they are not seekable either (ESPIPE)!
4359 		// The Open Group Base Specs don't mention any file types besides pipes,
4360 		// fifos, and sockets specially, so we allow seeking them.
4361 		case S_IFREG:
4362 		case S_IFBLK:
4363 		case S_IFDIR:
4364 		case S_IFLNK:
4365 		case S_IFCHR:
4366 			break;
4367 	}
4368 
4369 	switch (seekType) {
4370 		case SEEK_SET:
4371 			offset = 0;
4372 			break;
4373 		case SEEK_CUR:
4374 			offset = descriptor->pos;
4375 			break;
4376 		case SEEK_END:
4377 			offset = stat.st_size;
4378 			break;
4379 		default:
4380 			return B_BAD_VALUE;
4381 	}
4382 
4383 	// assumes off_t is 64 bits wide
4384 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4385 		return EOVERFLOW;
4386 
4387 	pos += offset;
4388 	if (pos < 0)
4389 		return B_BAD_VALUE;
4390 
4391 	return descriptor->pos = pos;
4392 }
4393 
4394 
4395 static status_t
4396 file_select(struct file_descriptor *descriptor, uint8 event,
4397 	struct selectsync *sync)
4398 {
4399 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
4400 
4401 	struct vnode *vnode = descriptor->u.vnode;
4402 
4403 	// If the FS has no select() hook, notify select() now.
4404 	if (FS_CALL(vnode, select) == NULL)
4405 		return notify_select_event(sync, event);
4406 
4407 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
4408 		descriptor->cookie, event, 0, sync);
4409 }
4410 
4411 
4412 static status_t
4413 file_deselect(struct file_descriptor *descriptor, uint8 event,
4414 	struct selectsync *sync)
4415 {
4416 	struct vnode *vnode = descriptor->u.vnode;
4417 
4418 	if (FS_CALL(vnode, deselect) == NULL)
4419 		return B_OK;
4420 
4421 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
4422 		descriptor->cookie, event, sync);
4423 }
4424 
4425 
4426 static status_t
4427 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char *name, int perms, bool kernel)
4428 {
4429 	struct vnode *vnode;
4430 	ino_t newID;
4431 	status_t status;
4432 
4433 	if (name == NULL || *name == '\0')
4434 		return B_BAD_VALUE;
4435 
4436 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
4437 
4438 	status = get_vnode(mountID, parentID, &vnode, true, false);
4439 	if (status < B_OK)
4440 		return status;
4441 
4442 	if (FS_CALL(vnode, create_dir))
4443 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
4444 	else
4445 		status = EROFS;
4446 
4447 	put_vnode(vnode);
4448 	return status;
4449 }
4450 
4451 
4452 static status_t
4453 dir_create(int fd, char *path, int perms, bool kernel)
4454 {
4455 	char filename[B_FILE_NAME_LENGTH];
4456 	struct vnode *vnode;
4457 	ino_t newID;
4458 	status_t status;
4459 
4460 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
4461 
4462 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4463 	if (status < 0)
4464 		return status;
4465 
4466 	if (FS_CALL(vnode, create_dir))
4467 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
4468 	else
4469 		status = EROFS;
4470 
4471 	put_vnode(vnode);
4472 	return status;
4473 }
4474 
4475 
4476 static int
4477 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char *name, bool kernel)
4478 {
4479 	struct vnode *vnode;
4480 	int status;
4481 
4482 	FUNCTION(("dir_open_entry_ref()\n"));
4483 
4484 	if (name && *name == '\0')
4485 		return B_BAD_VALUE;
4486 
4487 	// get the vnode matching the entry_ref/node_ref
4488 	if (name)
4489 		status = entry_ref_to_vnode(mountID, parentID, name, true, &vnode);
4490 	else
4491 		status = get_vnode(mountID, parentID, &vnode, true, false);
4492 	if (status < B_OK)
4493 		return status;
4494 
4495 	status = open_dir_vnode(vnode, kernel);
4496 	if (status < B_OK)
4497 		put_vnode(vnode);
4498 
4499 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
4500 		vnode->id, name);
4501 	return status;
4502 }
4503 
4504 
4505 static int
4506 dir_open(int fd, char *path, bool kernel)
4507 {
4508 	int status = B_OK;
4509 
4510 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
4511 
4512 	// get the vnode matching the vnode + path combination
4513 	struct vnode *vnode = NULL;
4514 	ino_t parentID;
4515 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
4516 	if (status != B_OK)
4517 		return status;
4518 
4519 	// open the dir
4520 	status = open_dir_vnode(vnode, kernel);
4521 	if (status < B_OK)
4522 		put_vnode(vnode);
4523 
4524 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4525 	return status;
4526 }
4527 
4528 
4529 static status_t
4530 dir_close(struct file_descriptor *descriptor)
4531 {
4532 	struct vnode *vnode = descriptor->u.vnode;
4533 
4534 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4535 
4536 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4537 	if (FS_CALL(vnode, close_dir))
4538 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4539 
4540 	return B_OK;
4541 }
4542 
4543 
4544 static void
4545 dir_free_fd(struct file_descriptor *descriptor)
4546 {
4547 	struct vnode *vnode = descriptor->u.vnode;
4548 
4549 	if (vnode != NULL) {
4550 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4551 		put_vnode(vnode);
4552 	}
4553 }
4554 
4555 
4556 static status_t
4557 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4558 {
4559 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
4560 }
4561 
4562 
4563 static void
4564 fix_dirent(struct vnode *parent, struct dirent *entry)
4565 {
4566 	// set d_pdev and d_pino
4567 	entry->d_pdev = parent->device;
4568 	entry->d_pino = parent->id;
4569 
4570 	// If this is the ".." entry and the directory is the root of a FS,
4571 	// we need to replace d_dev and d_ino with the actual values.
4572 	if (strcmp(entry->d_name, "..") == 0
4573 		&& parent->mount->root_vnode == parent
4574 		&& parent->mount->covers_vnode) {
4575 		inc_vnode_ref_count(parent);
4576 			// vnode_path_to_vnode() puts the node
4577 
4578 		// ".." is guaranteed to to be clobbered by this call
4579 		struct vnode *vnode;
4580 		status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
4581 			&vnode, NULL, NULL);
4582 
4583 		if (status == B_OK) {
4584 			entry->d_dev = vnode->device;
4585 			entry->d_ino = vnode->id;
4586 		}
4587 	} else {
4588 		// resolve mount points
4589 		struct vnode *vnode = NULL;
4590 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
4591 			false);
4592 		if (status != B_OK)
4593 			return;
4594 
4595 		mutex_lock(&sVnodeCoveredByMutex);
4596 		if (vnode->covered_by) {
4597 			entry->d_dev = vnode->covered_by->device;
4598 			entry->d_ino = vnode->covered_by->id;
4599 		}
4600 		mutex_unlock(&sVnodeCoveredByMutex);
4601 
4602 		put_vnode(vnode);
4603 	}
4604 }
4605 
4606 
4607 static status_t
4608 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4609 {
4610 	if (!FS_CALL(vnode, read_dir))
4611 		return EOPNOTSUPP;
4612 
4613 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
4614 	if (error != B_OK)
4615 		return error;
4616 
4617 	// we need to adjust the read dirents
4618 	if (*_count > 0) {
4619 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4620 		fix_dirent(vnode, buffer);
4621 	}
4622 
4623 	return error;
4624 }
4625 
4626 
4627 static status_t
4628 dir_rewind(struct file_descriptor *descriptor)
4629 {
4630 	struct vnode *vnode = descriptor->u.vnode;
4631 
4632 	if (FS_CALL(vnode, rewind_dir))
4633 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4634 
4635 	return EOPNOTSUPP;
4636 }
4637 
4638 
4639 static status_t
4640 dir_remove(int fd, char *path, bool kernel)
4641 {
4642 	char name[B_FILE_NAME_LENGTH];
4643 	struct vnode *directory;
4644 	status_t status;
4645 
4646 	if (path != NULL) {
4647 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4648 		char *lastSlash = strrchr(path, '/');
4649 		if (lastSlash != NULL) {
4650 			char *leaf = lastSlash + 1;
4651 			if (!strcmp(leaf, ".."))
4652 				return B_NOT_ALLOWED;
4653 
4654 			// omit multiple slashes
4655 			while (lastSlash > path && lastSlash[-1] == '/') {
4656 				lastSlash--;
4657 			}
4658 
4659 			if (!leaf[0]
4660 				|| !strcmp(leaf, ".")) {
4661 				// "name/" -> "name", or "name/." -> "name"
4662 				lastSlash[0] = '\0';
4663 			}
4664 		} else if (!strcmp(path, ".."))
4665 			return B_NOT_ALLOWED;
4666 	}
4667 
4668 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4669 	if (status < B_OK)
4670 		return status;
4671 
4672 	if (FS_CALL(directory, remove_dir)) {
4673 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4674 			directory->private_node, name);
4675 	} else
4676 		status = EROFS;
4677 
4678 	put_vnode(directory);
4679 	return status;
4680 }
4681 
4682 
4683 static status_t
4684 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer,
4685 	size_t length)
4686 {
4687 	struct vnode *vnode = descriptor->u.vnode;
4688 
4689 	if (FS_CALL(vnode, ioctl)) {
4690 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4691 			descriptor->cookie, op, buffer, length);
4692 	}
4693 
4694 	return EOPNOTSUPP;
4695 }
4696 
4697 
4698 static status_t
4699 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4700 {
4701 	struct file_descriptor *descriptor;
4702 	struct vnode *vnode;
4703 	struct flock flock;
4704 
4705 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4706 		fd, op, argument, kernel ? "kernel" : "user"));
4707 
4708 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4709 	if (descriptor == NULL)
4710 		return B_FILE_ERROR;
4711 
4712 	status_t status = B_OK;
4713 
4714 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4715 		if (descriptor->type != FDTYPE_FILE)
4716 			status = B_BAD_VALUE;
4717 		else if (user_memcpy(&flock, (struct flock *)argument,
4718 				sizeof(struct flock)) < B_OK)
4719 			status = B_BAD_ADDRESS;
4720 
4721 		if (status != B_OK) {
4722 			put_fd(descriptor);
4723 			return status;
4724 		}
4725 	}
4726 
4727 	switch (op) {
4728 		case F_SETFD:
4729 		{
4730 			struct io_context *context = get_current_io_context(kernel);
4731 			// Set file descriptor flags
4732 
4733 			// O_CLOEXEC is the only flag available at this time
4734 			mutex_lock(&context->io_mutex);
4735 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
4736 			mutex_unlock(&context->io_mutex);
4737 
4738 			status = B_OK;
4739 			break;
4740 		}
4741 
4742 		case F_GETFD:
4743 		{
4744 			struct io_context *context = get_current_io_context(kernel);
4745 
4746 			// Get file descriptor flags
4747 			mutex_lock(&context->io_mutex);
4748 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4749 			mutex_unlock(&context->io_mutex);
4750 			break;
4751 		}
4752 
4753 		case F_SETFL:
4754 			// Set file descriptor open mode
4755 			if (FS_CALL(vnode, set_flags)) {
4756 				// we only accept changes to O_APPEND and O_NONBLOCK
4757 				argument &= O_APPEND | O_NONBLOCK;
4758 
4759 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4760 					vnode->private_node, descriptor->cookie, (int)argument);
4761 				if (status == B_OK) {
4762 					// update this descriptor's open_mode field
4763 					descriptor->open_mode = (descriptor->open_mode
4764 						& ~(O_APPEND | O_NONBLOCK)) | argument;
4765 				}
4766 			} else
4767 				status = EOPNOTSUPP;
4768 			break;
4769 
4770 		case F_GETFL:
4771 			// Get file descriptor open mode
4772 			status = descriptor->open_mode;
4773 			break;
4774 
4775 		case F_DUPFD:
4776 		{
4777 			struct io_context *context = get_current_io_context(kernel);
4778 
4779 			status = new_fd_etc(context, descriptor, (int)argument);
4780 			if (status >= 0) {
4781 				mutex_lock(&context->io_mutex);
4782 				fd_set_close_on_exec(context, fd, false);
4783 				mutex_unlock(&context->io_mutex);
4784 
4785 				atomic_add(&descriptor->ref_count, 1);
4786 			}
4787 			break;
4788 		}
4789 
4790 		case F_GETLK:
4791 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4792 			if (status == B_OK) {
4793 				// copy back flock structure
4794 				status = user_memcpy((struct flock *)argument, &flock,
4795 					sizeof(struct flock));
4796 			}
4797 			break;
4798 
4799 		case F_SETLK:
4800 		case F_SETLKW:
4801 			status = normalize_flock(descriptor, &flock);
4802 			if (status < B_OK)
4803 				break;
4804 
4805 			if (flock.l_type == F_UNLCK)
4806 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4807 			else {
4808 				// the open mode must match the lock type
4809 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
4810 						&& flock.l_type == F_WRLCK
4811 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY
4812 						&& flock.l_type == F_RDLCK)
4813 					status = B_FILE_ERROR;
4814 				else {
4815 					status = acquire_advisory_lock(descriptor->u.vnode, -1,
4816 						&flock, op == F_SETLKW);
4817 				}
4818 			}
4819 			break;
4820 
4821 		// ToDo: add support for more ops?
4822 
4823 		default:
4824 			status = B_BAD_VALUE;
4825 	}
4826 
4827 	put_fd(descriptor);
4828 	return status;
4829 }
4830 
4831 
4832 static status_t
4833 common_sync(int fd, bool kernel)
4834 {
4835 	struct file_descriptor *descriptor;
4836 	struct vnode *vnode;
4837 	status_t status;
4838 
4839 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4840 
4841 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4842 	if (descriptor == NULL)
4843 		return B_FILE_ERROR;
4844 
4845 	if (FS_CALL(vnode, fsync) != NULL)
4846 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4847 	else
4848 		status = EOPNOTSUPP;
4849 
4850 	put_fd(descriptor);
4851 	return status;
4852 }
4853 
4854 
4855 static status_t
4856 common_lock_node(int fd, bool kernel)
4857 {
4858 	struct file_descriptor *descriptor;
4859 	struct vnode *vnode;
4860 
4861 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4862 	if (descriptor == NULL)
4863 		return B_FILE_ERROR;
4864 
4865 	status_t status = B_OK;
4866 
4867 	// We need to set the locking atomically - someone
4868 	// else might set one at the same time
4869 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4870 			(addr_t)descriptor, (addr_t)NULL) != (addr_t)NULL)
4871 		status = B_BUSY;
4872 
4873 	put_fd(descriptor);
4874 	return status;
4875 }
4876 
4877 
4878 static status_t
4879 common_unlock_node(int fd, bool kernel)
4880 {
4881 	struct file_descriptor *descriptor;
4882 	struct vnode *vnode;
4883 
4884 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4885 	if (descriptor == NULL)
4886 		return B_FILE_ERROR;
4887 
4888 	status_t status = B_OK;
4889 
4890 	// We need to set the locking atomically - someone
4891 	// else might set one at the same time
4892 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4893 			(addr_t)NULL, (addr_t)descriptor) != (int32)descriptor)
4894 		status = B_BAD_VALUE;
4895 
4896 	put_fd(descriptor);
4897 	return status;
4898 }
4899 
4900 
4901 static status_t
4902 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4903 	bool kernel)
4904 {
4905 	struct vnode *vnode;
4906 	status_t status;
4907 
4908 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4909 	if (status < B_OK)
4910 		return status;
4911 
4912 	if (FS_CALL(vnode, read_symlink) != NULL) {
4913 		status = FS_CALL(vnode, read_symlink)(vnode->mount->cookie,
4914 			vnode->private_node, buffer, _bufferSize);
4915 	} else
4916 		status = B_BAD_VALUE;
4917 
4918 	put_vnode(vnode);
4919 	return status;
4920 }
4921 
4922 
4923 static status_t
4924 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4925 	bool kernel)
4926 {
4927 	// path validity checks have to be in the calling function!
4928 	char name[B_FILE_NAME_LENGTH];
4929 	struct vnode *vnode;
4930 	status_t status;
4931 
4932 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4933 
4934 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4935 	if (status < B_OK)
4936 		return status;
4937 
4938 	if (FS_CALL(vnode, create_symlink) != NULL)
4939 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4940 	else
4941 		status = EROFS;
4942 
4943 	put_vnode(vnode);
4944 
4945 	return status;
4946 }
4947 
4948 
4949 static status_t
4950 common_create_link(char *path, char *toPath, bool kernel)
4951 {
4952 	// path validity checks have to be in the calling function!
4953 	char name[B_FILE_NAME_LENGTH];
4954 	struct vnode *directory, *vnode;
4955 	status_t status;
4956 
4957 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4958 
4959 	status = path_to_dir_vnode(path, &directory, name, kernel);
4960 	if (status < B_OK)
4961 		return status;
4962 
4963 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4964 	if (status < B_OK)
4965 		goto err;
4966 
4967 	if (directory->mount != vnode->mount) {
4968 		status = B_CROSS_DEVICE_LINK;
4969 		goto err1;
4970 	}
4971 
4972 	if (FS_CALL(vnode, link) != NULL)
4973 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4974 	else
4975 		status = EROFS;
4976 
4977 err1:
4978 	put_vnode(vnode);
4979 err:
4980 	put_vnode(directory);
4981 
4982 	return status;
4983 }
4984 
4985 
4986 static status_t
4987 common_unlink(int fd, char *path, bool kernel)
4988 {
4989 	char filename[B_FILE_NAME_LENGTH];
4990 	struct vnode *vnode;
4991 	status_t status;
4992 
4993 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4994 
4995 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4996 	if (status < 0)
4997 		return status;
4998 
4999 	if (FS_CALL(vnode, unlink) != NULL)
5000 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
5001 	else
5002 		status = EROFS;
5003 
5004 	put_vnode(vnode);
5005 
5006 	return status;
5007 }
5008 
5009 
5010 static status_t
5011 common_access(char *path, int mode, bool kernel)
5012 {
5013 	struct vnode *vnode;
5014 	status_t status;
5015 
5016 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
5017 	if (status < B_OK)
5018 		return status;
5019 
5020 	if (FS_CALL(vnode, access) != NULL)
5021 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
5022 	else
5023 		status = B_OK;
5024 
5025 	put_vnode(vnode);
5026 
5027 	return status;
5028 }
5029 
5030 
5031 static status_t
5032 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
5033 {
5034 	struct vnode *fromVnode, *toVnode;
5035 	char fromName[B_FILE_NAME_LENGTH];
5036 	char toName[B_FILE_NAME_LENGTH];
5037 	status_t status;
5038 
5039 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
5040 
5041 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
5042 	if (status < 0)
5043 		return status;
5044 
5045 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
5046 	if (status < 0)
5047 		goto err;
5048 
5049 	if (fromVnode->device != toVnode->device) {
5050 		status = B_CROSS_DEVICE_LINK;
5051 		goto err1;
5052 	}
5053 
5054 	if (FS_CALL(fromVnode, rename) != NULL)
5055 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
5056 	else
5057 		status = EROFS;
5058 
5059 err1:
5060 	put_vnode(toVnode);
5061 err:
5062 	put_vnode(fromVnode);
5063 
5064 	return status;
5065 }
5066 
5067 
5068 static status_t
5069 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5070 {
5071 	struct vnode *vnode = descriptor->u.vnode;
5072 
5073 	FUNCTION(("common_read_stat: stat %p\n", stat));
5074 
5075 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
5076 		vnode->private_node, stat);
5077 
5078 	// fill in the st_dev and st_ino fields
5079 	if (status == B_OK) {
5080 		stat->st_dev = vnode->device;
5081 		stat->st_ino = vnode->id;
5082 	}
5083 
5084 	return status;
5085 }
5086 
5087 
5088 static status_t
5089 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5090 {
5091 	struct vnode *vnode = descriptor->u.vnode;
5092 
5093 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
5094 	if (!FS_CALL(vnode, write_stat))
5095 		return EROFS;
5096 
5097 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
5098 }
5099 
5100 
5101 static status_t
5102 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
5103 	struct stat *stat, bool kernel)
5104 {
5105 	struct vnode *vnode;
5106 	status_t status;
5107 
5108 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
5109 
5110 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5111 	if (status < 0)
5112 		return status;
5113 
5114 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
5115 
5116 	// fill in the st_dev and st_ino fields
5117 	if (status == B_OK) {
5118 		stat->st_dev = vnode->device;
5119 		stat->st_ino = vnode->id;
5120 	}
5121 
5122 	put_vnode(vnode);
5123 	return status;
5124 }
5125 
5126 
5127 static status_t
5128 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
5129 	const struct stat *stat, int statMask, bool kernel)
5130 {
5131 	struct vnode *vnode;
5132 	status_t status;
5133 
5134 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
5135 
5136 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5137 	if (status < 0)
5138 		return status;
5139 
5140 	if (FS_CALL(vnode, write_stat))
5141 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
5142 	else
5143 		status = EROFS;
5144 
5145 	put_vnode(vnode);
5146 
5147 	return status;
5148 }
5149 
5150 
5151 static int
5152 attr_dir_open(int fd, char *path, bool kernel)
5153 {
5154 	struct vnode *vnode;
5155 	int status;
5156 
5157 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
5158 
5159 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5160 	if (status < B_OK)
5161 		return status;
5162 
5163 	status = open_attr_dir_vnode(vnode, kernel);
5164 	if (status < 0)
5165 		put_vnode(vnode);
5166 
5167 	return status;
5168 }
5169 
5170 
5171 static status_t
5172 attr_dir_close(struct file_descriptor *descriptor)
5173 {
5174 	struct vnode *vnode = descriptor->u.vnode;
5175 
5176 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
5177 
5178 	if (FS_CALL(vnode, close_attr_dir))
5179 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5180 
5181 	return B_OK;
5182 }
5183 
5184 
5185 static void
5186 attr_dir_free_fd(struct file_descriptor *descriptor)
5187 {
5188 	struct vnode *vnode = descriptor->u.vnode;
5189 
5190 	if (vnode != NULL) {
5191 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5192 		put_vnode(vnode);
5193 	}
5194 }
5195 
5196 
5197 static status_t
5198 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5199 {
5200 	struct vnode *vnode = descriptor->u.vnode;
5201 
5202 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
5203 
5204 	if (FS_CALL(vnode, read_attr_dir))
5205 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
5206 
5207 	return EOPNOTSUPP;
5208 }
5209 
5210 
5211 static status_t
5212 attr_dir_rewind(struct file_descriptor *descriptor)
5213 {
5214 	struct vnode *vnode = descriptor->u.vnode;
5215 
5216 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
5217 
5218 	if (FS_CALL(vnode, rewind_attr_dir))
5219 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5220 
5221 	return EOPNOTSUPP;
5222 }
5223 
5224 
5225 static int
5226 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
5227 {
5228 	struct vnode *vnode;
5229 	fs_cookie cookie;
5230 	int status;
5231 
5232 	if (name == NULL || *name == '\0')
5233 		return B_BAD_VALUE;
5234 
5235 	vnode = get_vnode_from_fd(fd, kernel);
5236 	if (vnode == NULL)
5237 		return B_FILE_ERROR;
5238 
5239 	if (FS_CALL(vnode, create_attr) == NULL) {
5240 		status = EROFS;
5241 		goto err;
5242 	}
5243 
5244 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
5245 	if (status < B_OK)
5246 		goto err;
5247 
5248 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5249 		return status;
5250 
5251 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5252 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5253 
5254 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5255 
5256 err:
5257 	put_vnode(vnode);
5258 
5259 	return status;
5260 }
5261 
5262 
5263 static int
5264 attr_open(int fd, const char *name, int openMode, bool kernel)
5265 {
5266 	struct vnode *vnode;
5267 	fs_cookie cookie;
5268 	int status;
5269 
5270 	if (name == NULL || *name == '\0')
5271 		return B_BAD_VALUE;
5272 
5273 	vnode = get_vnode_from_fd(fd, kernel);
5274 	if (vnode == NULL)
5275 		return B_FILE_ERROR;
5276 
5277 	if (FS_CALL(vnode, open_attr) == NULL) {
5278 		status = EOPNOTSUPP;
5279 		goto err;
5280 	}
5281 
5282 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
5283 	if (status < B_OK)
5284 		goto err;
5285 
5286 	// now we only need a file descriptor for this attribute and we're done
5287 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5288 		return status;
5289 
5290 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5291 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5292 
5293 err:
5294 	put_vnode(vnode);
5295 
5296 	return status;
5297 }
5298 
5299 
5300 static status_t
5301 attr_close(struct file_descriptor *descriptor)
5302 {
5303 	struct vnode *vnode = descriptor->u.vnode;
5304 
5305 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
5306 
5307 	if (FS_CALL(vnode, close_attr))
5308 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5309 
5310 	return B_OK;
5311 }
5312 
5313 
5314 static void
5315 attr_free_fd(struct file_descriptor *descriptor)
5316 {
5317 	struct vnode *vnode = descriptor->u.vnode;
5318 
5319 	if (vnode != NULL) {
5320 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5321 		put_vnode(vnode);
5322 	}
5323 }
5324 
5325 
5326 static status_t
5327 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
5328 {
5329 	struct vnode *vnode = descriptor->u.vnode;
5330 
5331 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5332 	if (!FS_CALL(vnode, read_attr))
5333 		return EOPNOTSUPP;
5334 
5335 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5336 }
5337 
5338 
5339 static status_t
5340 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
5341 {
5342 	struct vnode *vnode = descriptor->u.vnode;
5343 
5344 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5345 	if (!FS_CALL(vnode, write_attr))
5346 		return EOPNOTSUPP;
5347 
5348 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5349 }
5350 
5351 
5352 static off_t
5353 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
5354 {
5355 	off_t offset;
5356 
5357 	switch (seekType) {
5358 		case SEEK_SET:
5359 			offset = 0;
5360 			break;
5361 		case SEEK_CUR:
5362 			offset = descriptor->pos;
5363 			break;
5364 		case SEEK_END:
5365 		{
5366 			struct vnode *vnode = descriptor->u.vnode;
5367 			struct stat stat;
5368 			status_t status;
5369 
5370 			if (FS_CALL(vnode, read_stat) == NULL)
5371 				return EOPNOTSUPP;
5372 
5373 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
5374 			if (status < B_OK)
5375 				return status;
5376 
5377 			offset = stat.st_size;
5378 			break;
5379 		}
5380 		default:
5381 			return B_BAD_VALUE;
5382 	}
5383 
5384 	// assumes off_t is 64 bits wide
5385 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5386 		return EOVERFLOW;
5387 
5388 	pos += offset;
5389 	if (pos < 0)
5390 		return B_BAD_VALUE;
5391 
5392 	return descriptor->pos = pos;
5393 }
5394 
5395 
5396 static status_t
5397 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5398 {
5399 	struct vnode *vnode = descriptor->u.vnode;
5400 
5401 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
5402 
5403 	if (!FS_CALL(vnode, read_attr_stat))
5404 		return EOPNOTSUPP;
5405 
5406 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5407 }
5408 
5409 
5410 static status_t
5411 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5412 {
5413 	struct vnode *vnode = descriptor->u.vnode;
5414 
5415 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
5416 
5417 	if (!FS_CALL(vnode, write_attr_stat))
5418 		return EROFS;
5419 
5420 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
5421 }
5422 
5423 
5424 static status_t
5425 attr_remove(int fd, const char *name, bool kernel)
5426 {
5427 	struct file_descriptor *descriptor;
5428 	struct vnode *vnode;
5429 	status_t status;
5430 
5431 	if (name == NULL || *name == '\0')
5432 		return B_BAD_VALUE;
5433 
5434 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
5435 
5436 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5437 	if (descriptor == NULL)
5438 		return B_FILE_ERROR;
5439 
5440 	if (FS_CALL(vnode, remove_attr))
5441 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5442 	else
5443 		status = EROFS;
5444 
5445 	put_fd(descriptor);
5446 
5447 	return status;
5448 }
5449 
5450 
5451 static status_t
5452 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
5453 {
5454 	struct file_descriptor *fromDescriptor, *toDescriptor;
5455 	struct vnode *fromVnode, *toVnode;
5456 	status_t status;
5457 
5458 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
5459 		return B_BAD_VALUE;
5460 
5461 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
5462 
5463 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
5464 	if (fromDescriptor == NULL)
5465 		return B_FILE_ERROR;
5466 
5467 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
5468 	if (toDescriptor == NULL) {
5469 		status = B_FILE_ERROR;
5470 		goto err;
5471 	}
5472 
5473 	// are the files on the same volume?
5474 	if (fromVnode->device != toVnode->device) {
5475 		status = B_CROSS_DEVICE_LINK;
5476 		goto err1;
5477 	}
5478 
5479 	if (FS_CALL(fromVnode, rename_attr))
5480 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
5481 	else
5482 		status = EROFS;
5483 
5484 err1:
5485 	put_fd(toDescriptor);
5486 err:
5487 	put_fd(fromDescriptor);
5488 
5489 	return status;
5490 }
5491 
5492 
5493 static status_t
5494 index_dir_open(dev_t mountID, bool kernel)
5495 {
5496 	struct fs_mount *mount;
5497 	fs_cookie cookie;
5498 
5499 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
5500 
5501 	status_t status = get_mount(mountID, &mount);
5502 	if (status < B_OK)
5503 		return status;
5504 
5505 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
5506 		status = EOPNOTSUPP;
5507 		goto out;
5508 	}
5509 
5510 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
5511 	if (status < B_OK)
5512 		goto out;
5513 
5514 	// get fd for the index directory
5515 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
5516 	if (status >= 0)
5517 		goto out;
5518 
5519 	// something went wrong
5520 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
5521 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
5522 
5523 out:
5524 	put_mount(mount);
5525 	return status;
5526 }
5527 
5528 
5529 static status_t
5530 index_dir_close(struct file_descriptor *descriptor)
5531 {
5532 	struct fs_mount *mount = descriptor->u.mount;
5533 
5534 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
5535 
5536 	if (FS_MOUNT_CALL(mount, close_index_dir))
5537 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
5538 
5539 	return B_OK;
5540 }
5541 
5542 
5543 static void
5544 index_dir_free_fd(struct file_descriptor *descriptor)
5545 {
5546 	struct fs_mount *mount = descriptor->u.mount;
5547 
5548 	if (mount != NULL) {
5549 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
5550 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5551 		//put_vnode(vnode);
5552 	}
5553 }
5554 
5555 
5556 static status_t
5557 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5558 {
5559 	struct fs_mount *mount = descriptor->u.mount;
5560 
5561 	if (FS_MOUNT_CALL(mount, read_index_dir))
5562 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5563 
5564 	return EOPNOTSUPP;
5565 }
5566 
5567 
5568 static status_t
5569 index_dir_rewind(struct file_descriptor *descriptor)
5570 {
5571 	struct fs_mount *mount = descriptor->u.mount;
5572 
5573 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
5574 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
5575 
5576 	return EOPNOTSUPP;
5577 }
5578 
5579 
5580 static status_t
5581 index_create(dev_t mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5582 {
5583 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5584 
5585 	struct fs_mount *mount;
5586 	status_t status = get_mount(mountID, &mount);
5587 	if (status < B_OK)
5588 		return status;
5589 
5590 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
5591 		status = EROFS;
5592 		goto out;
5593 	}
5594 
5595 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
5596 
5597 out:
5598 	put_mount(mount);
5599 	return status;
5600 }
5601 
5602 
5603 #if 0
5604 static status_t
5605 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5606 {
5607 	struct vnode *vnode = descriptor->u.vnode;
5608 
5609 	// ToDo: currently unused!
5610 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5611 	if (!FS_CALL(vnode, read_index_stat))
5612 		return EOPNOTSUPP;
5613 
5614 	return EOPNOTSUPP;
5615 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5616 }
5617 
5618 
5619 static void
5620 index_free_fd(struct file_descriptor *descriptor)
5621 {
5622 	struct vnode *vnode = descriptor->u.vnode;
5623 
5624 	if (vnode != NULL) {
5625 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5626 		put_vnode(vnode);
5627 	}
5628 }
5629 #endif
5630 
5631 
5632 static status_t
5633 index_name_read_stat(dev_t mountID, const char *name, struct stat *stat, bool kernel)
5634 {
5635 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5636 
5637 	struct fs_mount *mount;
5638 	status_t status = get_mount(mountID, &mount);
5639 	if (status < B_OK)
5640 		return status;
5641 
5642 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5643 		status = EOPNOTSUPP;
5644 		goto out;
5645 	}
5646 
5647 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5648 
5649 out:
5650 	put_mount(mount);
5651 	return status;
5652 }
5653 
5654 
5655 static status_t
5656 index_remove(dev_t mountID, const char *name, bool kernel)
5657 {
5658 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5659 
5660 	struct fs_mount *mount;
5661 	status_t status = get_mount(mountID, &mount);
5662 	if (status < B_OK)
5663 		return status;
5664 
5665 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5666 		status = EROFS;
5667 		goto out;
5668 	}
5669 
5670 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5671 
5672 out:
5673 	put_mount(mount);
5674 	return status;
5675 }
5676 
5677 
5678 /*!	ToDo: the query FS API is still the pretty much the same as in R5.
5679 		It would be nice if the FS would find some more kernel support
5680 		for them.
5681 		For example, query parsing should be moved into the kernel.
5682 */
5683 static int
5684 query_open(dev_t device, const char *query, uint32 flags,
5685 	port_id port, int32 token, bool kernel)
5686 {
5687 	struct fs_mount *mount;
5688 	fs_cookie cookie;
5689 
5690 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5691 
5692 	status_t status = get_mount(device, &mount);
5693 	if (status < B_OK)
5694 		return status;
5695 
5696 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5697 		status = EOPNOTSUPP;
5698 		goto out;
5699 	}
5700 
5701 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5702 	if (status < B_OK)
5703 		goto out;
5704 
5705 	// get fd for the index directory
5706 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5707 	if (status >= 0)
5708 		goto out;
5709 
5710 	// something went wrong
5711 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5712 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5713 
5714 out:
5715 	put_mount(mount);
5716 	return status;
5717 }
5718 
5719 
5720 static status_t
5721 query_close(struct file_descriptor *descriptor)
5722 {
5723 	struct fs_mount *mount = descriptor->u.mount;
5724 
5725 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5726 
5727 	if (FS_MOUNT_CALL(mount, close_query))
5728 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5729 
5730 	return B_OK;
5731 }
5732 
5733 
5734 static void
5735 query_free_fd(struct file_descriptor *descriptor)
5736 {
5737 	struct fs_mount *mount = descriptor->u.mount;
5738 
5739 	if (mount != NULL) {
5740 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5741 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5742 		//put_vnode(vnode);
5743 	}
5744 }
5745 
5746 
5747 static status_t
5748 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5749 {
5750 	struct fs_mount *mount = descriptor->u.mount;
5751 
5752 	if (FS_MOUNT_CALL(mount, read_query))
5753 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5754 
5755 	return EOPNOTSUPP;
5756 }
5757 
5758 
5759 static status_t
5760 query_rewind(struct file_descriptor *descriptor)
5761 {
5762 	struct fs_mount *mount = descriptor->u.mount;
5763 
5764 	if (FS_MOUNT_CALL(mount, rewind_query))
5765 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5766 
5767 	return EOPNOTSUPP;
5768 }
5769 
5770 
5771 //	#pragma mark - General File System functions
5772 
5773 
5774 static dev_t
5775 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5776 	const char *args, bool kernel)
5777 {
5778 	struct fs_mount *mount;
5779 	status_t status = 0;
5780 
5781 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5782 
5783 	// The path is always safe, we just have to make sure that fsName is
5784 	// almost valid - we can't make any assumptions about args, though.
5785 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5786 	// We'll get it from the DDM later.
5787 	if (fsName == NULL) {
5788 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5789 			return B_BAD_VALUE;
5790 	} else if (fsName[0] == '\0')
5791 		return B_BAD_VALUE;
5792 
5793 	RecursiveLocker mountOpLocker(sMountOpLock);
5794 
5795 	// Helper to delete a newly created file device on failure.
5796 	// Not exactly beautiful, but helps to keep the code below cleaner.
5797 	struct FileDeviceDeleter {
5798 		FileDeviceDeleter() : id(-1) {}
5799 		~FileDeviceDeleter()
5800 		{
5801 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5802 		}
5803 
5804 		partition_id id;
5805 	} fileDeviceDeleter;
5806 
5807 	// If the file system is not a "virtual" one, the device argument should
5808 	// point to a real file/device (if given at all).
5809 	// get the partition
5810 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5811 	KPartition *partition = NULL;
5812 	KPath normalizedDevice;
5813 	bool newlyCreatedFileDevice = false;
5814 
5815 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5816 		// normalize the device path
5817 		status = normalizedDevice.SetTo(device, true);
5818 		if (status != B_OK)
5819 			return status;
5820 
5821 		// get a corresponding partition from the DDM
5822 		partition = ddm->RegisterPartition(normalizedDevice.Path());
5823 
5824 		if (!partition) {
5825 			// Partition not found: This either means, the user supplied
5826 			// an invalid path, or the path refers to an image file. We try
5827 			// to let the DDM create a file device for the path.
5828 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5829 				&newlyCreatedFileDevice);
5830 			if (deviceID >= 0) {
5831 				partition = ddm->RegisterPartition(deviceID);
5832 				if (newlyCreatedFileDevice)
5833 					fileDeviceDeleter.id = deviceID;
5834 			}
5835 		}
5836 
5837 		if (!partition) {
5838 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5839 				normalizedDevice.Path()));
5840 			return B_ENTRY_NOT_FOUND;
5841 		}
5842 
5843 		device = normalizedDevice.Path();
5844 			// correct path to file device
5845 	}
5846 	PartitionRegistrar partitionRegistrar(partition, true);
5847 
5848 	// Write lock the partition's device. For the time being, we keep the lock
5849 	// until we're done mounting -- not nice, but ensure, that no-one is
5850 	// interfering.
5851 	// TODO: Find a better solution.
5852 	KDiskDevice *diskDevice = NULL;
5853 	if (partition) {
5854 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5855 		if (!diskDevice) {
5856 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5857 			return B_ERROR;
5858 		}
5859 	}
5860 
5861 	DeviceWriteLocker writeLocker(diskDevice, true);
5862 		// this takes over the write lock acquired before
5863 
5864 	if (partition) {
5865 		// make sure, that the partition is not busy
5866 		if (partition->IsBusy()) {
5867 			TRACE(("fs_mount(): Partition is busy.\n"));
5868 			return B_BUSY;
5869 		}
5870 
5871 		// if no FS name had been supplied, we get it from the partition
5872 		if (!fsName) {
5873 			KDiskSystem *diskSystem = partition->DiskSystem();
5874 			if (!diskSystem) {
5875 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5876 					"recognize it.\n"));
5877 				return B_BAD_VALUE;
5878 			}
5879 
5880 			if (!diskSystem->IsFileSystem()) {
5881 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5882 					"partitioning system.\n"));
5883 				return B_BAD_VALUE;
5884 			}
5885 
5886 			// The disk system name will not change, and the KDiskSystem
5887 			// object will not go away while the disk device is locked (and
5888 			// the partition has a reference to it), so this is safe.
5889 			fsName = diskSystem->Name();
5890 		}
5891 	}
5892 
5893 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5894 	if (mount == NULL)
5895 		return B_NO_MEMORY;
5896 
5897 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5898 
5899 	mount->fs_name = get_file_system_name(fsName);
5900 	if (mount->fs_name == NULL) {
5901 		status = B_NO_MEMORY;
5902 		goto err1;
5903 	}
5904 
5905 	mount->device_name = strdup(device);
5906 		// "device" can be NULL
5907 
5908 	mount->fs = get_file_system(fsName);
5909 	if (mount->fs == NULL) {
5910 		status = ENODEV;
5911 		goto err3;
5912 	}
5913 
5914 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5915 	if (status < B_OK)
5916 		goto err4;
5917 
5918 	// initialize structure
5919 	mount->id = sNextMountID++;
5920 	mount->partition = NULL;
5921 	mount->root_vnode = NULL;
5922 	mount->covers_vnode = NULL;
5923 	mount->cookie = NULL;
5924 	mount->unmounting = false;
5925 	mount->owns_file_device = false;
5926 
5927 	// insert mount struct into list before we call FS's mount() function
5928 	// so that vnodes can be created for this mount
5929 	mutex_lock(&sMountMutex);
5930 	hash_insert(sMountsTable, mount);
5931 	mutex_unlock(&sMountMutex);
5932 
5933 	ino_t rootID;
5934 
5935 	if (!sRoot) {
5936 		// we haven't mounted anything yet
5937 		if (strcmp(path, "/") != 0) {
5938 			status = B_ERROR;
5939 			goto err5;
5940 		}
5941 
5942 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5943 		if (status < 0) {
5944 			// ToDo: why should we hide the error code from the file system here?
5945 			//status = ERR_VFS_GENERAL;
5946 			goto err5;
5947 		}
5948 	} else {
5949 		struct vnode *coveredVnode;
5950 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5951 		if (status < B_OK)
5952 			goto err5;
5953 
5954 		// make sure covered_vnode is a DIR
5955 		struct stat coveredNodeStat;
5956 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5957 			coveredVnode->private_node, &coveredNodeStat);
5958 		if (status < B_OK)
5959 			goto err5;
5960 
5961 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5962 			status = B_NOT_A_DIRECTORY;
5963 			goto err5;
5964 		}
5965 
5966 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5967 			// this is already a mount point
5968 			status = B_BUSY;
5969 			goto err5;
5970 		}
5971 
5972 		mount->covers_vnode = coveredVnode;
5973 
5974 		// mount it
5975 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5976 		if (status < B_OK)
5977 			goto err6;
5978 	}
5979 
5980 	// the root node is supposed to be owned by the file system - it must
5981 	// exist at this point
5982 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5983 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5984 		panic("fs_mount: file system does not own its root node!\n");
5985 		status = B_ERROR;
5986 		goto err7;
5987 	}
5988 
5989 	// No race here, since fs_mount() is the only function changing
5990 	// covers_vnode (and holds sMountOpLock at that time).
5991 	mutex_lock(&sVnodeCoveredByMutex);
5992 	if (mount->covers_vnode)
5993 		mount->covers_vnode->covered_by = mount->root_vnode;
5994 	mutex_unlock(&sVnodeCoveredByMutex);
5995 
5996 	if (!sRoot)
5997 		sRoot = mount->root_vnode;
5998 
5999 	// supply the partition (if any) with the mount cookie and mark it mounted
6000 	if (partition) {
6001 		partition->SetMountCookie(mount->cookie);
6002 		partition->SetVolumeID(mount->id);
6003 
6004 		// keep a partition reference as long as the partition is mounted
6005 		partitionRegistrar.Detach();
6006 		mount->partition = partition;
6007 		mount->owns_file_device = newlyCreatedFileDevice;
6008 		fileDeviceDeleter.id = -1;
6009 	}
6010 
6011 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
6012 		mount->covers_vnode ? mount->covers_vnode->id : -1);
6013 
6014 	return mount->id;
6015 
6016 err7:
6017 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
6018 err6:
6019 	if (mount->covers_vnode)
6020 		put_vnode(mount->covers_vnode);
6021 err5:
6022 	mutex_lock(&sMountMutex);
6023 	hash_remove(sMountsTable, mount);
6024 	mutex_unlock(&sMountMutex);
6025 
6026 	recursive_lock_destroy(&mount->rlock);
6027 err4:
6028 	put_file_system(mount->fs);
6029 	free(mount->device_name);
6030 err3:
6031 	free(mount->fs_name);
6032 err1:
6033 	free(mount);
6034 
6035 	return status;
6036 }
6037 
6038 
6039 static status_t
6040 fs_unmount(char *path, dev_t mountID, uint32 flags, bool kernel)
6041 {
6042 	struct vnode *vnode = NULL;
6043 	struct fs_mount *mount;
6044 	status_t err;
6045 
6046 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
6047 		kernel));
6048 
6049 	if (path != NULL) {
6050 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
6051 		if (err != B_OK)
6052 			return B_ENTRY_NOT_FOUND;
6053 	}
6054 
6055 	RecursiveLocker mountOpLocker(sMountOpLock);
6056 
6057 	mount = find_mount(path != NULL ? vnode->device : mountID);
6058 	if (mount == NULL) {
6059 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
6060 			vnode);
6061 	}
6062 
6063 	if (path != NULL) {
6064 		put_vnode(vnode);
6065 
6066 		if (mount->root_vnode != vnode) {
6067 			// not mountpoint
6068 			return B_BAD_VALUE;
6069 		}
6070 	}
6071 
6072 	// if the volume is associated with a partition, lock the device of the
6073 	// partition as long as we are unmounting
6074 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6075 	KPartition *partition = mount->partition;
6076 	KDiskDevice *diskDevice = NULL;
6077 	if (partition) {
6078 		if (partition->Device() == NULL) {
6079 			dprintf("fs_unmount(): There is no device!\n");
6080 			return B_ERROR;
6081 		}
6082 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6083 		if (!diskDevice) {
6084 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
6085 			return B_ERROR;
6086 		}
6087 	}
6088 	DeviceWriteLocker writeLocker(diskDevice, true);
6089 
6090 	// make sure, that the partition is not busy
6091 	if (partition) {
6092 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
6093 			TRACE(("fs_unmount(): Partition is busy.\n"));
6094 			return B_BUSY;
6095 		}
6096 	}
6097 
6098 	// grab the vnode master mutex to keep someone from creating
6099 	// a vnode while we're figuring out if we can continue
6100 	mutex_lock(&sVnodeMutex);
6101 
6102 	bool disconnectedDescriptors = false;
6103 
6104 	while (true) {
6105 		bool busy = false;
6106 
6107 		// cycle through the list of vnodes associated with this mount and
6108 		// make sure all of them are not busy or have refs on them
6109 		vnode = NULL;
6110 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
6111 				vnode)) != NULL) {
6112 			// The root vnode ref_count needs to be 1 here (the mount has a
6113 			// reference).
6114 			if (vnode->busy
6115 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
6116 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
6117 				// there are still vnodes in use on this mount, so we cannot
6118 				// unmount yet
6119 				busy = true;
6120 				break;
6121 			}
6122 		}
6123 
6124 		if (!busy)
6125 			break;
6126 
6127 		if ((flags & B_FORCE_UNMOUNT) == 0) {
6128 			mutex_unlock(&sVnodeMutex);
6129 			put_vnode(mount->root_vnode);
6130 
6131 			return B_BUSY;
6132 		}
6133 
6134 		if (disconnectedDescriptors) {
6135 			// wait a bit until the last access is finished, and then try again
6136 			mutex_unlock(&sVnodeMutex);
6137 			snooze(100000);
6138 			// TODO: if there is some kind of bug that prevents the ref counts
6139 			//	from getting back to zero, this will fall into an endless loop...
6140 			mutex_lock(&sVnodeMutex);
6141 			continue;
6142 		}
6143 
6144 		// the file system is still busy - but we're forced to unmount it,
6145 		// so let's disconnect all open file descriptors
6146 
6147 		mount->unmounting = true;
6148 			// prevent new vnodes from being created
6149 
6150 		mutex_unlock(&sVnodeMutex);
6151 
6152 		disconnect_mount_or_vnode_fds(mount, NULL);
6153 		disconnectedDescriptors = true;
6154 
6155 		mutex_lock(&sVnodeMutex);
6156 	}
6157 
6158 	// we can safely continue, mark all of the vnodes busy and this mount
6159 	// structure in unmounting state
6160 	mount->unmounting = true;
6161 
6162 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
6163 		vnode->busy = true;
6164 
6165 		if (vnode->ref_count == 0) {
6166 			// this vnode has been unused before
6167 			list_remove_item(&sUnusedVnodeList, vnode);
6168 			sUnusedVnodes--;
6169 		}
6170 	}
6171 
6172 	// The ref_count of the root node is 1 at this point, see above why this is
6173 	mount->root_vnode->ref_count--;
6174 
6175 	mutex_unlock(&sVnodeMutex);
6176 
6177 	mutex_lock(&sVnodeCoveredByMutex);
6178 	mount->covers_vnode->covered_by = NULL;
6179 	mutex_unlock(&sVnodeCoveredByMutex);
6180 	put_vnode(mount->covers_vnode);
6181 
6182 	// Free all vnodes associated with this mount.
6183 	// They will be removed from the mount list by free_vnode(), so
6184 	// we don't have to do this.
6185 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes))
6186 			!= NULL) {
6187 		free_vnode(vnode, false);
6188 	}
6189 
6190 	// remove the mount structure from the hash table
6191 	mutex_lock(&sMountMutex);
6192 	hash_remove(sMountsTable, mount);
6193 	mutex_unlock(&sMountMutex);
6194 
6195 	mountOpLocker.Unlock();
6196 
6197 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
6198 	notify_unmount(mount->id);
6199 
6200 	// release the file system
6201 	put_file_system(mount->fs);
6202 
6203 	// dereference the partition and mark it unmounted
6204 	if (partition) {
6205 		partition->SetVolumeID(-1);
6206 		partition->SetMountCookie(NULL);
6207 
6208 		if (mount->owns_file_device)
6209 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
6210 		partition->Unregister();
6211 	}
6212 
6213 	free(mount->device_name);
6214 	free(mount->fs_name);
6215 	free(mount);
6216 
6217 	return B_OK;
6218 }
6219 
6220 
6221 static status_t
6222 fs_sync(dev_t device)
6223 {
6224 	struct fs_mount *mount;
6225 	status_t status = get_mount(device, &mount);
6226 	if (status < B_OK)
6227 		return status;
6228 
6229 	// First, synchronize all file caches
6230 
6231 	struct vnode *previousVnode = NULL;
6232 	while (true) {
6233 		// synchronize access to vnode list
6234 		recursive_lock_lock(&mount->rlock);
6235 
6236 		struct vnode *vnode = previousVnode;
6237 		do {
6238 			// TODO: we could track writes (and writable mapped vnodes)
6239 			//	and have a simple flag that we could test for here
6240 			vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode);
6241 		} while (vnode != NULL && vnode->cache == NULL);
6242 
6243 		ino_t id = -1;
6244 		if (vnode != NULL)
6245 			id = vnode->id;
6246 
6247 		recursive_lock_unlock(&mount->rlock);
6248 
6249 		if (vnode == NULL)
6250 			break;
6251 
6252 		// acquire a reference to the vnode
6253 
6254 		if (get_vnode(mount->id, id, &vnode, true, false) == B_OK) {
6255 			if (previousVnode != NULL)
6256 				put_vnode(previousVnode);
6257 
6258 			if (vnode->cache != NULL)
6259 				vm_cache_write_modified(vnode->cache, false);
6260 
6261 			// the next vnode might change until we lock the vnode list again,
6262 			// but this vnode won't go away since we keep a reference to it.
6263 			previousVnode = vnode;
6264 		} else {
6265 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n",
6266 				mount->id, id);
6267 			break;
6268 		}
6269 	}
6270 
6271 	if (previousVnode != NULL)
6272 		put_vnode(previousVnode);
6273 
6274 	// And then, let the file systems do their synchronizing work
6275 
6276 	mutex_lock(&sMountMutex);
6277 
6278 	if (FS_MOUNT_CALL(mount, sync))
6279 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
6280 
6281 	mutex_unlock(&sMountMutex);
6282 
6283 	put_mount(mount);
6284 	return status;
6285 }
6286 
6287 
6288 static status_t
6289 fs_read_info(dev_t device, struct fs_info *info)
6290 {
6291 	struct fs_mount *mount;
6292 	status_t status = get_mount(device, &mount);
6293 	if (status < B_OK)
6294 		return status;
6295 
6296 	memset(info, 0, sizeof(struct fs_info));
6297 
6298 	if (FS_MOUNT_CALL(mount, read_fs_info))
6299 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
6300 
6301 	// fill in info the file system doesn't (have to) know about
6302 	if (status == B_OK) {
6303 		info->dev = mount->id;
6304 		info->root = mount->root_vnode->id;
6305 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
6306 		if (mount->device_name != NULL) {
6307 			strlcpy(info->device_name, mount->device_name,
6308 				sizeof(info->device_name));
6309 		}
6310 	}
6311 
6312 	// if the call is not supported by the file system, there are still
6313 	// the parts that we filled out ourselves
6314 
6315 	put_mount(mount);
6316 	return status;
6317 }
6318 
6319 
6320 static status_t
6321 fs_write_info(dev_t device, const struct fs_info *info, int mask)
6322 {
6323 	struct fs_mount *mount;
6324 	status_t status = get_mount(device, &mount);
6325 	if (status < B_OK)
6326 		return status;
6327 
6328 	if (FS_MOUNT_CALL(mount, write_fs_info))
6329 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
6330 	else
6331 		status = EROFS;
6332 
6333 	put_mount(mount);
6334 	return status;
6335 }
6336 
6337 
6338 static dev_t
6339 fs_next_device(int32 *_cookie)
6340 {
6341 	struct fs_mount *mount = NULL;
6342 	dev_t device = *_cookie;
6343 
6344 	mutex_lock(&sMountMutex);
6345 
6346 	// Since device IDs are assigned sequentially, this algorithm
6347 	// does work good enough. It makes sure that the device list
6348 	// returned is sorted, and that no device is skipped when an
6349 	// already visited device got unmounted.
6350 
6351 	while (device < sNextMountID) {
6352 		mount = find_mount(device++);
6353 		if (mount != NULL && mount->cookie != NULL)
6354 			break;
6355 	}
6356 
6357 	*_cookie = device;
6358 
6359 	if (mount != NULL)
6360 		device = mount->id;
6361 	else
6362 		device = B_BAD_VALUE;
6363 
6364 	mutex_unlock(&sMountMutex);
6365 
6366 	return device;
6367 }
6368 
6369 
6370 static status_t
6371 get_cwd(char *buffer, size_t size, bool kernel)
6372 {
6373 	// Get current working directory from io context
6374 	struct io_context *context = get_current_io_context(kernel);
6375 	status_t status;
6376 
6377 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
6378 
6379 	mutex_lock(&context->io_mutex);
6380 
6381 	if (context->cwd)
6382 		status = dir_vnode_to_path(context->cwd, buffer, size);
6383 	else
6384 		status = B_ERROR;
6385 
6386 	mutex_unlock(&context->io_mutex);
6387 	return status;
6388 }
6389 
6390 
6391 static status_t
6392 set_cwd(int fd, char *path, bool kernel)
6393 {
6394 	struct io_context *context;
6395 	struct vnode *vnode = NULL;
6396 	struct vnode *oldDirectory;
6397 	struct stat stat;
6398 	status_t status;
6399 
6400 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
6401 
6402 	// Get vnode for passed path, and bail if it failed
6403 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6404 	if (status < 0)
6405 		return status;
6406 
6407 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
6408 	if (status < 0)
6409 		goto err;
6410 
6411 	if (!S_ISDIR(stat.st_mode)) {
6412 		// nope, can't cwd to here
6413 		status = B_NOT_A_DIRECTORY;
6414 		goto err;
6415 	}
6416 
6417 	// Get current io context and lock
6418 	context = get_current_io_context(kernel);
6419 	mutex_lock(&context->io_mutex);
6420 
6421 	// save the old current working directory first
6422 	oldDirectory = context->cwd;
6423 	context->cwd = vnode;
6424 
6425 	mutex_unlock(&context->io_mutex);
6426 
6427 	if (oldDirectory)
6428 		put_vnode(oldDirectory);
6429 
6430 	return B_NO_ERROR;
6431 
6432 err:
6433 	put_vnode(vnode);
6434 	return status;
6435 }
6436 
6437 
6438 //	#pragma mark - kernel mirrored syscalls
6439 
6440 
6441 dev_t
6442 _kern_mount(const char *path, const char *device, const char *fsName,
6443 	uint32 flags, const char *args, size_t argsLength)
6444 {
6445 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6446 	if (pathBuffer.InitCheck() != B_OK)
6447 		return B_NO_MEMORY;
6448 
6449 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
6450 }
6451 
6452 
6453 status_t
6454 _kern_unmount(const char *path, uint32 flags)
6455 {
6456 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6457 	if (pathBuffer.InitCheck() != B_OK)
6458 		return B_NO_MEMORY;
6459 
6460 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
6461 }
6462 
6463 
6464 status_t
6465 _kern_read_fs_info(dev_t device, struct fs_info *info)
6466 {
6467 	if (info == NULL)
6468 		return B_BAD_VALUE;
6469 
6470 	return fs_read_info(device, info);
6471 }
6472 
6473 
6474 status_t
6475 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
6476 {
6477 	if (info == NULL)
6478 		return B_BAD_VALUE;
6479 
6480 	return fs_write_info(device, info, mask);
6481 }
6482 
6483 
6484 status_t
6485 _kern_sync(void)
6486 {
6487 	// Note: _kern_sync() is also called from _user_sync()
6488 	int32 cookie = 0;
6489 	dev_t device;
6490 	while ((device = next_dev(&cookie)) >= 0) {
6491 		status_t status = fs_sync(device);
6492 		if (status != B_OK && status != B_BAD_VALUE)
6493 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
6494 	}
6495 
6496 	return B_OK;
6497 }
6498 
6499 
6500 dev_t
6501 _kern_next_device(int32 *_cookie)
6502 {
6503 	return fs_next_device(_cookie);
6504 }
6505 
6506 
6507 status_t
6508 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
6509 	size_t infoSize)
6510 {
6511 	if (infoSize != sizeof(fd_info))
6512 		return B_BAD_VALUE;
6513 
6514 	struct io_context *context = NULL;
6515 	sem_id contextMutex = -1;
6516 	struct team *team = NULL;
6517 
6518 	cpu_status state = disable_interrupts();
6519 	GRAB_TEAM_LOCK();
6520 
6521 	team = team_get_team_struct_locked(teamID);
6522 	if (team) {
6523 		context = (io_context *)team->io_context;
6524 		contextMutex = context->io_mutex.sem;
6525 	}
6526 
6527 	RELEASE_TEAM_LOCK();
6528 	restore_interrupts(state);
6529 
6530 	// we now have a context - since we couldn't lock it while having
6531 	// safe access to the team structure, we now need to lock the mutex
6532 	// manually
6533 
6534 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
6535 		// team doesn't exit or seems to be gone
6536 		return B_BAD_TEAM_ID;
6537 	}
6538 
6539 	// the team cannot be deleted completely while we're owning its
6540 	// io_context mutex, so we can safely play with it now
6541 
6542 	context->io_mutex.holder = thread_get_current_thread_id();
6543 
6544 	uint32 slot = *_cookie;
6545 
6546 	struct file_descriptor *descriptor;
6547 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
6548 		slot++;
6549 
6550 	if (slot >= context->table_size) {
6551 		mutex_unlock(&context->io_mutex);
6552 		return B_ENTRY_NOT_FOUND;
6553 	}
6554 
6555 	info->number = slot;
6556 	info->open_mode = descriptor->open_mode;
6557 
6558 	struct vnode *vnode = fd_vnode(descriptor);
6559 	if (vnode != NULL) {
6560 		info->device = vnode->device;
6561 		info->node = vnode->id;
6562 	} else if (descriptor->u.mount != NULL) {
6563 		info->device = descriptor->u.mount->id;
6564 		info->node = -1;
6565 	}
6566 
6567 	mutex_unlock(&context->io_mutex);
6568 
6569 	*_cookie = slot + 1;
6570 	return B_OK;
6571 }
6572 
6573 
6574 int
6575 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6576 {
6577 	if (openMode & O_CREAT)
6578 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6579 
6580 	return file_open_entry_ref(device, inode, name, openMode, true);
6581 }
6582 
6583 
6584 /*!	\brief Opens a node specified by a FD + path pair.
6585 
6586 	At least one of \a fd and \a path must be specified.
6587 	If only \a fd is given, the function opens the node identified by this
6588 	FD. If only a path is given, this path is opened. If both are given and
6589 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6590 	of the directory (!) identified by \a fd.
6591 
6592 	\param fd The FD. May be < 0.
6593 	\param path The absolute or relative path. May be \c NULL.
6594 	\param openMode The open mode.
6595 	\return A FD referring to the newly opened node, or an error code,
6596 			if an error occurs.
6597 */
6598 int
6599 _kern_open(int fd, const char *path, int openMode, int perms)
6600 {
6601 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6602 	if (pathBuffer.InitCheck() != B_OK)
6603 		return B_NO_MEMORY;
6604 
6605 	if (openMode & O_CREAT)
6606 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6607 
6608 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6609 }
6610 
6611 
6612 /*!	\brief Opens a directory specified by entry_ref or node_ref.
6613 
6614 	The supplied name may be \c NULL, in which case directory identified
6615 	by \a device and \a inode will be opened. Otherwise \a device and
6616 	\a inode identify the parent directory of the directory to be opened
6617 	and \a name its entry name.
6618 
6619 	\param device If \a name is specified the ID of the device the parent
6620 		   directory of the directory to be opened resides on, otherwise
6621 		   the device of the directory itself.
6622 	\param inode If \a name is specified the node ID of the parent
6623 		   directory of the directory to be opened, otherwise node ID of the
6624 		   directory itself.
6625 	\param name The entry name of the directory to be opened. If \c NULL,
6626 		   the \a device + \a inode pair identify the node to be opened.
6627 	\return The FD of the newly opened directory or an error code, if
6628 			something went wrong.
6629 */
6630 int
6631 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6632 {
6633 	return dir_open_entry_ref(device, inode, name, true);
6634 }
6635 
6636 
6637 /*!	\brief Opens a directory specified by a FD + path pair.
6638 
6639 	At least one of \a fd and \a path must be specified.
6640 	If only \a fd is given, the function opens the directory identified by this
6641 	FD. If only a path is given, this path is opened. If both are given and
6642 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6643 	of the directory (!) identified by \a fd.
6644 
6645 	\param fd The FD. May be < 0.
6646 	\param path The absolute or relative path. May be \c NULL.
6647 	\return A FD referring to the newly opened directory, or an error code,
6648 			if an error occurs.
6649 */
6650 int
6651 _kern_open_dir(int fd, const char *path)
6652 {
6653 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6654 	if (pathBuffer.InitCheck() != B_OK)
6655 		return B_NO_MEMORY;
6656 
6657 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6658 }
6659 
6660 
6661 status_t
6662 _kern_fcntl(int fd, int op, uint32 argument)
6663 {
6664 	return common_fcntl(fd, op, argument, true);
6665 }
6666 
6667 
6668 status_t
6669 _kern_fsync(int fd)
6670 {
6671 	return common_sync(fd, true);
6672 }
6673 
6674 
6675 status_t
6676 _kern_lock_node(int fd)
6677 {
6678 	return common_lock_node(fd, true);
6679 }
6680 
6681 
6682 status_t
6683 _kern_unlock_node(int fd)
6684 {
6685 	return common_unlock_node(fd, true);
6686 }
6687 
6688 
6689 status_t
6690 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6691 {
6692 	return dir_create_entry_ref(device, inode, name, perms, true);
6693 }
6694 
6695 
6696 /*!	\brief Creates a directory specified by a FD + path pair.
6697 
6698 	\a path must always be specified (it contains the name of the new directory
6699 	at least). If only a path is given, this path identifies the location at
6700 	which the directory shall be created. If both \a fd and \a path are given and
6701 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6702 	of the directory (!) identified by \a fd.
6703 
6704 	\param fd The FD. May be < 0.
6705 	\param path The absolute or relative path. Must not be \c NULL.
6706 	\param perms The access permissions the new directory shall have.
6707 	\return \c B_OK, if the directory has been created successfully, another
6708 			error code otherwise.
6709 */
6710 status_t
6711 _kern_create_dir(int fd, const char *path, int perms)
6712 {
6713 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6714 	if (pathBuffer.InitCheck() != B_OK)
6715 		return B_NO_MEMORY;
6716 
6717 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6718 }
6719 
6720 
6721 status_t
6722 _kern_remove_dir(int fd, const char *path)
6723 {
6724 	if (path) {
6725 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6726 		if (pathBuffer.InitCheck() != B_OK)
6727 			return B_NO_MEMORY;
6728 
6729 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6730 	}
6731 
6732 	return dir_remove(fd, NULL, true);
6733 }
6734 
6735 
6736 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
6737 
6738 	At least one of \a fd and \a path must be specified.
6739 	If only \a fd is given, the function the symlink to be read is the node
6740 	identified by this FD. If only a path is given, this path identifies the
6741 	symlink to be read. If both are given and the path is absolute, \a fd is
6742 	ignored; a relative path is reckoned off of the directory (!) identified
6743 	by \a fd.
6744 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6745 	will still be updated to reflect the required buffer size.
6746 
6747 	\param fd The FD. May be < 0.
6748 	\param path The absolute or relative path. May be \c NULL.
6749 	\param buffer The buffer into which the contents of the symlink shall be
6750 		   written.
6751 	\param _bufferSize A pointer to the size of the supplied buffer.
6752 	\return The length of the link on success or an appropriate error code
6753 */
6754 status_t
6755 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6756 {
6757 	status_t status;
6758 
6759 	if (path) {
6760 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6761 		if (pathBuffer.InitCheck() != B_OK)
6762 			return B_NO_MEMORY;
6763 
6764 		return common_read_link(fd, pathBuffer.LockBuffer(),
6765 			buffer, _bufferSize, true);
6766 	}
6767 
6768 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6769 }
6770 
6771 
6772 /*!	\brief Creates a symlink specified by a FD + path pair.
6773 
6774 	\a path must always be specified (it contains the name of the new symlink
6775 	at least). If only a path is given, this path identifies the location at
6776 	which the symlink shall be created. If both \a fd and \a path are given and
6777 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6778 	of the directory (!) identified by \a fd.
6779 
6780 	\param fd The FD. May be < 0.
6781 	\param toPath The absolute or relative path. Must not be \c NULL.
6782 	\param mode The access permissions the new symlink shall have.
6783 	\return \c B_OK, if the symlink has been created successfully, another
6784 			error code otherwise.
6785 */
6786 status_t
6787 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6788 {
6789 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6790 	if (pathBuffer.InitCheck() != B_OK)
6791 		return B_NO_MEMORY;
6792 
6793 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6794 		toPath, mode, true);
6795 }
6796 
6797 
6798 status_t
6799 _kern_create_link(const char *path, const char *toPath)
6800 {
6801 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6802 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6803 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6804 		return B_NO_MEMORY;
6805 
6806 	return common_create_link(pathBuffer.LockBuffer(),
6807 		toPathBuffer.LockBuffer(), true);
6808 }
6809 
6810 
6811 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
6812 
6813 	\a path must always be specified (it contains at least the name of the entry
6814 	to be deleted). If only a path is given, this path identifies the entry
6815 	directly. If both \a fd and \a path are given and the path is absolute,
6816 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6817 	identified by \a fd.
6818 
6819 	\param fd The FD. May be < 0.
6820 	\param path The absolute or relative path. Must not be \c NULL.
6821 	\return \c B_OK, if the entry has been removed successfully, another
6822 			error code otherwise.
6823 */
6824 status_t
6825 _kern_unlink(int fd, const char *path)
6826 {
6827 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6828 	if (pathBuffer.InitCheck() != B_OK)
6829 		return B_NO_MEMORY;
6830 
6831 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6832 }
6833 
6834 
6835 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
6836 		   by another FD + path pair.
6837 
6838 	\a oldPath and \a newPath must always be specified (they contain at least
6839 	the name of the entry). If only a path is given, this path identifies the
6840 	entry directly. If both a FD and a path are given and the path is absolute,
6841 	the FD is ignored; a relative path is reckoned off of the directory (!)
6842 	identified by the respective FD.
6843 
6844 	\param oldFD The FD of the old location. May be < 0.
6845 	\param oldPath The absolute or relative path of the old location. Must not
6846 		   be \c NULL.
6847 	\param newFD The FD of the new location. May be < 0.
6848 	\param newPath The absolute or relative path of the new location. Must not
6849 		   be \c NULL.
6850 	\return \c B_OK, if the entry has been moved successfully, another
6851 			error code otherwise.
6852 */
6853 status_t
6854 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6855 {
6856 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6857 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6858 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6859 		return B_NO_MEMORY;
6860 
6861 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6862 		newFD, newPathBuffer.LockBuffer(), true);
6863 }
6864 
6865 
6866 status_t
6867 _kern_access(const char *path, int mode)
6868 {
6869 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6870 	if (pathBuffer.InitCheck() != B_OK)
6871 		return B_NO_MEMORY;
6872 
6873 	return common_access(pathBuffer.LockBuffer(), mode, true);
6874 }
6875 
6876 
6877 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
6878 
6879 	If only \a fd is given, the stat operation associated with the type
6880 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6881 	given, this path identifies the entry for whose node to retrieve the
6882 	stat data. If both \a fd and \a path are given and the path is absolute,
6883 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6884 	identified by \a fd and specifies the entry whose stat data shall be
6885 	retrieved.
6886 
6887 	\param fd The FD. May be < 0.
6888 	\param path The absolute or relative path. Must not be \c NULL.
6889 	\param traverseLeafLink If \a path is given, \c true specifies that the
6890 		   function shall not stick to symlinks, but traverse them.
6891 	\param stat The buffer the stat data shall be written into.
6892 	\param statSize The size of the supplied stat buffer.
6893 	\return \c B_OK, if the the stat data have been read successfully, another
6894 			error code otherwise.
6895 */
6896 status_t
6897 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6898 	struct stat *stat, size_t statSize)
6899 {
6900 	struct stat completeStat;
6901 	struct stat *originalStat = NULL;
6902 	status_t status;
6903 
6904 	if (statSize > sizeof(struct stat))
6905 		return B_BAD_VALUE;
6906 
6907 	// this supports different stat extensions
6908 	if (statSize < sizeof(struct stat)) {
6909 		originalStat = stat;
6910 		stat = &completeStat;
6911 	}
6912 
6913 	if (path) {
6914 		// path given: get the stat of the node referred to by (fd, path)
6915 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6916 		if (pathBuffer.InitCheck() != B_OK)
6917 			return B_NO_MEMORY;
6918 
6919 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6920 			traverseLeafLink, stat, true);
6921 	} else {
6922 		// no path given: get the FD and use the FD operation
6923 		struct file_descriptor *descriptor
6924 			= get_fd(get_current_io_context(true), fd);
6925 		if (descriptor == NULL)
6926 			return B_FILE_ERROR;
6927 
6928 		if (descriptor->ops->fd_read_stat)
6929 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6930 		else
6931 			status = EOPNOTSUPP;
6932 
6933 		put_fd(descriptor);
6934 	}
6935 
6936 	if (status == B_OK && originalStat != NULL)
6937 		memcpy(originalStat, stat, statSize);
6938 
6939 	return status;
6940 }
6941 
6942 
6943 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
6944 
6945 	If only \a fd is given, the stat operation associated with the type
6946 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6947 	given, this path identifies the entry for whose node to write the
6948 	stat data. If both \a fd and \a path are given and the path is absolute,
6949 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6950 	identified by \a fd and specifies the entry whose stat data shall be
6951 	written.
6952 
6953 	\param fd The FD. May be < 0.
6954 	\param path The absolute or relative path. Must not be \c NULL.
6955 	\param traverseLeafLink If \a path is given, \c true specifies that the
6956 		   function shall not stick to symlinks, but traverse them.
6957 	\param stat The buffer containing the stat data to be written.
6958 	\param statSize The size of the supplied stat buffer.
6959 	\param statMask A mask specifying which parts of the stat data shall be
6960 		   written.
6961 	\return \c B_OK, if the the stat data have been written successfully,
6962 			another error code otherwise.
6963 */
6964 status_t
6965 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6966 	const struct stat *stat, size_t statSize, int statMask)
6967 {
6968 	struct stat completeStat;
6969 
6970 	if (statSize > sizeof(struct stat))
6971 		return B_BAD_VALUE;
6972 
6973 	// this supports different stat extensions
6974 	if (statSize < sizeof(struct stat)) {
6975 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6976 		memcpy(&completeStat, stat, statSize);
6977 		stat = &completeStat;
6978 	}
6979 
6980 	status_t status;
6981 
6982 	if (path) {
6983 		// path given: write the stat of the node referred to by (fd, path)
6984 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6985 		if (pathBuffer.InitCheck() != B_OK)
6986 			return B_NO_MEMORY;
6987 
6988 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6989 			traverseLeafLink, stat, statMask, true);
6990 	} else {
6991 		// no path given: get the FD and use the FD operation
6992 		struct file_descriptor *descriptor
6993 			= get_fd(get_current_io_context(true), fd);
6994 		if (descriptor == NULL)
6995 			return B_FILE_ERROR;
6996 
6997 		if (descriptor->ops->fd_write_stat)
6998 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6999 		else
7000 			status = EOPNOTSUPP;
7001 
7002 		put_fd(descriptor);
7003 	}
7004 
7005 	return status;
7006 }
7007 
7008 
7009 int
7010 _kern_open_attr_dir(int fd, const char *path)
7011 {
7012 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7013 	if (pathBuffer.InitCheck() != B_OK)
7014 		return B_NO_MEMORY;
7015 
7016 	if (path != NULL)
7017 		pathBuffer.SetTo(path);
7018 
7019 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
7020 }
7021 
7022 
7023 int
7024 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
7025 {
7026 	return attr_create(fd, name, type, openMode, true);
7027 }
7028 
7029 
7030 int
7031 _kern_open_attr(int fd, const char *name, int openMode)
7032 {
7033 	return attr_open(fd, name, openMode, true);
7034 }
7035 
7036 
7037 status_t
7038 _kern_remove_attr(int fd, const char *name)
7039 {
7040 	return attr_remove(fd, name, true);
7041 }
7042 
7043 
7044 status_t
7045 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
7046 {
7047 	return attr_rename(fromFile, fromName, toFile, toName, true);
7048 }
7049 
7050 
7051 int
7052 _kern_open_index_dir(dev_t device)
7053 {
7054 	return index_dir_open(device, true);
7055 }
7056 
7057 
7058 status_t
7059 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
7060 {
7061 	return index_create(device, name, type, flags, true);
7062 }
7063 
7064 
7065 status_t
7066 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
7067 {
7068 	return index_name_read_stat(device, name, stat, true);
7069 }
7070 
7071 
7072 status_t
7073 _kern_remove_index(dev_t device, const char *name)
7074 {
7075 	return index_remove(device, name, true);
7076 }
7077 
7078 
7079 status_t
7080 _kern_getcwd(char *buffer, size_t size)
7081 {
7082 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
7083 
7084 	// Call vfs to get current working directory
7085 	return get_cwd(buffer, size, true);
7086 }
7087 
7088 
7089 status_t
7090 _kern_setcwd(int fd, const char *path)
7091 {
7092 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7093 	if (pathBuffer.InitCheck() != B_OK)
7094 		return B_NO_MEMORY;
7095 
7096 	if (path != NULL)
7097 		pathBuffer.SetTo(path);
7098 
7099 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
7100 }
7101 
7102 
7103 //	#pragma mark - userland syscalls
7104 
7105 
7106 dev_t
7107 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
7108 	uint32 flags, const char *userArgs, size_t argsLength)
7109 {
7110 	char fileSystem[B_OS_NAME_LENGTH];
7111 	KPath path, device;
7112 	char *args = NULL;
7113 	status_t status;
7114 
7115 	if (!IS_USER_ADDRESS(userPath)
7116 		|| !IS_USER_ADDRESS(userFileSystem)
7117 		|| !IS_USER_ADDRESS(userDevice))
7118 		return B_BAD_ADDRESS;
7119 
7120 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
7121 		return B_NO_MEMORY;
7122 
7123 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
7124 		return B_BAD_ADDRESS;
7125 
7126 	if (userFileSystem != NULL
7127 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
7128 		return B_BAD_ADDRESS;
7129 
7130 	if (userDevice != NULL
7131 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
7132 		return B_BAD_ADDRESS;
7133 
7134 	if (userArgs != NULL && argsLength > 0) {
7135 		// this is a safety restriction
7136 		if (argsLength >= 65536)
7137 			return B_NAME_TOO_LONG;
7138 
7139 		args = (char *)malloc(argsLength + 1);
7140 		if (args == NULL)
7141 			return B_NO_MEMORY;
7142 
7143 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
7144 			free(args);
7145 			return B_BAD_ADDRESS;
7146 		}
7147 	}
7148 	path.UnlockBuffer();
7149 	device.UnlockBuffer();
7150 
7151 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
7152 		userFileSystem ? fileSystem : NULL, flags, args, false);
7153 
7154 	free(args);
7155 	return status;
7156 }
7157 
7158 
7159 status_t
7160 _user_unmount(const char *userPath, uint32 flags)
7161 {
7162 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7163 	if (pathBuffer.InitCheck() != B_OK)
7164 		return B_NO_MEMORY;
7165 
7166 	char *path = pathBuffer.LockBuffer();
7167 
7168 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7169 		return B_BAD_ADDRESS;
7170 
7171 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
7172 }
7173 
7174 
7175 status_t
7176 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
7177 {
7178 	struct fs_info info;
7179 	status_t status;
7180 
7181 	if (userInfo == NULL)
7182 		return B_BAD_VALUE;
7183 
7184 	if (!IS_USER_ADDRESS(userInfo))
7185 		return B_BAD_ADDRESS;
7186 
7187 	status = fs_read_info(device, &info);
7188 	if (status != B_OK)
7189 		return status;
7190 
7191 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
7192 		return B_BAD_ADDRESS;
7193 
7194 	return B_OK;
7195 }
7196 
7197 
7198 status_t
7199 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
7200 {
7201 	struct fs_info info;
7202 
7203 	if (userInfo == NULL)
7204 		return B_BAD_VALUE;
7205 
7206 	if (!IS_USER_ADDRESS(userInfo)
7207 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
7208 		return B_BAD_ADDRESS;
7209 
7210 	return fs_write_info(device, &info, mask);
7211 }
7212 
7213 
7214 dev_t
7215 _user_next_device(int32 *_userCookie)
7216 {
7217 	int32 cookie;
7218 	dev_t device;
7219 
7220 	if (!IS_USER_ADDRESS(_userCookie)
7221 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
7222 		return B_BAD_ADDRESS;
7223 
7224 	device = fs_next_device(&cookie);
7225 
7226 	if (device >= B_OK) {
7227 		// update user cookie
7228 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
7229 			return B_BAD_ADDRESS;
7230 	}
7231 
7232 	return device;
7233 }
7234 
7235 
7236 status_t
7237 _user_sync(void)
7238 {
7239 	return _kern_sync();
7240 }
7241 
7242 
7243 status_t
7244 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
7245 	size_t infoSize)
7246 {
7247 	struct fd_info info;
7248 	uint32 cookie;
7249 
7250 	// only root can do this (or should root's group be enough?)
7251 	if (geteuid() != 0)
7252 		return B_NOT_ALLOWED;
7253 
7254 	if (infoSize != sizeof(fd_info))
7255 		return B_BAD_VALUE;
7256 
7257 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
7258 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
7259 		return B_BAD_ADDRESS;
7260 
7261 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
7262 	if (status < B_OK)
7263 		return status;
7264 
7265 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
7266 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
7267 		return B_BAD_ADDRESS;
7268 
7269 	return status;
7270 }
7271 
7272 
7273 status_t
7274 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
7275 	char *userPath, size_t pathLength)
7276 {
7277 	if (!IS_USER_ADDRESS(userPath))
7278 		return B_BAD_ADDRESS;
7279 
7280 	KPath path(B_PATH_NAME_LENGTH + 1);
7281 	if (path.InitCheck() != B_OK)
7282 		return B_NO_MEMORY;
7283 
7284 	// copy the leaf name onto the stack
7285 	char stackLeaf[B_FILE_NAME_LENGTH];
7286 	if (leaf) {
7287 		if (!IS_USER_ADDRESS(leaf))
7288 			return B_BAD_ADDRESS;
7289 
7290 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
7291 		if (length < 0)
7292 			return length;
7293 		if (length >= B_FILE_NAME_LENGTH)
7294 			return B_NAME_TOO_LONG;
7295 
7296 		leaf = stackLeaf;
7297 	}
7298 
7299 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
7300 		path.LockBuffer(), path.BufferSize());
7301 	if (status < B_OK)
7302 		return status;
7303 
7304 	path.UnlockBuffer();
7305 
7306 	int length = user_strlcpy(userPath, path.Path(), pathLength);
7307 	if (length < 0)
7308 		return length;
7309 	if (length >= (int)pathLength)
7310 		return B_BUFFER_OVERFLOW;
7311 
7312 	return B_OK;
7313 }
7314 
7315 
7316 status_t
7317 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
7318 {
7319 	if (userPath == NULL || buffer == NULL)
7320 		return B_BAD_VALUE;
7321 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
7322 		return B_BAD_ADDRESS;
7323 
7324 	// copy path from userland
7325 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7326 	if (pathBuffer.InitCheck() != B_OK)
7327 		return B_NO_MEMORY;
7328 	char* path = pathBuffer.LockBuffer();
7329 
7330 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7331 		return B_BAD_ADDRESS;
7332 
7333 	// buffer for the leaf part
7334 	KPath leafBuffer(B_PATH_NAME_LENGTH + 1);
7335 	if (leafBuffer.InitCheck() != B_OK)
7336 		return B_NO_MEMORY;
7337 	char* leaf = leafBuffer.LockBuffer();
7338 
7339 	VNodePutter dirPutter;
7340 	struct vnode* dir = NULL;
7341 	status_t error;
7342 
7343 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
7344 		// get dir vnode + leaf name
7345 		struct vnode* nextDir;
7346 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, false);
7347 		if (error != B_OK)
7348 			return error;
7349 
7350 		dir = nextDir;
7351 		strcpy(path, leaf);
7352 		dirPutter.SetTo(dir);
7353 
7354 		// get file vnode
7355 		inc_vnode_ref_count(dir);
7356 		struct vnode* fileVnode;
7357 		int type;
7358 		error = vnode_path_to_vnode(dir, path, false, 0, &fileVnode, NULL,
7359 			&type);
7360 		if (error != B_OK)
7361 			return error;
7362 		VNodePutter fileVnodePutter(fileVnode);
7363 
7364 		if (!traverseLink || !S_ISLNK(type)) {
7365 			// we're done -- construct the path
7366 			bool hasLeaf = true;
7367 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
7368 				// special cases "." and ".." -- get the dir, forget the leaf
7369 				inc_vnode_ref_count(dir);
7370 				error = vnode_path_to_vnode(dir, leaf, false, 0, &nextDir, NULL,
7371 					NULL);
7372 				if (error != B_OK)
7373 					return error;
7374 				dir = nextDir;
7375 				dirPutter.SetTo(dir);
7376 				hasLeaf = false;
7377 			}
7378 
7379 			// get the directory path
7380 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH);
7381 			if (error != B_OK)
7382 				return error;
7383 
7384 			// append the leaf name
7385 			if (hasLeaf) {
7386 				// insert a directory separator if this is not the file system
7387 				// root
7388 				if ((strcmp(path, "/") != 0
7389 					&& strlcat(path, "/", pathBuffer.BufferSize())
7390 						>= pathBuffer.BufferSize())
7391 					|| strlcat(path, leaf, pathBuffer.BufferSize())
7392 						>= pathBuffer.BufferSize()) {
7393 					return B_NAME_TOO_LONG;
7394 				}
7395 			}
7396 
7397 			// copy back to userland
7398 			int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
7399 			if (len < 0)
7400 				return len;
7401 			if (len >= B_PATH_NAME_LENGTH)
7402 				return B_BUFFER_OVERFLOW;
7403 
7404 			return B_OK;
7405 		}
7406 
7407 		// read link
7408 		struct stat st;
7409 		if (FS_CALL(fileVnode, read_symlink) != NULL) {
7410 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
7411 			error = FS_CALL(fileVnode, read_symlink)(fileVnode->mount->cookie,
7412 				fileVnode->private_node, path, &bufferSize);
7413 			if (error != B_OK)
7414 				return error;
7415 			path[bufferSize] = '\0';
7416 		} else
7417 			return B_BAD_VALUE;
7418 	}
7419 
7420 	return B_LINK_LIMIT;
7421 }
7422 
7423 
7424 int
7425 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
7426 	int openMode, int perms)
7427 {
7428 	char name[B_FILE_NAME_LENGTH];
7429 
7430 	if (userName == NULL || device < 0 || inode < 0)
7431 		return B_BAD_VALUE;
7432 	if (!IS_USER_ADDRESS(userName)
7433 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7434 		return B_BAD_ADDRESS;
7435 
7436 	if (openMode & O_CREAT)
7437 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
7438 
7439 	return file_open_entry_ref(device, inode, name, openMode, false);
7440 }
7441 
7442 
7443 int
7444 _user_open(int fd, const char *userPath, int openMode, int perms)
7445 {
7446 	KPath path(B_PATH_NAME_LENGTH + 1);
7447 	if (path.InitCheck() != B_OK)
7448 		return B_NO_MEMORY;
7449 
7450 	char *buffer = path.LockBuffer();
7451 
7452 	if (!IS_USER_ADDRESS(userPath)
7453 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7454 		return B_BAD_ADDRESS;
7455 
7456 	if (openMode & O_CREAT)
7457 		return file_create(fd, buffer, openMode, perms, false);
7458 
7459 	return file_open(fd, buffer, openMode, false);
7460 }
7461 
7462 
7463 int
7464 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
7465 {
7466 	if (userName != NULL) {
7467 		char name[B_FILE_NAME_LENGTH];
7468 
7469 		if (!IS_USER_ADDRESS(userName)
7470 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7471 			return B_BAD_ADDRESS;
7472 
7473 		return dir_open_entry_ref(device, inode, name, false);
7474 	}
7475 	return dir_open_entry_ref(device, inode, NULL, false);
7476 }
7477 
7478 
7479 int
7480 _user_open_dir(int fd, const char *userPath)
7481 {
7482 	KPath path(B_PATH_NAME_LENGTH + 1);
7483 	if (path.InitCheck() != B_OK)
7484 		return B_NO_MEMORY;
7485 
7486 	char *buffer = path.LockBuffer();
7487 
7488 	if (!IS_USER_ADDRESS(userPath)
7489 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7490 		return B_BAD_ADDRESS;
7491 
7492 	return dir_open(fd, buffer, false);
7493 }
7494 
7495 
7496 /*!	\brief Opens a directory's parent directory and returns the entry name
7497 		   of the former.
7498 
7499 	Aside from that is returns the directory's entry name, this method is
7500 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
7501 	equivalent, if \a userName is \c NULL.
7502 
7503 	If a name buffer is supplied and the name does not fit the buffer, the
7504 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
7505 
7506 	\param fd A FD referring to a directory.
7507 	\param userName Buffer the directory's entry name shall be written into.
7508 		   May be \c NULL.
7509 	\param nameLength Size of the name buffer.
7510 	\return The file descriptor of the opened parent directory, if everything
7511 			went fine, an error code otherwise.
7512 */
7513 int
7514 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
7515 {
7516 	bool kernel = false;
7517 
7518 	if (userName && !IS_USER_ADDRESS(userName))
7519 		return B_BAD_ADDRESS;
7520 
7521 	// open the parent dir
7522 	int parentFD = dir_open(fd, "..", kernel);
7523 	if (parentFD < 0)
7524 		return parentFD;
7525 	FDCloser fdCloser(parentFD, kernel);
7526 
7527 	if (userName) {
7528 		// get the vnodes
7529 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
7530 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
7531 		VNodePutter parentVNodePutter(parentVNode);
7532 		VNodePutter dirVNodePutter(dirVNode);
7533 		if (!parentVNode || !dirVNode)
7534 			return B_FILE_ERROR;
7535 
7536 		// get the vnode name
7537 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
7538 		struct dirent *buffer = (struct dirent*)_buffer;
7539 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
7540 			sizeof(_buffer));
7541 		if (status != B_OK)
7542 			return status;
7543 
7544 		// copy the name to the userland buffer
7545 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
7546 		if (len < 0)
7547 			return len;
7548 		if (len >= (int)nameLength)
7549 			return B_BUFFER_OVERFLOW;
7550 	}
7551 
7552 	return fdCloser.Detach();
7553 }
7554 
7555 
7556 status_t
7557 _user_fcntl(int fd, int op, uint32 argument)
7558 {
7559 	status_t status = common_fcntl(fd, op, argument, false);
7560 	if (op == F_SETLKW)
7561 		syscall_restart_handle_post(status);
7562 
7563 	return status;
7564 }
7565 
7566 
7567 status_t
7568 _user_fsync(int fd)
7569 {
7570 	return common_sync(fd, false);
7571 }
7572 
7573 
7574 status_t
7575 _user_flock(int fd, int op)
7576 {
7577 	struct file_descriptor *descriptor;
7578 	struct vnode *vnode;
7579 	struct flock flock;
7580 	status_t status;
7581 
7582 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, op));
7583 
7584 	descriptor = get_fd_and_vnode(fd, &vnode, false);
7585 	if (descriptor == NULL)
7586 		return B_FILE_ERROR;
7587 
7588 	if (descriptor->type != FDTYPE_FILE) {
7589 		put_fd(descriptor);
7590 		return B_BAD_VALUE;
7591 	}
7592 
7593 	flock.l_start = 0;
7594 	flock.l_len = OFF_MAX;
7595 	flock.l_whence = 0;
7596 	flock.l_type = (op & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
7597 
7598 	if ((op & LOCK_UN) != 0)
7599 		status = release_advisory_lock(descriptor->u.vnode, &flock);
7600 	else {
7601 		status = acquire_advisory_lock(descriptor->u.vnode,
7602 			thread_get_current_thread()->team->session_id, &flock,
7603 			(op & LOCK_NB) == 0);
7604 	}
7605 
7606 	syscall_restart_handle_post(status);
7607 
7608 	put_fd(descriptor);
7609 	return status;
7610 }
7611 
7612 
7613 status_t
7614 _user_lock_node(int fd)
7615 {
7616 	return common_lock_node(fd, false);
7617 }
7618 
7619 
7620 status_t
7621 _user_unlock_node(int fd)
7622 {
7623 	return common_unlock_node(fd, false);
7624 }
7625 
7626 
7627 status_t
7628 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
7629 {
7630 	char name[B_FILE_NAME_LENGTH];
7631 	status_t status;
7632 
7633 	if (!IS_USER_ADDRESS(userName))
7634 		return B_BAD_ADDRESS;
7635 
7636 	status = user_strlcpy(name, userName, sizeof(name));
7637 	if (status < 0)
7638 		return status;
7639 
7640 	return dir_create_entry_ref(device, inode, name, perms, false);
7641 }
7642 
7643 
7644 status_t
7645 _user_create_dir(int fd, const char *userPath, int perms)
7646 {
7647 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7648 	if (pathBuffer.InitCheck() != B_OK)
7649 		return B_NO_MEMORY;
7650 
7651 	char *path = pathBuffer.LockBuffer();
7652 
7653 	if (!IS_USER_ADDRESS(userPath)
7654 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7655 		return B_BAD_ADDRESS;
7656 
7657 	return dir_create(fd, path, perms, false);
7658 }
7659 
7660 
7661 status_t
7662 _user_remove_dir(int fd, const char *userPath)
7663 {
7664 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7665 	if (pathBuffer.InitCheck() != B_OK)
7666 		return B_NO_MEMORY;
7667 
7668 	char *path = pathBuffer.LockBuffer();
7669 
7670 	if (userPath != NULL) {
7671 		if (!IS_USER_ADDRESS(userPath)
7672 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7673 			return B_BAD_ADDRESS;
7674 	}
7675 
7676 	return dir_remove(fd, userPath ? path : NULL, false);
7677 }
7678 
7679 
7680 status_t
7681 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
7682 {
7683 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
7684 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
7685 		return B_NO_MEMORY;
7686 
7687 	size_t bufferSize;
7688 
7689 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
7690 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
7691 		return B_BAD_ADDRESS;
7692 
7693 	char *path = pathBuffer.LockBuffer();
7694 	char *buffer = linkBuffer.LockBuffer();
7695 
7696 	if (userPath) {
7697 		if (!IS_USER_ADDRESS(userPath)
7698 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7699 			return B_BAD_ADDRESS;
7700 
7701 		if (bufferSize > B_PATH_NAME_LENGTH)
7702 			bufferSize = B_PATH_NAME_LENGTH;
7703 	}
7704 
7705 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
7706 		&bufferSize, false);
7707 
7708 	// we also update the bufferSize in case of errors
7709 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
7710 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
7711 		return B_BAD_ADDRESS;
7712 
7713 	if (status < B_OK)
7714 		return status;
7715 
7716 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
7717 		return B_BAD_ADDRESS;
7718 
7719 	return B_OK;
7720 }
7721 
7722 
7723 status_t
7724 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7725 	int mode)
7726 {
7727 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7728 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7729 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7730 		return B_NO_MEMORY;
7731 
7732 	char *path = pathBuffer.LockBuffer();
7733 	char *toPath = toPathBuffer.LockBuffer();
7734 
7735 	if (!IS_USER_ADDRESS(userPath)
7736 		|| !IS_USER_ADDRESS(userToPath)
7737 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7738 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7739 		return B_BAD_ADDRESS;
7740 
7741 	return common_create_symlink(fd, path, toPath, mode, false);
7742 }
7743 
7744 
7745 status_t
7746 _user_create_link(const char *userPath, const char *userToPath)
7747 {
7748 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7749 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7750 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7751 		return B_NO_MEMORY;
7752 
7753 	char *path = pathBuffer.LockBuffer();
7754 	char *toPath = toPathBuffer.LockBuffer();
7755 
7756 	if (!IS_USER_ADDRESS(userPath)
7757 		|| !IS_USER_ADDRESS(userToPath)
7758 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7759 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7760 		return B_BAD_ADDRESS;
7761 
7762 	status_t status = check_path(toPath);
7763 	if (status < B_OK)
7764 		return status;
7765 
7766 	return common_create_link(path, toPath, false);
7767 }
7768 
7769 
7770 status_t
7771 _user_unlink(int fd, const char *userPath)
7772 {
7773 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7774 	if (pathBuffer.InitCheck() != B_OK)
7775 		return B_NO_MEMORY;
7776 
7777 	char *path = pathBuffer.LockBuffer();
7778 
7779 	if (!IS_USER_ADDRESS(userPath)
7780 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7781 		return B_BAD_ADDRESS;
7782 
7783 	return common_unlink(fd, path, false);
7784 }
7785 
7786 
7787 status_t
7788 _user_rename(int oldFD, const char *userOldPath, int newFD,
7789 	const char *userNewPath)
7790 {
7791 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7792 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7793 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7794 		return B_NO_MEMORY;
7795 
7796 	char *oldPath = oldPathBuffer.LockBuffer();
7797 	char *newPath = newPathBuffer.LockBuffer();
7798 
7799 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7800 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7801 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7802 		return B_BAD_ADDRESS;
7803 
7804 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7805 }
7806 
7807 
7808 status_t
7809 _user_access(const char *userPath, int mode)
7810 {
7811 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7812 	if (pathBuffer.InitCheck() != B_OK)
7813 		return B_NO_MEMORY;
7814 
7815 	char *path = pathBuffer.LockBuffer();
7816 
7817 	if (!IS_USER_ADDRESS(userPath)
7818 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7819 		return B_BAD_ADDRESS;
7820 
7821 	return common_access(path, mode, false);
7822 }
7823 
7824 
7825 status_t
7826 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7827 	struct stat *userStat, size_t statSize)
7828 {
7829 	struct stat stat;
7830 	status_t status;
7831 
7832 	if (statSize > sizeof(struct stat))
7833 		return B_BAD_VALUE;
7834 
7835 	if (!IS_USER_ADDRESS(userStat))
7836 		return B_BAD_ADDRESS;
7837 
7838 	if (userPath) {
7839 		// path given: get the stat of the node referred to by (fd, path)
7840 		if (!IS_USER_ADDRESS(userPath))
7841 			return B_BAD_ADDRESS;
7842 
7843 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7844 		if (pathBuffer.InitCheck() != B_OK)
7845 			return B_NO_MEMORY;
7846 
7847 		char *path = pathBuffer.LockBuffer();
7848 
7849 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7850 		if (length < B_OK)
7851 			return length;
7852 		if (length >= B_PATH_NAME_LENGTH)
7853 			return B_NAME_TOO_LONG;
7854 
7855 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7856 	} else {
7857 		// no path given: get the FD and use the FD operation
7858 		struct file_descriptor *descriptor
7859 			= get_fd(get_current_io_context(false), fd);
7860 		if (descriptor == NULL)
7861 			return B_FILE_ERROR;
7862 
7863 		if (descriptor->ops->fd_read_stat)
7864 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7865 		else
7866 			status = EOPNOTSUPP;
7867 
7868 		put_fd(descriptor);
7869 	}
7870 
7871 	if (status < B_OK)
7872 		return status;
7873 
7874 	return user_memcpy(userStat, &stat, statSize);
7875 }
7876 
7877 
7878 status_t
7879 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7880 	const struct stat *userStat, size_t statSize, int statMask)
7881 {
7882 	if (statSize > sizeof(struct stat))
7883 		return B_BAD_VALUE;
7884 
7885 	struct stat stat;
7886 
7887 	if (!IS_USER_ADDRESS(userStat)
7888 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7889 		return B_BAD_ADDRESS;
7890 
7891 	// clear additional stat fields
7892 	if (statSize < sizeof(struct stat))
7893 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7894 
7895 	status_t status;
7896 
7897 	if (userPath) {
7898 		// path given: write the stat of the node referred to by (fd, path)
7899 		if (!IS_USER_ADDRESS(userPath))
7900 			return B_BAD_ADDRESS;
7901 
7902 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7903 		if (pathBuffer.InitCheck() != B_OK)
7904 			return B_NO_MEMORY;
7905 
7906 		char *path = pathBuffer.LockBuffer();
7907 
7908 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7909 		if (length < B_OK)
7910 			return length;
7911 		if (length >= B_PATH_NAME_LENGTH)
7912 			return B_NAME_TOO_LONG;
7913 
7914 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7915 			statMask, false);
7916 	} else {
7917 		// no path given: get the FD and use the FD operation
7918 		struct file_descriptor *descriptor
7919 			= get_fd(get_current_io_context(false), fd);
7920 		if (descriptor == NULL)
7921 			return B_FILE_ERROR;
7922 
7923 		if (descriptor->ops->fd_write_stat)
7924 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7925 		else
7926 			status = EOPNOTSUPP;
7927 
7928 		put_fd(descriptor);
7929 	}
7930 
7931 	return status;
7932 }
7933 
7934 
7935 int
7936 _user_open_attr_dir(int fd, const char *userPath)
7937 {
7938 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7939 	if (pathBuffer.InitCheck() != B_OK)
7940 		return B_NO_MEMORY;
7941 
7942 	char *path = pathBuffer.LockBuffer();
7943 
7944 	if (userPath != NULL) {
7945 		if (!IS_USER_ADDRESS(userPath)
7946 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7947 			return B_BAD_ADDRESS;
7948 	}
7949 
7950 	return attr_dir_open(fd, userPath ? path : NULL, false);
7951 }
7952 
7953 
7954 int
7955 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7956 {
7957 	char name[B_FILE_NAME_LENGTH];
7958 
7959 	if (!IS_USER_ADDRESS(userName)
7960 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7961 		return B_BAD_ADDRESS;
7962 
7963 	return attr_create(fd, name, type, openMode, false);
7964 }
7965 
7966 
7967 int
7968 _user_open_attr(int fd, const char *userName, int openMode)
7969 {
7970 	char name[B_FILE_NAME_LENGTH];
7971 
7972 	if (!IS_USER_ADDRESS(userName)
7973 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7974 		return B_BAD_ADDRESS;
7975 
7976 	return attr_open(fd, name, openMode, false);
7977 }
7978 
7979 
7980 status_t
7981 _user_remove_attr(int fd, const char *userName)
7982 {
7983 	char name[B_FILE_NAME_LENGTH];
7984 
7985 	if (!IS_USER_ADDRESS(userName)
7986 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7987 		return B_BAD_ADDRESS;
7988 
7989 	return attr_remove(fd, name, false);
7990 }
7991 
7992 
7993 status_t
7994 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7995 {
7996 	if (!IS_USER_ADDRESS(userFromName)
7997 		|| !IS_USER_ADDRESS(userToName))
7998 		return B_BAD_ADDRESS;
7999 
8000 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
8001 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
8002 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
8003 		return B_NO_MEMORY;
8004 
8005 	char *fromName = fromNameBuffer.LockBuffer();
8006 	char *toName = toNameBuffer.LockBuffer();
8007 
8008 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
8009 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
8010 		return B_BAD_ADDRESS;
8011 
8012 	return attr_rename(fromFile, fromName, toFile, toName, false);
8013 }
8014 
8015 
8016 int
8017 _user_open_index_dir(dev_t device)
8018 {
8019 	return index_dir_open(device, false);
8020 }
8021 
8022 
8023 status_t
8024 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
8025 {
8026 	char name[B_FILE_NAME_LENGTH];
8027 
8028 	if (!IS_USER_ADDRESS(userName)
8029 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8030 		return B_BAD_ADDRESS;
8031 
8032 	return index_create(device, name, type, flags, false);
8033 }
8034 
8035 
8036 status_t
8037 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
8038 {
8039 	char name[B_FILE_NAME_LENGTH];
8040 	struct stat stat;
8041 	status_t status;
8042 
8043 	if (!IS_USER_ADDRESS(userName)
8044 		|| !IS_USER_ADDRESS(userStat)
8045 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8046 		return B_BAD_ADDRESS;
8047 
8048 	status = index_name_read_stat(device, name, &stat, false);
8049 	if (status == B_OK) {
8050 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
8051 			return B_BAD_ADDRESS;
8052 	}
8053 
8054 	return status;
8055 }
8056 
8057 
8058 status_t
8059 _user_remove_index(dev_t device, const char *userName)
8060 {
8061 	char name[B_FILE_NAME_LENGTH];
8062 
8063 	if (!IS_USER_ADDRESS(userName)
8064 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8065 		return B_BAD_ADDRESS;
8066 
8067 	return index_remove(device, name, false);
8068 }
8069 
8070 
8071 status_t
8072 _user_getcwd(char *userBuffer, size_t size)
8073 {
8074 	if (!IS_USER_ADDRESS(userBuffer))
8075 		return B_BAD_ADDRESS;
8076 
8077 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8078 	if (pathBuffer.InitCheck() != B_OK)
8079 		return B_NO_MEMORY;
8080 
8081 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
8082 
8083 	if (size > B_PATH_NAME_LENGTH)
8084 		size = B_PATH_NAME_LENGTH;
8085 
8086 	char *path = pathBuffer.LockBuffer();
8087 
8088 	status_t status = get_cwd(path, size, false);
8089 	if (status < B_OK)
8090 		return status;
8091 
8092 	// Copy back the result
8093 	if (user_strlcpy(userBuffer, path, size) < B_OK)
8094 		return B_BAD_ADDRESS;
8095 
8096 	return status;
8097 }
8098 
8099 
8100 status_t
8101 _user_setcwd(int fd, const char *userPath)
8102 {
8103 	TRACE(("user_setcwd: path = %p\n", userPath));
8104 
8105 	KPath pathBuffer(B_PATH_NAME_LENGTH);
8106 	if (pathBuffer.InitCheck() != B_OK)
8107 		return B_NO_MEMORY;
8108 
8109 	char *path = pathBuffer.LockBuffer();
8110 
8111 	if (userPath != NULL) {
8112 		if (!IS_USER_ADDRESS(userPath)
8113 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8114 			return B_BAD_ADDRESS;
8115 	}
8116 
8117 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
8118 }
8119 
8120 
8121 int
8122 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
8123 	uint32 flags, port_id port, int32 token)
8124 {
8125 	char *query;
8126 
8127 	if (device < 0 || userQuery == NULL || queryLength == 0)
8128 		return B_BAD_VALUE;
8129 
8130 	// this is a safety restriction
8131 	if (queryLength >= 65536)
8132 		return B_NAME_TOO_LONG;
8133 
8134 	query = (char *)malloc(queryLength + 1);
8135 	if (query == NULL)
8136 		return B_NO_MEMORY;
8137 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
8138 		free(query);
8139 		return B_BAD_ADDRESS;
8140 	}
8141 
8142 	int fd = query_open(device, query, flags, port, token, false);
8143 
8144 	free(query);
8145 	return fd;
8146 }
8147