xref: /haiku/src/system/kernel/fs/vfs.cpp (revision a381c8a06378de22ff08adf4282b4e3f7e50d250)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /*! Virtual File System and File System Interface Layer */
10 
11 
12 #include <ctype.h>
13 #include <fcntl.h>
14 #include <limits.h>
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/file.h>
19 #include <sys/resource.h>
20 #include <sys/stat.h>
21 #include <unistd.h>
22 
23 #include <fs_info.h>
24 #include <fs_interface.h>
25 #include <fs_volume.h>
26 #include <OS.h>
27 #include <StorageDefs.h>
28 
29 #include <util/AutoLock.h>
30 
31 #include <block_cache.h>
32 #include <fd.h>
33 #include <file_cache.h>
34 #include <khash.h>
35 #include <KPath.h>
36 #include <lock.h>
37 #include <syscalls.h>
38 #include <syscall_restart.h>
39 #include <vfs.h>
40 #include <vm.h>
41 #include <vm_cache.h>
42 #include <vm_low_memory.h>
43 
44 #include <boot/kernel_args.h>
45 #include <disk_device_manager/KDiskDevice.h>
46 #include <disk_device_manager/KDiskDeviceManager.h>
47 #include <disk_device_manager/KDiskDeviceUtils.h>
48 #include <disk_device_manager/KDiskSystem.h>
49 #include <fs/node_monitor.h>
50 
51 
52 //#define TRACE_VFS
53 #ifdef TRACE_VFS
54 #	define TRACE(x) dprintf x
55 #	define FUNCTION(x) dprintf x
56 #else
57 #	define TRACE(x) ;
58 #	define FUNCTION(x) ;
59 #endif
60 
61 #define ADD_DEBUGGER_COMMANDS
62 
63 const static uint32 kMaxUnusedVnodes = 8192;
64 	// This is the maximum number of unused vnodes that the system
65 	// will keep around (weak limit, if there is enough memory left,
66 	// they won't get flushed even when hitting that limit).
67 	// It may be chosen with respect to the available memory or enhanced
68 	// by some timestamp/frequency heurism.
69 
70 struct vnode {
71 	struct vnode	*next;
72 	vm_cache		*cache;
73 	dev_t			device;
74 	list_link		mount_link;
75 	list_link		unused_link;
76 	ino_t			id;
77 	fs_vnode		private_node;
78 	struct fs_mount	*mount;
79 	struct vnode	*covered_by;
80 	int32			ref_count;
81 	uint8			remove : 1;
82 	uint8			busy : 1;
83 	uint8			unpublished : 1;
84 	struct advisory_locking	*advisory_locking;
85 	struct file_descriptor *mandatory_locked_by;
86 };
87 
88 struct vnode_hash_key {
89 	dev_t	device;
90 	ino_t	vnode;
91 };
92 
93 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
94 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
95 
96 /*!	\brief Structure to manage a mounted file system
97 
98 	Note: The root_vnode and covers_vnode fields (what others?) are
99 	initialized in fs_mount() and not changed afterwards. That is as soon
100 	as the mount is mounted and it is made sure it won't be unmounted
101 	(e.g. by holding a reference to a vnode of that mount) (read) access
102 	to those fields is always safe, even without additional locking. Morever
103 	while mounted the mount holds a reference to the covers_vnode, and thus
104 	making the access path vnode->mount->covers_vnode->mount->... safe if a
105 	reference to vnode is held (note that for the root mount covers_vnode
106 	is NULL, though).
107 */
108 struct fs_mount {
109 	struct fs_mount	*next;
110 	file_system_module_info *fs;
111 	dev_t			id;
112 	void			*cookie;
113 	char			*device_name;
114 	char			*fs_name;
115 	recursive_lock	rlock;	// guards the vnodes list
116 	struct vnode	*root_vnode;
117 	struct vnode	*covers_vnode;
118 	KPartition		*partition;
119 	struct list		vnodes;
120 	bool			unmounting;
121 	bool			owns_file_device;
122 };
123 
124 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
125 	list_link		link;
126 	team_id			team;
127 	pid_t			session;
128 	off_t			start;
129 	off_t			end;
130 	bool			shared;
131 };
132 
133 typedef DoublyLinkedList<advisory_lock> LockList;
134 
135 struct advisory_locking {
136 	sem_id			lock;
137 	sem_id			wait_sem;
138 	LockList		locks;
139 };
140 
141 static mutex sFileSystemsMutex;
142 
143 /*!	\brief Guards sMountsTable.
144 
145 	The holder is allowed to read/write access the sMountsTable.
146 	Manipulation of the fs_mount structures themselves
147 	(and their destruction) requires different locks though.
148 */
149 static mutex sMountMutex;
150 
151 /*!	\brief Guards mount/unmount operations.
152 
153 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
154 	That is locking the lock ensures that no FS is mounted/unmounted. In
155 	particular this means that
156 	- sMountsTable will not be modified,
157 	- the fields immutable after initialization of the fs_mount structures in
158 	  sMountsTable will not be modified,
159 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
160 
161 	The thread trying to lock the lock must not hold sVnodeMutex or
162 	sMountMutex.
163 */
164 static recursive_lock sMountOpLock;
165 
166 /*!	\brief Guards the vnode::covered_by field of any vnode
167 
168 	The holder is allowed to read access the vnode::covered_by field of any
169 	vnode. Additionally holding sMountOpLock allows for write access.
170 
171 	The thread trying to lock the must not hold sVnodeMutex.
172 */
173 static mutex sVnodeCoveredByMutex;
174 
175 /*!	\brief Guards sVnodeTable.
176 
177 	The holder is allowed to read/write access sVnodeTable and to
178 	any unbusy vnode in that table, save to the immutable fields (device, id,
179 	private_node, mount) to which
180 	only read-only access is allowed, and to the field covered_by, which is
181 	guarded by sMountOpLock and sVnodeCoveredByMutex.
182 
183 	The thread trying to lock the mutex must not hold sMountMutex.
184 	You must not have this mutex held when calling create_sem(), as this
185 	might call vfs_free_unused_vnodes().
186 */
187 static mutex sVnodeMutex;
188 
189 #define VNODE_HASH_TABLE_SIZE 1024
190 static hash_table *sVnodeTable;
191 static list sUnusedVnodeList;
192 static uint32 sUnusedVnodes = 0;
193 static struct vnode *sRoot;
194 
195 #define MOUNTS_HASH_TABLE_SIZE 16
196 static hash_table *sMountsTable;
197 static dev_t sNextMountID = 1;
198 
199 #define MAX_TEMP_IO_VECS 8
200 
201 mode_t __gUmask = 022;
202 
203 /* function declarations */
204 
205 // file descriptor operation prototypes
206 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
207 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
208 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
209 static void file_free_fd(struct file_descriptor *);
210 static status_t file_close(struct file_descriptor *);
211 static status_t file_select(struct file_descriptor *, uint8 event,
212 	struct selectsync *sync);
213 static status_t file_deselect(struct file_descriptor *, uint8 event,
214 	struct selectsync *sync);
215 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
216 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
217 static status_t dir_rewind(struct file_descriptor *);
218 static void dir_free_fd(struct file_descriptor *);
219 static status_t dir_close(struct file_descriptor *);
220 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
221 static status_t attr_dir_rewind(struct file_descriptor *);
222 static void attr_dir_free_fd(struct file_descriptor *);
223 static status_t attr_dir_close(struct file_descriptor *);
224 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
225 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
226 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
227 static void attr_free_fd(struct file_descriptor *);
228 static status_t attr_close(struct file_descriptor *);
229 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
230 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
231 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
232 static status_t index_dir_rewind(struct file_descriptor *);
233 static void index_dir_free_fd(struct file_descriptor *);
234 static status_t index_dir_close(struct file_descriptor *);
235 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
236 static status_t query_rewind(struct file_descriptor *);
237 static void query_free_fd(struct file_descriptor *);
238 static status_t query_close(struct file_descriptor *);
239 
240 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
241 static status_t common_read_stat(struct file_descriptor *, struct stat *);
242 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
243 
244 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
245 	bool traverseLeafLink, int count, struct vnode **_vnode, ino_t *_parentID, int *_type);
246 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
247 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
248 	struct vnode **_vnode, ino_t *_parentID, bool kernel);
249 static void inc_vnode_ref_count(struct vnode *vnode);
250 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
251 static inline void put_vnode(struct vnode *vnode);
252 static status_t fs_unmount(char *path, dev_t mountID, uint32 flags,
253 	bool kernel);
254 
255 
256 static struct fd_ops sFileOps = {
257 	file_read,
258 	file_write,
259 	file_seek,
260 	common_ioctl,
261 	file_select,
262 	file_deselect,
263 	NULL,		// read_dir()
264 	NULL,		// rewind_dir()
265 	common_read_stat,
266 	common_write_stat,
267 	file_close,
268 	file_free_fd
269 };
270 
271 static struct fd_ops sDirectoryOps = {
272 	NULL,		// read()
273 	NULL,		// write()
274 	NULL,		// seek()
275 	common_ioctl,
276 	NULL,		// select()
277 	NULL,		// deselect()
278 	dir_read,
279 	dir_rewind,
280 	common_read_stat,
281 	common_write_stat,
282 	dir_close,
283 	dir_free_fd
284 };
285 
286 static struct fd_ops sAttributeDirectoryOps = {
287 	NULL,		// read()
288 	NULL,		// write()
289 	NULL,		// seek()
290 	common_ioctl,
291 	NULL,		// select()
292 	NULL,		// deselect()
293 	attr_dir_read,
294 	attr_dir_rewind,
295 	common_read_stat,
296 	common_write_stat,
297 	attr_dir_close,
298 	attr_dir_free_fd
299 };
300 
301 static struct fd_ops sAttributeOps = {
302 	attr_read,
303 	attr_write,
304 	attr_seek,
305 	common_ioctl,
306 	NULL,		// select()
307 	NULL,		// deselect()
308 	NULL,		// read_dir()
309 	NULL,		// rewind_dir()
310 	attr_read_stat,
311 	attr_write_stat,
312 	attr_close,
313 	attr_free_fd
314 };
315 
316 static struct fd_ops sIndexDirectoryOps = {
317 	NULL,		// read()
318 	NULL,		// write()
319 	NULL,		// seek()
320 	NULL,		// ioctl()
321 	NULL,		// select()
322 	NULL,		// deselect()
323 	index_dir_read,
324 	index_dir_rewind,
325 	NULL,		// read_stat()
326 	NULL,		// write_stat()
327 	index_dir_close,
328 	index_dir_free_fd
329 };
330 
331 #if 0
332 static struct fd_ops sIndexOps = {
333 	NULL,		// read()
334 	NULL,		// write()
335 	NULL,		// seek()
336 	NULL,		// ioctl()
337 	NULL,		// select()
338 	NULL,		// deselect()
339 	NULL,		// dir_read()
340 	NULL,		// dir_rewind()
341 	index_read_stat,	// read_stat()
342 	NULL,		// write_stat()
343 	NULL,		// dir_close()
344 	NULL		// free_fd()
345 };
346 #endif
347 
348 static struct fd_ops sQueryOps = {
349 	NULL,		// read()
350 	NULL,		// write()
351 	NULL,		// seek()
352 	NULL,		// ioctl()
353 	NULL,		// select()
354 	NULL,		// deselect()
355 	query_read,
356 	query_rewind,
357 	NULL,		// read_stat()
358 	NULL,		// write_stat()
359 	query_close,
360 	query_free_fd
361 };
362 
363 
364 // VNodePutter
365 class VNodePutter {
366 public:
367 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
368 
369 	~VNodePutter()
370 	{
371 		Put();
372 	}
373 
374 	void SetTo(struct vnode *vnode)
375 	{
376 		Put();
377 		fVNode = vnode;
378 	}
379 
380 	void Put()
381 	{
382 		if (fVNode) {
383 			put_vnode(fVNode);
384 			fVNode = NULL;
385 		}
386 	}
387 
388 	struct vnode *Detach()
389 	{
390 		struct vnode *vnode = fVNode;
391 		fVNode = NULL;
392 		return vnode;
393 	}
394 
395 private:
396 	struct vnode *fVNode;
397 };
398 
399 
400 class FDCloser {
401 public:
402 	FDCloser() : fFD(-1), fKernel(true) {}
403 
404 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
405 
406 	~FDCloser()
407 	{
408 		Close();
409 	}
410 
411 	void SetTo(int fd, bool kernel)
412 	{
413 		Close();
414 		fFD = fd;
415 		fKernel = kernel;
416 	}
417 
418 	void Close()
419 	{
420 		if (fFD >= 0) {
421 			if (fKernel)
422 				_kern_close(fFD);
423 			else
424 				_user_close(fFD);
425 			fFD = -1;
426 		}
427 	}
428 
429 	int Detach()
430 	{
431 		int fd = fFD;
432 		fFD = -1;
433 		return fd;
434 	}
435 
436 private:
437 	int		fFD;
438 	bool	fKernel;
439 };
440 
441 
442 static int
443 mount_compare(void *_m, const void *_key)
444 {
445 	struct fs_mount *mount = (fs_mount *)_m;
446 	const dev_t *id = (dev_t *)_key;
447 
448 	if (mount->id == *id)
449 		return 0;
450 
451 	return -1;
452 }
453 
454 
455 static uint32
456 mount_hash(void *_m, const void *_key, uint32 range)
457 {
458 	struct fs_mount *mount = (fs_mount *)_m;
459 	const dev_t *id = (dev_t *)_key;
460 
461 	if (mount)
462 		return mount->id % range;
463 
464 	return (uint32)*id % range;
465 }
466 
467 
468 /*! Finds the mounted device (the fs_mount structure) with the given ID.
469 	Note, you must hold the gMountMutex lock when you call this function.
470 */
471 static struct fs_mount *
472 find_mount(dev_t id)
473 {
474 	ASSERT_LOCKED_MUTEX(&sMountMutex);
475 
476 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
477 }
478 
479 
480 static status_t
481 get_mount(dev_t id, struct fs_mount **_mount)
482 {
483 	struct fs_mount *mount;
484 	status_t status;
485 
486 	MutexLocker nodeLocker(sVnodeMutex);
487 	MutexLocker mountLocker(sMountMutex);
488 
489 	mount = find_mount(id);
490 	if (mount == NULL)
491 		return B_BAD_VALUE;
492 
493 	struct vnode* rootNode = mount->root_vnode;
494 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
495 		// might have been called during a mount/unmount operation
496 		return B_BUSY;
497 	}
498 
499 	inc_vnode_ref_count(mount->root_vnode);
500 	*_mount = mount;
501 	return B_OK;
502 }
503 
504 
505 static void
506 put_mount(struct fs_mount *mount)
507 {
508 	if (mount)
509 		put_vnode(mount->root_vnode);
510 }
511 
512 
513 static status_t
514 put_file_system(file_system_module_info *fs)
515 {
516 	return put_module(fs->info.name);
517 }
518 
519 
520 /*!	Tries to open the specified file system module.
521 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
522 	Returns a pointer to file system module interface, or NULL if it
523 	could not open the module.
524 */
525 static file_system_module_info *
526 get_file_system(const char *fsName)
527 {
528 	char name[B_FILE_NAME_LENGTH];
529 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
530 		// construct module name if we didn't get one
531 		// (we currently support only one API)
532 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
533 		fsName = NULL;
534 	}
535 
536 	file_system_module_info *info;
537 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
538 		return NULL;
539 
540 	return info;
541 }
542 
543 
544 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
545 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
546 	The name is allocated for you, and you have to free() it when you're
547 	done with it.
548 	Returns NULL if the required memory is no available.
549 */
550 static char *
551 get_file_system_name(const char *fsName)
552 {
553 	const size_t length = strlen("file_systems/");
554 
555 	if (strncmp(fsName, "file_systems/", length)) {
556 		// the name already seems to be the module's file name
557 		return strdup(fsName);
558 	}
559 
560 	fsName += length;
561 	const char *end = strchr(fsName, '/');
562 	if (end == NULL) {
563 		// this doesn't seem to be a valid name, but well...
564 		return strdup(fsName);
565 	}
566 
567 	// cut off the trailing /v1
568 
569 	char *name = (char *)malloc(end + 1 - fsName);
570 	if (name == NULL)
571 		return NULL;
572 
573 	strlcpy(name, fsName, end + 1 - fsName);
574 	return name;
575 }
576 
577 
578 static int
579 vnode_compare(void *_vnode, const void *_key)
580 {
581 	struct vnode *vnode = (struct vnode *)_vnode;
582 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
583 
584 	if (vnode->device == key->device && vnode->id == key->vnode)
585 		return 0;
586 
587 	return -1;
588 }
589 
590 
591 static uint32
592 vnode_hash(void *_vnode, const void *_key, uint32 range)
593 {
594 	struct vnode *vnode = (struct vnode *)_vnode;
595 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
596 
597 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
598 
599 	if (vnode != NULL)
600 		return VHASH(vnode->device, vnode->id) % range;
601 
602 	return VHASH(key->device, key->vnode) % range;
603 
604 #undef VHASH
605 }
606 
607 
608 static void
609 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
610 {
611 	recursive_lock_lock(&mount->rlock);
612 
613 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
614 
615 	recursive_lock_unlock(&mount->rlock);
616 }
617 
618 
619 static void
620 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
621 {
622 	recursive_lock_lock(&mount->rlock);
623 
624 	list_remove_link(&vnode->mount_link);
625 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
626 
627 	recursive_lock_unlock(&mount->rlock);
628 }
629 
630 
631 static status_t
632 create_new_vnode(struct vnode **_vnode, dev_t mountID, ino_t vnodeID)
633 {
634 	FUNCTION(("create_new_vnode()\n"));
635 
636 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
637 	if (vnode == NULL)
638 		return B_NO_MEMORY;
639 
640 	// initialize basic values
641 	memset(vnode, 0, sizeof(struct vnode));
642 	vnode->device = mountID;
643 	vnode->id = vnodeID;
644 
645 	// add the vnode to the mount structure
646 	mutex_lock(&sMountMutex);
647 	vnode->mount = find_mount(mountID);
648 	if (!vnode->mount || vnode->mount->unmounting) {
649 		mutex_unlock(&sMountMutex);
650 		free(vnode);
651 		return B_ENTRY_NOT_FOUND;
652 	}
653 
654 	hash_insert(sVnodeTable, vnode);
655 	add_vnode_to_mount_list(vnode, vnode->mount);
656 
657 	mutex_unlock(&sMountMutex);
658 
659 	vnode->ref_count = 1;
660 	*_vnode = vnode;
661 
662 	return B_OK;
663 }
664 
665 
666 /*!	Frees the vnode and all resources it has acquired, and removes
667 	it from the vnode hash as well as from its mount structure.
668 	Will also make sure that any cache modifications are written back.
669 */
670 static void
671 free_vnode(struct vnode *vnode, bool reenter)
672 {
673 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
674 
675 	// write back any changes in this vnode's cache -- but only
676 	// if the vnode won't be deleted, in which case the changes
677 	// will be discarded
678 
679 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
680 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
681 
682 	// Note: If this vnode has a cache attached, there will still be two
683 	// references to that cache at this point. The last one belongs to the vnode
684 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
685 	// cache. Each but the last reference to a cache also includes a reference
686 	// to the vnode. The file cache, however, released its reference (cf.
687 	// file_cache_create()), so that this vnode's ref count has the chance to
688 	// ever drop to 0. Deleting the file cache now, will cause the next to last
689 	// cache reference to be released, which will also release a (no longer
690 	// existing) vnode reference. To avoid problems, we set the vnode's ref
691 	// count, so that it will neither become negative nor 0.
692 	vnode->ref_count = 2;
693 
694 	// The file system has removed the resources of the vnode now, so we can
695 	// make it available again (and remove the busy vnode from the hash)
696 	mutex_lock(&sVnodeMutex);
697 	hash_remove(sVnodeTable, vnode);
698 	mutex_unlock(&sVnodeMutex);
699 
700 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
701 	// cache either (i.e. no one can get a cache reference while we're deleting
702 	// the vnode).. This is, however, not the case for the page daemon. It gets
703 	// its cache references via the pages it scans, so it can in fact get a
704 	// vnode reference while we're deleting the vnode.
705 
706 	if (!vnode->unpublished) {
707 		if (vnode->remove) {
708 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie,
709 				vnode->private_node, reenter);
710 		} else {
711 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node,
712 				reenter);
713 		}
714 	}
715 
716 	// if we have a vm_cache attached, remove it
717 	if (vnode->cache)
718 		vm_cache_release_ref(vnode->cache);
719 
720 	vnode->cache = NULL;
721 
722 	remove_vnode_from_mount_list(vnode, vnode->mount);
723 
724 	free(vnode);
725 }
726 
727 
728 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
729 	if the counter dropped to 0.
730 
731 	The caller must, of course, own a reference to the vnode to call this
732 	function.
733 	The caller must not hold the sVnodeMutex or the sMountMutex.
734 
735 	\param vnode the vnode.
736 	\param reenter \c true, if this function is called (indirectly) from within
737 		   a file system.
738 	\return \c B_OK, if everything went fine, an error code otherwise.
739 */
740 static status_t
741 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
742 {
743 	mutex_lock(&sVnodeMutex);
744 
745 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
746 
747 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
748 
749 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
750 
751 	if (oldRefCount == 1) {
752 		if (vnode->busy)
753 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
754 
755 		bool freeNode = false;
756 
757 		// Just insert the vnode into an unused list if we don't need
758 		// to delete it
759 		if (vnode->remove) {
760 			vnode->busy = true;
761 			freeNode = true;
762 		} else {
763 			list_add_item(&sUnusedVnodeList, vnode);
764 			if (++sUnusedVnodes > kMaxUnusedVnodes
765 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
766 				// there are too many unused vnodes so we free the oldest one
767 				// ToDo: evaluate this mechanism
768 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
769 				vnode->busy = true;
770 				freeNode = true;
771 				sUnusedVnodes--;
772 			}
773 		}
774 
775 		mutex_unlock(&sVnodeMutex);
776 
777 		if (freeNode)
778 			free_vnode(vnode, reenter);
779 	} else
780 		mutex_unlock(&sVnodeMutex);
781 
782 	return B_OK;
783 }
784 
785 
786 /*!	\brief Increments the reference counter of the given vnode.
787 
788 	The caller must either already have a reference to the vnode or hold
789 	the sVnodeMutex.
790 
791 	\param vnode the vnode.
792 */
793 static void
794 inc_vnode_ref_count(struct vnode *vnode)
795 {
796 	atomic_add(&vnode->ref_count, 1);
797 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
798 }
799 
800 
801 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
802 
803 	The caller must hold the sVnodeMutex.
804 
805 	\param mountID the mount ID.
806 	\param vnodeID the node ID.
807 
808 	\return The vnode structure, if it was found in the hash table, \c NULL
809 			otherwise.
810 */
811 static struct vnode *
812 lookup_vnode(dev_t mountID, ino_t vnodeID)
813 {
814 	struct vnode_hash_key key;
815 
816 	key.device = mountID;
817 	key.vnode = vnodeID;
818 
819 	return (vnode *)hash_lookup(sVnodeTable, &key);
820 }
821 
822 
823 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
824 
825 	If the node is not yet in memory, it will be loaded.
826 
827 	The caller must not hold the sVnodeMutex or the sMountMutex.
828 
829 	\param mountID the mount ID.
830 	\param vnodeID the node ID.
831 	\param _vnode Pointer to a vnode* variable into which the pointer to the
832 		   retrieved vnode structure shall be written.
833 	\param reenter \c true, if this function is called (indirectly) from within
834 		   a file system.
835 	\return \c B_OK, if everything when fine, an error code otherwise.
836 */
837 static status_t
838 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode, bool canWait,
839 	int reenter)
840 {
841 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
842 
843 	mutex_lock(&sVnodeMutex);
844 
845 	int32 tries = 1000;
846 		// try for 10 secs
847 restart:
848 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
849 	if (vnode && vnode->busy) {
850 		mutex_unlock(&sVnodeMutex);
851 		if (!canWait || --tries < 0) {
852 			// vnode doesn't seem to become unbusy
853 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
854 			return B_BUSY;
855 		}
856 		snooze(10000); // 10 ms
857 		mutex_lock(&sVnodeMutex);
858 		goto restart;
859 	}
860 
861 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
862 
863 	status_t status;
864 
865 	if (vnode) {
866 		if (vnode->ref_count == 0) {
867 			// this vnode has been unused before
868 			list_remove_item(&sUnusedVnodeList, vnode);
869 			sUnusedVnodes--;
870 		}
871 		inc_vnode_ref_count(vnode);
872 	} else {
873 		// we need to create a new vnode and read it in
874 		status = create_new_vnode(&vnode, mountID, vnodeID);
875 		if (status < B_OK)
876 			goto err;
877 
878 		vnode->busy = true;
879 		mutex_unlock(&sVnodeMutex);
880 
881 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID,
882 			&vnode->private_node, reenter);
883 		if (status == B_OK && vnode->private_node == NULL)
884 			status = B_BAD_VALUE;
885 
886 		mutex_lock(&sVnodeMutex);
887 
888 		if (status < B_OK)
889 			goto err1;
890 
891 		vnode->busy = false;
892 	}
893 
894 	mutex_unlock(&sVnodeMutex);
895 
896 	TRACE(("get_vnode: returning %p\n", vnode));
897 
898 	*_vnode = vnode;
899 	return B_OK;
900 
901 err1:
902 	hash_remove(sVnodeTable, vnode);
903 	remove_vnode_from_mount_list(vnode, vnode->mount);
904 err:
905 	mutex_unlock(&sVnodeMutex);
906 	if (vnode)
907 		free(vnode);
908 
909 	return status;
910 }
911 
912 
913 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
914 	if the counter dropped to 0.
915 
916 	The caller must, of course, own a reference to the vnode to call this
917 	function.
918 	The caller must not hold the sVnodeMutex or the sMountMutex.
919 
920 	\param vnode the vnode.
921 */
922 static inline void
923 put_vnode(struct vnode *vnode)
924 {
925 	dec_vnode_ref_count(vnode, false);
926 }
927 
928 
929 static void
930 vnode_low_memory_handler(void */*data*/, int32 level)
931 {
932 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
933 
934 	uint32 count = 1;
935 	switch (level) {
936 		case B_NO_LOW_MEMORY:
937 			return;
938 		case B_LOW_MEMORY_NOTE:
939 			count = sUnusedVnodes / 100;
940 			break;
941 		case B_LOW_MEMORY_WARNING:
942 			count = sUnusedVnodes / 10;
943 			break;
944 		case B_LOW_MEMORY_CRITICAL:
945 			count = sUnusedVnodes;
946 			break;
947 	}
948 
949 	if (count > sUnusedVnodes)
950 		count = sUnusedVnodes;
951 
952 	// first, write back the modified pages of some unused vnodes
953 
954 	uint32 freeCount = count;
955 
956 	for (uint32 i = 0; i < count; i++) {
957 		mutex_lock(&sVnodeMutex);
958 		struct vnode *vnode = (struct vnode *)list_remove_head_item(
959 			&sUnusedVnodeList);
960 		if (vnode == NULL) {
961 			mutex_unlock(&sVnodeMutex);
962 			break;
963 		}
964 
965 		inc_vnode_ref_count(vnode);
966 		sUnusedVnodes--;
967 
968 		mutex_unlock(&sVnodeMutex);
969 
970 		if (vnode->cache != NULL)
971 			vm_cache_write_modified(vnode->cache, false);
972 
973 		dec_vnode_ref_count(vnode, false);
974 	}
975 
976 	// and then free them
977 
978 	for (uint32 i = 0; i < freeCount; i++) {
979 		mutex_lock(&sVnodeMutex);
980 
981 		// We're removing vnodes from the tail of the list - hoping it's
982 		// one of those we have just written back; otherwise we'll write
983 		// back the vnode with the busy flag turned on, and that might
984 		// take some time.
985 		struct vnode *vnode = (struct vnode *)list_remove_tail_item(
986 			&sUnusedVnodeList);
987 		if (vnode == NULL) {
988 			mutex_unlock(&sVnodeMutex);
989 			break;
990 		}
991 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
992 
993 		vnode->busy = true;
994 		sUnusedVnodes--;
995 
996 		mutex_unlock(&sVnodeMutex);
997 
998 		free_vnode(vnode, false);
999 	}
1000 }
1001 
1002 
1003 static inline void
1004 put_advisory_locking(struct advisory_locking *locking)
1005 {
1006 	release_sem(locking->lock);
1007 }
1008 
1009 
1010 /*!	Returns the advisory_locking object of the \a vnode in case it
1011 	has one, and locks it.
1012 	You have to call put_advisory_locking() when you're done with
1013 	it.
1014 	Note, you must not have the vnode mutex locked when calling
1015 	this function.
1016 */
1017 static struct advisory_locking *
1018 get_advisory_locking(struct vnode *vnode)
1019 {
1020 	mutex_lock(&sVnodeMutex);
1021 
1022 	struct advisory_locking *locking = vnode->advisory_locking;
1023 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1024 
1025 	mutex_unlock(&sVnodeMutex);
1026 
1027 	if (lock >= B_OK)
1028 		lock = acquire_sem(lock);
1029 	if (lock < B_OK) {
1030 		// This means the locking has been deleted in the mean time
1031 		// or had never existed in the first place - otherwise, we
1032 		// would get the lock at some point.
1033 		return NULL;
1034 	}
1035 
1036 	return locking;
1037 }
1038 
1039 
1040 /*!	Creates a locked advisory_locking object, and attaches it to the
1041 	given \a vnode.
1042 	Returns B_OK in case of success - also if the vnode got such an
1043 	object from someone else in the mean time, you'll still get this
1044 	one locked then.
1045 */
1046 static status_t
1047 create_advisory_locking(struct vnode *vnode)
1048 {
1049 	if (vnode == NULL)
1050 		return B_FILE_ERROR;
1051 
1052 	struct advisory_locking *locking = new(std::nothrow) advisory_locking;
1053 	if (locking == NULL)
1054 		return B_NO_MEMORY;
1055 
1056 	status_t status;
1057 
1058 	locking->wait_sem = create_sem(0, "advisory lock");
1059 	if (locking->wait_sem < B_OK) {
1060 		status = locking->wait_sem;
1061 		goto err1;
1062 	}
1063 
1064 	locking->lock = create_sem(0, "advisory locking");
1065 	if (locking->lock < B_OK) {
1066 		status = locking->lock;
1067 		goto err2;
1068 	}
1069 
1070 	// We need to set the locking structure atomically - someone
1071 	// else might set one at the same time
1072 	do {
1073 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking,
1074 				(addr_t)locking, NULL) == NULL)
1075 			return B_OK;
1076 	} while (get_advisory_locking(vnode) == NULL);
1077 
1078 	status = B_OK;
1079 		// we delete the one we've just created, but nevertheless, the vnode
1080 		// does have a locking structure now
1081 
1082 	delete_sem(locking->lock);
1083 err2:
1084 	delete_sem(locking->wait_sem);
1085 err1:
1086 	delete locking;
1087 	return status;
1088 }
1089 
1090 
1091 /*!	Retrieves the first lock that has been set by the current team.
1092 */
1093 static status_t
1094 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1095 {
1096 	struct advisory_locking *locking = get_advisory_locking(vnode);
1097 	if (locking == NULL)
1098 		return B_BAD_VALUE;
1099 
1100 	// TODO: this should probably get the flock by its file descriptor!
1101 	team_id team = team_get_current_team_id();
1102 	status_t status = B_BAD_VALUE;
1103 
1104 	LockList::Iterator iterator = locking->locks.GetIterator();
1105 	while (iterator.HasNext()) {
1106 		struct advisory_lock *lock = iterator.Next();
1107 
1108 		if (lock->team == team) {
1109 			flock->l_start = lock->start;
1110 			flock->l_len = lock->end - lock->start + 1;
1111 			status = B_OK;
1112 			break;
1113 		}
1114 	}
1115 
1116 	put_advisory_locking(locking);
1117 	return status;
1118 }
1119 
1120 
1121 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1122 	with the advisory_lock \a lock.
1123 */
1124 static bool
1125 advisory_lock_intersects(struct advisory_lock *lock, struct flock *flock)
1126 {
1127 	if (flock == NULL)
1128 		return true;
1129 
1130 	return lock->start <= flock->l_start - 1 + flock->l_len
1131 		&& lock->end >= flock->l_start;
1132 }
1133 
1134 
1135 /*!	Removes the specified lock, or all locks of the calling team
1136 	if \a flock is NULL.
1137 */
1138 static status_t
1139 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1140 {
1141 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1142 
1143 	struct advisory_locking *locking = get_advisory_locking(vnode);
1144 	if (locking == NULL)
1145 		return B_OK;
1146 
1147 	// TODO: use the thread ID instead??
1148 	team_id team = team_get_current_team_id();
1149 	pid_t session = thread_get_current_thread()->team->session_id;
1150 
1151 	// find matching lock entries
1152 
1153 	LockList::Iterator iterator = locking->locks.GetIterator();
1154 	while (iterator.HasNext()) {
1155 		struct advisory_lock *lock = iterator.Next();
1156 		bool removeLock = false;
1157 
1158 		if (lock->session == session)
1159 			removeLock = true;
1160 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1161 			bool endsBeyond = false;
1162 			bool startsBefore = false;
1163 			if (flock != NULL) {
1164 				startsBefore = lock->start < flock->l_start;
1165 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1166 			}
1167 
1168 			if (!startsBefore && !endsBeyond) {
1169 				// lock is completely contained in flock
1170 				removeLock = true;
1171 			} else if (startsBefore && !endsBeyond) {
1172 				// cut the end of the lock
1173 				lock->end = flock->l_start - 1;
1174 			} else if (!startsBefore && endsBeyond) {
1175 				// cut the start of the lock
1176 				lock->start = flock->l_start + flock->l_len;
1177 			} else {
1178 				// divide the lock into two locks
1179 				struct advisory_lock *secondLock = new advisory_lock;
1180 				if (secondLock == NULL) {
1181 					// TODO: we should probably revert the locks we already
1182 					// changed... (ie. allocate upfront)
1183 					put_advisory_locking(locking);
1184 					return B_NO_MEMORY;
1185 				}
1186 
1187 				lock->end = flock->l_start - 1;
1188 
1189 				secondLock->team = lock->team;
1190 				secondLock->session = lock->session;
1191 				// values must already be normalized when getting here
1192 				secondLock->start = flock->l_start + flock->l_len;
1193 				secondLock->end = lock->end;
1194 				secondLock->shared = lock->shared;
1195 
1196 				locking->locks.Add(secondLock);
1197 			}
1198 		}
1199 
1200 		if (removeLock) {
1201 			// this lock is no longer used
1202 			iterator.Remove();
1203 			free(lock);
1204 		}
1205 	}
1206 
1207 	bool removeLocking = locking->locks.IsEmpty();
1208 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1209 
1210 	put_advisory_locking(locking);
1211 
1212 	if (removeLocking) {
1213 		// We can remove the whole advisory locking structure; it's no
1214 		// longer used
1215 		locking = get_advisory_locking(vnode);
1216 		if (locking != NULL) {
1217 			// the locking could have been changed in the mean time
1218 			if (locking->locks.IsEmpty()) {
1219 				vnode->advisory_locking = NULL;
1220 
1221 				// we've detached the locking from the vnode, so we can
1222 				// safely delete it
1223 				delete_sem(locking->lock);
1224 				delete_sem(locking->wait_sem);
1225 				delete locking;
1226 			} else {
1227 				// the locking is in use again
1228 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1229 			}
1230 		}
1231 	}
1232 
1233 	return B_OK;
1234 }
1235 
1236 
1237 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1238 	will wait for the lock to become available, if there are any collisions
1239 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1240 
1241 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1242 	BSD flock() semantics are used, that is, all children can unlock the file
1243 	in question (we even allow parents to remove the lock, though, but that
1244 	seems to be in line to what the BSD's are doing).
1245 */
1246 static status_t
1247 acquire_advisory_lock(struct vnode *vnode, pid_t session, struct flock *flock,
1248 	bool wait)
1249 {
1250 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1251 		vnode, flock, wait ? "yes" : "no"));
1252 
1253 	bool shared = flock->l_type == F_RDLCK;
1254 	status_t status = B_OK;
1255 
1256 	// TODO: do deadlock detection!
1257 
1258 restart:
1259 	// if this vnode has an advisory_locking structure attached,
1260 	// lock that one and search for any colliding file lock
1261 	struct advisory_locking *locking = get_advisory_locking(vnode);
1262 	team_id team = team_get_current_team_id();
1263 	sem_id waitForLock = -1;
1264 
1265 	if (locking != NULL) {
1266 		// test for collisions
1267 		LockList::Iterator iterator = locking->locks.GetIterator();
1268 		while (iterator.HasNext()) {
1269 			struct advisory_lock *lock = iterator.Next();
1270 
1271 			// TODO: locks from the same team might be joinable!
1272 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1273 				// locks do overlap
1274 				if (!shared || !lock->shared) {
1275 					// we need to wait
1276 					waitForLock = locking->wait_sem;
1277 					break;
1278 				}
1279 			}
1280 		}
1281 
1282 		if (waitForLock < B_OK || !wait)
1283 			put_advisory_locking(locking);
1284 	}
1285 
1286 	// wait for the lock if we have to, or else return immediately
1287 
1288 	if (waitForLock >= B_OK) {
1289 		if (!wait)
1290 			status = session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1291 		else {
1292 			status = switch_sem_etc(locking->lock, waitForLock, 1,
1293 				B_CAN_INTERRUPT, 0);
1294 			if (status == B_OK) {
1295 				// see if we're still colliding
1296 				goto restart;
1297 			}
1298 		}
1299 	}
1300 
1301 	if (status < B_OK)
1302 		return status;
1303 
1304 	// install new lock
1305 
1306 	locking = get_advisory_locking(vnode);
1307 	if (locking == NULL) {
1308 		// we need to create a new locking object
1309 		status = create_advisory_locking(vnode);
1310 		if (status < B_OK)
1311 			return status;
1312 
1313 		locking = vnode->advisory_locking;
1314 			// we own the locking object, so it can't go away
1315 	}
1316 
1317 	struct advisory_lock *lock = (struct advisory_lock *)malloc(
1318 		sizeof(struct advisory_lock));
1319 	if (lock == NULL) {
1320 		if (waitForLock >= B_OK)
1321 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1322 		release_sem(locking->lock);
1323 		return B_NO_MEMORY;
1324 	}
1325 
1326 	lock->team = team_get_current_team_id();
1327 	lock->session = session;
1328 	// values must already be normalized when getting here
1329 	lock->start = flock->l_start;
1330 	lock->end = flock->l_start - 1 + flock->l_len;
1331 	lock->shared = shared;
1332 
1333 	locking->locks.Add(lock);
1334 	put_advisory_locking(locking);
1335 
1336 	return status;
1337 }
1338 
1339 
1340 /*!	Normalizes the \a flock structure to make it easier to compare the
1341 	structure with others. The l_start and l_len fields are set to absolute
1342 	values according to the l_whence field.
1343 */
1344 static status_t
1345 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1346 {
1347 	switch (flock->l_whence) {
1348 		case SEEK_SET:
1349 			break;
1350 		case SEEK_CUR:
1351 			flock->l_start += descriptor->pos;
1352 			break;
1353 		case SEEK_END:
1354 		{
1355 			struct vnode *vnode = descriptor->u.vnode;
1356 			struct stat stat;
1357 			status_t status;
1358 
1359 			if (FS_CALL(vnode, read_stat) == NULL)
1360 				return EOPNOTSUPP;
1361 
1362 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
1363 				vnode->private_node, &stat);
1364 			if (status < B_OK)
1365 				return status;
1366 
1367 			flock->l_start += stat.st_size;
1368 			break;
1369 		}
1370 		default:
1371 			return B_BAD_VALUE;
1372 	}
1373 
1374 	if (flock->l_start < 0)
1375 		flock->l_start = 0;
1376 	if (flock->l_len == 0)
1377 		flock->l_len = OFF_MAX;
1378 
1379 	// don't let the offset and length overflow
1380 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1381 		flock->l_len = OFF_MAX - flock->l_start;
1382 
1383 	if (flock->l_len < 0) {
1384 		// a negative length reverses the region
1385 		flock->l_start += flock->l_len;
1386 		flock->l_len = -flock->l_len;
1387 	}
1388 
1389 	return B_OK;
1390 }
1391 
1392 
1393 /*!	Disconnects all file descriptors that are associated with the
1394 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1395 	\a mount object.
1396 
1397 	Note, after you've called this function, there might still be ongoing
1398 	accesses - they won't be interrupted if they already happened before.
1399 	However, any subsequent access will fail.
1400 
1401 	This is not a cheap function and should be used with care and rarely.
1402 	TODO: there is currently no means to stop a blocking read/write!
1403 */
1404 void
1405 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1406 	struct vnode *vnodeToDisconnect)
1407 {
1408 	// iterate over all teams and peek into their file descriptors
1409 	int32 nextTeamID = 0;
1410 
1411 	while (true) {
1412 		struct io_context *context = NULL;
1413 		sem_id contextMutex = -1;
1414 		struct team *team = NULL;
1415 		team_id lastTeamID;
1416 
1417 		cpu_status state = disable_interrupts();
1418 		GRAB_TEAM_LOCK();
1419 
1420 		lastTeamID = peek_next_thread_id();
1421 		if (nextTeamID < lastTeamID) {
1422 			// get next valid team
1423 			while (nextTeamID < lastTeamID
1424 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1425 				nextTeamID++;
1426 			}
1427 
1428 			if (team) {
1429 				context = (io_context *)team->io_context;
1430 				contextMutex = context->io_mutex.sem;
1431 				nextTeamID++;
1432 			}
1433 		}
1434 
1435 		RELEASE_TEAM_LOCK();
1436 		restore_interrupts(state);
1437 
1438 		if (context == NULL)
1439 			break;
1440 
1441 		// we now have a context - since we couldn't lock it while having
1442 		// safe access to the team structure, we now need to lock the mutex
1443 		// manually
1444 
1445 		if (acquire_sem(contextMutex) != B_OK) {
1446 			// team seems to be gone, go over to the next team
1447 			continue;
1448 		}
1449 
1450 		// the team cannot be deleted completely while we're owning its
1451 		// io_context mutex, so we can safely play with it now
1452 
1453 		context->io_mutex.holder = thread_get_current_thread_id();
1454 
1455 		if (context->cwd != NULL && context->cwd->mount == mount
1456 			&& (vnodeToDisconnect == NULL
1457 				|| vnodeToDisconnect == context->cwd)) {
1458 			put_vnode(context->cwd);
1459 				// Note: We're only accessing the pointer, not the vnode itself
1460 				// in the lines below.
1461 
1462 			if (context->cwd == mount->root_vnode) {
1463 				// redirect the current working directory to the covered vnode
1464 				context->cwd = mount->covers_vnode;
1465 				inc_vnode_ref_count(context->cwd);
1466 			} else
1467 				context->cwd = NULL;
1468 		}
1469 
1470 		for (uint32 i = 0; i < context->table_size; i++) {
1471 			if (struct file_descriptor *descriptor = context->fds[i]) {
1472 				inc_fd_ref_count(descriptor);
1473 
1474 				// if this descriptor points at this mount, we
1475 				// need to disconnect it to be able to unmount
1476 				struct vnode *vnode = fd_vnode(descriptor);
1477 				if (vnodeToDisconnect != NULL) {
1478 					if (vnode == vnodeToDisconnect)
1479 						disconnect_fd(descriptor);
1480 				} else if (vnode != NULL && vnode->mount == mount
1481 					|| vnode == NULL && descriptor->u.mount == mount)
1482 					disconnect_fd(descriptor);
1483 
1484 				put_fd(descriptor);
1485 			}
1486 		}
1487 
1488 		mutex_unlock(&context->io_mutex);
1489 	}
1490 }
1491 
1492 
1493 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1494 		   by.
1495 
1496 	Given an arbitrary vnode, the function checks, whether the node is covered
1497 	by the root of a volume. If it is the function obtains a reference to the
1498 	volume root node and returns it.
1499 
1500 	\param vnode The vnode in question.
1501 	\return The volume root vnode the vnode cover is covered by, if it is
1502 			indeed a mount point, or \c NULL otherwise.
1503 */
1504 static struct vnode *
1505 resolve_mount_point_to_volume_root(struct vnode *vnode)
1506 {
1507 	if (!vnode)
1508 		return NULL;
1509 
1510 	struct vnode *volumeRoot = NULL;
1511 
1512 	mutex_lock(&sVnodeCoveredByMutex);
1513 	if (vnode->covered_by) {
1514 		volumeRoot = vnode->covered_by;
1515 		inc_vnode_ref_count(volumeRoot);
1516 	}
1517 	mutex_unlock(&sVnodeCoveredByMutex);
1518 
1519 	return volumeRoot;
1520 }
1521 
1522 
1523 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1524 		   by.
1525 
1526 	Given an arbitrary vnode (identified by mount and node ID), the function
1527 	checks, whether the node is covered by the root of a volume. If it is the
1528 	function returns the mount and node ID of the volume root node. Otherwise
1529 	it simply returns the supplied mount and node ID.
1530 
1531 	In case of error (e.g. the supplied node could not be found) the variables
1532 	for storing the resolved mount and node ID remain untouched and an error
1533 	code is returned.
1534 
1535 	\param mountID The mount ID of the vnode in question.
1536 	\param nodeID The node ID of the vnode in question.
1537 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1538 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1539 	\return
1540 	- \c B_OK, if everything went fine,
1541 	- another error code, if something went wrong.
1542 */
1543 status_t
1544 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1545 	dev_t *resolvedMountID, ino_t *resolvedNodeID)
1546 {
1547 	// get the node
1548 	struct vnode *node;
1549 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1550 	if (error != B_OK)
1551 		return error;
1552 
1553 	// resolve the node
1554 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1555 	if (resolvedNode) {
1556 		put_vnode(node);
1557 		node = resolvedNode;
1558 	}
1559 
1560 	// set the return values
1561 	*resolvedMountID = node->device;
1562 	*resolvedNodeID = node->id;
1563 
1564 	put_vnode(node);
1565 
1566 	return B_OK;
1567 }
1568 
1569 
1570 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1571 
1572 	Given an arbitrary vnode, the function checks, whether the node is the
1573 	root of a volume. If it is (and if it is not "/"), the function obtains
1574 	a reference to the underlying mount point node and returns it.
1575 
1576 	\param vnode The vnode in question (caller must have a reference).
1577 	\return The mount point vnode the vnode covers, if it is indeed a volume
1578 			root and not "/", or \c NULL otherwise.
1579 */
1580 static struct vnode *
1581 resolve_volume_root_to_mount_point(struct vnode *vnode)
1582 {
1583 	if (!vnode)
1584 		return NULL;
1585 
1586 	struct vnode *mountPoint = NULL;
1587 
1588 	struct fs_mount *mount = vnode->mount;
1589 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1590 		mountPoint = mount->covers_vnode;
1591 		inc_vnode_ref_count(mountPoint);
1592 	}
1593 
1594 	return mountPoint;
1595 }
1596 
1597 
1598 /*!	\brief Gets the directory path and leaf name for a given path.
1599 
1600 	The supplied \a path is transformed to refer to the directory part of
1601 	the entry identified by the original path, and into the buffer \a filename
1602 	the leaf name of the original entry is written.
1603 	Neither the returned path nor the leaf name can be expected to be
1604 	canonical.
1605 
1606 	\param path The path to be analyzed. Must be able to store at least one
1607 		   additional character.
1608 	\param filename The buffer into which the leaf name will be written.
1609 		   Must be of size B_FILE_NAME_LENGTH at least.
1610 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1611 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1612 		   if the given path name is empty.
1613 */
1614 static status_t
1615 get_dir_path_and_leaf(char *path, char *filename)
1616 {
1617 	if (*path == '\0')
1618 		return B_ENTRY_NOT_FOUND;
1619 
1620 	char *p = strrchr(path, '/');
1621 		// '/' are not allowed in file names!
1622 
1623 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1624 
1625 	if (!p) {
1626 		// this path is single segment with no '/' in it
1627 		// ex. "foo"
1628 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1629 			return B_NAME_TOO_LONG;
1630 		strcpy(path, ".");
1631 	} else {
1632 		p++;
1633 		if (*p == '\0') {
1634 			// special case: the path ends in '/'
1635 			strcpy(filename, ".");
1636 		} else {
1637 			// normal leaf: replace the leaf portion of the path with a '.'
1638 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1639 				>= B_FILE_NAME_LENGTH) {
1640 				return B_NAME_TOO_LONG;
1641 			}
1642 		}
1643 		p[0] = '.';
1644 		p[1] = '\0';
1645 	}
1646 	return B_OK;
1647 }
1648 
1649 
1650 static status_t
1651 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char *name,
1652 	struct vnode **_vnode)
1653 {
1654 	char clonedName[B_FILE_NAME_LENGTH + 1];
1655 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1656 		return B_NAME_TOO_LONG;
1657 
1658 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1659 	struct vnode *directory;
1660 
1661 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
1662 	if (status < 0)
1663 		return status;
1664 
1665 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL,
1666 		NULL);
1667 }
1668 
1669 
1670 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
1671 	\a path must not be NULL.
1672 	If it returns successfully, \a path contains the name of the last path
1673 	component.
1674 	Note, this reduces the ref_count of the starting \a vnode, no matter if
1675 	it is successful or not!
1676 */
1677 static status_t
1678 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1679 	int count, struct vnode **_vnode, ino_t *_parentID, int *_type)
1680 {
1681 	status_t status = 0;
1682 	ino_t lastParentID = vnode->id;
1683 	int type = 0;
1684 
1685 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1686 
1687 	if (path == NULL) {
1688 		put_vnode(vnode);
1689 		return B_BAD_VALUE;
1690 	}
1691 
1692 	if (*path == '\0') {
1693 		put_vnode(vnode);
1694 		return B_ENTRY_NOT_FOUND;
1695 	}
1696 
1697 	while (true) {
1698 		struct vnode *nextVnode;
1699 		ino_t vnodeID;
1700 		char *nextPath;
1701 
1702 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1703 
1704 		// done?
1705 		if (path[0] == '\0')
1706 			break;
1707 
1708 		// walk to find the next path component ("path" will point to a single
1709 		// path component), and filter out multiple slashes
1710 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1711 
1712 		if (*nextPath == '/') {
1713 			*nextPath = '\0';
1714 			do
1715 				nextPath++;
1716 			while (*nextPath == '/');
1717 		}
1718 
1719 		// See if the '..' is at the root of a mount and move to the covered
1720 		// vnode so we pass the '..' path to the underlying filesystem
1721 		if (!strcmp("..", path)
1722 			&& vnode->mount->root_vnode == vnode
1723 			&& vnode->mount->covers_vnode) {
1724 			nextVnode = vnode->mount->covers_vnode;
1725 			inc_vnode_ref_count(nextVnode);
1726 			put_vnode(vnode);
1727 			vnode = nextVnode;
1728 		}
1729 
1730 		// Check if we have the right to search the current directory vnode.
1731 		// If a file system doesn't have the access() function, we assume that
1732 		// searching a directory is always allowed
1733 		if (FS_CALL(vnode, access))
1734 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1735 
1736 		// Tell the filesystem to get the vnode of this path component (if we got the
1737 		// permission from the call above)
1738 		if (status >= B_OK)
1739 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1740 
1741 		if (status < B_OK) {
1742 			put_vnode(vnode);
1743 			return status;
1744 		}
1745 
1746 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1747 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1748 		// ref count incremented at this point
1749 		mutex_lock(&sVnodeMutex);
1750 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1751 		mutex_unlock(&sVnodeMutex);
1752 
1753 		if (!nextVnode) {
1754 			// pretty screwed up here - the file system found the vnode, but the hash
1755 			// lookup failed, so our internal structures are messed up
1756 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1757 				vnode->device, vnodeID);
1758 			put_vnode(vnode);
1759 			return B_ENTRY_NOT_FOUND;
1760 		}
1761 
1762 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1763 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1764 			size_t bufferSize;
1765 			char *buffer;
1766 
1767 			TRACE(("traverse link\n"));
1768 
1769 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1770 			if (count + 1 > B_MAX_SYMLINKS) {
1771 				status = B_LINK_LIMIT;
1772 				goto resolve_link_error;
1773 			}
1774 
1775 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1776 			if (buffer == NULL) {
1777 				status = B_NO_MEMORY;
1778 				goto resolve_link_error;
1779 			}
1780 
1781 			if (FS_CALL(nextVnode, read_symlink) != NULL) {
1782 				status = FS_CALL(nextVnode, read_symlink)(
1783 					nextVnode->mount->cookie, nextVnode->private_node, buffer,
1784 					&bufferSize);
1785 			} else
1786 				status = B_BAD_VALUE;
1787 
1788 			if (status < B_OK) {
1789 				free(buffer);
1790 
1791 		resolve_link_error:
1792 				put_vnode(vnode);
1793 				put_vnode(nextVnode);
1794 
1795 				return status;
1796 			}
1797 			put_vnode(nextVnode);
1798 
1799 			// Check if we start from the root directory or the current
1800 			// directory ("vnode" still points to that one).
1801 			// Cut off all leading slashes if it's the root directory
1802 			path = buffer;
1803 			bool absoluteSymlink = false;
1804 			if (path[0] == '/') {
1805 				// we don't need the old directory anymore
1806 				put_vnode(vnode);
1807 
1808 				while (*++path == '/')
1809 					;
1810 				vnode = sRoot;
1811 				inc_vnode_ref_count(vnode);
1812 
1813 				absoluteSymlink = true;
1814 			}
1815 
1816 			inc_vnode_ref_count(vnode);
1817 				// balance the next recursion - we will decrement the
1818 				// ref_count of the vnode, no matter if we succeeded or not
1819 
1820 			if (absoluteSymlink && *path == '\0') {
1821 				// symlink was just "/"
1822 				nextVnode = vnode;
1823 			} else {
1824 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
1825 					count + 1, &nextVnode, &lastParentID, _type);
1826 			}
1827 
1828 			free(buffer);
1829 
1830 			if (status < B_OK) {
1831 				put_vnode(vnode);
1832 				return status;
1833 			}
1834 		} else
1835 			lastParentID = vnode->id;
1836 
1837 		// decrease the ref count on the old dir we just looked up into
1838 		put_vnode(vnode);
1839 
1840 		path = nextPath;
1841 		vnode = nextVnode;
1842 
1843 		// see if we hit a mount point
1844 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1845 		if (mountPoint) {
1846 			put_vnode(vnode);
1847 			vnode = mountPoint;
1848 		}
1849 	}
1850 
1851 	*_vnode = vnode;
1852 	if (_type)
1853 		*_type = type;
1854 	if (_parentID)
1855 		*_parentID = lastParentID;
1856 
1857 	return B_OK;
1858 }
1859 
1860 
1861 static status_t
1862 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1863 	ino_t *_parentID, bool kernel)
1864 {
1865 	struct vnode *start = NULL;
1866 
1867 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1868 
1869 	if (!path)
1870 		return B_BAD_VALUE;
1871 
1872 	if (*path == '\0')
1873 		return B_ENTRY_NOT_FOUND;
1874 
1875 	// figure out if we need to start at root or at cwd
1876 	if (*path == '/') {
1877 		if (sRoot == NULL) {
1878 			// we're a bit early, aren't we?
1879 			return B_ERROR;
1880 		}
1881 
1882 		while (*++path == '/')
1883 			;
1884 		start = sRoot;
1885 		inc_vnode_ref_count(start);
1886 
1887 		if (*path == '\0') {
1888 			*_vnode = start;
1889 			return B_OK;
1890 		}
1891 
1892 	} else {
1893 		struct io_context *context = get_current_io_context(kernel);
1894 
1895 		mutex_lock(&context->io_mutex);
1896 		start = context->cwd;
1897 		if (start != NULL)
1898 			inc_vnode_ref_count(start);
1899 		mutex_unlock(&context->io_mutex);
1900 
1901 		if (start == NULL)
1902 			return B_ERROR;
1903 	}
1904 
1905 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1906 }
1907 
1908 
1909 /*! Returns the vnode in the next to last segment of the path, and returns
1910 	the last portion in filename.
1911 	The path buffer must be able to store at least one additional character.
1912 */
1913 static status_t
1914 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1915 {
1916 	status_t status = get_dir_path_and_leaf(path, filename);
1917 	if (status != B_OK)
1918 		return status;
1919 
1920 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1921 }
1922 
1923 
1924 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
1925 		   to by a FD + path pair.
1926 
1927 	\a path must be given in either case. \a fd might be omitted, in which
1928 	case \a path is either an absolute path or one relative to the current
1929 	directory. If both a supplied and \a path is relative it is reckoned off
1930 	of the directory referred to by \a fd. If \a path is absolute \a fd is
1931 	ignored.
1932 
1933 	The caller has the responsibility to call put_vnode() on the returned
1934 	directory vnode.
1935 
1936 	\param fd The FD. May be < 0.
1937 	\param path The absolute or relative path. Must not be \c NULL. The buffer
1938 	       is modified by this function. It must have at least room for a
1939 	       string one character longer than the path it contains.
1940 	\param _vnode A pointer to a variable the directory vnode shall be written
1941 		   into.
1942 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1943 		   the leaf name of the specified entry will be written.
1944 	\param kernel \c true, if invoked from inside the kernel, \c false if
1945 		   invoked from userland.
1946 	\return \c B_OK, if everything went fine, another error code otherwise.
1947 */
1948 static status_t
1949 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1950 	char *filename, bool kernel)
1951 {
1952 	if (!path)
1953 		return B_BAD_VALUE;
1954 	if (*path == '\0')
1955 		return B_ENTRY_NOT_FOUND;
1956 	if (fd < 0)
1957 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1958 
1959 	status_t status = get_dir_path_and_leaf(path, filename);
1960 	if (status != B_OK)
1961 		return status;
1962 
1963 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1964 }
1965 
1966 
1967 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
1968 		   to by a vnode + path pair.
1969 
1970 	\a path must be given in either case. \a vnode might be omitted, in which
1971 	case \a path is either an absolute path or one relative to the current
1972 	directory. If both a supplied and \a path is relative it is reckoned off
1973 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
1974 	ignored.
1975 
1976 	The caller has the responsibility to call put_vnode() on the returned
1977 	directory vnode.
1978 
1979 	\param vnode The vnode. May be \c NULL.
1980 	\param path The absolute or relative path. Must not be \c NULL. The buffer
1981 	       is modified by this function. It must have at least room for a
1982 	       string one character longer than the path it contains.
1983 	\param _vnode A pointer to a variable the directory vnode shall be written
1984 		   into.
1985 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1986 		   the leaf name of the specified entry will be written.
1987 	\param kernel \c true, if invoked from inside the kernel, \c false if
1988 		   invoked from userland.
1989 	\return \c B_OK, if everything went fine, another error code otherwise.
1990 */
1991 static status_t
1992 vnode_and_path_to_dir_vnode(struct vnode* vnode, char *path,
1993 	struct vnode **_vnode, char *filename, bool kernel)
1994 {
1995 	if (!path)
1996 		return B_BAD_VALUE;
1997 	if (*path == '\0')
1998 		return B_ENTRY_NOT_FOUND;
1999 	if (vnode == NULL || path[0] == '/')
2000 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2001 
2002 	status_t status = get_dir_path_and_leaf(path, filename);
2003 	if (status != B_OK)
2004 		return status;
2005 
2006 	inc_vnode_ref_count(vnode);
2007 		// vnode_path_to_vnode() always decrements the ref count
2008 
2009 	return vnode_path_to_vnode(vnode, path, true, 0, _vnode, NULL, NULL);
2010 }
2011 
2012 
2013 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2014 */
2015 static status_t
2016 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
2017 	size_t bufferSize)
2018 {
2019 	if (bufferSize < sizeof(struct dirent))
2020 		return B_BAD_VALUE;
2021 
2022 	// See if vnode is the root of a mount and move to the covered
2023 	// vnode so we get the underlying file system
2024 	VNodePutter vnodePutter;
2025 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
2026 		vnode = vnode->mount->covers_vnode;
2027 		inc_vnode_ref_count(vnode);
2028 		vnodePutter.SetTo(vnode);
2029 	}
2030 
2031 	if (FS_CALL(vnode, get_vnode_name)) {
2032 		// The FS supports getting the name of a vnode.
2033 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
2034 			vnode->private_node, buffer->d_name,
2035 			(char*)buffer + bufferSize - buffer->d_name);
2036 	}
2037 
2038 	// The FS doesn't support getting the name of a vnode. So we search the
2039 	// parent directory for the vnode, if the caller let us.
2040 
2041 	if (parent == NULL)
2042 		return EOPNOTSUPP;
2043 
2044 	fs_cookie cookie;
2045 
2046 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
2047 		parent->private_node, &cookie);
2048 	if (status >= B_OK) {
2049 		while (true) {
2050 			uint32 num = 1;
2051 			status = dir_read(parent, cookie, buffer, bufferSize, &num);
2052 			if (status < B_OK)
2053 				break;
2054 			if (num == 0) {
2055 				status = B_ENTRY_NOT_FOUND;
2056 				break;
2057 			}
2058 
2059 			if (vnode->id == buffer->d_ino) {
2060 				// found correct entry!
2061 				break;
2062 			}
2063 		}
2064 
2065 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node,
2066 			cookie);
2067 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie,
2068 			vnode->private_node, cookie);
2069 	}
2070 	return status;
2071 }
2072 
2073 
2074 static status_t
2075 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
2076 	size_t nameSize)
2077 {
2078 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2079 	struct dirent *dirent = (struct dirent *)buffer;
2080 
2081 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer));
2082 	if (status != B_OK)
2083 		return status;
2084 
2085 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2086 		return B_BUFFER_OVERFLOW;
2087 
2088 	return B_OK;
2089 }
2090 
2091 
2092 /*!	Gets the full path to a given directory vnode.
2093 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2094 	file system doesn't support this call, it will fall back to iterating
2095 	through the parent directory to get the name of the child.
2096 
2097 	To protect against circular loops, it supports a maximum tree depth
2098 	of 256 levels.
2099 
2100 	Note that the path may not be correct the time this function returns!
2101 	It doesn't use any locking to prevent returning the correct path, as
2102 	paths aren't safe anyway: the path to a file can change at any time.
2103 
2104 	It might be a good idea, though, to check if the returned path exists
2105 	in the calling function (it's not done here because of efficiency)
2106 */
2107 static status_t
2108 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
2109 {
2110 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2111 
2112 	if (vnode == NULL || buffer == NULL)
2113 		return B_BAD_VALUE;
2114 
2115 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
2116 	KPath pathBuffer;
2117 	if (pathBuffer.InitCheck() != B_OK)
2118 		return B_NO_MEMORY;
2119 
2120 	char *path = pathBuffer.LockBuffer();
2121 	int32 insert = pathBuffer.BufferSize();
2122 	int32 maxLevel = 256;
2123 	int32 length;
2124 	status_t status;
2125 
2126 	// we don't use get_vnode() here because this call is more
2127 	// efficient and does all we need from get_vnode()
2128 	inc_vnode_ref_count(vnode);
2129 
2130 	// resolve a volume root to its mount point
2131 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
2132 	if (mountPoint) {
2133 		put_vnode(vnode);
2134 		vnode = mountPoint;
2135 	}
2136 
2137 	path[--insert] = '\0';
2138 
2139 	while (true) {
2140 		// the name buffer is also used for fs_read_dir()
2141 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2142 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
2143 		struct vnode *parentVnode;
2144 		ino_t parentID;
2145 		int type;
2146 
2147 		// lookup the parent vnode
2148 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
2149 			&parentID, &type);
2150 		if (status < B_OK)
2151 			goto out;
2152 
2153 		mutex_lock(&sVnodeMutex);
2154 		parentVnode = lookup_vnode(vnode->device, parentID);
2155 		mutex_unlock(&sVnodeMutex);
2156 
2157 		if (parentVnode == NULL) {
2158 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
2159 				vnode->device, parentID);
2160 			status = B_ENTRY_NOT_FOUND;
2161 			goto out;
2162 		}
2163 
2164 		// get the node's name
2165 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2166 			sizeof(nameBuffer));
2167 
2168 		// resolve a volume root to its mount point
2169 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2170 		if (mountPoint) {
2171 			put_vnode(parentVnode);
2172 			parentVnode = mountPoint;
2173 			parentID = parentVnode->id;
2174 		}
2175 
2176 		bool hitRoot = (parentVnode == vnode);
2177 
2178 		// release the current vnode, we only need its parent from now on
2179 		put_vnode(vnode);
2180 		vnode = parentVnode;
2181 
2182 		if (status < B_OK)
2183 			goto out;
2184 
2185 		if (hitRoot) {
2186 			// we have reached "/", which means we have constructed the full
2187 			// path
2188 			break;
2189 		}
2190 
2191 		// ToDo: add an explicit check for loops in about 10 levels to do
2192 		// real loop detection
2193 
2194 		// don't go deeper as 'maxLevel' to prevent circular loops
2195 		if (maxLevel-- < 0) {
2196 			status = ELOOP;
2197 			goto out;
2198 		}
2199 
2200 		// add the name in front of the current path
2201 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2202 		length = strlen(name);
2203 		insert -= length;
2204 		if (insert <= 0) {
2205 			status = ENOBUFS;
2206 			goto out;
2207 		}
2208 		memcpy(path + insert, name, length);
2209 		path[--insert] = '/';
2210 	}
2211 
2212 	// the root dir will result in an empty path: fix it
2213 	if (path[insert] == '\0')
2214 		path[--insert] = '/';
2215 
2216 	TRACE(("  path is: %s\n", path + insert));
2217 
2218 	// copy the path to the output buffer
2219 	length = pathBuffer.BufferSize() - insert;
2220 	if (length <= (int)bufferSize)
2221 		memcpy(buffer, path + insert, length);
2222 	else
2223 		status = ENOBUFS;
2224 
2225 out:
2226 	put_vnode(vnode);
2227 	return status;
2228 }
2229 
2230 
2231 /*!	Checks the length of every path component, and adds a '.'
2232 	if the path ends in a slash.
2233 	The given path buffer must be able to store at least one
2234 	additional character.
2235 */
2236 static status_t
2237 check_path(char *to)
2238 {
2239 	int32 length = 0;
2240 
2241 	// check length of every path component
2242 
2243 	while (*to) {
2244 		char *begin;
2245 		if (*to == '/')
2246 			to++, length++;
2247 
2248 		begin = to;
2249 		while (*to != '/' && *to)
2250 			to++, length++;
2251 
2252 		if (to - begin > B_FILE_NAME_LENGTH)
2253 			return B_NAME_TOO_LONG;
2254 	}
2255 
2256 	if (length == 0)
2257 		return B_ENTRY_NOT_FOUND;
2258 
2259 	// complete path if there is a slash at the end
2260 
2261 	if (*(to - 1) == '/') {
2262 		if (length > B_PATH_NAME_LENGTH - 2)
2263 			return B_NAME_TOO_LONG;
2264 
2265 		to[0] = '.';
2266 		to[1] = '\0';
2267 	}
2268 
2269 	return B_OK;
2270 }
2271 
2272 
2273 static struct file_descriptor *
2274 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2275 {
2276 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2277 	if (descriptor == NULL)
2278 		return NULL;
2279 
2280 	if (fd_vnode(descriptor) == NULL) {
2281 		put_fd(descriptor);
2282 		return NULL;
2283 	}
2284 
2285 	// ToDo: when we can close a file descriptor at any point, investigate
2286 	//	if this is still valid to do (accessing the vnode without ref_count
2287 	//	or locking)
2288 	*_vnode = descriptor->u.vnode;
2289 	return descriptor;
2290 }
2291 
2292 
2293 static struct vnode *
2294 get_vnode_from_fd(int fd, bool kernel)
2295 {
2296 	struct file_descriptor *descriptor;
2297 	struct vnode *vnode;
2298 
2299 	descriptor = get_fd(get_current_io_context(kernel), fd);
2300 	if (descriptor == NULL)
2301 		return NULL;
2302 
2303 	vnode = fd_vnode(descriptor);
2304 	if (vnode != NULL)
2305 		inc_vnode_ref_count(vnode);
2306 
2307 	put_fd(descriptor);
2308 	return vnode;
2309 }
2310 
2311 
2312 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2313 	only the path will be considered. In this case, the \a path must not be
2314 	NULL.
2315 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2316 	and should be NULL for files.
2317 */
2318 static status_t
2319 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2320 	struct vnode **_vnode, ino_t *_parentID, bool kernel)
2321 {
2322 	if (fd < 0 && !path)
2323 		return B_BAD_VALUE;
2324 
2325 	if (path != NULL && *path == '\0')
2326 		return B_ENTRY_NOT_FOUND;
2327 
2328 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2329 		// no FD or absolute path
2330 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2331 	}
2332 
2333 	// FD only, or FD + relative path
2334 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2335 	if (!vnode)
2336 		return B_FILE_ERROR;
2337 
2338 	if (path != NULL) {
2339 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2340 			_vnode, _parentID, NULL);
2341 	}
2342 
2343 	// there is no relative path to take into account
2344 
2345 	*_vnode = vnode;
2346 	if (_parentID)
2347 		*_parentID = -1;
2348 
2349 	return B_OK;
2350 }
2351 
2352 
2353 static int
2354 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2355 	fs_cookie cookie, int openMode, bool kernel)
2356 {
2357 	struct file_descriptor *descriptor;
2358 	int fd;
2359 
2360 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2361 	if (vnode && vnode->mandatory_locked_by != NULL)
2362 		return B_BUSY;
2363 
2364 	descriptor = alloc_fd();
2365 	if (!descriptor)
2366 		return B_NO_MEMORY;
2367 
2368 	if (vnode)
2369 		descriptor->u.vnode = vnode;
2370 	else
2371 		descriptor->u.mount = mount;
2372 	descriptor->cookie = cookie;
2373 
2374 	switch (type) {
2375 		// vnode types
2376 		case FDTYPE_FILE:
2377 			descriptor->ops = &sFileOps;
2378 			break;
2379 		case FDTYPE_DIR:
2380 			descriptor->ops = &sDirectoryOps;
2381 			break;
2382 		case FDTYPE_ATTR:
2383 			descriptor->ops = &sAttributeOps;
2384 			break;
2385 		case FDTYPE_ATTR_DIR:
2386 			descriptor->ops = &sAttributeDirectoryOps;
2387 			break;
2388 
2389 		// mount types
2390 		case FDTYPE_INDEX_DIR:
2391 			descriptor->ops = &sIndexDirectoryOps;
2392 			break;
2393 		case FDTYPE_QUERY:
2394 			descriptor->ops = &sQueryOps;
2395 			break;
2396 
2397 		default:
2398 			panic("get_new_fd() called with unknown type %d\n", type);
2399 			break;
2400 	}
2401 	descriptor->type = type;
2402 	descriptor->open_mode = openMode;
2403 
2404 	fd = new_fd(get_current_io_context(kernel), descriptor);
2405 	if (fd < 0) {
2406 		free(descriptor);
2407 		return B_NO_MORE_FDS;
2408 	}
2409 
2410 	return fd;
2411 }
2412 
2413 #ifdef ADD_DEBUGGER_COMMANDS
2414 
2415 
2416 static void
2417 _dump_advisory_locking(advisory_locking *locking)
2418 {
2419 	if (locking == NULL)
2420 		return;
2421 
2422 	kprintf("   lock:        %ld", locking->lock);
2423 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2424 
2425 	int32 index = 0;
2426 	LockList::Iterator iterator = locking->locks.GetIterator();
2427 	while (iterator.HasNext()) {
2428 		struct advisory_lock *lock = iterator.Next();
2429 
2430 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2431 		kprintf("        start:  %Ld\n", lock->start);
2432 		kprintf("        end:    %Ld\n", lock->end);
2433 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2434 	}
2435 }
2436 
2437 
2438 static void
2439 _dump_mount(struct fs_mount *mount)
2440 {
2441 	kprintf("MOUNT: %p\n", mount);
2442 	kprintf(" id:            %ld\n", mount->id);
2443 	kprintf(" device_name:   %s\n", mount->device_name);
2444 	kprintf(" fs_name:       %s\n", mount->fs_name);
2445 	kprintf(" cookie:        %p\n", mount->cookie);
2446 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2447 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2448 	kprintf(" partition:     %p\n", mount->partition);
2449 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2450 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2451 		mount->owns_file_device ? " owns_file_device" : "");
2452 
2453 	set_debug_variable("_cookie", (addr_t)mount->cookie);
2454 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2455 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2456 	set_debug_variable("_partition", (addr_t)mount->partition);
2457 }
2458 
2459 
2460 static void
2461 _dump_vnode(struct vnode *vnode)
2462 {
2463 	kprintf("VNODE: %p\n", vnode);
2464 	kprintf(" device:        %ld\n", vnode->device);
2465 	kprintf(" id:            %Ld\n", vnode->id);
2466 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2467 	kprintf(" private_node:  %p\n", vnode->private_node);
2468 	kprintf(" mount:         %p\n", vnode->mount);
2469 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2470 	kprintf(" cache:         %p\n", vnode->cache);
2471 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2472 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2473 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2474 
2475 	_dump_advisory_locking(vnode->advisory_locking);
2476 
2477 	set_debug_variable("_node", (addr_t)vnode->private_node);
2478 	set_debug_variable("_mount", (addr_t)vnode->mount);
2479 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
2480 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
2481 }
2482 
2483 
2484 static int
2485 dump_mount(int argc, char **argv)
2486 {
2487 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2488 		kprintf("usage: %s [id|address]\n", argv[0]);
2489 		return 0;
2490 	}
2491 
2492 	uint32 id = parse_expression(argv[1]);
2493 	struct fs_mount *mount = NULL;
2494 
2495 	mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2496 	if (mount == NULL) {
2497 		if (IS_USER_ADDRESS(id)) {
2498 			kprintf("fs_mount not found\n");
2499 			return 0;
2500 		}
2501 		mount = (fs_mount *)id;
2502 	}
2503 
2504 	_dump_mount(mount);
2505 	return 0;
2506 }
2507 
2508 
2509 static int
2510 dump_mounts(int argc, char **argv)
2511 {
2512 	if (argc != 1) {
2513 		kprintf("usage: %s\n", argv[0]);
2514 		return 0;
2515 	}
2516 
2517 	kprintf("address     id root       covers     cookie     fs_name\n");
2518 
2519 	struct hash_iterator iterator;
2520 	struct fs_mount *mount;
2521 
2522 	hash_open(sMountsTable, &iterator);
2523 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2524 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
2525 			mount->covers_vnode, mount->cookie, mount->fs_name);
2526 	}
2527 
2528 	hash_close(sMountsTable, &iterator, false);
2529 	return 0;
2530 }
2531 
2532 
2533 static int
2534 dump_vnode(int argc, char **argv)
2535 {
2536 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
2537 		kprintf("usage: %s <device> <id>\n"
2538 			"   or: %s <address>\n", argv[0], argv[0]);
2539 		return 0;
2540 	}
2541 
2542 	struct vnode *vnode = NULL;
2543 
2544 	if (argc == 2) {
2545 		vnode = (struct vnode *)parse_expression(argv[1]);
2546 		if (IS_USER_ADDRESS(vnode)) {
2547 			kprintf("invalid vnode address\n");
2548 			return 0;
2549 		}
2550 		_dump_vnode(vnode);
2551 		return 0;
2552 	}
2553 
2554 	struct hash_iterator iterator;
2555 	dev_t device = parse_expression(argv[1]);
2556 	ino_t id = atoll(argv[2]);
2557 
2558 	hash_open(sVnodeTable, &iterator);
2559 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2560 		if (vnode->id != id || vnode->device != device)
2561 			continue;
2562 
2563 		_dump_vnode(vnode);
2564 	}
2565 
2566 	hash_close(sVnodeTable, &iterator, false);
2567 	return 0;
2568 }
2569 
2570 
2571 static int
2572 dump_vnodes(int argc, char **argv)
2573 {
2574 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2575 		kprintf("usage: %s [device]\n", argv[0]);
2576 		return 0;
2577 	}
2578 
2579 	// restrict dumped nodes to a certain device if requested
2580 	dev_t device = parse_expression(argv[1]);
2581 
2582 	struct hash_iterator iterator;
2583 	struct vnode *vnode;
2584 
2585 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
2586 		"flags\n");
2587 
2588 	hash_open(sVnodeTable, &iterator);
2589 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2590 		if (vnode->device != device)
2591 			continue;
2592 
2593 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
2594 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
2595 			vnode->advisory_locking, vnode->remove ? "r" : "-",
2596 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2597 	}
2598 
2599 	hash_close(sVnodeTable, &iterator, false);
2600 	return 0;
2601 }
2602 
2603 
2604 static int
2605 dump_vnode_caches(int argc, char **argv)
2606 {
2607 	struct hash_iterator iterator;
2608 	struct vnode *vnode;
2609 
2610 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2611 		kprintf("usage: %s [device]\n", argv[0]);
2612 		return 0;
2613 	}
2614 
2615 	// restrict dumped nodes to a certain device if requested
2616 	dev_t device = -1;
2617 	if (argc > 1)
2618 		device = atoi(argv[1]);
2619 
2620 	kprintf("address    dev     inode cache          size   pages\n");
2621 
2622 	hash_open(sVnodeTable, &iterator);
2623 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2624 		if (vnode->cache == NULL)
2625 			continue;
2626 		if (device != -1 && vnode->device != device)
2627 			continue;
2628 
2629 		// count pages in cache
2630 		size_t numPages = 0;
2631 		for (struct vm_page *page = vnode->cache->page_list;
2632 				page != NULL; page = page->cache_next) {
2633 			numPages++;
2634 		}
2635 
2636 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
2637 			vnode->cache, (vnode->cache->virtual_size + B_PAGE_SIZE - 1)
2638 				/ B_PAGE_SIZE, numPages);
2639 	}
2640 
2641 	hash_close(sVnodeTable, &iterator, false);
2642 	return 0;
2643 }
2644 
2645 
2646 int
2647 dump_io_context(int argc, char **argv)
2648 {
2649 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2650 		kprintf("usage: %s [team-id|address]\n", argv[0]);
2651 		return 0;
2652 	}
2653 
2654 	struct io_context *context = NULL;
2655 
2656 	if (argc > 1) {
2657 		uint32 num = parse_expression(argv[1]);
2658 		if (IS_KERNEL_ADDRESS(num))
2659 			context = (struct io_context *)num;
2660 		else {
2661 			struct team *team = team_get_team_struct_locked(num);
2662 			if (team == NULL) {
2663 				kprintf("could not find team with ID %ld\n", num);
2664 				return 0;
2665 			}
2666 			context = (struct io_context *)team->io_context;
2667 		}
2668 	} else
2669 		context = get_current_io_context(true);
2670 
2671 	kprintf("I/O CONTEXT: %p\n", context);
2672 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2673 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2674 	kprintf(" max fds:\t%lu\n", context->table_size);
2675 
2676 	if (context->num_used_fds)
2677 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2678 
2679 	for (uint32 i = 0; i < context->table_size; i++) {
2680 		struct file_descriptor *fd = context->fds[i];
2681 		if (fd == NULL)
2682 			continue;
2683 
2684 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2685 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2686 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2687 			fd->u.vnode);
2688 	}
2689 
2690 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2691 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2692 
2693 	set_debug_variable("_cwd", (addr_t)context->cwd);
2694 
2695 	return 0;
2696 }
2697 
2698 
2699 int
2700 dump_vnode_usage(int argc, char **argv)
2701 {
2702 	if (argc != 1) {
2703 		kprintf("usage: %s\n", argv[0]);
2704 		return 0;
2705 	}
2706 
2707 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
2708 		kMaxUnusedVnodes);
2709 
2710 	struct hash_iterator iterator;
2711 	hash_open(sVnodeTable, &iterator);
2712 
2713 	uint32 count = 0;
2714 	struct vnode *vnode;
2715 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2716 		count++;
2717 	}
2718 
2719 	hash_close(sVnodeTable, &iterator, false);
2720 
2721 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2722 	return 0;
2723 }
2724 
2725 #endif	// ADD_DEBUGGER_COMMANDS
2726 
2727 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
2728 	and calls the file system hooks to read/write the request to disk.
2729 */
2730 static status_t
2731 common_file_io_vec_pages(struct vnode *vnode, void *cookie,
2732 	const file_io_vec *fileVecs, size_t fileVecCount, const iovec *vecs,
2733 	size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset, size_t *_numBytes,
2734 	bool doWrite)
2735 {
2736 	if (fileVecCount == 0) {
2737 		// There are no file vecs at this offset, so we're obviously trying
2738 		// to access the file outside of its bounds
2739 		return B_BAD_VALUE;
2740 	}
2741 
2742 	size_t numBytes = *_numBytes;
2743 	uint32 fileVecIndex;
2744 	size_t vecOffset = *_vecOffset;
2745 	uint32 vecIndex = *_vecIndex;
2746 	status_t status;
2747 	size_t size;
2748 
2749 	if (!doWrite && vecOffset == 0) {
2750 		// now directly read the data from the device
2751 		// the first file_io_vec can be read directly
2752 
2753 		size = fileVecs[0].length;
2754 		if (size > numBytes)
2755 			size = numBytes;
2756 
2757 		status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2758 			vnode->private_node, cookie, fileVecs[0].offset, &vecs[vecIndex],
2759 			vecCount - vecIndex, &size, false);
2760 		if (status < B_OK)
2761 			return status;
2762 
2763 		// TODO: this is a work-around for buggy device drivers!
2764 		//	When our own drivers honour the length, we can:
2765 		//	a) also use this direct I/O for writes (otherwise, it would
2766 		//	   overwrite precious data)
2767 		//	b) panic if the term below is true (at least for writes)
2768 		if (size > fileVecs[0].length) {
2769 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
2770 			size = fileVecs[0].length;
2771 		}
2772 
2773 		ASSERT(size <= fileVecs[0].length);
2774 
2775 		// If the file portion was contiguous, we're already done now
2776 		if (size == numBytes)
2777 			return B_OK;
2778 
2779 		// if we reached the end of the file, we can return as well
2780 		if (size != fileVecs[0].length) {
2781 			*_numBytes = size;
2782 			return B_OK;
2783 		}
2784 
2785 		fileVecIndex = 1;
2786 
2787 		// first, find out where we have to continue in our iovecs
2788 		for (; vecIndex < vecCount; vecIndex++) {
2789 			if (size < vecs[vecIndex].iov_len)
2790 				break;
2791 
2792 			size -= vecs[vecIndex].iov_len;
2793 		}
2794 
2795 		vecOffset = size;
2796 	} else {
2797 		fileVecIndex = 0;
2798 		size = 0;
2799 	}
2800 
2801 	// Too bad, let's process the rest of the file_io_vecs
2802 
2803 	size_t totalSize = size;
2804 	size_t bytesLeft = numBytes - size;
2805 
2806 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
2807 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
2808 		off_t fileOffset = fileVec.offset;
2809 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
2810 
2811 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
2812 
2813 		// process the complete fileVec
2814 		while (fileLeft > 0) {
2815 			iovec tempVecs[MAX_TEMP_IO_VECS];
2816 			uint32 tempCount = 0;
2817 
2818 			// size tracks how much of what is left of the current fileVec
2819 			// (fileLeft) has been assigned to tempVecs
2820 			size = 0;
2821 
2822 			// assign what is left of the current fileVec to the tempVecs
2823 			for (size = 0; size < fileLeft && vecIndex < vecCount
2824 					&& tempCount < MAX_TEMP_IO_VECS;) {
2825 				// try to satisfy one iovec per iteration (or as much as
2826 				// possible)
2827 
2828 				// bytes left of the current iovec
2829 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
2830 				if (vecLeft == 0) {
2831 					vecOffset = 0;
2832 					vecIndex++;
2833 					continue;
2834 				}
2835 
2836 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
2837 					vecIndex, vecOffset, size));
2838 
2839 				// actually available bytes
2840 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
2841 
2842 				tempVecs[tempCount].iov_base
2843 					= (void *)((addr_t)vecs[vecIndex].iov_base + vecOffset);
2844 				tempVecs[tempCount].iov_len = tempVecSize;
2845 				tempCount++;
2846 
2847 				size += tempVecSize;
2848 				vecOffset += tempVecSize;
2849 			}
2850 
2851 			size_t bytes = size;
2852 			if (doWrite) {
2853 				status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
2854 					vnode->private_node, cookie, fileOffset, tempVecs,
2855 					tempCount, &bytes, false);
2856 			} else {
2857 				status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2858 					vnode->private_node, cookie, fileOffset, tempVecs,
2859 					tempCount, &bytes, false);
2860 			}
2861 			if (status < B_OK)
2862 				return status;
2863 
2864 			totalSize += bytes;
2865 			bytesLeft -= size;
2866 			fileOffset += size;
2867 			fileLeft -= size;
2868 			//dprintf("-> file left = %Lu\n", fileLeft);
2869 
2870 			if (size != bytes || vecIndex >= vecCount) {
2871 				// there are no more bytes or iovecs, let's bail out
2872 				*_numBytes = totalSize;
2873 				return B_OK;
2874 			}
2875 		}
2876 	}
2877 
2878 	*_vecIndex = vecIndex;
2879 	*_vecOffset = vecOffset;
2880 	*_numBytes = totalSize;
2881 	return B_OK;
2882 }
2883 
2884 
2885 //	#pragma mark - public API for file systems
2886 
2887 
2888 extern "C" status_t
2889 new_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2890 {
2891 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2892 		mountID, vnodeID, privateNode));
2893 
2894 	if (privateNode == NULL)
2895 		return B_BAD_VALUE;
2896 
2897 	mutex_lock(&sVnodeMutex);
2898 
2899 	// file system integrity check:
2900 	// test if the vnode already exists and bail out if this is the case!
2901 
2902 	// ToDo: the R5 implementation obviously checks for a different cookie
2903 	//	and doesn't panic if they are equal
2904 
2905 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2906 	if (vnode != NULL)
2907 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2908 
2909 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2910 	if (status == B_OK) {
2911 		vnode->private_node = privateNode;
2912 		vnode->busy = true;
2913 		vnode->unpublished = true;
2914 	}
2915 
2916 	TRACE(("returns: %s\n", strerror(status)));
2917 
2918 	mutex_unlock(&sVnodeMutex);
2919 	return status;
2920 }
2921 
2922 
2923 extern "C" status_t
2924 publish_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2925 {
2926 	FUNCTION(("publish_vnode()\n"));
2927 
2928 	mutex_lock(&sVnodeMutex);
2929 
2930 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2931 	status_t status = B_OK;
2932 
2933 	if (vnode != NULL && vnode->busy && vnode->unpublished
2934 		&& vnode->private_node == privateNode) {
2935 		vnode->busy = false;
2936 		vnode->unpublished = false;
2937 	} else if (vnode == NULL && privateNode != NULL) {
2938 		status = create_new_vnode(&vnode, mountID, vnodeID);
2939 		if (status == B_OK)
2940 			vnode->private_node = privateNode;
2941 	} else
2942 		status = B_BAD_VALUE;
2943 
2944 	TRACE(("returns: %s\n", strerror(status)));
2945 
2946 	mutex_unlock(&sVnodeMutex);
2947 	return status;
2948 }
2949 
2950 
2951 extern "C" status_t
2952 get_vnode(dev_t mountID, ino_t vnodeID, fs_vnode *_fsNode)
2953 {
2954 	struct vnode *vnode;
2955 
2956 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
2957 	if (status < B_OK)
2958 		return status;
2959 
2960 	*_fsNode = vnode->private_node;
2961 	return B_OK;
2962 }
2963 
2964 
2965 extern "C" status_t
2966 put_vnode(dev_t mountID, ino_t vnodeID)
2967 {
2968 	struct vnode *vnode;
2969 
2970 	mutex_lock(&sVnodeMutex);
2971 	vnode = lookup_vnode(mountID, vnodeID);
2972 	mutex_unlock(&sVnodeMutex);
2973 
2974 	if (vnode)
2975 		dec_vnode_ref_count(vnode, true);
2976 
2977 	return B_OK;
2978 }
2979 
2980 
2981 extern "C" status_t
2982 remove_vnode(dev_t mountID, ino_t vnodeID)
2983 {
2984 	struct vnode *vnode;
2985 	bool remove = false;
2986 
2987 	MutexLocker locker(sVnodeMutex);
2988 
2989 	vnode = lookup_vnode(mountID, vnodeID);
2990 	if (vnode == NULL)
2991 		return B_ENTRY_NOT_FOUND;
2992 
2993 	if (vnode->covered_by != NULL) {
2994 		// this vnode is in use
2995 		mutex_unlock(&sVnodeMutex);
2996 		return B_BUSY;
2997 	}
2998 
2999 	vnode->remove = true;
3000 	if (vnode->unpublished) {
3001 		// prepare the vnode for deletion
3002 		vnode->busy = true;
3003 		remove = true;
3004 	}
3005 
3006 	locker.Unlock();
3007 
3008 	if (remove) {
3009 		// if the vnode hasn't been published yet, we delete it here
3010 		atomic_add(&vnode->ref_count, -1);
3011 		free_vnode(vnode, true);
3012 	}
3013 
3014 	return B_OK;
3015 }
3016 
3017 
3018 extern "C" status_t
3019 unremove_vnode(dev_t mountID, ino_t vnodeID)
3020 {
3021 	struct vnode *vnode;
3022 
3023 	mutex_lock(&sVnodeMutex);
3024 
3025 	vnode = lookup_vnode(mountID, vnodeID);
3026 	if (vnode)
3027 		vnode->remove = false;
3028 
3029 	mutex_unlock(&sVnodeMutex);
3030 	return B_OK;
3031 }
3032 
3033 
3034 extern "C" status_t
3035 get_vnode_removed(dev_t mountID, ino_t vnodeID, bool* removed)
3036 {
3037 	mutex_lock(&sVnodeMutex);
3038 
3039 	status_t result;
3040 
3041 	if (struct vnode* vnode = lookup_vnode(mountID, vnodeID)) {
3042 		if (removed)
3043 			*removed = vnode->remove;
3044 		result = B_OK;
3045 	} else
3046 		result = B_BAD_VALUE;
3047 
3048 	mutex_unlock(&sVnodeMutex);
3049 	return result;
3050 }
3051 
3052 
3053 extern "C" status_t
3054 read_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3055 	size_t *_numBytes, bool fsReenter)
3056 {
3057 	struct file_descriptor *descriptor;
3058 	struct vnode *vnode;
3059 
3060 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3061 	if (descriptor == NULL)
3062 		return B_FILE_ERROR;
3063 
3064 	status_t status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
3065 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
3066 		fsReenter);
3067 
3068 	put_fd(descriptor);
3069 	return status;
3070 }
3071 
3072 
3073 extern "C" status_t
3074 write_pages(int fd, off_t pos, const iovec *vecs, size_t count,
3075 	size_t *_numBytes, bool fsReenter)
3076 {
3077 	struct file_descriptor *descriptor;
3078 	struct vnode *vnode;
3079 
3080 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3081 	if (descriptor == NULL)
3082 		return B_FILE_ERROR;
3083 
3084 	status_t status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
3085 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
3086 		fsReenter);
3087 
3088 	put_fd(descriptor);
3089 	return status;
3090 }
3091 
3092 
3093 extern "C" status_t
3094 read_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3095 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3096 	size_t *_bytes)
3097 {
3098 	struct file_descriptor *descriptor;
3099 	struct vnode *vnode;
3100 
3101 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3102 	if (descriptor == NULL)
3103 		return B_FILE_ERROR;
3104 
3105 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3106 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3107 		false);
3108 
3109 	put_fd(descriptor);
3110 	return status;
3111 }
3112 
3113 
3114 extern "C" status_t
3115 write_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
3116 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
3117 	size_t *_bytes)
3118 {
3119 	struct file_descriptor *descriptor;
3120 	struct vnode *vnode;
3121 
3122 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3123 	if (descriptor == NULL)
3124 		return B_FILE_ERROR;
3125 
3126 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3127 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3128 		true);
3129 
3130 	put_fd(descriptor);
3131 	return status;
3132 }
3133 
3134 
3135 //	#pragma mark - private VFS API
3136 //	Functions the VFS exports for other parts of the kernel
3137 
3138 
3139 /*! Acquires another reference to the vnode that has to be released
3140 	by calling vfs_put_vnode().
3141 */
3142 void
3143 vfs_acquire_vnode(struct vnode *vnode)
3144 {
3145 	inc_vnode_ref_count(vnode);
3146 }
3147 
3148 
3149 /*! This is currently called from file_cache_create() only.
3150 	It's probably a temporary solution as long as devfs requires that
3151 	fs_read_pages()/fs_write_pages() are called with the standard
3152 	open cookie and not with a device cookie.
3153 	If that's done differently, remove this call; it has no other
3154 	purpose.
3155 */
3156 extern "C" status_t
3157 vfs_get_cookie_from_fd(int fd, void **_cookie)
3158 {
3159 	struct file_descriptor *descriptor;
3160 
3161 	descriptor = get_fd(get_current_io_context(true), fd);
3162 	if (descriptor == NULL)
3163 		return B_FILE_ERROR;
3164 
3165 	*_cookie = descriptor->cookie;
3166 	return B_OK;
3167 }
3168 
3169 
3170 extern "C" int
3171 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode **vnode)
3172 {
3173 	*vnode = get_vnode_from_fd(fd, kernel);
3174 
3175 	if (*vnode == NULL)
3176 		return B_FILE_ERROR;
3177 
3178 	return B_NO_ERROR;
3179 }
3180 
3181 
3182 extern "C" status_t
3183 vfs_get_vnode_from_path(const char *path, bool kernel, struct vnode **_vnode)
3184 {
3185 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3186 		path, kernel));
3187 
3188 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3189 	if (pathBuffer.InitCheck() != B_OK)
3190 		return B_NO_MEMORY;
3191 
3192 	char *buffer = pathBuffer.LockBuffer();
3193 	strlcpy(buffer, path, pathBuffer.BufferSize());
3194 
3195 	struct vnode *vnode;
3196 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3197 	if (status < B_OK)
3198 		return status;
3199 
3200 	*_vnode = vnode;
3201 	return B_OK;
3202 }
3203 
3204 
3205 extern "C" status_t
3206 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode **_vnode)
3207 {
3208 	struct vnode *vnode;
3209 
3210 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3211 	if (status < B_OK)
3212 		return status;
3213 
3214 	*_vnode = vnode;
3215 	return B_OK;
3216 }
3217 
3218 
3219 extern "C" status_t
3220 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3221 	const char *name, struct vnode **_vnode)
3222 {
3223 	return entry_ref_to_vnode(mountID, directoryID, name, _vnode);
3224 }
3225 
3226 
3227 extern "C" void
3228 vfs_vnode_to_node_ref(struct vnode *vnode, dev_t *_mountID, ino_t *_vnodeID)
3229 {
3230 	*_mountID = vnode->device;
3231 	*_vnodeID = vnode->id;
3232 }
3233 
3234 
3235 /*!	Looks up a vnode with the given mount and vnode ID.
3236 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3237 	to the node.
3238 	It's currently only be used by file_cache_create().
3239 */
3240 extern "C" status_t
3241 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode)
3242 {
3243 	mutex_lock(&sVnodeMutex);
3244 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
3245 	mutex_unlock(&sVnodeMutex);
3246 
3247 	if (vnode == NULL)
3248 		return B_ERROR;
3249 
3250 	*_vnode = vnode;
3251 	return B_OK;
3252 }
3253 
3254 
3255 extern "C" status_t
3256 vfs_get_fs_node_from_path(dev_t mountID, const char *path, bool kernel,
3257 	void **_node)
3258 {
3259 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
3260 		mountID, path, kernel));
3261 
3262 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3263 	if (pathBuffer.InitCheck() != B_OK)
3264 		return B_NO_MEMORY;
3265 
3266 	fs_mount *mount;
3267 	status_t status = get_mount(mountID, &mount);
3268 	if (status < B_OK)
3269 		return status;
3270 
3271 	char *buffer = pathBuffer.LockBuffer();
3272 	strlcpy(buffer, path, pathBuffer.BufferSize());
3273 
3274 	struct vnode *vnode = mount->root_vnode;
3275 
3276 	if (buffer[0] == '/')
3277 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
3278 	else {
3279 		inc_vnode_ref_count(vnode);
3280 			// vnode_path_to_vnode() releases a reference to the starting vnode
3281 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
3282 	}
3283 
3284 	put_mount(mount);
3285 
3286 	if (status < B_OK)
3287 		return status;
3288 
3289 	if (vnode->device != mountID) {
3290 		// wrong mount ID - must not gain access on foreign file system nodes
3291 		put_vnode(vnode);
3292 		return B_BAD_VALUE;
3293 	}
3294 
3295 	*_node = vnode->private_node;
3296 	return B_OK;
3297 }
3298 
3299 
3300 /*!	Finds the full path to the file that contains the module \a moduleName,
3301 	puts it into \a pathBuffer, and returns B_OK for success.
3302 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
3303 	\c B_ENTRY_NOT_FOUNT if no file could be found.
3304 	\a pathBuffer is clobbered in any case and must not be relied on if this
3305 	functions returns unsuccessfully.
3306 */
3307 status_t
3308 vfs_get_module_path(const char *basePath, const char *moduleName,
3309 	char *pathBuffer, size_t bufferSize)
3310 {
3311 	struct vnode *dir, *file;
3312 	status_t status;
3313 	size_t length;
3314 	char *path;
3315 
3316 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
3317 		return B_BUFFER_OVERFLOW;
3318 
3319 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
3320 	if (status < B_OK)
3321 		return status;
3322 
3323 	// the path buffer had been clobbered by the above call
3324 	length = strlcpy(pathBuffer, basePath, bufferSize);
3325 	if (pathBuffer[length - 1] != '/')
3326 		pathBuffer[length++] = '/';
3327 
3328 	path = pathBuffer + length;
3329 	bufferSize -= length;
3330 
3331 	while (moduleName) {
3332 		int type;
3333 
3334 		char *nextPath = strchr(moduleName, '/');
3335 		if (nextPath == NULL)
3336 			length = strlen(moduleName);
3337 		else {
3338 			length = nextPath - moduleName;
3339 			nextPath++;
3340 		}
3341 
3342 		if (length + 1 >= bufferSize) {
3343 			status = B_BUFFER_OVERFLOW;
3344 			goto err;
3345 		}
3346 
3347 		memcpy(path, moduleName, length);
3348 		path[length] = '\0';
3349 		moduleName = nextPath;
3350 
3351 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
3352 		if (status < B_OK) {
3353 			// vnode_path_to_vnode() has already released the reference to dir
3354 			return status;
3355 		}
3356 
3357 		if (S_ISDIR(type)) {
3358 			// goto the next directory
3359 			path[length] = '/';
3360 			path[length + 1] = '\0';
3361 			path += length + 1;
3362 			bufferSize -= length + 1;
3363 
3364 			dir = file;
3365 		} else if (S_ISREG(type)) {
3366 			// it's a file so it should be what we've searched for
3367 			put_vnode(file);
3368 
3369 			return B_OK;
3370 		} else {
3371 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
3372 			status = B_ERROR;
3373 			dir = file;
3374 			goto err;
3375 		}
3376 	}
3377 
3378 	// if we got here, the moduleName just pointed to a directory, not to
3379 	// a real module - what should we do in this case?
3380 	status = B_ENTRY_NOT_FOUND;
3381 
3382 err:
3383 	put_vnode(dir);
3384 	return status;
3385 }
3386 
3387 
3388 /*!	\brief Normalizes a given path.
3389 
3390 	The path must refer to an existing or non-existing entry in an existing
3391 	directory, that is chopping off the leaf component the remaining path must
3392 	refer to an existing directory.
3393 
3394 	The returned will be canonical in that it will be absolute, will not
3395 	contain any "." or ".." components or duplicate occurrences of '/'s,
3396 	and none of the directory components will by symbolic links.
3397 
3398 	Any two paths referring to the same entry, will result in the same
3399 	normalized path (well, that is pretty much the definition of `normalized',
3400 	isn't it :-).
3401 
3402 	\param path The path to be normalized.
3403 	\param buffer The buffer into which the normalized path will be written.
3404 		   May be the same one as \a path.
3405 	\param bufferSize The size of \a buffer.
3406 	\param kernel \c true, if the IO context of the kernel shall be used,
3407 		   otherwise that of the team this thread belongs to. Only relevant,
3408 		   if the path is relative (to get the CWD).
3409 	\return \c B_OK if everything went fine, another error code otherwise.
3410 */
3411 status_t
3412 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
3413 	bool kernel)
3414 {
3415 	if (!path || !buffer || bufferSize < 1)
3416 		return B_BAD_VALUE;
3417 
3418 	TRACE(("vfs_normalize_path(`%s')\n", path));
3419 
3420 	// copy the supplied path to the stack, so it can be modified
3421 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
3422 	if (mutablePathBuffer.InitCheck() != B_OK)
3423 		return B_NO_MEMORY;
3424 
3425 	char *mutablePath = mutablePathBuffer.LockBuffer();
3426 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
3427 		return B_NAME_TOO_LONG;
3428 
3429 	// get the dir vnode and the leaf name
3430 	struct vnode *dirNode;
3431 	char leaf[B_FILE_NAME_LENGTH];
3432 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
3433 	if (error != B_OK) {
3434 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
3435 		return error;
3436 	}
3437 
3438 	// if the leaf is "." or "..", we directly get the correct directory
3439 	// vnode and ignore the leaf later
3440 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
3441 	if (isDir)
3442 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
3443 	if (error != B_OK) {
3444 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
3445 			strerror(error)));
3446 		return error;
3447 	}
3448 
3449 	// get the directory path
3450 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
3451 	put_vnode(dirNode);
3452 	if (error < B_OK) {
3453 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
3454 		return error;
3455 	}
3456 
3457 	// append the leaf name
3458 	if (!isDir) {
3459 		// insert a directory separator only if this is not the file system root
3460 		if ((strcmp(buffer, "/") != 0
3461 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3462 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3463 			return B_NAME_TOO_LONG;
3464 		}
3465 	}
3466 
3467 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3468 	return B_OK;
3469 }
3470 
3471 
3472 extern "C" void
3473 vfs_put_vnode(struct vnode *vnode)
3474 {
3475 	put_vnode(vnode);
3476 }
3477 
3478 
3479 extern "C" status_t
3480 vfs_get_cwd(dev_t *_mountID, ino_t *_vnodeID)
3481 {
3482 	// Get current working directory from io context
3483 	struct io_context *context = get_current_io_context(false);
3484 	status_t status = B_OK;
3485 
3486 	mutex_lock(&context->io_mutex);
3487 
3488 	if (context->cwd != NULL) {
3489 		*_mountID = context->cwd->device;
3490 		*_vnodeID = context->cwd->id;
3491 	} else
3492 		status = B_ERROR;
3493 
3494 	mutex_unlock(&context->io_mutex);
3495 	return status;
3496 }
3497 
3498 
3499 status_t
3500 vfs_unmount(dev_t mountID, uint32 flags)
3501 {
3502 	return fs_unmount(NULL, mountID, flags, true);
3503 }
3504 
3505 
3506 extern "C" status_t
3507 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
3508 {
3509 	struct vnode *vnode;
3510 
3511 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
3512 	if (status < B_OK)
3513 		return status;
3514 
3515 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3516 	put_vnode(vnode);
3517 	return B_OK;
3518 }
3519 
3520 
3521 extern "C" void
3522 vfs_free_unused_vnodes(int32 level)
3523 {
3524 	vnode_low_memory_handler(NULL, level);
3525 }
3526 
3527 
3528 extern "C" bool
3529 vfs_can_page(struct vnode *vnode, void *cookie)
3530 {
3531 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3532 
3533 	if (FS_CALL(vnode, can_page)) {
3534 		return FS_CALL(vnode, can_page)(vnode->mount->cookie,
3535 			vnode->private_node, cookie);
3536 	}
3537 	return false;
3538 }
3539 
3540 
3541 extern "C" status_t
3542 vfs_read_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3543 	size_t count, size_t *_numBytes, bool fsReenter)
3544 {
3545 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3546 
3547 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3548 		cookie, pos, vecs, count, _numBytes, fsReenter);
3549 }
3550 
3551 
3552 extern "C" status_t
3553 vfs_write_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3554 	size_t count, size_t *_numBytes, bool fsReenter)
3555 {
3556 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3557 
3558 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3559 		cookie, pos, vecs, count, _numBytes, fsReenter);
3560 }
3561 
3562 
3563 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
3564 	created if \a allocate is \c true.
3565 	In case it's successful, it will also grab a reference to the cache
3566 	it returns.
3567 */
3568 extern "C" status_t
3569 vfs_get_vnode_cache(struct vnode *vnode, vm_cache **_cache, bool allocate)
3570 {
3571 	if (vnode->cache != NULL) {
3572 		vm_cache_acquire_ref(vnode->cache);
3573 		*_cache = vnode->cache;
3574 		return B_OK;
3575 	}
3576 
3577 	mutex_lock(&sVnodeMutex);
3578 
3579 	status_t status = B_OK;
3580 
3581 	// The cache could have been created in the meantime
3582 	if (vnode->cache == NULL) {
3583 		if (allocate) {
3584 			// TODO: actually the vnode need to be busy already here, or
3585 			//	else this won't work...
3586 			bool wasBusy = vnode->busy;
3587 			vnode->busy = true;
3588 			mutex_unlock(&sVnodeMutex);
3589 
3590 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3591 
3592 			mutex_lock(&sVnodeMutex);
3593 			vnode->busy = wasBusy;
3594 		} else
3595 			status = B_BAD_VALUE;
3596 	}
3597 
3598 	if (status == B_OK) {
3599 		vm_cache_acquire_ref(vnode->cache);
3600 		*_cache = vnode->cache;
3601 	}
3602 
3603 	mutex_unlock(&sVnodeMutex);
3604 	return status;
3605 }
3606 
3607 
3608 status_t
3609 vfs_get_file_map(struct vnode *vnode, off_t offset, size_t size,
3610 	file_io_vec *vecs, size_t *_count)
3611 {
3612 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3613 
3614 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie,
3615 		vnode->private_node, offset, size, vecs, _count);
3616 }
3617 
3618 
3619 status_t
3620 vfs_stat_vnode(struct vnode *vnode, struct stat *stat)
3621 {
3622 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3623 		vnode->private_node, stat);
3624 
3625 	// fill in the st_dev and st_ino fields
3626 	if (status == B_OK) {
3627 		stat->st_dev = vnode->device;
3628 		stat->st_ino = vnode->id;
3629 	}
3630 
3631 	return status;
3632 }
3633 
3634 
3635 status_t
3636 vfs_get_vnode_name(struct vnode *vnode, char *name, size_t nameSize)
3637 {
3638 	return get_vnode_name(vnode, NULL, name, nameSize);
3639 }
3640 
3641 
3642 /*!	If the given descriptor locked its vnode, that lock will be released. */
3643 void
3644 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3645 {
3646 	struct vnode *vnode = fd_vnode(descriptor);
3647 
3648 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3649 		vnode->mandatory_locked_by = NULL;
3650 }
3651 
3652 
3653 /*!	Closes all file descriptors of the specified I/O context that
3654 	have the O_CLOEXEC flag set.
3655 */
3656 void
3657 vfs_exec_io_context(void *_context)
3658 {
3659 	struct io_context *context = (struct io_context *)_context;
3660 	uint32 i;
3661 
3662 	for (i = 0; i < context->table_size; i++) {
3663 		mutex_lock(&context->io_mutex);
3664 
3665 		struct file_descriptor *descriptor = context->fds[i];
3666 		bool remove = false;
3667 
3668 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3669 			context->fds[i] = NULL;
3670 			context->num_used_fds--;
3671 
3672 			remove = true;
3673 		}
3674 
3675 		mutex_unlock(&context->io_mutex);
3676 
3677 		if (remove) {
3678 			close_fd(descriptor);
3679 			put_fd(descriptor);
3680 		}
3681 	}
3682 }
3683 
3684 
3685 /*! Sets up a new io_control structure, and inherits the properties
3686 	of the parent io_control if it is given.
3687 */
3688 void *
3689 vfs_new_io_context(void *_parentContext)
3690 {
3691 	size_t tableSize;
3692 	struct io_context *context;
3693 	struct io_context *parentContext;
3694 
3695 	context = (io_context *)malloc(sizeof(struct io_context));
3696 	if (context == NULL)
3697 		return NULL;
3698 
3699 	memset(context, 0, sizeof(struct io_context));
3700 
3701 	parentContext = (struct io_context *)_parentContext;
3702 	if (parentContext)
3703 		tableSize = parentContext->table_size;
3704 	else
3705 		tableSize = DEFAULT_FD_TABLE_SIZE;
3706 
3707 	// allocate space for FDs and their close-on-exec flag
3708 	context->fds = (file_descriptor**)malloc(
3709 		sizeof(struct file_descriptor*) * tableSize
3710 		+ sizeof(struct select_sync*) * tableSize
3711 		+ (tableSize + 7) / 8);
3712 	if (context->fds == NULL) {
3713 		free(context);
3714 		return NULL;
3715 	}
3716 
3717 	context->select_infos = (select_info**)(context->fds + tableSize);
3718 	context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
3719 
3720 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
3721 		+ sizeof(struct select_sync*) * tableSize
3722 		+ (tableSize + 7) / 8);
3723 
3724 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3725 		free(context->fds);
3726 		free(context);
3727 		return NULL;
3728 	}
3729 
3730 	// Copy all parent file descriptors
3731 
3732 	if (parentContext) {
3733 		size_t i;
3734 
3735 		mutex_lock(&parentContext->io_mutex);
3736 
3737 		context->cwd = parentContext->cwd;
3738 		if (context->cwd)
3739 			inc_vnode_ref_count(context->cwd);
3740 
3741 		for (i = 0; i < tableSize; i++) {
3742 			struct file_descriptor *descriptor = parentContext->fds[i];
3743 
3744 			if (descriptor != NULL) {
3745 				context->fds[i] = descriptor;
3746 				context->num_used_fds++;
3747 				atomic_add(&descriptor->ref_count, 1);
3748 				atomic_add(&descriptor->open_count, 1);
3749 			}
3750 		}
3751 
3752 		mutex_unlock(&parentContext->io_mutex);
3753 	} else {
3754 		context->cwd = sRoot;
3755 
3756 		if (context->cwd)
3757 			inc_vnode_ref_count(context->cwd);
3758 	}
3759 
3760 	context->table_size = tableSize;
3761 
3762 	list_init(&context->node_monitors);
3763 	context->max_monitors = DEFAULT_NODE_MONITORS;
3764 
3765 	return context;
3766 }
3767 
3768 
3769 status_t
3770 vfs_free_io_context(void *_ioContext)
3771 {
3772 	struct io_context *context = (struct io_context *)_ioContext;
3773 	uint32 i;
3774 
3775 	if (context->cwd)
3776 		dec_vnode_ref_count(context->cwd, false);
3777 
3778 	mutex_lock(&context->io_mutex);
3779 
3780 	for (i = 0; i < context->table_size; i++) {
3781 		if (struct file_descriptor *descriptor = context->fds[i]) {
3782 			close_fd(descriptor);
3783 			put_fd(descriptor);
3784 		}
3785 	}
3786 
3787 	mutex_destroy(&context->io_mutex);
3788 
3789 	remove_node_monitors(context);
3790 	free(context->fds);
3791 	free(context);
3792 
3793 	return B_OK;
3794 }
3795 
3796 
3797 static status_t
3798 vfs_resize_fd_table(struct io_context *context, const int newSize)
3799 {
3800 	struct file_descriptor **fds;
3801 
3802 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3803 		return EINVAL;
3804 
3805 	MutexLocker(context->io_mutex);
3806 
3807 	int oldSize = context->table_size;
3808 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
3809 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
3810 
3811 	// If the tables shrink, make sure none of the fds being dropped are in use.
3812 	if (newSize < oldSize) {
3813 		for (int i = oldSize; i-- > newSize;) {
3814 			if (context->fds[i])
3815 				return EBUSY;
3816 		}
3817 	}
3818 
3819 	// store pointers to the old tables
3820 	file_descriptor** oldFDs = context->fds;
3821 	select_info** oldSelectInfos = context->select_infos;
3822 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
3823 
3824 	// allocate new tables
3825 	file_descriptor** newFDs = (file_descriptor**)malloc(
3826 		sizeof(struct file_descriptor*) * newSize
3827 		+ sizeof(struct select_sync*) * newSize
3828 		+ newCloseOnExitBitmapSize);
3829 	if (newFDs == NULL)
3830 		return ENOMEM;
3831 
3832 	context->fds = newFDs;
3833 	context->select_infos = (select_info**)(context->fds + newSize);
3834 	context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
3835 	context->table_size = newSize;
3836 
3837 	// copy entries from old tables
3838 	int toCopy = min_c(oldSize, newSize);
3839 
3840 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
3841 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
3842 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
3843 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
3844 
3845 	// clear additional entries, if the tables grow
3846 	if (newSize > oldSize) {
3847 		memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
3848 		memset(context->select_infos + oldSize, 0,
3849 			sizeof(void *) * (newSize - oldSize));
3850 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
3851 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
3852 	}
3853 
3854 	free(oldFDs);
3855 
3856 	return B_OK;
3857 }
3858 
3859 
3860 static status_t
3861 vfs_resize_monitor_table(struct io_context *context, const int newSize)
3862 {
3863 	void *fds;
3864 	int	status = B_OK;
3865 
3866 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3867 		return EINVAL;
3868 
3869 	mutex_lock(&context->io_mutex);
3870 
3871 	if ((size_t)newSize < context->num_monitors) {
3872 		status = EBUSY;
3873 		goto out;
3874 	}
3875 	context->max_monitors = newSize;
3876 
3877 out:
3878 	mutex_unlock(&context->io_mutex);
3879 	return status;
3880 }
3881 
3882 
3883 int
3884 vfs_getrlimit(int resource, struct rlimit * rlp)
3885 {
3886 	if (!rlp)
3887 		return B_BAD_ADDRESS;
3888 
3889 	switch (resource) {
3890 		case RLIMIT_NOFILE:
3891 		{
3892 			struct io_context *ioctx = get_current_io_context(false);
3893 
3894 			mutex_lock(&ioctx->io_mutex);
3895 
3896 			rlp->rlim_cur = ioctx->table_size;
3897 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3898 
3899 			mutex_unlock(&ioctx->io_mutex);
3900 
3901 			return 0;
3902 		}
3903 
3904 		case RLIMIT_NOVMON:
3905 		{
3906 			struct io_context *ioctx = get_current_io_context(false);
3907 
3908 			mutex_lock(&ioctx->io_mutex);
3909 
3910 			rlp->rlim_cur = ioctx->max_monitors;
3911 			rlp->rlim_max = MAX_NODE_MONITORS;
3912 
3913 			mutex_unlock(&ioctx->io_mutex);
3914 
3915 			return 0;
3916 		}
3917 
3918 		default:
3919 			return EINVAL;
3920 	}
3921 }
3922 
3923 
3924 int
3925 vfs_setrlimit(int resource, const struct rlimit * rlp)
3926 {
3927 	if (!rlp)
3928 		return B_BAD_ADDRESS;
3929 
3930 	switch (resource) {
3931 		case RLIMIT_NOFILE:
3932 			/* TODO: check getuid() */
3933 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3934 			    rlp->rlim_max != MAX_FD_TABLE_SIZE)
3935 				return EPERM;
3936 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3937 
3938 		case RLIMIT_NOVMON:
3939 			/* TODO: check getuid() */
3940 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3941 			    rlp->rlim_max != MAX_NODE_MONITORS)
3942 				return EPERM;
3943 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
3944 
3945 		default:
3946 			return EINVAL;
3947 	}
3948 }
3949 
3950 
3951 status_t
3952 vfs_init(kernel_args *args)
3953 {
3954 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3955 		&vnode_compare, &vnode_hash);
3956 	if (sVnodeTable == NULL)
3957 		panic("vfs_init: error creating vnode hash table\n");
3958 
3959 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3960 
3961 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3962 		&mount_compare, &mount_hash);
3963 	if (sMountsTable == NULL)
3964 		panic("vfs_init: error creating mounts hash table\n");
3965 
3966 	node_monitor_init();
3967 
3968 	sRoot = NULL;
3969 
3970 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3971 		panic("vfs_init: error allocating file systems lock\n");
3972 
3973 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3974 		panic("vfs_init: error allocating mount op lock\n");
3975 
3976 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3977 		panic("vfs_init: error allocating mount lock\n");
3978 
3979 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
3980 		panic("vfs_init: error allocating vnode::covered_by lock\n");
3981 
3982 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3983 		panic("vfs_init: error allocating vnode lock\n");
3984 
3985 	if (block_cache_init() != B_OK)
3986 		return B_ERROR;
3987 
3988 #ifdef ADD_DEBUGGER_COMMANDS
3989 	// add some debugger commands
3990 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3991 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3992 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3993 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3994 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3995 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3996 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3997 #endif
3998 
3999 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
4000 
4001 	return file_cache_init();
4002 }
4003 
4004 
4005 //	#pragma mark - fd_ops implementations
4006 
4007 
4008 /*!
4009 	Calls fs_open() on the given vnode and returns a new
4010 	file descriptor for it
4011 */
4012 static int
4013 create_vnode(struct vnode *directory, const char *name, int openMode,
4014 	int perms, bool kernel)
4015 {
4016 	struct vnode *vnode;
4017 	fs_cookie cookie;
4018 	ino_t newID;
4019 	int status;
4020 
4021 	if (FS_CALL(directory, create) == NULL)
4022 		return EROFS;
4023 
4024 	status = FS_CALL(directory, create)(directory->mount->cookie,
4025 		directory->private_node, name, openMode, perms, &cookie, &newID);
4026 	if (status < B_OK)
4027 		return status;
4028 
4029 	mutex_lock(&sVnodeMutex);
4030 	vnode = lookup_vnode(directory->device, newID);
4031 	mutex_unlock(&sVnodeMutex);
4032 
4033 	if (vnode == NULL) {
4034 		panic("vfs: fs_create() returned success but there is no vnode, mount ID %ld!\n",
4035 			directory->device);
4036 		return B_BAD_VALUE;
4037 	}
4038 
4039 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
4040 		return status;
4041 
4042 	// something went wrong, clean up
4043 
4044 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
4045 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4046 	put_vnode(vnode);
4047 
4048 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
4049 
4050 	return status;
4051 }
4052 
4053 
4054 /*!
4055 	Calls fs_open() on the given vnode and returns a new
4056 	file descriptor for it
4057 */
4058 static int
4059 open_vnode(struct vnode *vnode, int openMode, bool kernel)
4060 {
4061 	fs_cookie cookie;
4062 	int status;
4063 
4064 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
4065 	if (status < 0)
4066 		return status;
4067 
4068 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
4069 	if (status < 0) {
4070 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
4071 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4072 	}
4073 	return status;
4074 }
4075 
4076 
4077 /*! Calls fs open_dir() on the given vnode and returns a new
4078 	file descriptor for it
4079 */
4080 static int
4081 open_dir_vnode(struct vnode *vnode, bool kernel)
4082 {
4083 	fs_cookie cookie;
4084 	int status;
4085 
4086 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
4087 	if (status < B_OK)
4088 		return status;
4089 
4090 	// file is opened, create a fd
4091 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
4092 	if (status >= 0)
4093 		return status;
4094 
4095 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
4096 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4097 
4098 	return status;
4099 }
4100 
4101 
4102 /*! Calls fs open_attr_dir() on the given vnode and returns a new
4103 	file descriptor for it.
4104 	Used by attr_dir_open(), and attr_dir_open_fd().
4105 */
4106 static int
4107 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
4108 {
4109 	fs_cookie cookie;
4110 	int status;
4111 
4112 	if (FS_CALL(vnode, open_attr_dir) == NULL)
4113 		return EOPNOTSUPP;
4114 
4115 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
4116 	if (status < 0)
4117 		return status;
4118 
4119 	// file is opened, create a fd
4120 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
4121 	if (status >= 0)
4122 		return status;
4123 
4124 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
4125 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4126 
4127 	return status;
4128 }
4129 
4130 
4131 static int
4132 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4133 	int openMode, int perms, bool kernel)
4134 {
4135 	struct vnode *directory;
4136 	int status;
4137 
4138 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
4139 
4140 	// get directory to put the new file in
4141 	status = get_vnode(mountID, directoryID, &directory, true, false);
4142 	if (status < B_OK)
4143 		return status;
4144 
4145 	status = create_vnode(directory, name, openMode, perms, kernel);
4146 	put_vnode(directory);
4147 
4148 	return status;
4149 }
4150 
4151 
4152 static int
4153 file_create(int fd, char *path, int openMode, int perms, bool kernel)
4154 {
4155 	char name[B_FILE_NAME_LENGTH];
4156 	struct vnode *directory;
4157 	int status;
4158 
4159 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
4160 
4161 	// get directory to put the new file in
4162 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4163 	if (status < 0)
4164 		return status;
4165 
4166 	status = create_vnode(directory, name, openMode, perms, kernel);
4167 
4168 	put_vnode(directory);
4169 	return status;
4170 }
4171 
4172 
4173 static int
4174 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char *name, int openMode, bool kernel)
4175 {
4176 	struct vnode *vnode;
4177 	int status;
4178 
4179 	if (name == NULL || *name == '\0')
4180 		return B_BAD_VALUE;
4181 
4182 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
4183 		mountID, directoryID, name, openMode));
4184 
4185 	// get the vnode matching the entry_ref
4186 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
4187 	if (status < B_OK)
4188 		return status;
4189 
4190 	status = open_vnode(vnode, openMode, kernel);
4191 	if (status < B_OK)
4192 		put_vnode(vnode);
4193 
4194 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
4195 	return status;
4196 }
4197 
4198 
4199 static int
4200 file_open(int fd, char *path, int openMode, bool kernel)
4201 {
4202 	int status = B_OK;
4203 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4204 
4205 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
4206 		fd, path, openMode, kernel));
4207 
4208 	// get the vnode matching the vnode + path combination
4209 	struct vnode *vnode = NULL;
4210 	ino_t parentID;
4211 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
4212 	if (status != B_OK)
4213 		return status;
4214 
4215 	// open the vnode
4216 	status = open_vnode(vnode, openMode, kernel);
4217 	// put only on error -- otherwise our reference was transferred to the FD
4218 	if (status < B_OK)
4219 		put_vnode(vnode);
4220 
4221 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
4222 		vnode->device, parentID, vnode->id, NULL);
4223 
4224 	return status;
4225 }
4226 
4227 
4228 static status_t
4229 file_close(struct file_descriptor *descriptor)
4230 {
4231 	struct vnode *vnode = descriptor->u.vnode;
4232 	status_t status = B_OK;
4233 
4234 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
4235 
4236 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
4237 	if (FS_CALL(vnode, close))
4238 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4239 
4240 	if (status == B_OK) {
4241 		// remove all outstanding locks for this team
4242 		release_advisory_lock(vnode, NULL);
4243 	}
4244 	return status;
4245 }
4246 
4247 
4248 static void
4249 file_free_fd(struct file_descriptor *descriptor)
4250 {
4251 	struct vnode *vnode = descriptor->u.vnode;
4252 
4253 	if (vnode != NULL) {
4254 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4255 		put_vnode(vnode);
4256 	}
4257 }
4258 
4259 
4260 static status_t
4261 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4262 {
4263 	struct vnode *vnode = descriptor->u.vnode;
4264 
4265 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4266 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4267 }
4268 
4269 
4270 static status_t
4271 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4272 {
4273 	struct vnode *vnode = descriptor->u.vnode;
4274 
4275 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4276 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4277 }
4278 
4279 
4280 static off_t
4281 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4282 {
4283 	off_t offset;
4284 
4285 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
4286 
4287 	// stat() the node
4288 	struct vnode *vnode = descriptor->u.vnode;
4289 	if (FS_CALL(vnode, read_stat) == NULL)
4290 		return EOPNOTSUPP;
4291 
4292 	struct stat stat;
4293 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4294 		vnode->private_node, &stat);
4295 	if (status < B_OK)
4296 		return status;
4297 
4298 	// some kinds of files are not seekable
4299 	switch (stat.st_mode & S_IFMT) {
4300 		case S_IFIFO:
4301 			return ESPIPE;
4302 // TODO: We don't catch sockets here, but they are not seekable either (ESPIPE)!
4303 		// The Open Group Base Specs don't mention any file types besides pipes,
4304 		// fifos, and sockets specially, so we allow seeking them.
4305 		case S_IFREG:
4306 		case S_IFBLK:
4307 		case S_IFDIR:
4308 		case S_IFLNK:
4309 		case S_IFCHR:
4310 			break;
4311 	}
4312 
4313 	switch (seekType) {
4314 		case SEEK_SET:
4315 			offset = 0;
4316 			break;
4317 		case SEEK_CUR:
4318 			offset = descriptor->pos;
4319 			break;
4320 		case SEEK_END:
4321 			offset = stat.st_size;
4322 			break;
4323 		default:
4324 			return B_BAD_VALUE;
4325 	}
4326 
4327 	// assumes off_t is 64 bits wide
4328 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4329 		return EOVERFLOW;
4330 
4331 	pos += offset;
4332 	if (pos < 0)
4333 		return B_BAD_VALUE;
4334 
4335 	return descriptor->pos = pos;
4336 }
4337 
4338 
4339 static status_t
4340 file_select(struct file_descriptor *descriptor, uint8 event,
4341 	struct selectsync *sync)
4342 {
4343 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
4344 
4345 	struct vnode *vnode = descriptor->u.vnode;
4346 
4347 	// If the FS has no select() hook, notify select() now.
4348 	if (FS_CALL(vnode, select) == NULL)
4349 		return notify_select_event(sync, event);
4350 
4351 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
4352 		descriptor->cookie, event, 0, sync);
4353 }
4354 
4355 
4356 static status_t
4357 file_deselect(struct file_descriptor *descriptor, uint8 event,
4358 	struct selectsync *sync)
4359 {
4360 	struct vnode *vnode = descriptor->u.vnode;
4361 
4362 	if (FS_CALL(vnode, deselect) == NULL)
4363 		return B_OK;
4364 
4365 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
4366 		descriptor->cookie, event, sync);
4367 }
4368 
4369 
4370 static status_t
4371 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char *name, int perms, bool kernel)
4372 {
4373 	struct vnode *vnode;
4374 	ino_t newID;
4375 	status_t status;
4376 
4377 	if (name == NULL || *name == '\0')
4378 		return B_BAD_VALUE;
4379 
4380 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
4381 
4382 	status = get_vnode(mountID, parentID, &vnode, true, false);
4383 	if (status < B_OK)
4384 		return status;
4385 
4386 	if (FS_CALL(vnode, create_dir))
4387 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
4388 	else
4389 		status = EROFS;
4390 
4391 	put_vnode(vnode);
4392 	return status;
4393 }
4394 
4395 
4396 static status_t
4397 dir_create(int fd, char *path, int perms, bool kernel)
4398 {
4399 	char filename[B_FILE_NAME_LENGTH];
4400 	struct vnode *vnode;
4401 	ino_t newID;
4402 	status_t status;
4403 
4404 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
4405 
4406 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4407 	if (status < 0)
4408 		return status;
4409 
4410 	if (FS_CALL(vnode, create_dir))
4411 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
4412 	else
4413 		status = EROFS;
4414 
4415 	put_vnode(vnode);
4416 	return status;
4417 }
4418 
4419 
4420 static int
4421 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char *name, bool kernel)
4422 {
4423 	struct vnode *vnode;
4424 	int status;
4425 
4426 	FUNCTION(("dir_open_entry_ref()\n"));
4427 
4428 	if (name && *name == '\0')
4429 		return B_BAD_VALUE;
4430 
4431 	// get the vnode matching the entry_ref/node_ref
4432 	if (name)
4433 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
4434 	else
4435 		status = get_vnode(mountID, parentID, &vnode, true, false);
4436 	if (status < B_OK)
4437 		return status;
4438 
4439 	status = open_dir_vnode(vnode, kernel);
4440 	if (status < B_OK)
4441 		put_vnode(vnode);
4442 
4443 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
4444 	return status;
4445 }
4446 
4447 
4448 static int
4449 dir_open(int fd, char *path, bool kernel)
4450 {
4451 	int status = B_OK;
4452 
4453 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
4454 
4455 	// get the vnode matching the vnode + path combination
4456 	struct vnode *vnode = NULL;
4457 	ino_t parentID;
4458 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
4459 	if (status != B_OK)
4460 		return status;
4461 
4462 	// open the dir
4463 	status = open_dir_vnode(vnode, kernel);
4464 	if (status < B_OK)
4465 		put_vnode(vnode);
4466 
4467 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4468 	return status;
4469 }
4470 
4471 
4472 static status_t
4473 dir_close(struct file_descriptor *descriptor)
4474 {
4475 	struct vnode *vnode = descriptor->u.vnode;
4476 
4477 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4478 
4479 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4480 	if (FS_CALL(vnode, close_dir))
4481 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4482 
4483 	return B_OK;
4484 }
4485 
4486 
4487 static void
4488 dir_free_fd(struct file_descriptor *descriptor)
4489 {
4490 	struct vnode *vnode = descriptor->u.vnode;
4491 
4492 	if (vnode != NULL) {
4493 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4494 		put_vnode(vnode);
4495 	}
4496 }
4497 
4498 
4499 static status_t
4500 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4501 {
4502 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
4503 }
4504 
4505 
4506 static void
4507 fix_dirent(struct vnode *parent, struct dirent *entry)
4508 {
4509 	// set d_pdev and d_pino
4510 	entry->d_pdev = parent->device;
4511 	entry->d_pino = parent->id;
4512 
4513 	// If this is the ".." entry and the directory is the root of a FS,
4514 	// we need to replace d_dev and d_ino with the actual values.
4515 	if (strcmp(entry->d_name, "..") == 0
4516 		&& parent->mount->root_vnode == parent
4517 		&& parent->mount->covers_vnode) {
4518 		inc_vnode_ref_count(parent);
4519 			// vnode_path_to_vnode() puts the node
4520 
4521 		struct vnode *vnode;
4522 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
4523 			NULL, NULL);
4524 
4525 		if (status == B_OK) {
4526 			entry->d_dev = vnode->device;
4527 			entry->d_ino = vnode->id;
4528 		}
4529 	} else {
4530 		// resolve mount points
4531 		struct vnode *vnode = NULL;
4532 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
4533 			false);
4534 		if (status != B_OK)
4535 			return;
4536 
4537 		mutex_lock(&sVnodeCoveredByMutex);
4538 		if (vnode->covered_by) {
4539 			entry->d_dev = vnode->covered_by->device;
4540 			entry->d_ino = vnode->covered_by->id;
4541 		}
4542 		mutex_unlock(&sVnodeCoveredByMutex);
4543 
4544 		put_vnode(vnode);
4545 	}
4546 }
4547 
4548 
4549 static status_t
4550 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4551 {
4552 	if (!FS_CALL(vnode, read_dir))
4553 		return EOPNOTSUPP;
4554 
4555 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
4556 	if (error != B_OK)
4557 		return error;
4558 
4559 	// we need to adjust the read dirents
4560 	if (*_count > 0) {
4561 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4562 		fix_dirent(vnode, buffer);
4563 	}
4564 
4565 	return error;
4566 }
4567 
4568 
4569 static status_t
4570 dir_rewind(struct file_descriptor *descriptor)
4571 {
4572 	struct vnode *vnode = descriptor->u.vnode;
4573 
4574 	if (FS_CALL(vnode, rewind_dir))
4575 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4576 
4577 	return EOPNOTSUPP;
4578 }
4579 
4580 
4581 static status_t
4582 dir_remove(int fd, char *path, bool kernel)
4583 {
4584 	char name[B_FILE_NAME_LENGTH];
4585 	struct vnode *directory;
4586 	status_t status;
4587 
4588 	if (path != NULL) {
4589 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4590 		char *lastSlash = strrchr(path, '/');
4591 		if (lastSlash != NULL) {
4592 			char *leaf = lastSlash + 1;
4593 			if (!strcmp(leaf, ".."))
4594 				return B_NOT_ALLOWED;
4595 
4596 			// omit multiple slashes
4597 			while (lastSlash > path && lastSlash[-1] == '/') {
4598 				lastSlash--;
4599 			}
4600 
4601 			if (!leaf[0]
4602 				|| !strcmp(leaf, ".")) {
4603 				// "name/" -> "name", or "name/." -> "name"
4604 				lastSlash[0] = '\0';
4605 			}
4606 		} else if (!strcmp(path, ".."))
4607 			return B_NOT_ALLOWED;
4608 	}
4609 
4610 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4611 	if (status < B_OK)
4612 		return status;
4613 
4614 	if (FS_CALL(directory, remove_dir)) {
4615 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4616 			directory->private_node, name);
4617 	} else
4618 		status = EROFS;
4619 
4620 	put_vnode(directory);
4621 	return status;
4622 }
4623 
4624 
4625 static status_t
4626 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer,
4627 	size_t length)
4628 {
4629 	struct vnode *vnode = descriptor->u.vnode;
4630 
4631 	if (FS_CALL(vnode, ioctl)) {
4632 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4633 			descriptor->cookie, op, buffer, length);
4634 	}
4635 
4636 	return EOPNOTSUPP;
4637 }
4638 
4639 
4640 static status_t
4641 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4642 {
4643 	struct file_descriptor *descriptor;
4644 	struct vnode *vnode;
4645 	struct flock flock;
4646 
4647 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4648 		fd, op, argument, kernel ? "kernel" : "user"));
4649 
4650 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4651 	if (descriptor == NULL)
4652 		return B_FILE_ERROR;
4653 
4654 	status_t status = B_OK;
4655 
4656 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4657 		if (descriptor->type != FDTYPE_FILE)
4658 			status = B_BAD_VALUE;
4659 		else if (user_memcpy(&flock, (struct flock *)argument,
4660 				sizeof(struct flock)) < B_OK)
4661 			status = B_BAD_ADDRESS;
4662 
4663 		if (status != B_OK) {
4664 			put_fd(descriptor);
4665 			return status;
4666 		}
4667 	}
4668 
4669 	switch (op) {
4670 		case F_SETFD:
4671 		{
4672 			struct io_context *context = get_current_io_context(kernel);
4673 			// Set file descriptor flags
4674 
4675 			// O_CLOEXEC is the only flag available at this time
4676 			mutex_lock(&context->io_mutex);
4677 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
4678 			mutex_unlock(&context->io_mutex);
4679 
4680 			status = B_OK;
4681 			break;
4682 		}
4683 
4684 		case F_GETFD:
4685 		{
4686 			struct io_context *context = get_current_io_context(kernel);
4687 
4688 			// Get file descriptor flags
4689 			mutex_lock(&context->io_mutex);
4690 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4691 			mutex_unlock(&context->io_mutex);
4692 			break;
4693 		}
4694 
4695 		case F_SETFL:
4696 			// Set file descriptor open mode
4697 			if (FS_CALL(vnode, set_flags)) {
4698 				// we only accept changes to O_APPEND and O_NONBLOCK
4699 				argument &= O_APPEND | O_NONBLOCK;
4700 
4701 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4702 					vnode->private_node, descriptor->cookie, (int)argument);
4703 				if (status == B_OK) {
4704 					// update this descriptor's open_mode field
4705 					descriptor->open_mode = (descriptor->open_mode
4706 						& ~(O_APPEND | O_NONBLOCK)) | argument;
4707 				}
4708 			} else
4709 				status = EOPNOTSUPP;
4710 			break;
4711 
4712 		case F_GETFL:
4713 			// Get file descriptor open mode
4714 			status = descriptor->open_mode;
4715 			break;
4716 
4717 		case F_DUPFD:
4718 		{
4719 			struct io_context *context = get_current_io_context(kernel);
4720 
4721 			status = new_fd_etc(context, descriptor, (int)argument);
4722 			if (status >= 0) {
4723 				mutex_lock(&context->io_mutex);
4724 				fd_set_close_on_exec(context, fd, false);
4725 				mutex_unlock(&context->io_mutex);
4726 
4727 				atomic_add(&descriptor->ref_count, 1);
4728 			}
4729 			break;
4730 		}
4731 
4732 		case F_GETLK:
4733 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4734 			if (status == B_OK) {
4735 				// copy back flock structure
4736 				status = user_memcpy((struct flock *)argument, &flock,
4737 					sizeof(struct flock));
4738 			}
4739 			break;
4740 
4741 		case F_SETLK:
4742 		case F_SETLKW:
4743 			status = normalize_flock(descriptor, &flock);
4744 			if (status < B_OK)
4745 				break;
4746 
4747 			if (flock.l_type == F_UNLCK)
4748 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4749 			else {
4750 				// the open mode must match the lock type
4751 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
4752 						&& flock.l_type == F_WRLCK
4753 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY
4754 						&& flock.l_type == F_RDLCK)
4755 					status = B_FILE_ERROR;
4756 				else {
4757 					status = acquire_advisory_lock(descriptor->u.vnode, -1,
4758 						&flock, op == F_SETLKW);
4759 				}
4760 			}
4761 			break;
4762 
4763 		// ToDo: add support for more ops?
4764 
4765 		default:
4766 			status = B_BAD_VALUE;
4767 	}
4768 
4769 	put_fd(descriptor);
4770 	return status;
4771 }
4772 
4773 
4774 static status_t
4775 common_sync(int fd, bool kernel)
4776 {
4777 	struct file_descriptor *descriptor;
4778 	struct vnode *vnode;
4779 	status_t status;
4780 
4781 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4782 
4783 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4784 	if (descriptor == NULL)
4785 		return B_FILE_ERROR;
4786 
4787 	if (FS_CALL(vnode, fsync) != NULL)
4788 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4789 	else
4790 		status = EOPNOTSUPP;
4791 
4792 	put_fd(descriptor);
4793 	return status;
4794 }
4795 
4796 
4797 static status_t
4798 common_lock_node(int fd, bool kernel)
4799 {
4800 	struct file_descriptor *descriptor;
4801 	struct vnode *vnode;
4802 
4803 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4804 	if (descriptor == NULL)
4805 		return B_FILE_ERROR;
4806 
4807 	status_t status = B_OK;
4808 
4809 	// We need to set the locking atomically - someone
4810 	// else might set one at the same time
4811 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4812 			(addr_t)descriptor, NULL) != NULL)
4813 		status = B_BUSY;
4814 
4815 	put_fd(descriptor);
4816 	return status;
4817 }
4818 
4819 
4820 static status_t
4821 common_unlock_node(int fd, bool kernel)
4822 {
4823 	struct file_descriptor *descriptor;
4824 	struct vnode *vnode;
4825 
4826 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4827 	if (descriptor == NULL)
4828 		return B_FILE_ERROR;
4829 
4830 	status_t status = B_OK;
4831 
4832 	// We need to set the locking atomically - someone
4833 	// else might set one at the same time
4834 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4835 			NULL, (addr_t)descriptor) != (int32)descriptor)
4836 		status = B_BAD_VALUE;
4837 
4838 	put_fd(descriptor);
4839 	return status;
4840 }
4841 
4842 
4843 static status_t
4844 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4845 	bool kernel)
4846 {
4847 	struct vnode *vnode;
4848 	status_t status;
4849 
4850 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4851 	if (status < B_OK)
4852 		return status;
4853 
4854 	if (FS_CALL(vnode, read_symlink) != NULL) {
4855 		status = FS_CALL(vnode, read_symlink)(vnode->mount->cookie,
4856 			vnode->private_node, buffer, _bufferSize);
4857 	} else
4858 		status = B_BAD_VALUE;
4859 
4860 	put_vnode(vnode);
4861 	return status;
4862 }
4863 
4864 
4865 static status_t
4866 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4867 	bool kernel)
4868 {
4869 	// path validity checks have to be in the calling function!
4870 	char name[B_FILE_NAME_LENGTH];
4871 	struct vnode *vnode;
4872 	status_t status;
4873 
4874 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4875 
4876 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4877 	if (status < B_OK)
4878 		return status;
4879 
4880 	if (FS_CALL(vnode, create_symlink) != NULL)
4881 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4882 	else
4883 		status = EROFS;
4884 
4885 	put_vnode(vnode);
4886 
4887 	return status;
4888 }
4889 
4890 
4891 static status_t
4892 common_create_link(char *path, char *toPath, bool kernel)
4893 {
4894 	// path validity checks have to be in the calling function!
4895 	char name[B_FILE_NAME_LENGTH];
4896 	struct vnode *directory, *vnode;
4897 	status_t status;
4898 
4899 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4900 
4901 	status = path_to_dir_vnode(path, &directory, name, kernel);
4902 	if (status < B_OK)
4903 		return status;
4904 
4905 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4906 	if (status < B_OK)
4907 		goto err;
4908 
4909 	if (directory->mount != vnode->mount) {
4910 		status = B_CROSS_DEVICE_LINK;
4911 		goto err1;
4912 	}
4913 
4914 	if (FS_CALL(vnode, link) != NULL)
4915 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4916 	else
4917 		status = EROFS;
4918 
4919 err1:
4920 	put_vnode(vnode);
4921 err:
4922 	put_vnode(directory);
4923 
4924 	return status;
4925 }
4926 
4927 
4928 static status_t
4929 common_unlink(int fd, char *path, bool kernel)
4930 {
4931 	char filename[B_FILE_NAME_LENGTH];
4932 	struct vnode *vnode;
4933 	status_t status;
4934 
4935 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4936 
4937 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4938 	if (status < 0)
4939 		return status;
4940 
4941 	if (FS_CALL(vnode, unlink) != NULL)
4942 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4943 	else
4944 		status = EROFS;
4945 
4946 	put_vnode(vnode);
4947 
4948 	return status;
4949 }
4950 
4951 
4952 static status_t
4953 common_access(char *path, int mode, bool kernel)
4954 {
4955 	struct vnode *vnode;
4956 	status_t status;
4957 
4958 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4959 	if (status < B_OK)
4960 		return status;
4961 
4962 	if (FS_CALL(vnode, access) != NULL)
4963 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4964 	else
4965 		status = B_OK;
4966 
4967 	put_vnode(vnode);
4968 
4969 	return status;
4970 }
4971 
4972 
4973 static status_t
4974 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4975 {
4976 	struct vnode *fromVnode, *toVnode;
4977 	char fromName[B_FILE_NAME_LENGTH];
4978 	char toName[B_FILE_NAME_LENGTH];
4979 	status_t status;
4980 
4981 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4982 
4983 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4984 	if (status < 0)
4985 		return status;
4986 
4987 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4988 	if (status < 0)
4989 		goto err;
4990 
4991 	if (fromVnode->device != toVnode->device) {
4992 		status = B_CROSS_DEVICE_LINK;
4993 		goto err1;
4994 	}
4995 
4996 	if (FS_CALL(fromVnode, rename) != NULL)
4997 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4998 	else
4999 		status = EROFS;
5000 
5001 err1:
5002 	put_vnode(toVnode);
5003 err:
5004 	put_vnode(fromVnode);
5005 
5006 	return status;
5007 }
5008 
5009 
5010 static status_t
5011 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5012 {
5013 	struct vnode *vnode = descriptor->u.vnode;
5014 
5015 	FUNCTION(("common_read_stat: stat %p\n", stat));
5016 
5017 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
5018 		vnode->private_node, stat);
5019 
5020 	// fill in the st_dev and st_ino fields
5021 	if (status == B_OK) {
5022 		stat->st_dev = vnode->device;
5023 		stat->st_ino = vnode->id;
5024 	}
5025 
5026 	return status;
5027 }
5028 
5029 
5030 static status_t
5031 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5032 {
5033 	struct vnode *vnode = descriptor->u.vnode;
5034 
5035 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
5036 	if (!FS_CALL(vnode, write_stat))
5037 		return EROFS;
5038 
5039 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
5040 }
5041 
5042 
5043 static status_t
5044 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
5045 	struct stat *stat, bool kernel)
5046 {
5047 	struct vnode *vnode;
5048 	status_t status;
5049 
5050 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
5051 
5052 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5053 	if (status < 0)
5054 		return status;
5055 
5056 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
5057 
5058 	// fill in the st_dev and st_ino fields
5059 	if (status == B_OK) {
5060 		stat->st_dev = vnode->device;
5061 		stat->st_ino = vnode->id;
5062 	}
5063 
5064 	put_vnode(vnode);
5065 	return status;
5066 }
5067 
5068 
5069 static status_t
5070 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
5071 	const struct stat *stat, int statMask, bool kernel)
5072 {
5073 	struct vnode *vnode;
5074 	status_t status;
5075 
5076 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
5077 
5078 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
5079 	if (status < 0)
5080 		return status;
5081 
5082 	if (FS_CALL(vnode, write_stat))
5083 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
5084 	else
5085 		status = EROFS;
5086 
5087 	put_vnode(vnode);
5088 
5089 	return status;
5090 }
5091 
5092 
5093 static int
5094 attr_dir_open(int fd, char *path, bool kernel)
5095 {
5096 	struct vnode *vnode;
5097 	int status;
5098 
5099 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
5100 
5101 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5102 	if (status < B_OK)
5103 		return status;
5104 
5105 	status = open_attr_dir_vnode(vnode, kernel);
5106 	if (status < 0)
5107 		put_vnode(vnode);
5108 
5109 	return status;
5110 }
5111 
5112 
5113 static status_t
5114 attr_dir_close(struct file_descriptor *descriptor)
5115 {
5116 	struct vnode *vnode = descriptor->u.vnode;
5117 
5118 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
5119 
5120 	if (FS_CALL(vnode, close_attr_dir))
5121 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5122 
5123 	return B_OK;
5124 }
5125 
5126 
5127 static void
5128 attr_dir_free_fd(struct file_descriptor *descriptor)
5129 {
5130 	struct vnode *vnode = descriptor->u.vnode;
5131 
5132 	if (vnode != NULL) {
5133 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5134 		put_vnode(vnode);
5135 	}
5136 }
5137 
5138 
5139 static status_t
5140 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5141 {
5142 	struct vnode *vnode = descriptor->u.vnode;
5143 
5144 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
5145 
5146 	if (FS_CALL(vnode, read_attr_dir))
5147 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
5148 
5149 	return EOPNOTSUPP;
5150 }
5151 
5152 
5153 static status_t
5154 attr_dir_rewind(struct file_descriptor *descriptor)
5155 {
5156 	struct vnode *vnode = descriptor->u.vnode;
5157 
5158 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
5159 
5160 	if (FS_CALL(vnode, rewind_attr_dir))
5161 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5162 
5163 	return EOPNOTSUPP;
5164 }
5165 
5166 
5167 static int
5168 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
5169 {
5170 	struct vnode *vnode;
5171 	fs_cookie cookie;
5172 	int status;
5173 
5174 	if (name == NULL || *name == '\0')
5175 		return B_BAD_VALUE;
5176 
5177 	vnode = get_vnode_from_fd(fd, kernel);
5178 	if (vnode == NULL)
5179 		return B_FILE_ERROR;
5180 
5181 	if (FS_CALL(vnode, create_attr) == NULL) {
5182 		status = EROFS;
5183 		goto err;
5184 	}
5185 
5186 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
5187 	if (status < B_OK)
5188 		goto err;
5189 
5190 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5191 		return status;
5192 
5193 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5194 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5195 
5196 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5197 
5198 err:
5199 	put_vnode(vnode);
5200 
5201 	return status;
5202 }
5203 
5204 
5205 static int
5206 attr_open(int fd, const char *name, int openMode, bool kernel)
5207 {
5208 	struct vnode *vnode;
5209 	fs_cookie cookie;
5210 	int status;
5211 
5212 	if (name == NULL || *name == '\0')
5213 		return B_BAD_VALUE;
5214 
5215 	vnode = get_vnode_from_fd(fd, kernel);
5216 	if (vnode == NULL)
5217 		return B_FILE_ERROR;
5218 
5219 	if (FS_CALL(vnode, open_attr) == NULL) {
5220 		status = EOPNOTSUPP;
5221 		goto err;
5222 	}
5223 
5224 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
5225 	if (status < B_OK)
5226 		goto err;
5227 
5228 	// now we only need a file descriptor for this attribute and we're done
5229 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5230 		return status;
5231 
5232 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5233 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5234 
5235 err:
5236 	put_vnode(vnode);
5237 
5238 	return status;
5239 }
5240 
5241 
5242 static status_t
5243 attr_close(struct file_descriptor *descriptor)
5244 {
5245 	struct vnode *vnode = descriptor->u.vnode;
5246 
5247 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
5248 
5249 	if (FS_CALL(vnode, close_attr))
5250 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5251 
5252 	return B_OK;
5253 }
5254 
5255 
5256 static void
5257 attr_free_fd(struct file_descriptor *descriptor)
5258 {
5259 	struct vnode *vnode = descriptor->u.vnode;
5260 
5261 	if (vnode != NULL) {
5262 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5263 		put_vnode(vnode);
5264 	}
5265 }
5266 
5267 
5268 static status_t
5269 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
5270 {
5271 	struct vnode *vnode = descriptor->u.vnode;
5272 
5273 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5274 	if (!FS_CALL(vnode, read_attr))
5275 		return EOPNOTSUPP;
5276 
5277 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5278 }
5279 
5280 
5281 static status_t
5282 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
5283 {
5284 	struct vnode *vnode = descriptor->u.vnode;
5285 
5286 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5287 	if (!FS_CALL(vnode, write_attr))
5288 		return EOPNOTSUPP;
5289 
5290 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5291 }
5292 
5293 
5294 static off_t
5295 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
5296 {
5297 	off_t offset;
5298 
5299 	switch (seekType) {
5300 		case SEEK_SET:
5301 			offset = 0;
5302 			break;
5303 		case SEEK_CUR:
5304 			offset = descriptor->pos;
5305 			break;
5306 		case SEEK_END:
5307 		{
5308 			struct vnode *vnode = descriptor->u.vnode;
5309 			struct stat stat;
5310 			status_t status;
5311 
5312 			if (FS_CALL(vnode, read_stat) == NULL)
5313 				return EOPNOTSUPP;
5314 
5315 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
5316 			if (status < B_OK)
5317 				return status;
5318 
5319 			offset = stat.st_size;
5320 			break;
5321 		}
5322 		default:
5323 			return B_BAD_VALUE;
5324 	}
5325 
5326 	// assumes off_t is 64 bits wide
5327 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5328 		return EOVERFLOW;
5329 
5330 	pos += offset;
5331 	if (pos < 0)
5332 		return B_BAD_VALUE;
5333 
5334 	return descriptor->pos = pos;
5335 }
5336 
5337 
5338 static status_t
5339 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5340 {
5341 	struct vnode *vnode = descriptor->u.vnode;
5342 
5343 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
5344 
5345 	if (!FS_CALL(vnode, read_attr_stat))
5346 		return EOPNOTSUPP;
5347 
5348 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5349 }
5350 
5351 
5352 static status_t
5353 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5354 {
5355 	struct vnode *vnode = descriptor->u.vnode;
5356 
5357 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
5358 
5359 	if (!FS_CALL(vnode, write_attr_stat))
5360 		return EROFS;
5361 
5362 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
5363 }
5364 
5365 
5366 static status_t
5367 attr_remove(int fd, const char *name, bool kernel)
5368 {
5369 	struct file_descriptor *descriptor;
5370 	struct vnode *vnode;
5371 	status_t status;
5372 
5373 	if (name == NULL || *name == '\0')
5374 		return B_BAD_VALUE;
5375 
5376 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
5377 
5378 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5379 	if (descriptor == NULL)
5380 		return B_FILE_ERROR;
5381 
5382 	if (FS_CALL(vnode, remove_attr))
5383 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5384 	else
5385 		status = EROFS;
5386 
5387 	put_fd(descriptor);
5388 
5389 	return status;
5390 }
5391 
5392 
5393 static status_t
5394 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
5395 {
5396 	struct file_descriptor *fromDescriptor, *toDescriptor;
5397 	struct vnode *fromVnode, *toVnode;
5398 	status_t status;
5399 
5400 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
5401 		return B_BAD_VALUE;
5402 
5403 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
5404 
5405 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
5406 	if (fromDescriptor == NULL)
5407 		return B_FILE_ERROR;
5408 
5409 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
5410 	if (toDescriptor == NULL) {
5411 		status = B_FILE_ERROR;
5412 		goto err;
5413 	}
5414 
5415 	// are the files on the same volume?
5416 	if (fromVnode->device != toVnode->device) {
5417 		status = B_CROSS_DEVICE_LINK;
5418 		goto err1;
5419 	}
5420 
5421 	if (FS_CALL(fromVnode, rename_attr))
5422 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
5423 	else
5424 		status = EROFS;
5425 
5426 err1:
5427 	put_fd(toDescriptor);
5428 err:
5429 	put_fd(fromDescriptor);
5430 
5431 	return status;
5432 }
5433 
5434 
5435 static status_t
5436 index_dir_open(dev_t mountID, bool kernel)
5437 {
5438 	struct fs_mount *mount;
5439 	fs_cookie cookie;
5440 
5441 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
5442 
5443 	status_t status = get_mount(mountID, &mount);
5444 	if (status < B_OK)
5445 		return status;
5446 
5447 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
5448 		status = EOPNOTSUPP;
5449 		goto out;
5450 	}
5451 
5452 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
5453 	if (status < B_OK)
5454 		goto out;
5455 
5456 	// get fd for the index directory
5457 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
5458 	if (status >= 0)
5459 		goto out;
5460 
5461 	// something went wrong
5462 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
5463 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
5464 
5465 out:
5466 	put_mount(mount);
5467 	return status;
5468 }
5469 
5470 
5471 static status_t
5472 index_dir_close(struct file_descriptor *descriptor)
5473 {
5474 	struct fs_mount *mount = descriptor->u.mount;
5475 
5476 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
5477 
5478 	if (FS_MOUNT_CALL(mount, close_index_dir))
5479 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
5480 
5481 	return B_OK;
5482 }
5483 
5484 
5485 static void
5486 index_dir_free_fd(struct file_descriptor *descriptor)
5487 {
5488 	struct fs_mount *mount = descriptor->u.mount;
5489 
5490 	if (mount != NULL) {
5491 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
5492 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5493 		//put_vnode(vnode);
5494 	}
5495 }
5496 
5497 
5498 static status_t
5499 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5500 {
5501 	struct fs_mount *mount = descriptor->u.mount;
5502 
5503 	if (FS_MOUNT_CALL(mount, read_index_dir))
5504 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5505 
5506 	return EOPNOTSUPP;
5507 }
5508 
5509 
5510 static status_t
5511 index_dir_rewind(struct file_descriptor *descriptor)
5512 {
5513 	struct fs_mount *mount = descriptor->u.mount;
5514 
5515 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
5516 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
5517 
5518 	return EOPNOTSUPP;
5519 }
5520 
5521 
5522 static status_t
5523 index_create(dev_t mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5524 {
5525 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5526 
5527 	struct fs_mount *mount;
5528 	status_t status = get_mount(mountID, &mount);
5529 	if (status < B_OK)
5530 		return status;
5531 
5532 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
5533 		status = EROFS;
5534 		goto out;
5535 	}
5536 
5537 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
5538 
5539 out:
5540 	put_mount(mount);
5541 	return status;
5542 }
5543 
5544 
5545 #if 0
5546 static status_t
5547 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5548 {
5549 	struct vnode *vnode = descriptor->u.vnode;
5550 
5551 	// ToDo: currently unused!
5552 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5553 	if (!FS_CALL(vnode, read_index_stat))
5554 		return EOPNOTSUPP;
5555 
5556 	return EOPNOTSUPP;
5557 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5558 }
5559 
5560 
5561 static void
5562 index_free_fd(struct file_descriptor *descriptor)
5563 {
5564 	struct vnode *vnode = descriptor->u.vnode;
5565 
5566 	if (vnode != NULL) {
5567 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5568 		put_vnode(vnode);
5569 	}
5570 }
5571 #endif
5572 
5573 
5574 static status_t
5575 index_name_read_stat(dev_t mountID, const char *name, struct stat *stat, bool kernel)
5576 {
5577 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5578 
5579 	struct fs_mount *mount;
5580 	status_t status = get_mount(mountID, &mount);
5581 	if (status < B_OK)
5582 		return status;
5583 
5584 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5585 		status = EOPNOTSUPP;
5586 		goto out;
5587 	}
5588 
5589 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5590 
5591 out:
5592 	put_mount(mount);
5593 	return status;
5594 }
5595 
5596 
5597 static status_t
5598 index_remove(dev_t mountID, const char *name, bool kernel)
5599 {
5600 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5601 
5602 	struct fs_mount *mount;
5603 	status_t status = get_mount(mountID, &mount);
5604 	if (status < B_OK)
5605 		return status;
5606 
5607 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5608 		status = EROFS;
5609 		goto out;
5610 	}
5611 
5612 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5613 
5614 out:
5615 	put_mount(mount);
5616 	return status;
5617 }
5618 
5619 
5620 /*!	ToDo: the query FS API is still the pretty much the same as in R5.
5621 		It would be nice if the FS would find some more kernel support
5622 		for them.
5623 		For example, query parsing should be moved into the kernel.
5624 */
5625 static int
5626 query_open(dev_t device, const char *query, uint32 flags,
5627 	port_id port, int32 token, bool kernel)
5628 {
5629 	struct fs_mount *mount;
5630 	fs_cookie cookie;
5631 
5632 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5633 
5634 	status_t status = get_mount(device, &mount);
5635 	if (status < B_OK)
5636 		return status;
5637 
5638 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5639 		status = EOPNOTSUPP;
5640 		goto out;
5641 	}
5642 
5643 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5644 	if (status < B_OK)
5645 		goto out;
5646 
5647 	// get fd for the index directory
5648 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5649 	if (status >= 0)
5650 		goto out;
5651 
5652 	// something went wrong
5653 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5654 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5655 
5656 out:
5657 	put_mount(mount);
5658 	return status;
5659 }
5660 
5661 
5662 static status_t
5663 query_close(struct file_descriptor *descriptor)
5664 {
5665 	struct fs_mount *mount = descriptor->u.mount;
5666 
5667 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5668 
5669 	if (FS_MOUNT_CALL(mount, close_query))
5670 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5671 
5672 	return B_OK;
5673 }
5674 
5675 
5676 static void
5677 query_free_fd(struct file_descriptor *descriptor)
5678 {
5679 	struct fs_mount *mount = descriptor->u.mount;
5680 
5681 	if (mount != NULL) {
5682 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5683 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5684 		//put_vnode(vnode);
5685 	}
5686 }
5687 
5688 
5689 static status_t
5690 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5691 {
5692 	struct fs_mount *mount = descriptor->u.mount;
5693 
5694 	if (FS_MOUNT_CALL(mount, read_query))
5695 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5696 
5697 	return EOPNOTSUPP;
5698 }
5699 
5700 
5701 static status_t
5702 query_rewind(struct file_descriptor *descriptor)
5703 {
5704 	struct fs_mount *mount = descriptor->u.mount;
5705 
5706 	if (FS_MOUNT_CALL(mount, rewind_query))
5707 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5708 
5709 	return EOPNOTSUPP;
5710 }
5711 
5712 
5713 //	#pragma mark - General File System functions
5714 
5715 
5716 static dev_t
5717 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5718 	const char *args, bool kernel)
5719 {
5720 	struct fs_mount *mount;
5721 	status_t status = 0;
5722 
5723 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5724 
5725 	// The path is always safe, we just have to make sure that fsName is
5726 	// almost valid - we can't make any assumptions about args, though.
5727 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5728 	// We'll get it from the DDM later.
5729 	if (fsName == NULL) {
5730 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5731 			return B_BAD_VALUE;
5732 	} else if (fsName[0] == '\0')
5733 		return B_BAD_VALUE;
5734 
5735 	RecursiveLocker mountOpLocker(sMountOpLock);
5736 
5737 	// Helper to delete a newly created file device on failure.
5738 	// Not exactly beautiful, but helps to keep the code below cleaner.
5739 	struct FileDeviceDeleter {
5740 		FileDeviceDeleter() : id(-1) {}
5741 		~FileDeviceDeleter()
5742 		{
5743 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5744 		}
5745 
5746 		partition_id id;
5747 	} fileDeviceDeleter;
5748 
5749 	// If the file system is not a "virtual" one, the device argument should
5750 	// point to a real file/device (if given at all).
5751 	// get the partition
5752 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5753 	KPartition *partition = NULL;
5754 	KPath normalizedDevice;
5755 	bool newlyCreatedFileDevice = false;
5756 
5757 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5758 		// normalize the device path
5759 		status = normalizedDevice.SetTo(device, true);
5760 		if (status != B_OK)
5761 			return status;
5762 
5763 		// get a corresponding partition from the DDM
5764 		partition = ddm->RegisterPartition(normalizedDevice.Path());
5765 
5766 		if (!partition) {
5767 			// Partition not found: This either means, the user supplied
5768 			// an invalid path, or the path refers to an image file. We try
5769 			// to let the DDM create a file device for the path.
5770 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5771 				&newlyCreatedFileDevice);
5772 			if (deviceID >= 0) {
5773 				partition = ddm->RegisterPartition(deviceID);
5774 				if (newlyCreatedFileDevice)
5775 					fileDeviceDeleter.id = deviceID;
5776 			}
5777 		}
5778 
5779 		if (!partition) {
5780 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5781 				normalizedDevice.Path()));
5782 			return B_ENTRY_NOT_FOUND;
5783 		}
5784 
5785 		device = normalizedDevice.Path();
5786 			// correct path to file device
5787 	}
5788 	PartitionRegistrar partitionRegistrar(partition, true);
5789 
5790 	// Write lock the partition's device. For the time being, we keep the lock
5791 	// until we're done mounting -- not nice, but ensure, that no-one is
5792 	// interfering.
5793 	// TODO: Find a better solution.
5794 	KDiskDevice *diskDevice = NULL;
5795 	if (partition) {
5796 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5797 		if (!diskDevice) {
5798 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5799 			return B_ERROR;
5800 		}
5801 	}
5802 
5803 	DeviceWriteLocker writeLocker(diskDevice, true);
5804 		// this takes over the write lock acquired before
5805 
5806 	if (partition) {
5807 		// make sure, that the partition is not busy
5808 		if (partition->IsBusy()) {
5809 			TRACE(("fs_mount(): Partition is busy.\n"));
5810 			return B_BUSY;
5811 		}
5812 
5813 		// if no FS name had been supplied, we get it from the partition
5814 		if (!fsName) {
5815 			KDiskSystem *diskSystem = partition->DiskSystem();
5816 			if (!diskSystem) {
5817 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5818 					"recognize it.\n"));
5819 				return B_BAD_VALUE;
5820 			}
5821 
5822 			if (!diskSystem->IsFileSystem()) {
5823 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5824 					"partitioning system.\n"));
5825 				return B_BAD_VALUE;
5826 			}
5827 
5828 			// The disk system name will not change, and the KDiskSystem
5829 			// object will not go away while the disk device is locked (and
5830 			// the partition has a reference to it), so this is safe.
5831 			fsName = diskSystem->Name();
5832 		}
5833 	}
5834 
5835 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5836 	if (mount == NULL)
5837 		return B_NO_MEMORY;
5838 
5839 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5840 
5841 	mount->fs_name = get_file_system_name(fsName);
5842 	if (mount->fs_name == NULL) {
5843 		status = B_NO_MEMORY;
5844 		goto err1;
5845 	}
5846 
5847 	mount->device_name = strdup(device);
5848 		// "device" can be NULL
5849 
5850 	mount->fs = get_file_system(fsName);
5851 	if (mount->fs == NULL) {
5852 		status = ENODEV;
5853 		goto err3;
5854 	}
5855 
5856 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5857 	if (status < B_OK)
5858 		goto err4;
5859 
5860 	// initialize structure
5861 	mount->id = sNextMountID++;
5862 	mount->partition = NULL;
5863 	mount->root_vnode = NULL;
5864 	mount->covers_vnode = NULL;
5865 	mount->cookie = NULL;
5866 	mount->unmounting = false;
5867 	mount->owns_file_device = false;
5868 
5869 	// insert mount struct into list before we call FS's mount() function
5870 	// so that vnodes can be created for this mount
5871 	mutex_lock(&sMountMutex);
5872 	hash_insert(sMountsTable, mount);
5873 	mutex_unlock(&sMountMutex);
5874 
5875 	ino_t rootID;
5876 
5877 	if (!sRoot) {
5878 		// we haven't mounted anything yet
5879 		if (strcmp(path, "/") != 0) {
5880 			status = B_ERROR;
5881 			goto err5;
5882 		}
5883 
5884 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5885 		if (status < 0) {
5886 			// ToDo: why should we hide the error code from the file system here?
5887 			//status = ERR_VFS_GENERAL;
5888 			goto err5;
5889 		}
5890 	} else {
5891 		struct vnode *coveredVnode;
5892 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5893 		if (status < B_OK)
5894 			goto err5;
5895 
5896 		// make sure covered_vnode is a DIR
5897 		struct stat coveredNodeStat;
5898 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5899 			coveredVnode->private_node, &coveredNodeStat);
5900 		if (status < B_OK)
5901 			goto err5;
5902 
5903 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5904 			status = B_NOT_A_DIRECTORY;
5905 			goto err5;
5906 		}
5907 
5908 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5909 			// this is already a mount point
5910 			status = B_BUSY;
5911 			goto err5;
5912 		}
5913 
5914 		mount->covers_vnode = coveredVnode;
5915 
5916 		// mount it
5917 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5918 		if (status < B_OK)
5919 			goto err6;
5920 	}
5921 
5922 	// the root node is supposed to be owned by the file system - it must
5923 	// exist at this point
5924 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5925 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5926 		panic("fs_mount: file system does not own its root node!\n");
5927 		status = B_ERROR;
5928 		goto err7;
5929 	}
5930 
5931 	// No race here, since fs_mount() is the only function changing
5932 	// covers_vnode (and holds sMountOpLock at that time).
5933 	mutex_lock(&sVnodeCoveredByMutex);
5934 	if (mount->covers_vnode)
5935 		mount->covers_vnode->covered_by = mount->root_vnode;
5936 	mutex_unlock(&sVnodeCoveredByMutex);
5937 
5938 	if (!sRoot)
5939 		sRoot = mount->root_vnode;
5940 
5941 	// supply the partition (if any) with the mount cookie and mark it mounted
5942 	if (partition) {
5943 		partition->SetMountCookie(mount->cookie);
5944 		partition->SetVolumeID(mount->id);
5945 
5946 		// keep a partition reference as long as the partition is mounted
5947 		partitionRegistrar.Detach();
5948 		mount->partition = partition;
5949 		mount->owns_file_device = newlyCreatedFileDevice;
5950 		fileDeviceDeleter.id = -1;
5951 	}
5952 
5953 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5954 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5955 
5956 	return mount->id;
5957 
5958 err7:
5959 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5960 err6:
5961 	if (mount->covers_vnode)
5962 		put_vnode(mount->covers_vnode);
5963 err5:
5964 	mutex_lock(&sMountMutex);
5965 	hash_remove(sMountsTable, mount);
5966 	mutex_unlock(&sMountMutex);
5967 
5968 	recursive_lock_destroy(&mount->rlock);
5969 err4:
5970 	put_file_system(mount->fs);
5971 	free(mount->device_name);
5972 err3:
5973 	free(mount->fs_name);
5974 err1:
5975 	free(mount);
5976 
5977 	return status;
5978 }
5979 
5980 
5981 static status_t
5982 fs_unmount(char *path, dev_t mountID, uint32 flags, bool kernel)
5983 {
5984 	struct vnode *vnode = NULL;
5985 	struct fs_mount *mount;
5986 	status_t err;
5987 
5988 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
5989 		kernel));
5990 
5991 	if (path != NULL) {
5992 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
5993 		if (err != B_OK)
5994 			return B_ENTRY_NOT_FOUND;
5995 	}
5996 
5997 	RecursiveLocker mountOpLocker(sMountOpLock);
5998 
5999 	mount = find_mount(path != NULL ? vnode->device : mountID);
6000 	if (mount == NULL) {
6001 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
6002 			vnode);
6003 	}
6004 
6005 	if (path != NULL) {
6006 		put_vnode(vnode);
6007 
6008 		if (mount->root_vnode != vnode) {
6009 			// not mountpoint
6010 			return B_BAD_VALUE;
6011 		}
6012 	}
6013 
6014 	// if the volume is associated with a partition, lock the device of the
6015 	// partition as long as we are unmounting
6016 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
6017 	KPartition *partition = mount->partition;
6018 	KDiskDevice *diskDevice = NULL;
6019 	if (partition) {
6020 		if (partition->Device() == NULL) {
6021 			dprintf("fs_unmount(): There is no device!\n");
6022 			return B_ERROR;
6023 		}
6024 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
6025 		if (!diskDevice) {
6026 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
6027 			return B_ERROR;
6028 		}
6029 	}
6030 	DeviceWriteLocker writeLocker(diskDevice, true);
6031 
6032 	// make sure, that the partition is not busy
6033 	if (partition) {
6034 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
6035 			TRACE(("fs_unmount(): Partition is busy.\n"));
6036 			return B_BUSY;
6037 		}
6038 	}
6039 
6040 	// grab the vnode master mutex to keep someone from creating
6041 	// a vnode while we're figuring out if we can continue
6042 	mutex_lock(&sVnodeMutex);
6043 
6044 	bool disconnectedDescriptors = false;
6045 
6046 	while (true) {
6047 		bool busy = false;
6048 
6049 		// cycle through the list of vnodes associated with this mount and
6050 		// make sure all of them are not busy or have refs on them
6051 		vnode = NULL;
6052 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
6053 				vnode)) != NULL) {
6054 			// The root vnode ref_count needs to be 1 here (the mount has a
6055 			// reference).
6056 			if (vnode->busy
6057 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
6058 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
6059 				// there are still vnodes in use on this mount, so we cannot
6060 				// unmount yet
6061 				busy = true;
6062 				break;
6063 			}
6064 		}
6065 
6066 		if (!busy)
6067 			break;
6068 
6069 		if ((flags & B_FORCE_UNMOUNT) == 0) {
6070 			mutex_unlock(&sVnodeMutex);
6071 			put_vnode(mount->root_vnode);
6072 
6073 			return B_BUSY;
6074 		}
6075 
6076 		if (disconnectedDescriptors) {
6077 			// wait a bit until the last access is finished, and then try again
6078 			mutex_unlock(&sVnodeMutex);
6079 			snooze(100000);
6080 			// TODO: if there is some kind of bug that prevents the ref counts
6081 			//	from getting back to zero, this will fall into an endless loop...
6082 			mutex_lock(&sVnodeMutex);
6083 			continue;
6084 		}
6085 
6086 		// the file system is still busy - but we're forced to unmount it,
6087 		// so let's disconnect all open file descriptors
6088 
6089 		mount->unmounting = true;
6090 			// prevent new vnodes from being created
6091 
6092 		mutex_unlock(&sVnodeMutex);
6093 
6094 		disconnect_mount_or_vnode_fds(mount, NULL);
6095 		disconnectedDescriptors = true;
6096 
6097 		mutex_lock(&sVnodeMutex);
6098 	}
6099 
6100 	// we can safely continue, mark all of the vnodes busy and this mount
6101 	// structure in unmounting state
6102 	mount->unmounting = true;
6103 
6104 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
6105 		vnode->busy = true;
6106 
6107 		if (vnode->ref_count == 0) {
6108 			// this vnode has been unused before
6109 			list_remove_item(&sUnusedVnodeList, vnode);
6110 			sUnusedVnodes--;
6111 		}
6112 	}
6113 
6114 	// The ref_count of the root node is 1 at this point, see above why this is
6115 	mount->root_vnode->ref_count--;
6116 
6117 	mutex_unlock(&sVnodeMutex);
6118 
6119 	mutex_lock(&sVnodeCoveredByMutex);
6120 	mount->covers_vnode->covered_by = NULL;
6121 	mutex_unlock(&sVnodeCoveredByMutex);
6122 	put_vnode(mount->covers_vnode);
6123 
6124 	// Free all vnodes associated with this mount.
6125 	// They will be removed from the mount list by free_vnode(), so
6126 	// we don't have to do this.
6127 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes))
6128 			!= NULL) {
6129 		free_vnode(vnode, false);
6130 	}
6131 
6132 	// remove the mount structure from the hash table
6133 	mutex_lock(&sMountMutex);
6134 	hash_remove(sMountsTable, mount);
6135 	mutex_unlock(&sMountMutex);
6136 
6137 	mountOpLocker.Unlock();
6138 
6139 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
6140 	notify_unmount(mount->id);
6141 
6142 	// release the file system
6143 	put_file_system(mount->fs);
6144 
6145 	// dereference the partition and mark it unmounted
6146 	if (partition) {
6147 		partition->SetVolumeID(-1);
6148 		partition->SetMountCookie(NULL);
6149 
6150 		if (mount->owns_file_device)
6151 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
6152 		partition->Unregister();
6153 	}
6154 
6155 	free(mount->device_name);
6156 	free(mount->fs_name);
6157 	free(mount);
6158 
6159 	return B_OK;
6160 }
6161 
6162 
6163 static status_t
6164 fs_sync(dev_t device)
6165 {
6166 	struct fs_mount *mount;
6167 	status_t status = get_mount(device, &mount);
6168 	if (status < B_OK)
6169 		return status;
6170 
6171 	// First, synchronize all file caches
6172 
6173 	struct vnode *previousVnode = NULL;
6174 	while (true) {
6175 		// synchronize access to vnode list
6176 		recursive_lock_lock(&mount->rlock);
6177 
6178 		struct vnode *vnode = previousVnode;
6179 		do {
6180 			// TODO: we could track writes (and writable mapped vnodes)
6181 			//	and have a simple flag that we could test for here
6182 			vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode);
6183 		} while (vnode != NULL && vnode->cache == NULL);
6184 
6185 		ino_t id = -1;
6186 		if (vnode != NULL)
6187 			id = vnode->id;
6188 
6189 		recursive_lock_unlock(&mount->rlock);
6190 
6191 		if (vnode == NULL)
6192 			break;
6193 
6194 		// acquire a reference to the vnode
6195 
6196 		if (get_vnode(mount->id, id, &vnode, true, false) == B_OK) {
6197 			if (previousVnode != NULL)
6198 				put_vnode(previousVnode);
6199 
6200 			if (vnode->cache != NULL)
6201 				vm_cache_write_modified(vnode->cache, false);
6202 
6203 			// the next vnode might change until we lock the vnode list again,
6204 			// but this vnode won't go away since we keep a reference to it.
6205 			previousVnode = vnode;
6206 		} else {
6207 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n",
6208 				mount->id, id);
6209 			break;
6210 		}
6211 	}
6212 
6213 	if (previousVnode != NULL)
6214 		put_vnode(previousVnode);
6215 
6216 	// And then, let the file systems do their synchronizing work
6217 
6218 	mutex_lock(&sMountMutex);
6219 
6220 	if (FS_MOUNT_CALL(mount, sync))
6221 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
6222 
6223 	mutex_unlock(&sMountMutex);
6224 
6225 	put_mount(mount);
6226 	return status;
6227 }
6228 
6229 
6230 static status_t
6231 fs_read_info(dev_t device, struct fs_info *info)
6232 {
6233 	struct fs_mount *mount;
6234 	status_t status = get_mount(device, &mount);
6235 	if (status < B_OK)
6236 		return status;
6237 
6238 	memset(info, 0, sizeof(struct fs_info));
6239 
6240 	if (FS_MOUNT_CALL(mount, read_fs_info))
6241 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
6242 
6243 	// fill in info the file system doesn't (have to) know about
6244 	if (status == B_OK) {
6245 		info->dev = mount->id;
6246 		info->root = mount->root_vnode->id;
6247 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
6248 		if (mount->device_name != NULL) {
6249 			strlcpy(info->device_name, mount->device_name,
6250 				sizeof(info->device_name));
6251 		}
6252 	}
6253 
6254 	// if the call is not supported by the file system, there are still
6255 	// the parts that we filled out ourselves
6256 
6257 	put_mount(mount);
6258 	return status;
6259 }
6260 
6261 
6262 static status_t
6263 fs_write_info(dev_t device, const struct fs_info *info, int mask)
6264 {
6265 	struct fs_mount *mount;
6266 	status_t status = get_mount(device, &mount);
6267 	if (status < B_OK)
6268 		return status;
6269 
6270 	if (FS_MOUNT_CALL(mount, write_fs_info))
6271 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
6272 	else
6273 		status = EROFS;
6274 
6275 	put_mount(mount);
6276 	return status;
6277 }
6278 
6279 
6280 static dev_t
6281 fs_next_device(int32 *_cookie)
6282 {
6283 	struct fs_mount *mount = NULL;
6284 	dev_t device = *_cookie;
6285 
6286 	mutex_lock(&sMountMutex);
6287 
6288 	// Since device IDs are assigned sequentially, this algorithm
6289 	// does work good enough. It makes sure that the device list
6290 	// returned is sorted, and that no device is skipped when an
6291 	// already visited device got unmounted.
6292 
6293 	while (device < sNextMountID) {
6294 		mount = find_mount(device++);
6295 		if (mount != NULL && mount->cookie != NULL)
6296 			break;
6297 	}
6298 
6299 	*_cookie = device;
6300 
6301 	if (mount != NULL)
6302 		device = mount->id;
6303 	else
6304 		device = B_BAD_VALUE;
6305 
6306 	mutex_unlock(&sMountMutex);
6307 
6308 	return device;
6309 }
6310 
6311 
6312 static status_t
6313 get_cwd(char *buffer, size_t size, bool kernel)
6314 {
6315 	// Get current working directory from io context
6316 	struct io_context *context = get_current_io_context(kernel);
6317 	status_t status;
6318 
6319 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
6320 
6321 	mutex_lock(&context->io_mutex);
6322 
6323 	if (context->cwd)
6324 		status = dir_vnode_to_path(context->cwd, buffer, size);
6325 	else
6326 		status = B_ERROR;
6327 
6328 	mutex_unlock(&context->io_mutex);
6329 	return status;
6330 }
6331 
6332 
6333 static status_t
6334 set_cwd(int fd, char *path, bool kernel)
6335 {
6336 	struct io_context *context;
6337 	struct vnode *vnode = NULL;
6338 	struct vnode *oldDirectory;
6339 	struct stat stat;
6340 	status_t status;
6341 
6342 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
6343 
6344 	// Get vnode for passed path, and bail if it failed
6345 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6346 	if (status < 0)
6347 		return status;
6348 
6349 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
6350 	if (status < 0)
6351 		goto err;
6352 
6353 	if (!S_ISDIR(stat.st_mode)) {
6354 		// nope, can't cwd to here
6355 		status = B_NOT_A_DIRECTORY;
6356 		goto err;
6357 	}
6358 
6359 	// Get current io context and lock
6360 	context = get_current_io_context(kernel);
6361 	mutex_lock(&context->io_mutex);
6362 
6363 	// save the old current working directory first
6364 	oldDirectory = context->cwd;
6365 	context->cwd = vnode;
6366 
6367 	mutex_unlock(&context->io_mutex);
6368 
6369 	if (oldDirectory)
6370 		put_vnode(oldDirectory);
6371 
6372 	return B_NO_ERROR;
6373 
6374 err:
6375 	put_vnode(vnode);
6376 	return status;
6377 }
6378 
6379 
6380 //	#pragma mark - kernel mirrored syscalls
6381 
6382 
6383 dev_t
6384 _kern_mount(const char *path, const char *device, const char *fsName,
6385 	uint32 flags, const char *args, size_t argsLength)
6386 {
6387 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6388 	if (pathBuffer.InitCheck() != B_OK)
6389 		return B_NO_MEMORY;
6390 
6391 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
6392 }
6393 
6394 
6395 status_t
6396 _kern_unmount(const char *path, uint32 flags)
6397 {
6398 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6399 	if (pathBuffer.InitCheck() != B_OK)
6400 		return B_NO_MEMORY;
6401 
6402 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
6403 }
6404 
6405 
6406 status_t
6407 _kern_read_fs_info(dev_t device, struct fs_info *info)
6408 {
6409 	if (info == NULL)
6410 		return B_BAD_VALUE;
6411 
6412 	return fs_read_info(device, info);
6413 }
6414 
6415 
6416 status_t
6417 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
6418 {
6419 	if (info == NULL)
6420 		return B_BAD_VALUE;
6421 
6422 	return fs_write_info(device, info, mask);
6423 }
6424 
6425 
6426 status_t
6427 _kern_sync(void)
6428 {
6429 	// Note: _kern_sync() is also called from _user_sync()
6430 	int32 cookie = 0;
6431 	dev_t device;
6432 	while ((device = next_dev(&cookie)) >= 0) {
6433 		status_t status = fs_sync(device);
6434 		if (status != B_OK && status != B_BAD_VALUE)
6435 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
6436 	}
6437 
6438 	return B_OK;
6439 }
6440 
6441 
6442 dev_t
6443 _kern_next_device(int32 *_cookie)
6444 {
6445 	return fs_next_device(_cookie);
6446 }
6447 
6448 
6449 status_t
6450 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
6451 	size_t infoSize)
6452 {
6453 	if (infoSize != sizeof(fd_info))
6454 		return B_BAD_VALUE;
6455 
6456 	struct io_context *context = NULL;
6457 	sem_id contextMutex = -1;
6458 	struct team *team = NULL;
6459 
6460 	cpu_status state = disable_interrupts();
6461 	GRAB_TEAM_LOCK();
6462 
6463 	team = team_get_team_struct_locked(teamID);
6464 	if (team) {
6465 		context = (io_context *)team->io_context;
6466 		contextMutex = context->io_mutex.sem;
6467 	}
6468 
6469 	RELEASE_TEAM_LOCK();
6470 	restore_interrupts(state);
6471 
6472 	// we now have a context - since we couldn't lock it while having
6473 	// safe access to the team structure, we now need to lock the mutex
6474 	// manually
6475 
6476 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
6477 		// team doesn't exit or seems to be gone
6478 		return B_BAD_TEAM_ID;
6479 	}
6480 
6481 	// the team cannot be deleted completely while we're owning its
6482 	// io_context mutex, so we can safely play with it now
6483 
6484 	context->io_mutex.holder = thread_get_current_thread_id();
6485 
6486 	uint32 slot = *_cookie;
6487 
6488 	struct file_descriptor *descriptor;
6489 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
6490 		slot++;
6491 
6492 	if (slot >= context->table_size) {
6493 		mutex_unlock(&context->io_mutex);
6494 		return B_ENTRY_NOT_FOUND;
6495 	}
6496 
6497 	info->number = slot;
6498 	info->open_mode = descriptor->open_mode;
6499 
6500 	struct vnode *vnode = fd_vnode(descriptor);
6501 	if (vnode != NULL) {
6502 		info->device = vnode->device;
6503 		info->node = vnode->id;
6504 	} else if (descriptor->u.mount != NULL) {
6505 		info->device = descriptor->u.mount->id;
6506 		info->node = -1;
6507 	}
6508 
6509 	mutex_unlock(&context->io_mutex);
6510 
6511 	*_cookie = slot + 1;
6512 	return B_OK;
6513 }
6514 
6515 
6516 int
6517 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6518 {
6519 	if (openMode & O_CREAT)
6520 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6521 
6522 	return file_open_entry_ref(device, inode, name, openMode, true);
6523 }
6524 
6525 
6526 /*!	\brief Opens a node specified by a FD + path pair.
6527 
6528 	At least one of \a fd and \a path must be specified.
6529 	If only \a fd is given, the function opens the node identified by this
6530 	FD. If only a path is given, this path is opened. If both are given and
6531 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6532 	of the directory (!) identified by \a fd.
6533 
6534 	\param fd The FD. May be < 0.
6535 	\param path The absolute or relative path. May be \c NULL.
6536 	\param openMode The open mode.
6537 	\return A FD referring to the newly opened node, or an error code,
6538 			if an error occurs.
6539 */
6540 int
6541 _kern_open(int fd, const char *path, int openMode, int perms)
6542 {
6543 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6544 	if (pathBuffer.InitCheck() != B_OK)
6545 		return B_NO_MEMORY;
6546 
6547 	if (openMode & O_CREAT)
6548 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6549 
6550 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6551 }
6552 
6553 
6554 /*!	\brief Opens a directory specified by entry_ref or node_ref.
6555 
6556 	The supplied name may be \c NULL, in which case directory identified
6557 	by \a device and \a inode will be opened. Otherwise \a device and
6558 	\a inode identify the parent directory of the directory to be opened
6559 	and \a name its entry name.
6560 
6561 	\param device If \a name is specified the ID of the device the parent
6562 		   directory of the directory to be opened resides on, otherwise
6563 		   the device of the directory itself.
6564 	\param inode If \a name is specified the node ID of the parent
6565 		   directory of the directory to be opened, otherwise node ID of the
6566 		   directory itself.
6567 	\param name The entry name of the directory to be opened. If \c NULL,
6568 		   the \a device + \a inode pair identify the node to be opened.
6569 	\return The FD of the newly opened directory or an error code, if
6570 			something went wrong.
6571 */
6572 int
6573 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6574 {
6575 	return dir_open_entry_ref(device, inode, name, true);
6576 }
6577 
6578 
6579 /*!	\brief Opens a directory specified by a FD + path pair.
6580 
6581 	At least one of \a fd and \a path must be specified.
6582 	If only \a fd is given, the function opens the directory identified by this
6583 	FD. If only a path is given, this path is opened. If both are given and
6584 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6585 	of the directory (!) identified by \a fd.
6586 
6587 	\param fd The FD. May be < 0.
6588 	\param path The absolute or relative path. May be \c NULL.
6589 	\return A FD referring to the newly opened directory, or an error code,
6590 			if an error occurs.
6591 */
6592 int
6593 _kern_open_dir(int fd, const char *path)
6594 {
6595 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6596 	if (pathBuffer.InitCheck() != B_OK)
6597 		return B_NO_MEMORY;
6598 
6599 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6600 }
6601 
6602 
6603 status_t
6604 _kern_fcntl(int fd, int op, uint32 argument)
6605 {
6606 	return common_fcntl(fd, op, argument, true);
6607 }
6608 
6609 
6610 status_t
6611 _kern_fsync(int fd)
6612 {
6613 	return common_sync(fd, true);
6614 }
6615 
6616 
6617 status_t
6618 _kern_lock_node(int fd)
6619 {
6620 	return common_lock_node(fd, true);
6621 }
6622 
6623 
6624 status_t
6625 _kern_unlock_node(int fd)
6626 {
6627 	return common_unlock_node(fd, true);
6628 }
6629 
6630 
6631 status_t
6632 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6633 {
6634 	return dir_create_entry_ref(device, inode, name, perms, true);
6635 }
6636 
6637 
6638 /*!	\brief Creates a directory specified by a FD + path pair.
6639 
6640 	\a path must always be specified (it contains the name of the new directory
6641 	at least). If only a path is given, this path identifies the location at
6642 	which the directory shall be created. If both \a fd and \a path are given and
6643 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6644 	of the directory (!) identified by \a fd.
6645 
6646 	\param fd The FD. May be < 0.
6647 	\param path The absolute or relative path. Must not be \c NULL.
6648 	\param perms The access permissions the new directory shall have.
6649 	\return \c B_OK, if the directory has been created successfully, another
6650 			error code otherwise.
6651 */
6652 status_t
6653 _kern_create_dir(int fd, const char *path, int perms)
6654 {
6655 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6656 	if (pathBuffer.InitCheck() != B_OK)
6657 		return B_NO_MEMORY;
6658 
6659 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6660 }
6661 
6662 
6663 status_t
6664 _kern_remove_dir(int fd, const char *path)
6665 {
6666 	if (path) {
6667 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6668 		if (pathBuffer.InitCheck() != B_OK)
6669 			return B_NO_MEMORY;
6670 
6671 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6672 	}
6673 
6674 	return dir_remove(fd, NULL, true);
6675 }
6676 
6677 
6678 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
6679 
6680 	At least one of \a fd and \a path must be specified.
6681 	If only \a fd is given, the function the symlink to be read is the node
6682 	identified by this FD. If only a path is given, this path identifies the
6683 	symlink to be read. If both are given and the path is absolute, \a fd is
6684 	ignored; a relative path is reckoned off of the directory (!) identified
6685 	by \a fd.
6686 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6687 	will still be updated to reflect the required buffer size.
6688 
6689 	\param fd The FD. May be < 0.
6690 	\param path The absolute or relative path. May be \c NULL.
6691 	\param buffer The buffer into which the contents of the symlink shall be
6692 		   written.
6693 	\param _bufferSize A pointer to the size of the supplied buffer.
6694 	\return The length of the link on success or an appropriate error code
6695 */
6696 status_t
6697 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6698 {
6699 	status_t status;
6700 
6701 	if (path) {
6702 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6703 		if (pathBuffer.InitCheck() != B_OK)
6704 			return B_NO_MEMORY;
6705 
6706 		return common_read_link(fd, pathBuffer.LockBuffer(),
6707 			buffer, _bufferSize, true);
6708 	}
6709 
6710 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6711 }
6712 
6713 
6714 /*!	\brief Creates a symlink specified by a FD + path pair.
6715 
6716 	\a path must always be specified (it contains the name of the new symlink
6717 	at least). If only a path is given, this path identifies the location at
6718 	which the symlink shall be created. If both \a fd and \a path are given and
6719 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6720 	of the directory (!) identified by \a fd.
6721 
6722 	\param fd The FD. May be < 0.
6723 	\param toPath The absolute or relative path. Must not be \c NULL.
6724 	\param mode The access permissions the new symlink shall have.
6725 	\return \c B_OK, if the symlink has been created successfully, another
6726 			error code otherwise.
6727 */
6728 status_t
6729 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6730 {
6731 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6732 	if (pathBuffer.InitCheck() != B_OK)
6733 		return B_NO_MEMORY;
6734 
6735 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6736 		toPath, mode, true);
6737 }
6738 
6739 
6740 status_t
6741 _kern_create_link(const char *path, const char *toPath)
6742 {
6743 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6744 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6745 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6746 		return B_NO_MEMORY;
6747 
6748 	return common_create_link(pathBuffer.LockBuffer(),
6749 		toPathBuffer.LockBuffer(), true);
6750 }
6751 
6752 
6753 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
6754 
6755 	\a path must always be specified (it contains at least the name of the entry
6756 	to be deleted). If only a path is given, this path identifies the entry
6757 	directly. If both \a fd and \a path are given and the path is absolute,
6758 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6759 	identified by \a fd.
6760 
6761 	\param fd The FD. May be < 0.
6762 	\param path The absolute or relative path. Must not be \c NULL.
6763 	\return \c B_OK, if the entry has been removed successfully, another
6764 			error code otherwise.
6765 */
6766 status_t
6767 _kern_unlink(int fd, const char *path)
6768 {
6769 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6770 	if (pathBuffer.InitCheck() != B_OK)
6771 		return B_NO_MEMORY;
6772 
6773 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6774 }
6775 
6776 
6777 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
6778 		   by another FD + path pair.
6779 
6780 	\a oldPath and \a newPath must always be specified (they contain at least
6781 	the name of the entry). If only a path is given, this path identifies the
6782 	entry directly. If both a FD and a path are given and the path is absolute,
6783 	the FD is ignored; a relative path is reckoned off of the directory (!)
6784 	identified by the respective FD.
6785 
6786 	\param oldFD The FD of the old location. May be < 0.
6787 	\param oldPath The absolute or relative path of the old location. Must not
6788 		   be \c NULL.
6789 	\param newFD The FD of the new location. May be < 0.
6790 	\param newPath The absolute or relative path of the new location. Must not
6791 		   be \c NULL.
6792 	\return \c B_OK, if the entry has been moved successfully, another
6793 			error code otherwise.
6794 */
6795 status_t
6796 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6797 {
6798 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6799 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6800 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6801 		return B_NO_MEMORY;
6802 
6803 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6804 		newFD, newPathBuffer.LockBuffer(), true);
6805 }
6806 
6807 
6808 status_t
6809 _kern_access(const char *path, int mode)
6810 {
6811 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6812 	if (pathBuffer.InitCheck() != B_OK)
6813 		return B_NO_MEMORY;
6814 
6815 	return common_access(pathBuffer.LockBuffer(), mode, true);
6816 }
6817 
6818 
6819 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
6820 
6821 	If only \a fd is given, the stat operation associated with the type
6822 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6823 	given, this path identifies the entry for whose node to retrieve the
6824 	stat data. If both \a fd and \a path are given and the path is absolute,
6825 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6826 	identified by \a fd and specifies the entry whose stat data shall be
6827 	retrieved.
6828 
6829 	\param fd The FD. May be < 0.
6830 	\param path The absolute or relative path. Must not be \c NULL.
6831 	\param traverseLeafLink If \a path is given, \c true specifies that the
6832 		   function shall not stick to symlinks, but traverse them.
6833 	\param stat The buffer the stat data shall be written into.
6834 	\param statSize The size of the supplied stat buffer.
6835 	\return \c B_OK, if the the stat data have been read successfully, another
6836 			error code otherwise.
6837 */
6838 status_t
6839 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6840 	struct stat *stat, size_t statSize)
6841 {
6842 	struct stat completeStat;
6843 	struct stat *originalStat = NULL;
6844 	status_t status;
6845 
6846 	if (statSize > sizeof(struct stat))
6847 		return B_BAD_VALUE;
6848 
6849 	// this supports different stat extensions
6850 	if (statSize < sizeof(struct stat)) {
6851 		originalStat = stat;
6852 		stat = &completeStat;
6853 	}
6854 
6855 	if (path) {
6856 		// path given: get the stat of the node referred to by (fd, path)
6857 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6858 		if (pathBuffer.InitCheck() != B_OK)
6859 			return B_NO_MEMORY;
6860 
6861 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6862 			traverseLeafLink, stat, true);
6863 	} else {
6864 		// no path given: get the FD and use the FD operation
6865 		struct file_descriptor *descriptor
6866 			= get_fd(get_current_io_context(true), fd);
6867 		if (descriptor == NULL)
6868 			return B_FILE_ERROR;
6869 
6870 		if (descriptor->ops->fd_read_stat)
6871 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6872 		else
6873 			status = EOPNOTSUPP;
6874 
6875 		put_fd(descriptor);
6876 	}
6877 
6878 	if (status == B_OK && originalStat != NULL)
6879 		memcpy(originalStat, stat, statSize);
6880 
6881 	return status;
6882 }
6883 
6884 
6885 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
6886 
6887 	If only \a fd is given, the stat operation associated with the type
6888 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6889 	given, this path identifies the entry for whose node to write the
6890 	stat data. If both \a fd and \a path are given and the path is absolute,
6891 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6892 	identified by \a fd and specifies the entry whose stat data shall be
6893 	written.
6894 
6895 	\param fd The FD. May be < 0.
6896 	\param path The absolute or relative path. Must not be \c NULL.
6897 	\param traverseLeafLink If \a path is given, \c true specifies that the
6898 		   function shall not stick to symlinks, but traverse them.
6899 	\param stat The buffer containing the stat data to be written.
6900 	\param statSize The size of the supplied stat buffer.
6901 	\param statMask A mask specifying which parts of the stat data shall be
6902 		   written.
6903 	\return \c B_OK, if the the stat data have been written successfully,
6904 			another error code otherwise.
6905 */
6906 status_t
6907 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6908 	const struct stat *stat, size_t statSize, int statMask)
6909 {
6910 	struct stat completeStat;
6911 
6912 	if (statSize > sizeof(struct stat))
6913 		return B_BAD_VALUE;
6914 
6915 	// this supports different stat extensions
6916 	if (statSize < sizeof(struct stat)) {
6917 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6918 		memcpy(&completeStat, stat, statSize);
6919 		stat = &completeStat;
6920 	}
6921 
6922 	status_t status;
6923 
6924 	if (path) {
6925 		// path given: write the stat of the node referred to by (fd, path)
6926 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6927 		if (pathBuffer.InitCheck() != B_OK)
6928 			return B_NO_MEMORY;
6929 
6930 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6931 			traverseLeafLink, stat, statMask, true);
6932 	} else {
6933 		// no path given: get the FD and use the FD operation
6934 		struct file_descriptor *descriptor
6935 			= get_fd(get_current_io_context(true), fd);
6936 		if (descriptor == NULL)
6937 			return B_FILE_ERROR;
6938 
6939 		if (descriptor->ops->fd_write_stat)
6940 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6941 		else
6942 			status = EOPNOTSUPP;
6943 
6944 		put_fd(descriptor);
6945 	}
6946 
6947 	return status;
6948 }
6949 
6950 
6951 int
6952 _kern_open_attr_dir(int fd, const char *path)
6953 {
6954 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6955 	if (pathBuffer.InitCheck() != B_OK)
6956 		return B_NO_MEMORY;
6957 
6958 	if (path != NULL)
6959 		pathBuffer.SetTo(path);
6960 
6961 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6962 }
6963 
6964 
6965 int
6966 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6967 {
6968 	return attr_create(fd, name, type, openMode, true);
6969 }
6970 
6971 
6972 int
6973 _kern_open_attr(int fd, const char *name, int openMode)
6974 {
6975 	return attr_open(fd, name, openMode, true);
6976 }
6977 
6978 
6979 status_t
6980 _kern_remove_attr(int fd, const char *name)
6981 {
6982 	return attr_remove(fd, name, true);
6983 }
6984 
6985 
6986 status_t
6987 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6988 {
6989 	return attr_rename(fromFile, fromName, toFile, toName, true);
6990 }
6991 
6992 
6993 int
6994 _kern_open_index_dir(dev_t device)
6995 {
6996 	return index_dir_open(device, true);
6997 }
6998 
6999 
7000 status_t
7001 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
7002 {
7003 	return index_create(device, name, type, flags, true);
7004 }
7005 
7006 
7007 status_t
7008 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
7009 {
7010 	return index_name_read_stat(device, name, stat, true);
7011 }
7012 
7013 
7014 status_t
7015 _kern_remove_index(dev_t device, const char *name)
7016 {
7017 	return index_remove(device, name, true);
7018 }
7019 
7020 
7021 status_t
7022 _kern_getcwd(char *buffer, size_t size)
7023 {
7024 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
7025 
7026 	// Call vfs to get current working directory
7027 	return get_cwd(buffer, size, true);
7028 }
7029 
7030 
7031 status_t
7032 _kern_setcwd(int fd, const char *path)
7033 {
7034 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7035 	if (pathBuffer.InitCheck() != B_OK)
7036 		return B_NO_MEMORY;
7037 
7038 	if (path != NULL)
7039 		pathBuffer.SetTo(path);
7040 
7041 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
7042 }
7043 
7044 
7045 //	#pragma mark - userland syscalls
7046 
7047 
7048 dev_t
7049 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
7050 	uint32 flags, const char *userArgs, size_t argsLength)
7051 {
7052 	char fileSystem[B_OS_NAME_LENGTH];
7053 	KPath path, device;
7054 	char *args = NULL;
7055 	status_t status;
7056 
7057 	if (!IS_USER_ADDRESS(userPath)
7058 		|| !IS_USER_ADDRESS(userFileSystem)
7059 		|| !IS_USER_ADDRESS(userDevice))
7060 		return B_BAD_ADDRESS;
7061 
7062 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
7063 		return B_NO_MEMORY;
7064 
7065 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
7066 		return B_BAD_ADDRESS;
7067 
7068 	if (userFileSystem != NULL
7069 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
7070 		return B_BAD_ADDRESS;
7071 
7072 	if (userDevice != NULL
7073 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
7074 		return B_BAD_ADDRESS;
7075 
7076 	if (userArgs != NULL && argsLength > 0) {
7077 		// this is a safety restriction
7078 		if (argsLength >= 65536)
7079 			return B_NAME_TOO_LONG;
7080 
7081 		args = (char *)malloc(argsLength + 1);
7082 		if (args == NULL)
7083 			return B_NO_MEMORY;
7084 
7085 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
7086 			free(args);
7087 			return B_BAD_ADDRESS;
7088 		}
7089 	}
7090 	path.UnlockBuffer();
7091 	device.UnlockBuffer();
7092 
7093 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
7094 		userFileSystem ? fileSystem : NULL, flags, args, false);
7095 
7096 	free(args);
7097 	return status;
7098 }
7099 
7100 
7101 status_t
7102 _user_unmount(const char *userPath, uint32 flags)
7103 {
7104 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7105 	if (pathBuffer.InitCheck() != B_OK)
7106 		return B_NO_MEMORY;
7107 
7108 	char *path = pathBuffer.LockBuffer();
7109 
7110 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7111 		return B_BAD_ADDRESS;
7112 
7113 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
7114 }
7115 
7116 
7117 status_t
7118 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
7119 {
7120 	struct fs_info info;
7121 	status_t status;
7122 
7123 	if (userInfo == NULL)
7124 		return B_BAD_VALUE;
7125 
7126 	if (!IS_USER_ADDRESS(userInfo))
7127 		return B_BAD_ADDRESS;
7128 
7129 	status = fs_read_info(device, &info);
7130 	if (status != B_OK)
7131 		return status;
7132 
7133 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
7134 		return B_BAD_ADDRESS;
7135 
7136 	return B_OK;
7137 }
7138 
7139 
7140 status_t
7141 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
7142 {
7143 	struct fs_info info;
7144 
7145 	if (userInfo == NULL)
7146 		return B_BAD_VALUE;
7147 
7148 	if (!IS_USER_ADDRESS(userInfo)
7149 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
7150 		return B_BAD_ADDRESS;
7151 
7152 	return fs_write_info(device, &info, mask);
7153 }
7154 
7155 
7156 dev_t
7157 _user_next_device(int32 *_userCookie)
7158 {
7159 	int32 cookie;
7160 	dev_t device;
7161 
7162 	if (!IS_USER_ADDRESS(_userCookie)
7163 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
7164 		return B_BAD_ADDRESS;
7165 
7166 	device = fs_next_device(&cookie);
7167 
7168 	if (device >= B_OK) {
7169 		// update user cookie
7170 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
7171 			return B_BAD_ADDRESS;
7172 	}
7173 
7174 	return device;
7175 }
7176 
7177 
7178 status_t
7179 _user_sync(void)
7180 {
7181 	return _kern_sync();
7182 }
7183 
7184 
7185 status_t
7186 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
7187 	size_t infoSize)
7188 {
7189 	struct fd_info info;
7190 	uint32 cookie;
7191 
7192 	// only root can do this (or should root's group be enough?)
7193 	if (geteuid() != 0)
7194 		return B_NOT_ALLOWED;
7195 
7196 	if (infoSize != sizeof(fd_info))
7197 		return B_BAD_VALUE;
7198 
7199 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
7200 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
7201 		return B_BAD_ADDRESS;
7202 
7203 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
7204 	if (status < B_OK)
7205 		return status;
7206 
7207 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
7208 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
7209 		return B_BAD_ADDRESS;
7210 
7211 	return status;
7212 }
7213 
7214 
7215 status_t
7216 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
7217 	char *userPath, size_t pathLength)
7218 {
7219 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7220 	if (pathBuffer.InitCheck() != B_OK)
7221 		return B_NO_MEMORY;
7222 
7223 	struct vnode *vnode;
7224 	status_t status;
7225 
7226 	if (!IS_USER_ADDRESS(userPath))
7227 		return B_BAD_ADDRESS;
7228 
7229 	// copy the leaf name onto the stack
7230 	char stackLeaf[B_FILE_NAME_LENGTH];
7231 	if (leaf) {
7232 		if (!IS_USER_ADDRESS(leaf))
7233 			return B_BAD_ADDRESS;
7234 
7235 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
7236 		if (len < 0)
7237 			return len;
7238 		if (len >= B_FILE_NAME_LENGTH)
7239 			return B_NAME_TOO_LONG;
7240 		leaf = stackLeaf;
7241 
7242 		// filter invalid leaf names
7243 		if (leaf[0] == '\0' || strchr(leaf, '/'))
7244 			return B_BAD_VALUE;
7245 	}
7246 
7247 	// get the vnode matching the dir's node_ref
7248 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
7249 		// special cases "." and "..": we can directly get the vnode of the
7250 		// referenced directory
7251 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
7252 		leaf = NULL;
7253 	} else
7254 		status = get_vnode(device, inode, &vnode, true, false);
7255 	if (status < B_OK)
7256 		return status;
7257 
7258 	char *path = pathBuffer.LockBuffer();
7259 
7260 	// get the directory path
7261 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
7262 	put_vnode(vnode);
7263 		// we don't need the vnode anymore
7264 	if (status < B_OK)
7265 		return status;
7266 
7267 	// append the leaf name
7268 	if (leaf) {
7269 		// insert a directory separator if this is not the file system root
7270 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
7271 				>= pathBuffer.BufferSize())
7272 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
7273 			return B_NAME_TOO_LONG;
7274 		}
7275 	}
7276 
7277 	int len = user_strlcpy(userPath, path, pathLength);
7278 	if (len < 0)
7279 		return len;
7280 	if (len >= (int)pathLength)
7281 		return B_BUFFER_OVERFLOW;
7282 
7283 	return B_OK;
7284 }
7285 
7286 
7287 status_t
7288 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
7289 {
7290 	if (userPath == NULL || buffer == NULL)
7291 		return B_BAD_VALUE;
7292 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
7293 		return B_BAD_ADDRESS;
7294 
7295 	// copy path from userland
7296 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7297 	if (pathBuffer.InitCheck() != B_OK)
7298 		return B_NO_MEMORY;
7299 	char* path = pathBuffer.LockBuffer();
7300 
7301 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7302 		return B_BAD_ADDRESS;
7303 
7304 	// buffer for the leaf part
7305 	KPath leafBuffer(B_PATH_NAME_LENGTH + 1);
7306 	if (leafBuffer.InitCheck() != B_OK)
7307 		return B_NO_MEMORY;
7308 	char* leaf = leafBuffer.LockBuffer();
7309 
7310 	VNodePutter dirPutter;
7311 	struct vnode* dir = NULL;
7312 	status_t error;
7313 
7314 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
7315 		// get dir vnode + leaf name
7316 		struct vnode* nextDir;
7317 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, false);
7318 		if (error != B_OK)
7319 			return error;
7320 
7321 		dir = nextDir;
7322 		strcpy(path, leaf);
7323 		dirPutter.SetTo(dir);
7324 
7325 		// get file vnode
7326 		inc_vnode_ref_count(dir);
7327 		struct vnode* fileVnode;
7328 		int type;
7329 		error = vnode_path_to_vnode(dir, path, false, 0, &fileVnode, NULL,
7330 			&type);
7331 		if (error != B_OK)
7332 			return error;
7333 		VNodePutter fileVnodePutter(fileVnode);
7334 
7335 		if (!traverseLink || !S_ISLNK(type)) {
7336 			// we're done -- construct the path
7337 			bool hasLeaf = true;
7338 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
7339 				// special cases "." and ".." -- get the dir, forget the leaf
7340 				inc_vnode_ref_count(dir);
7341 				error = vnode_path_to_vnode(dir, leaf, false, 0, &nextDir, NULL,
7342 					NULL);
7343 				if (error != B_OK)
7344 					return error;
7345 				dir = nextDir;
7346 				dirPutter.SetTo(dir);
7347 				hasLeaf = false;
7348 			}
7349 
7350 			// get the directory path
7351 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH);
7352 			if (error != B_OK)
7353 				return error;
7354 
7355 			// append the leaf name
7356 			if (hasLeaf) {
7357 				// insert a directory separator if this is not the file system
7358 				// root
7359 				if ((strcmp(path, "/") != 0
7360 					&& strlcat(path, "/", pathBuffer.BufferSize())
7361 						>= pathBuffer.BufferSize())
7362 					|| strlcat(path, leaf, pathBuffer.BufferSize())
7363 						>= pathBuffer.BufferSize()) {
7364 					return B_NAME_TOO_LONG;
7365 				}
7366 			}
7367 
7368 			// copy back to userland
7369 			int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
7370 			if (len < 0)
7371 				return len;
7372 			if (len >= B_PATH_NAME_LENGTH)
7373 				return B_BUFFER_OVERFLOW;
7374 
7375 			return B_OK;
7376 		}
7377 
7378 		// read link
7379 		struct stat st;
7380 		if (FS_CALL(fileVnode, read_symlink) != NULL) {
7381 			size_t bufferSize = B_PATH_NAME_LENGTH;
7382 			error = FS_CALL(fileVnode, read_symlink)(fileVnode->mount->cookie,
7383 				fileVnode->private_node, path, &bufferSize);
7384 			if (error != B_OK)
7385 				return error;
7386 		} else
7387 			return B_BAD_VALUE;
7388 	}
7389 
7390 	return B_LINK_LIMIT;
7391 }
7392 
7393 
7394 int
7395 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
7396 	int openMode, int perms)
7397 {
7398 	char name[B_FILE_NAME_LENGTH];
7399 
7400 	if (userName == NULL || device < 0 || inode < 0)
7401 		return B_BAD_VALUE;
7402 	if (!IS_USER_ADDRESS(userName)
7403 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7404 		return B_BAD_ADDRESS;
7405 
7406 	if (openMode & O_CREAT)
7407 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
7408 
7409 	return file_open_entry_ref(device, inode, name, openMode, false);
7410 }
7411 
7412 
7413 int
7414 _user_open(int fd, const char *userPath, int openMode, int perms)
7415 {
7416 	KPath path(B_PATH_NAME_LENGTH + 1);
7417 	if (path.InitCheck() != B_OK)
7418 		return B_NO_MEMORY;
7419 
7420 	char *buffer = path.LockBuffer();
7421 
7422 	if (!IS_USER_ADDRESS(userPath)
7423 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7424 		return B_BAD_ADDRESS;
7425 
7426 	if (openMode & O_CREAT)
7427 		return file_create(fd, buffer, openMode, perms, false);
7428 
7429 	return file_open(fd, buffer, openMode, false);
7430 }
7431 
7432 
7433 int
7434 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
7435 {
7436 	if (userName != NULL) {
7437 		char name[B_FILE_NAME_LENGTH];
7438 
7439 		if (!IS_USER_ADDRESS(userName)
7440 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7441 			return B_BAD_ADDRESS;
7442 
7443 		return dir_open_entry_ref(device, inode, name, false);
7444 	}
7445 	return dir_open_entry_ref(device, inode, NULL, false);
7446 }
7447 
7448 
7449 int
7450 _user_open_dir(int fd, const char *userPath)
7451 {
7452 	KPath path(B_PATH_NAME_LENGTH + 1);
7453 	if (path.InitCheck() != B_OK)
7454 		return B_NO_MEMORY;
7455 
7456 	char *buffer = path.LockBuffer();
7457 
7458 	if (!IS_USER_ADDRESS(userPath)
7459 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7460 		return B_BAD_ADDRESS;
7461 
7462 	return dir_open(fd, buffer, false);
7463 }
7464 
7465 
7466 /*!	\brief Opens a directory's parent directory and returns the entry name
7467 		   of the former.
7468 
7469 	Aside from that is returns the directory's entry name, this method is
7470 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
7471 	equivalent, if \a userName is \c NULL.
7472 
7473 	If a name buffer is supplied and the name does not fit the buffer, the
7474 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
7475 
7476 	\param fd A FD referring to a directory.
7477 	\param userName Buffer the directory's entry name shall be written into.
7478 		   May be \c NULL.
7479 	\param nameLength Size of the name buffer.
7480 	\return The file descriptor of the opened parent directory, if everything
7481 			went fine, an error code otherwise.
7482 */
7483 int
7484 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
7485 {
7486 	bool kernel = false;
7487 
7488 	if (userName && !IS_USER_ADDRESS(userName))
7489 		return B_BAD_ADDRESS;
7490 
7491 	// open the parent dir
7492 	int parentFD = dir_open(fd, "..", kernel);
7493 	if (parentFD < 0)
7494 		return parentFD;
7495 	FDCloser fdCloser(parentFD, kernel);
7496 
7497 	if (userName) {
7498 		// get the vnodes
7499 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
7500 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
7501 		VNodePutter parentVNodePutter(parentVNode);
7502 		VNodePutter dirVNodePutter(dirVNode);
7503 		if (!parentVNode || !dirVNode)
7504 			return B_FILE_ERROR;
7505 
7506 		// get the vnode name
7507 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
7508 		struct dirent *buffer = (struct dirent*)_buffer;
7509 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
7510 			sizeof(_buffer));
7511 		if (status != B_OK)
7512 			return status;
7513 
7514 		// copy the name to the userland buffer
7515 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
7516 		if (len < 0)
7517 			return len;
7518 		if (len >= (int)nameLength)
7519 			return B_BUFFER_OVERFLOW;
7520 	}
7521 
7522 	return fdCloser.Detach();
7523 }
7524 
7525 
7526 status_t
7527 _user_fcntl(int fd, int op, uint32 argument)
7528 {
7529 	status_t status = common_fcntl(fd, op, argument, false);
7530 	if (op == F_SETLKW)
7531 		syscall_restart_handle_post(status);
7532 
7533 	return status;
7534 }
7535 
7536 
7537 status_t
7538 _user_fsync(int fd)
7539 {
7540 	return common_sync(fd, false);
7541 }
7542 
7543 
7544 status_t
7545 _user_flock(int fd, int op)
7546 {
7547 	struct file_descriptor *descriptor;
7548 	struct vnode *vnode;
7549 	struct flock flock;
7550 	status_t status;
7551 
7552 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, op));
7553 
7554 	descriptor = get_fd_and_vnode(fd, &vnode, false);
7555 	if (descriptor == NULL)
7556 		return B_FILE_ERROR;
7557 
7558 	if (descriptor->type != FDTYPE_FILE) {
7559 		put_fd(descriptor);
7560 		return B_BAD_VALUE;
7561 	}
7562 
7563 	flock.l_start = 0;
7564 	flock.l_len = OFF_MAX;
7565 	flock.l_whence = 0;
7566 	flock.l_type = (op & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
7567 
7568 	if ((op & LOCK_UN) != 0)
7569 		status = release_advisory_lock(descriptor->u.vnode, &flock);
7570 	else {
7571 		status = acquire_advisory_lock(descriptor->u.vnode,
7572 			thread_get_current_thread()->team->session_id, &flock,
7573 			(op & LOCK_NB) == 0);
7574 	}
7575 
7576 	syscall_restart_handle_post(status);
7577 
7578 	put_fd(descriptor);
7579 	return status;
7580 }
7581 
7582 
7583 status_t
7584 _user_lock_node(int fd)
7585 {
7586 	return common_lock_node(fd, false);
7587 }
7588 
7589 
7590 status_t
7591 _user_unlock_node(int fd)
7592 {
7593 	return common_unlock_node(fd, false);
7594 }
7595 
7596 
7597 status_t
7598 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
7599 {
7600 	char name[B_FILE_NAME_LENGTH];
7601 	status_t status;
7602 
7603 	if (!IS_USER_ADDRESS(userName))
7604 		return B_BAD_ADDRESS;
7605 
7606 	status = user_strlcpy(name, userName, sizeof(name));
7607 	if (status < 0)
7608 		return status;
7609 
7610 	return dir_create_entry_ref(device, inode, name, perms, false);
7611 }
7612 
7613 
7614 status_t
7615 _user_create_dir(int fd, const char *userPath, int perms)
7616 {
7617 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7618 	if (pathBuffer.InitCheck() != B_OK)
7619 		return B_NO_MEMORY;
7620 
7621 	char *path = pathBuffer.LockBuffer();
7622 
7623 	if (!IS_USER_ADDRESS(userPath)
7624 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7625 		return B_BAD_ADDRESS;
7626 
7627 	return dir_create(fd, path, perms, false);
7628 }
7629 
7630 
7631 status_t
7632 _user_remove_dir(int fd, const char *userPath)
7633 {
7634 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7635 	if (pathBuffer.InitCheck() != B_OK)
7636 		return B_NO_MEMORY;
7637 
7638 	char *path = pathBuffer.LockBuffer();
7639 
7640 	if (userPath != NULL) {
7641 		if (!IS_USER_ADDRESS(userPath)
7642 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7643 			return B_BAD_ADDRESS;
7644 	}
7645 
7646 	return dir_remove(fd, userPath ? path : NULL, false);
7647 }
7648 
7649 
7650 status_t
7651 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
7652 {
7653 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
7654 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
7655 		return B_NO_MEMORY;
7656 
7657 	size_t bufferSize;
7658 
7659 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
7660 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
7661 		return B_BAD_ADDRESS;
7662 
7663 	char *path = pathBuffer.LockBuffer();
7664 	char *buffer = linkBuffer.LockBuffer();
7665 
7666 	if (userPath) {
7667 		if (!IS_USER_ADDRESS(userPath)
7668 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7669 			return B_BAD_ADDRESS;
7670 
7671 		if (bufferSize > B_PATH_NAME_LENGTH)
7672 			bufferSize = B_PATH_NAME_LENGTH;
7673 	}
7674 
7675 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
7676 		&bufferSize, false);
7677 
7678 	// we also update the bufferSize in case of errors
7679 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
7680 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
7681 		return B_BAD_ADDRESS;
7682 
7683 	if (status < B_OK)
7684 		return status;
7685 
7686 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
7687 		return B_BAD_ADDRESS;
7688 
7689 	return B_OK;
7690 }
7691 
7692 
7693 status_t
7694 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7695 	int mode)
7696 {
7697 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7698 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7699 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7700 		return B_NO_MEMORY;
7701 
7702 	char *path = pathBuffer.LockBuffer();
7703 	char *toPath = toPathBuffer.LockBuffer();
7704 
7705 	if (!IS_USER_ADDRESS(userPath)
7706 		|| !IS_USER_ADDRESS(userToPath)
7707 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7708 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7709 		return B_BAD_ADDRESS;
7710 
7711 	return common_create_symlink(fd, path, toPath, mode, false);
7712 }
7713 
7714 
7715 status_t
7716 _user_create_link(const char *userPath, const char *userToPath)
7717 {
7718 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7719 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7720 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7721 		return B_NO_MEMORY;
7722 
7723 	char *path = pathBuffer.LockBuffer();
7724 	char *toPath = toPathBuffer.LockBuffer();
7725 
7726 	if (!IS_USER_ADDRESS(userPath)
7727 		|| !IS_USER_ADDRESS(userToPath)
7728 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7729 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7730 		return B_BAD_ADDRESS;
7731 
7732 	status_t status = check_path(toPath);
7733 	if (status < B_OK)
7734 		return status;
7735 
7736 	return common_create_link(path, toPath, false);
7737 }
7738 
7739 
7740 status_t
7741 _user_unlink(int fd, const char *userPath)
7742 {
7743 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7744 	if (pathBuffer.InitCheck() != B_OK)
7745 		return B_NO_MEMORY;
7746 
7747 	char *path = pathBuffer.LockBuffer();
7748 
7749 	if (!IS_USER_ADDRESS(userPath)
7750 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7751 		return B_BAD_ADDRESS;
7752 
7753 	return common_unlink(fd, path, false);
7754 }
7755 
7756 
7757 status_t
7758 _user_rename(int oldFD, const char *userOldPath, int newFD,
7759 	const char *userNewPath)
7760 {
7761 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7762 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7763 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7764 		return B_NO_MEMORY;
7765 
7766 	char *oldPath = oldPathBuffer.LockBuffer();
7767 	char *newPath = newPathBuffer.LockBuffer();
7768 
7769 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7770 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7771 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7772 		return B_BAD_ADDRESS;
7773 
7774 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7775 }
7776 
7777 
7778 status_t
7779 _user_access(const char *userPath, int mode)
7780 {
7781 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7782 	if (pathBuffer.InitCheck() != B_OK)
7783 		return B_NO_MEMORY;
7784 
7785 	char *path = pathBuffer.LockBuffer();
7786 
7787 	if (!IS_USER_ADDRESS(userPath)
7788 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7789 		return B_BAD_ADDRESS;
7790 
7791 	return common_access(path, mode, false);
7792 }
7793 
7794 
7795 status_t
7796 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7797 	struct stat *userStat, size_t statSize)
7798 {
7799 	struct stat stat;
7800 	status_t status;
7801 
7802 	if (statSize > sizeof(struct stat))
7803 		return B_BAD_VALUE;
7804 
7805 	if (!IS_USER_ADDRESS(userStat))
7806 		return B_BAD_ADDRESS;
7807 
7808 	if (userPath) {
7809 		// path given: get the stat of the node referred to by (fd, path)
7810 		if (!IS_USER_ADDRESS(userPath))
7811 			return B_BAD_ADDRESS;
7812 
7813 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7814 		if (pathBuffer.InitCheck() != B_OK)
7815 			return B_NO_MEMORY;
7816 
7817 		char *path = pathBuffer.LockBuffer();
7818 
7819 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7820 		if (length < B_OK)
7821 			return length;
7822 		if (length >= B_PATH_NAME_LENGTH)
7823 			return B_NAME_TOO_LONG;
7824 
7825 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7826 	} else {
7827 		// no path given: get the FD and use the FD operation
7828 		struct file_descriptor *descriptor
7829 			= get_fd(get_current_io_context(false), fd);
7830 		if (descriptor == NULL)
7831 			return B_FILE_ERROR;
7832 
7833 		if (descriptor->ops->fd_read_stat)
7834 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7835 		else
7836 			status = EOPNOTSUPP;
7837 
7838 		put_fd(descriptor);
7839 	}
7840 
7841 	if (status < B_OK)
7842 		return status;
7843 
7844 	return user_memcpy(userStat, &stat, statSize);
7845 }
7846 
7847 
7848 status_t
7849 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7850 	const struct stat *userStat, size_t statSize, int statMask)
7851 {
7852 	if (statSize > sizeof(struct stat))
7853 		return B_BAD_VALUE;
7854 
7855 	struct stat stat;
7856 
7857 	if (!IS_USER_ADDRESS(userStat)
7858 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7859 		return B_BAD_ADDRESS;
7860 
7861 	// clear additional stat fields
7862 	if (statSize < sizeof(struct stat))
7863 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7864 
7865 	status_t status;
7866 
7867 	if (userPath) {
7868 		// path given: write the stat of the node referred to by (fd, path)
7869 		if (!IS_USER_ADDRESS(userPath))
7870 			return B_BAD_ADDRESS;
7871 
7872 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7873 		if (pathBuffer.InitCheck() != B_OK)
7874 			return B_NO_MEMORY;
7875 
7876 		char *path = pathBuffer.LockBuffer();
7877 
7878 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7879 		if (length < B_OK)
7880 			return length;
7881 		if (length >= B_PATH_NAME_LENGTH)
7882 			return B_NAME_TOO_LONG;
7883 
7884 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7885 			statMask, false);
7886 	} else {
7887 		// no path given: get the FD and use the FD operation
7888 		struct file_descriptor *descriptor
7889 			= get_fd(get_current_io_context(false), fd);
7890 		if (descriptor == NULL)
7891 			return B_FILE_ERROR;
7892 
7893 		if (descriptor->ops->fd_write_stat)
7894 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7895 		else
7896 			status = EOPNOTSUPP;
7897 
7898 		put_fd(descriptor);
7899 	}
7900 
7901 	return status;
7902 }
7903 
7904 
7905 int
7906 _user_open_attr_dir(int fd, const char *userPath)
7907 {
7908 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7909 	if (pathBuffer.InitCheck() != B_OK)
7910 		return B_NO_MEMORY;
7911 
7912 	char *path = pathBuffer.LockBuffer();
7913 
7914 	if (userPath != NULL) {
7915 		if (!IS_USER_ADDRESS(userPath)
7916 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7917 			return B_BAD_ADDRESS;
7918 	}
7919 
7920 	return attr_dir_open(fd, userPath ? path : NULL, false);
7921 }
7922 
7923 
7924 int
7925 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7926 {
7927 	char name[B_FILE_NAME_LENGTH];
7928 
7929 	if (!IS_USER_ADDRESS(userName)
7930 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7931 		return B_BAD_ADDRESS;
7932 
7933 	return attr_create(fd, name, type, openMode, false);
7934 }
7935 
7936 
7937 int
7938 _user_open_attr(int fd, const char *userName, int openMode)
7939 {
7940 	char name[B_FILE_NAME_LENGTH];
7941 
7942 	if (!IS_USER_ADDRESS(userName)
7943 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7944 		return B_BAD_ADDRESS;
7945 
7946 	return attr_open(fd, name, openMode, false);
7947 }
7948 
7949 
7950 status_t
7951 _user_remove_attr(int fd, const char *userName)
7952 {
7953 	char name[B_FILE_NAME_LENGTH];
7954 
7955 	if (!IS_USER_ADDRESS(userName)
7956 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7957 		return B_BAD_ADDRESS;
7958 
7959 	return attr_remove(fd, name, false);
7960 }
7961 
7962 
7963 status_t
7964 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7965 {
7966 	if (!IS_USER_ADDRESS(userFromName)
7967 		|| !IS_USER_ADDRESS(userToName))
7968 		return B_BAD_ADDRESS;
7969 
7970 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7971 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7972 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7973 		return B_NO_MEMORY;
7974 
7975 	char *fromName = fromNameBuffer.LockBuffer();
7976 	char *toName = toNameBuffer.LockBuffer();
7977 
7978 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7979 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7980 		return B_BAD_ADDRESS;
7981 
7982 	return attr_rename(fromFile, fromName, toFile, toName, false);
7983 }
7984 
7985 
7986 int
7987 _user_open_index_dir(dev_t device)
7988 {
7989 	return index_dir_open(device, false);
7990 }
7991 
7992 
7993 status_t
7994 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7995 {
7996 	char name[B_FILE_NAME_LENGTH];
7997 
7998 	if (!IS_USER_ADDRESS(userName)
7999 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8000 		return B_BAD_ADDRESS;
8001 
8002 	return index_create(device, name, type, flags, false);
8003 }
8004 
8005 
8006 status_t
8007 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
8008 {
8009 	char name[B_FILE_NAME_LENGTH];
8010 	struct stat stat;
8011 	status_t status;
8012 
8013 	if (!IS_USER_ADDRESS(userName)
8014 		|| !IS_USER_ADDRESS(userStat)
8015 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8016 		return B_BAD_ADDRESS;
8017 
8018 	status = index_name_read_stat(device, name, &stat, false);
8019 	if (status == B_OK) {
8020 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
8021 			return B_BAD_ADDRESS;
8022 	}
8023 
8024 	return status;
8025 }
8026 
8027 
8028 status_t
8029 _user_remove_index(dev_t device, const char *userName)
8030 {
8031 	char name[B_FILE_NAME_LENGTH];
8032 
8033 	if (!IS_USER_ADDRESS(userName)
8034 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
8035 		return B_BAD_ADDRESS;
8036 
8037 	return index_remove(device, name, false);
8038 }
8039 
8040 
8041 status_t
8042 _user_getcwd(char *userBuffer, size_t size)
8043 {
8044 	if (!IS_USER_ADDRESS(userBuffer))
8045 		return B_BAD_ADDRESS;
8046 
8047 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8048 	if (pathBuffer.InitCheck() != B_OK)
8049 		return B_NO_MEMORY;
8050 
8051 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
8052 
8053 	if (size > B_PATH_NAME_LENGTH)
8054 		size = B_PATH_NAME_LENGTH;
8055 
8056 	char *path = pathBuffer.LockBuffer();
8057 
8058 	status_t status = get_cwd(path, size, false);
8059 	if (status < B_OK)
8060 		return status;
8061 
8062 	// Copy back the result
8063 	if (user_strlcpy(userBuffer, path, size) < B_OK)
8064 		return B_BAD_ADDRESS;
8065 
8066 	return status;
8067 }
8068 
8069 
8070 status_t
8071 _user_setcwd(int fd, const char *userPath)
8072 {
8073 	TRACE(("user_setcwd: path = %p\n", userPath));
8074 
8075 	KPath pathBuffer(B_PATH_NAME_LENGTH);
8076 	if (pathBuffer.InitCheck() != B_OK)
8077 		return B_NO_MEMORY;
8078 
8079 	char *path = pathBuffer.LockBuffer();
8080 
8081 	if (userPath != NULL) {
8082 		if (!IS_USER_ADDRESS(userPath)
8083 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8084 			return B_BAD_ADDRESS;
8085 	}
8086 
8087 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
8088 }
8089 
8090 
8091 int
8092 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
8093 	uint32 flags, port_id port, int32 token)
8094 {
8095 	char *query;
8096 
8097 	if (device < 0 || userQuery == NULL || queryLength == 0)
8098 		return B_BAD_VALUE;
8099 
8100 	// this is a safety restriction
8101 	if (queryLength >= 65536)
8102 		return B_NAME_TOO_LONG;
8103 
8104 	query = (char *)malloc(queryLength + 1);
8105 	if (query == NULL)
8106 		return B_NO_MEMORY;
8107 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
8108 		free(query);
8109 		return B_BAD_ADDRESS;
8110 	}
8111 
8112 	int fd = query_open(device, query, flags, port, token, false);
8113 
8114 	free(query);
8115 	return fd;
8116 }
8117