xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 020cbad9d40235a2c50a81a42d69912a5ff8fbc4)
1 /*
2  * Copyright 2002-2008, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /*! Virtual File System and File System Interface Layer */
10 
11 
12 #include <ctype.h>
13 #include <fcntl.h>
14 #include <limits.h>
15 #include <stddef.h>
16 #include <stdio.h>
17 #include <string.h>
18 #include <sys/resource.h>
19 #include <sys/stat.h>
20 #include <unistd.h>
21 
22 #include <fs_info.h>
23 #include <fs_interface.h>
24 #include <fs_volume.h>
25 #include <OS.h>
26 #include <StorageDefs.h>
27 
28 #include <util/AutoLock.h>
29 
30 #include <block_cache.h>
31 #include <fd.h>
32 #include <file_cache.h>
33 #include <khash.h>
34 #include <KPath.h>
35 #include <lock.h>
36 #include <syscalls.h>
37 #include <vfs.h>
38 #include <vm.h>
39 #include <vm_cache.h>
40 #include <vm_low_memory.h>
41 
42 #include <boot/kernel_args.h>
43 #include <disk_device_manager/KDiskDevice.h>
44 #include <disk_device_manager/KDiskDeviceManager.h>
45 #include <disk_device_manager/KDiskDeviceUtils.h>
46 #include <disk_device_manager/KDiskSystem.h>
47 #include <fs/node_monitor.h>
48 
49 
50 //#define TRACE_VFS
51 #ifdef TRACE_VFS
52 #	define TRACE(x) dprintf x
53 #	define FUNCTION(x) dprintf x
54 #else
55 #	define TRACE(x) ;
56 #	define FUNCTION(x) ;
57 #endif
58 
59 #define ADD_DEBUGGER_COMMANDS
60 
61 const static uint32 kMaxUnusedVnodes = 8192;
62 	// This is the maximum number of unused vnodes that the system
63 	// will keep around (weak limit, if there is enough memory left,
64 	// they won't get flushed even when hitting that limit).
65 	// It may be chosen with respect to the available memory or enhanced
66 	// by some timestamp/frequency heurism.
67 
68 struct vnode {
69 	struct vnode	*next;
70 	vm_cache		*cache;
71 	dev_t			device;
72 	list_link		mount_link;
73 	list_link		unused_link;
74 	ino_t			id;
75 	fs_vnode		private_node;
76 	struct fs_mount	*mount;
77 	struct vnode	*covered_by;
78 	int32			ref_count;
79 	uint8			remove : 1;
80 	uint8			busy : 1;
81 	uint8			unpublished : 1;
82 	struct advisory_locking	*advisory_locking;
83 	struct file_descriptor *mandatory_locked_by;
84 };
85 
86 struct vnode_hash_key {
87 	dev_t	device;
88 	ino_t	vnode;
89 };
90 
91 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
92 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
93 
94 /*!	\brief Structure to manage a mounted file system
95 
96 	Note: The root_vnode and covers_vnode fields (what others?) are
97 	initialized in fs_mount() and not changed afterwards. That is as soon
98 	as the mount is mounted and it is made sure it won't be unmounted
99 	(e.g. by holding a reference to a vnode of that mount) (read) access
100 	to those fields is always safe, even without additional locking. Morever
101 	while mounted the mount holds a reference to the covers_vnode, and thus
102 	making the access path vnode->mount->covers_vnode->mount->... safe if a
103 	reference to vnode is held (note that for the root mount covers_vnode
104 	is NULL, though).
105 */
106 struct fs_mount {
107 	struct fs_mount	*next;
108 	file_system_module_info *fs;
109 	dev_t			id;
110 	void			*cookie;
111 	char			*device_name;
112 	char			*fs_name;
113 	recursive_lock	rlock;	// guards the vnodes list
114 	struct vnode	*root_vnode;
115 	struct vnode	*covers_vnode;
116 	KPartition		*partition;
117 	struct list		vnodes;
118 	bool			unmounting;
119 	bool			owns_file_device;
120 };
121 
122 struct advisory_locking {
123 	sem_id			lock;
124 	sem_id			wait_sem;
125 	struct list		locks;
126 };
127 
128 struct advisory_lock {
129 	list_link		link;
130 	team_id			team;
131 	off_t			offset;
132 	off_t			length;
133 	bool			shared;
134 };
135 
136 static mutex sFileSystemsMutex;
137 
138 /*!	\brief Guards sMountsTable.
139 
140 	The holder is allowed to read/write access the sMountsTable.
141 	Manipulation of the fs_mount structures themselves
142 	(and their destruction) requires different locks though.
143 */
144 static mutex sMountMutex;
145 
146 /*!	\brief Guards mount/unmount operations.
147 
148 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
149 	That is locking the lock ensures that no FS is mounted/unmounted. In
150 	particular this means that
151 	- sMountsTable will not be modified,
152 	- the fields immutable after initialization of the fs_mount structures in
153 	  sMountsTable will not be modified,
154 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
155 
156 	The thread trying to lock the lock must not hold sVnodeMutex or
157 	sMountMutex.
158 */
159 static recursive_lock sMountOpLock;
160 
161 /*!	\brief Guards the vnode::covered_by field of any vnode
162 
163 	The holder is allowed to read access the vnode::covered_by field of any
164 	vnode. Additionally holding sMountOpLock allows for write access.
165 
166 	The thread trying to lock the must not hold sVnodeMutex.
167 */
168 static mutex sVnodeCoveredByMutex;
169 
170 /*!	\brief Guards sVnodeTable.
171 
172 	The holder is allowed to read/write access sVnodeTable and to
173 	any unbusy vnode in that table, save to the immutable fields (device, id,
174 	private_node, mount) to which
175 	only read-only access is allowed, and to the field covered_by, which is
176 	guarded by sMountOpLock and sVnodeCoveredByMutex.
177 
178 	The thread trying to lock the mutex must not hold sMountMutex.
179 	You must not have this mutex held when calling create_sem(), as this
180 	might call vfs_free_unused_vnodes().
181 */
182 static mutex sVnodeMutex;
183 
184 #define VNODE_HASH_TABLE_SIZE 1024
185 static hash_table *sVnodeTable;
186 static list sUnusedVnodeList;
187 static uint32 sUnusedVnodes = 0;
188 static struct vnode *sRoot;
189 
190 #define MOUNTS_HASH_TABLE_SIZE 16
191 static hash_table *sMountsTable;
192 static dev_t sNextMountID = 1;
193 
194 #define MAX_TEMP_IO_VECS 8
195 
196 mode_t __gUmask = 022;
197 
198 /* function declarations */
199 
200 // file descriptor operation prototypes
201 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
202 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
203 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
204 static void file_free_fd(struct file_descriptor *);
205 static status_t file_close(struct file_descriptor *);
206 static status_t file_select(struct file_descriptor *, uint8 event,
207 	struct selectsync *sync);
208 static status_t file_deselect(struct file_descriptor *, uint8 event,
209 	struct selectsync *sync);
210 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
211 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
212 static status_t dir_rewind(struct file_descriptor *);
213 static void dir_free_fd(struct file_descriptor *);
214 static status_t dir_close(struct file_descriptor *);
215 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
216 static status_t attr_dir_rewind(struct file_descriptor *);
217 static void attr_dir_free_fd(struct file_descriptor *);
218 static status_t attr_dir_close(struct file_descriptor *);
219 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
220 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
221 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
222 static void attr_free_fd(struct file_descriptor *);
223 static status_t attr_close(struct file_descriptor *);
224 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
225 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
226 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
227 static status_t index_dir_rewind(struct file_descriptor *);
228 static void index_dir_free_fd(struct file_descriptor *);
229 static status_t index_dir_close(struct file_descriptor *);
230 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
231 static status_t query_rewind(struct file_descriptor *);
232 static void query_free_fd(struct file_descriptor *);
233 static status_t query_close(struct file_descriptor *);
234 
235 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
236 static status_t common_read_stat(struct file_descriptor *, struct stat *);
237 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
238 
239 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
240 	bool traverseLeafLink, int count, struct vnode **_vnode, ino_t *_parentID, int *_type);
241 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
242 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
243 	struct vnode **_vnode, ino_t *_parentID, bool kernel);
244 static void inc_vnode_ref_count(struct vnode *vnode);
245 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
246 static inline void put_vnode(struct vnode *vnode);
247 static status_t fs_unmount(char *path, dev_t mountID, uint32 flags,
248 	bool kernel);
249 
250 
251 static struct fd_ops sFileOps = {
252 	file_read,
253 	file_write,
254 	file_seek,
255 	common_ioctl,
256 	file_select,
257 	file_deselect,
258 	NULL,		// read_dir()
259 	NULL,		// rewind_dir()
260 	common_read_stat,
261 	common_write_stat,
262 	file_close,
263 	file_free_fd
264 };
265 
266 static struct fd_ops sDirectoryOps = {
267 	NULL,		// read()
268 	NULL,		// write()
269 	NULL,		// seek()
270 	common_ioctl,
271 	NULL,		// select()
272 	NULL,		// deselect()
273 	dir_read,
274 	dir_rewind,
275 	common_read_stat,
276 	common_write_stat,
277 	dir_close,
278 	dir_free_fd
279 };
280 
281 static struct fd_ops sAttributeDirectoryOps = {
282 	NULL,		// read()
283 	NULL,		// write()
284 	NULL,		// seek()
285 	common_ioctl,
286 	NULL,		// select()
287 	NULL,		// deselect()
288 	attr_dir_read,
289 	attr_dir_rewind,
290 	common_read_stat,
291 	common_write_stat,
292 	attr_dir_close,
293 	attr_dir_free_fd
294 };
295 
296 static struct fd_ops sAttributeOps = {
297 	attr_read,
298 	attr_write,
299 	attr_seek,
300 	common_ioctl,
301 	NULL,		// select()
302 	NULL,		// deselect()
303 	NULL,		// read_dir()
304 	NULL,		// rewind_dir()
305 	attr_read_stat,
306 	attr_write_stat,
307 	attr_close,
308 	attr_free_fd
309 };
310 
311 static struct fd_ops sIndexDirectoryOps = {
312 	NULL,		// read()
313 	NULL,		// write()
314 	NULL,		// seek()
315 	NULL,		// ioctl()
316 	NULL,		// select()
317 	NULL,		// deselect()
318 	index_dir_read,
319 	index_dir_rewind,
320 	NULL,		// read_stat()
321 	NULL,		// write_stat()
322 	index_dir_close,
323 	index_dir_free_fd
324 };
325 
326 #if 0
327 static struct fd_ops sIndexOps = {
328 	NULL,		// read()
329 	NULL,		// write()
330 	NULL,		// seek()
331 	NULL,		// ioctl()
332 	NULL,		// select()
333 	NULL,		// deselect()
334 	NULL,		// dir_read()
335 	NULL,		// dir_rewind()
336 	index_read_stat,	// read_stat()
337 	NULL,		// write_stat()
338 	NULL,		// dir_close()
339 	NULL		// free_fd()
340 };
341 #endif
342 
343 static struct fd_ops sQueryOps = {
344 	NULL,		// read()
345 	NULL,		// write()
346 	NULL,		// seek()
347 	NULL,		// ioctl()
348 	NULL,		// select()
349 	NULL,		// deselect()
350 	query_read,
351 	query_rewind,
352 	NULL,		// read_stat()
353 	NULL,		// write_stat()
354 	query_close,
355 	query_free_fd
356 };
357 
358 
359 // VNodePutter
360 class VNodePutter {
361 public:
362 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
363 
364 	~VNodePutter()
365 	{
366 		Put();
367 	}
368 
369 	void SetTo(struct vnode *vnode)
370 	{
371 		Put();
372 		fVNode = vnode;
373 	}
374 
375 	void Put()
376 	{
377 		if (fVNode) {
378 			put_vnode(fVNode);
379 			fVNode = NULL;
380 		}
381 	}
382 
383 	struct vnode *Detach()
384 	{
385 		struct vnode *vnode = fVNode;
386 		fVNode = NULL;
387 		return vnode;
388 	}
389 
390 private:
391 	struct vnode *fVNode;
392 };
393 
394 
395 class FDCloser {
396 public:
397 	FDCloser() : fFD(-1), fKernel(true) {}
398 
399 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
400 
401 	~FDCloser()
402 	{
403 		Close();
404 	}
405 
406 	void SetTo(int fd, bool kernel)
407 	{
408 		Close();
409 		fFD = fd;
410 		fKernel = kernel;
411 	}
412 
413 	void Close()
414 	{
415 		if (fFD >= 0) {
416 			if (fKernel)
417 				_kern_close(fFD);
418 			else
419 				_user_close(fFD);
420 			fFD = -1;
421 		}
422 	}
423 
424 	int Detach()
425 	{
426 		int fd = fFD;
427 		fFD = -1;
428 		return fd;
429 	}
430 
431 private:
432 	int		fFD;
433 	bool	fKernel;
434 };
435 
436 
437 static int
438 mount_compare(void *_m, const void *_key)
439 {
440 	struct fs_mount *mount = (fs_mount *)_m;
441 	const dev_t *id = (dev_t *)_key;
442 
443 	if (mount->id == *id)
444 		return 0;
445 
446 	return -1;
447 }
448 
449 
450 static uint32
451 mount_hash(void *_m, const void *_key, uint32 range)
452 {
453 	struct fs_mount *mount = (fs_mount *)_m;
454 	const dev_t *id = (dev_t *)_key;
455 
456 	if (mount)
457 		return mount->id % range;
458 
459 	return (uint32)*id % range;
460 }
461 
462 
463 /*! Finds the mounted device (the fs_mount structure) with the given ID.
464 	Note, you must hold the gMountMutex lock when you call this function.
465 */
466 static struct fs_mount *
467 find_mount(dev_t id)
468 {
469 	ASSERT_LOCKED_MUTEX(&sMountMutex);
470 
471 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
472 }
473 
474 
475 static status_t
476 get_mount(dev_t id, struct fs_mount **_mount)
477 {
478 	struct fs_mount *mount;
479 	status_t status;
480 
481 	MutexLocker nodeLocker(sVnodeMutex);
482 	MutexLocker mountLocker(sMountMutex);
483 
484 	mount = find_mount(id);
485 	if (mount == NULL)
486 		return B_BAD_VALUE;
487 
488 	struct vnode* rootNode = mount->root_vnode;
489 	if (rootNode == NULL || rootNode->busy || rootNode->ref_count == 0) {
490 		// might have been called during a mount/unmount operation
491 		return B_BUSY;
492 	}
493 
494 	inc_vnode_ref_count(mount->root_vnode);
495 	*_mount = mount;
496 	return B_OK;
497 }
498 
499 
500 static void
501 put_mount(struct fs_mount *mount)
502 {
503 	if (mount)
504 		put_vnode(mount->root_vnode);
505 }
506 
507 
508 static status_t
509 put_file_system(file_system_module_info *fs)
510 {
511 	return put_module(fs->info.name);
512 }
513 
514 
515 /*!	Tries to open the specified file system module.
516 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
517 	Returns a pointer to file system module interface, or NULL if it
518 	could not open the module.
519 */
520 static file_system_module_info *
521 get_file_system(const char *fsName)
522 {
523 	char name[B_FILE_NAME_LENGTH];
524 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
525 		// construct module name if we didn't get one
526 		// (we currently support only one API)
527 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
528 		fsName = NULL;
529 	}
530 
531 	file_system_module_info *info;
532 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
533 		return NULL;
534 
535 	return info;
536 }
537 
538 
539 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
540 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
541 	The name is allocated for you, and you have to free() it when you're
542 	done with it.
543 	Returns NULL if the required memory is no available.
544 */
545 static char *
546 get_file_system_name(const char *fsName)
547 {
548 	const size_t length = strlen("file_systems/");
549 
550 	if (strncmp(fsName, "file_systems/", length)) {
551 		// the name already seems to be the module's file name
552 		return strdup(fsName);
553 	}
554 
555 	fsName += length;
556 	const char *end = strchr(fsName, '/');
557 	if (end == NULL) {
558 		// this doesn't seem to be a valid name, but well...
559 		return strdup(fsName);
560 	}
561 
562 	// cut off the trailing /v1
563 
564 	char *name = (char *)malloc(end + 1 - fsName);
565 	if (name == NULL)
566 		return NULL;
567 
568 	strlcpy(name, fsName, end + 1 - fsName);
569 	return name;
570 }
571 
572 
573 static int
574 vnode_compare(void *_vnode, const void *_key)
575 {
576 	struct vnode *vnode = (struct vnode *)_vnode;
577 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
578 
579 	if (vnode->device == key->device && vnode->id == key->vnode)
580 		return 0;
581 
582 	return -1;
583 }
584 
585 
586 static uint32
587 vnode_hash(void *_vnode, const void *_key, uint32 range)
588 {
589 	struct vnode *vnode = (struct vnode *)_vnode;
590 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
591 
592 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
593 
594 	if (vnode != NULL)
595 		return VHASH(vnode->device, vnode->id) % range;
596 
597 	return VHASH(key->device, key->vnode) % range;
598 
599 #undef VHASH
600 }
601 
602 
603 static void
604 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
605 {
606 	recursive_lock_lock(&mount->rlock);
607 
608 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
609 
610 	recursive_lock_unlock(&mount->rlock);
611 }
612 
613 
614 static void
615 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
616 {
617 	recursive_lock_lock(&mount->rlock);
618 
619 	list_remove_link(&vnode->mount_link);
620 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
621 
622 	recursive_lock_unlock(&mount->rlock);
623 }
624 
625 
626 static status_t
627 create_new_vnode(struct vnode **_vnode, dev_t mountID, ino_t vnodeID)
628 {
629 	FUNCTION(("create_new_vnode()\n"));
630 
631 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
632 	if (vnode == NULL)
633 		return B_NO_MEMORY;
634 
635 	// initialize basic values
636 	memset(vnode, 0, sizeof(struct vnode));
637 	vnode->device = mountID;
638 	vnode->id = vnodeID;
639 
640 	// add the vnode to the mount structure
641 	mutex_lock(&sMountMutex);
642 	vnode->mount = find_mount(mountID);
643 	if (!vnode->mount || vnode->mount->unmounting) {
644 		mutex_unlock(&sMountMutex);
645 		free(vnode);
646 		return B_ENTRY_NOT_FOUND;
647 	}
648 
649 	hash_insert(sVnodeTable, vnode);
650 	add_vnode_to_mount_list(vnode, vnode->mount);
651 
652 	mutex_unlock(&sMountMutex);
653 
654 	vnode->ref_count = 1;
655 	*_vnode = vnode;
656 
657 	return B_OK;
658 }
659 
660 
661 /*!	Frees the vnode and all resources it has acquired, and removes
662 	it from the vnode hash as well as from its mount structure.
663 	Will also make sure that any cache modifications are written back.
664 */
665 static void
666 free_vnode(struct vnode *vnode, bool reenter)
667 {
668 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->busy, "vnode: %p\n", vnode);
669 
670 	// write back any changes in this vnode's cache -- but only
671 	// if the vnode won't be deleted, in which case the changes
672 	// will be discarded
673 
674 	if (!vnode->remove && FS_CALL(vnode, fsync) != NULL)
675 		FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
676 
677 	// Note: If this vnode has a cache attached, there will still be two
678 	// references to that cache at this point. The last one belongs to the vnode
679 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
680 	// cache. Each but the last reference to a cache also includes a reference
681 	// to the vnode. The file cache, however, released its reference (cf.
682 	// file_cache_create()), so that this vnode's ref count has the chance to
683 	// ever drop to 0. Deleting the file cache now, will cause the next to last
684 	// cache reference to be released, which will also release a (no longer
685 	// existing) vnode reference. To avoid problems, we set the vnode's ref
686 	// count, so that it will neither become negative nor 0.
687 	vnode->ref_count = 2;
688 
689 	// The file system has removed the resources of the vnode now, so we can
690 	// make it available again (and remove the busy vnode from the hash)
691 	mutex_lock(&sVnodeMutex);
692 	hash_remove(sVnodeTable, vnode);
693 	mutex_unlock(&sVnodeMutex);
694 
695 	// TODO: Usually, when the vnode is unreferenced, no one can get hold of the
696 	// cache either (i.e. no one can get a cache reference while we're deleting
697 	// the vnode).. This is, however, not the case for the page daemon. It gets
698 	// its cache references via the pages it scans, so it can in fact get a
699 	// vnode reference while we're deleting the vnode.
700 
701 	if (!vnode->unpublished) {
702 		if (vnode->remove) {
703 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie,
704 				vnode->private_node, reenter);
705 		} else {
706 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node,
707 				reenter);
708 		}
709 	}
710 
711 	// if we have a vm_cache attached, remove it
712 	if (vnode->cache)
713 		vm_cache_release_ref(vnode->cache);
714 
715 	vnode->cache = NULL;
716 
717 	remove_vnode_from_mount_list(vnode, vnode->mount);
718 
719 	free(vnode);
720 }
721 
722 
723 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
724 	if the counter dropped to 0.
725 
726 	The caller must, of course, own a reference to the vnode to call this
727 	function.
728 	The caller must not hold the sVnodeMutex or the sMountMutex.
729 
730 	\param vnode the vnode.
731 	\param reenter \c true, if this function is called (indirectly) from within
732 		   a file system.
733 	\return \c B_OK, if everything went fine, an error code otherwise.
734 */
735 static status_t
736 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
737 {
738 	mutex_lock(&sVnodeMutex);
739 
740 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
741 
742 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
743 
744 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
745 
746 	if (oldRefCount == 1) {
747 		if (vnode->busy)
748 			panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
749 
750 		bool freeNode = false;
751 
752 		// Just insert the vnode into an unused list if we don't need
753 		// to delete it
754 		if (vnode->remove) {
755 			vnode->busy = true;
756 			freeNode = true;
757 		} else {
758 			list_add_item(&sUnusedVnodeList, vnode);
759 			if (++sUnusedVnodes > kMaxUnusedVnodes
760 				&& vm_low_memory_state() != B_NO_LOW_MEMORY) {
761 				// there are too many unused vnodes so we free the oldest one
762 				// ToDo: evaluate this mechanism
763 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
764 				vnode->busy = true;
765 				freeNode = true;
766 				sUnusedVnodes--;
767 			}
768 		}
769 
770 		mutex_unlock(&sVnodeMutex);
771 
772 		if (freeNode)
773 			free_vnode(vnode, reenter);
774 	} else
775 		mutex_unlock(&sVnodeMutex);
776 
777 	return B_OK;
778 }
779 
780 
781 /*!	\brief Increments the reference counter of the given vnode.
782 
783 	The caller must either already have a reference to the vnode or hold
784 	the sVnodeMutex.
785 
786 	\param vnode the vnode.
787 */
788 static void
789 inc_vnode_ref_count(struct vnode *vnode)
790 {
791 	atomic_add(&vnode->ref_count, 1);
792 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
793 }
794 
795 
796 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
797 
798 	The caller must hold the sVnodeMutex.
799 
800 	\param mountID the mount ID.
801 	\param vnodeID the node ID.
802 
803 	\return The vnode structure, if it was found in the hash table, \c NULL
804 			otherwise.
805 */
806 static struct vnode *
807 lookup_vnode(dev_t mountID, ino_t vnodeID)
808 {
809 	struct vnode_hash_key key;
810 
811 	key.device = mountID;
812 	key.vnode = vnodeID;
813 
814 	return (vnode *)hash_lookup(sVnodeTable, &key);
815 }
816 
817 
818 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
819 
820 	If the node is not yet in memory, it will be loaded.
821 
822 	The caller must not hold the sVnodeMutex or the sMountMutex.
823 
824 	\param mountID the mount ID.
825 	\param vnodeID the node ID.
826 	\param _vnode Pointer to a vnode* variable into which the pointer to the
827 		   retrieved vnode structure shall be written.
828 	\param reenter \c true, if this function is called (indirectly) from within
829 		   a file system.
830 	\return \c B_OK, if everything when fine, an error code otherwise.
831 */
832 static status_t
833 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode, bool canWait,
834 	int reenter)
835 {
836 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
837 
838 	mutex_lock(&sVnodeMutex);
839 
840 	int32 tries = 1000;
841 		// try for 10 secs
842 restart:
843 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
844 	if (vnode && vnode->busy) {
845 		mutex_unlock(&sVnodeMutex);
846 		if (!canWait || --tries < 0) {
847 			// vnode doesn't seem to become unbusy
848 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID, vnodeID);
849 			return B_BUSY;
850 		}
851 		snooze(10000); // 10 ms
852 		mutex_lock(&sVnodeMutex);
853 		goto restart;
854 	}
855 
856 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
857 
858 	status_t status;
859 
860 	if (vnode) {
861 		if (vnode->ref_count == 0) {
862 			// this vnode has been unused before
863 			list_remove_item(&sUnusedVnodeList, vnode);
864 			sUnusedVnodes--;
865 		}
866 		inc_vnode_ref_count(vnode);
867 	} else {
868 		// we need to create a new vnode and read it in
869 		status = create_new_vnode(&vnode, mountID, vnodeID);
870 		if (status < B_OK)
871 			goto err;
872 
873 		vnode->busy = true;
874 		mutex_unlock(&sVnodeMutex);
875 
876 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID,
877 			&vnode->private_node, reenter);
878 		if (status == B_OK && vnode->private_node == NULL)
879 			status = B_BAD_VALUE;
880 
881 		mutex_lock(&sVnodeMutex);
882 
883 		if (status < B_OK)
884 			goto err1;
885 
886 		vnode->busy = false;
887 	}
888 
889 	mutex_unlock(&sVnodeMutex);
890 
891 	TRACE(("get_vnode: returning %p\n", vnode));
892 
893 	*_vnode = vnode;
894 	return B_OK;
895 
896 err1:
897 	hash_remove(sVnodeTable, vnode);
898 	remove_vnode_from_mount_list(vnode, vnode->mount);
899 err:
900 	mutex_unlock(&sVnodeMutex);
901 	if (vnode)
902 		free(vnode);
903 
904 	return status;
905 }
906 
907 
908 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
909 	if the counter dropped to 0.
910 
911 	The caller must, of course, own a reference to the vnode to call this
912 	function.
913 	The caller must not hold the sVnodeMutex or the sMountMutex.
914 
915 	\param vnode the vnode.
916 */
917 static inline void
918 put_vnode(struct vnode *vnode)
919 {
920 	dec_vnode_ref_count(vnode, false);
921 }
922 
923 
924 static void
925 vnode_low_memory_handler(void */*data*/, int32 level)
926 {
927 	TRACE(("vnode_low_memory_handler(level = %ld)\n", level));
928 
929 	uint32 count = 1;
930 	switch (level) {
931 		case B_NO_LOW_MEMORY:
932 			return;
933 		case B_LOW_MEMORY_NOTE:
934 			count = sUnusedVnodes / 100;
935 			break;
936 		case B_LOW_MEMORY_WARNING:
937 			count = sUnusedVnodes / 10;
938 			break;
939 		case B_LOW_MEMORY_CRITICAL:
940 			count = sUnusedVnodes;
941 			break;
942 	}
943 
944 	if (count > sUnusedVnodes)
945 		count = sUnusedVnodes;
946 
947 	// first, write back the modified pages of some unused vnodes
948 
949 	uint32 freeCount = count;
950 
951 	for (uint32 i = 0; i < count; i++) {
952 		mutex_lock(&sVnodeMutex);
953 		struct vnode *vnode = (struct vnode *)list_remove_head_item(
954 			&sUnusedVnodeList);
955 		if (vnode == NULL) {
956 			mutex_unlock(&sVnodeMutex);
957 			break;
958 		}
959 
960 		inc_vnode_ref_count(vnode);
961 		sUnusedVnodes--;
962 
963 		mutex_unlock(&sVnodeMutex);
964 
965 		if (vnode->cache != NULL)
966 			vm_cache_write_modified(vnode->cache, false);
967 
968 		dec_vnode_ref_count(vnode, false);
969 	}
970 
971 	// and then free them
972 
973 	for (uint32 i = 0; i < freeCount; i++) {
974 		mutex_lock(&sVnodeMutex);
975 
976 		// We're removing vnodes from the tail of the list - hoping it's
977 		// one of those we have just written back; otherwise we'll write
978 		// back the vnode with the busy flag turned on, and that might
979 		// take some time.
980 		struct vnode *vnode = (struct vnode *)list_remove_tail_item(
981 			&sUnusedVnodeList);
982 		if (vnode == NULL) {
983 			mutex_unlock(&sVnodeMutex);
984 			break;
985 		}
986 		TRACE(("  free vnode %ld:%Ld (%p)\n", vnode->device, vnode->id, vnode));
987 
988 		vnode->busy = true;
989 		sUnusedVnodes--;
990 
991 		mutex_unlock(&sVnodeMutex);
992 
993 		free_vnode(vnode, false);
994 	}
995 }
996 
997 
998 static inline void
999 put_advisory_locking(struct advisory_locking *locking)
1000 {
1001 	release_sem(locking->lock);
1002 }
1003 
1004 
1005 /*!	Returns the advisory_locking object of the \a vnode in case it
1006 	has one, and locks it.
1007 	You have to call put_advisory_locking() when you're done with
1008 	it.
1009 	Note, you must not have the vnode mutex locked when calling
1010 	this function.
1011 */
1012 static struct advisory_locking *
1013 get_advisory_locking(struct vnode *vnode)
1014 {
1015 	mutex_lock(&sVnodeMutex);
1016 
1017 	struct advisory_locking *locking = vnode->advisory_locking;
1018 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1019 
1020 	mutex_unlock(&sVnodeMutex);
1021 
1022 	if (lock >= B_OK)
1023 		lock = acquire_sem(lock);
1024 	if (lock < B_OK) {
1025 		// This means the locking has been deleted in the mean time
1026 		// or had never existed in the first place - otherwise, we
1027 		// would get the lock at some point.
1028 		return NULL;
1029 	}
1030 
1031 	return locking;
1032 }
1033 
1034 
1035 /*!	Creates a locked advisory_locking object, and attaches it to the
1036 	given \a vnode.
1037 	Returns B_OK in case of success - also if the vnode got such an
1038 	object from someone else in the mean time, you'll still get this
1039 	one locked then.
1040 */
1041 static status_t
1042 create_advisory_locking(struct vnode *vnode)
1043 {
1044 	if (vnode == NULL)
1045 		return B_FILE_ERROR;
1046 
1047 	struct advisory_locking *locking = (struct advisory_locking *)malloc(
1048 		sizeof(struct advisory_locking));
1049 	if (locking == NULL)
1050 		return B_NO_MEMORY;
1051 
1052 	status_t status;
1053 
1054 	locking->wait_sem = create_sem(0, "advisory lock");
1055 	if (locking->wait_sem < B_OK) {
1056 		status = locking->wait_sem;
1057 		goto err1;
1058 	}
1059 
1060 	locking->lock = create_sem(0, "advisory locking");
1061 	if (locking->lock < B_OK) {
1062 		status = locking->lock;
1063 		goto err2;
1064 	}
1065 
1066 	list_init(&locking->locks);
1067 
1068 	// We need to set the locking structure atomically - someone
1069 	// else might set one at the same time
1070 	do {
1071 		if (atomic_test_and_set((vint32 *)&vnode->advisory_locking, (addr_t)locking,
1072 				NULL) == NULL)
1073 			return B_OK;
1074 	} while (get_advisory_locking(vnode) == NULL);
1075 
1076 	status = B_OK;
1077 		// we delete the one we've just created, but nevertheless, the vnode
1078 		// does have a locking structure now
1079 
1080 	delete_sem(locking->lock);
1081 err2:
1082 	delete_sem(locking->wait_sem);
1083 err1:
1084 	free(locking);
1085 	return status;
1086 }
1087 
1088 
1089 /*!	Retrieves the first lock that has been set by the current team.
1090 */
1091 
1092 static status_t
1093 get_advisory_lock(struct vnode *vnode, struct flock *flock)
1094 {
1095 	struct advisory_locking *locking = get_advisory_locking(vnode);
1096 	if (locking == NULL)
1097 		return B_BAD_VALUE;
1098 
1099 	// TODO: this should probably get the flock by its file descriptor!
1100 	team_id team = team_get_current_team_id();
1101 	status_t status = B_BAD_VALUE;
1102 
1103 	struct advisory_lock *lock = NULL;
1104 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1105 		if (lock->team == team) {
1106 			flock->l_start = lock->offset;
1107 			flock->l_len = lock->length;
1108 			status = B_OK;
1109 			break;
1110 		}
1111 	}
1112 
1113 	put_advisory_locking(locking);
1114 	return status;
1115 }
1116 
1117 
1118 /*!	Removes the specified lock, or all locks of the calling team
1119 	if \a flock is NULL.
1120 */
1121 static status_t
1122 release_advisory_lock(struct vnode *vnode, struct flock *flock)
1123 {
1124 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1125 
1126 	struct advisory_locking *locking = get_advisory_locking(vnode);
1127 	if (locking == NULL)
1128 		return flock != NULL ? B_BAD_VALUE : B_OK;
1129 
1130 	team_id team = team_get_current_team_id();
1131 
1132 	// find matching lock entry
1133 
1134 	status_t status = B_BAD_VALUE;
1135 	struct advisory_lock *lock = NULL;
1136 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1137 		if (lock->team == team && (flock == NULL || (flock != NULL
1138 			&& lock->offset == flock->l_start
1139 			&& lock->length == flock->l_len))) {
1140 			// we found our lock, free it
1141 			list_remove_item(&locking->locks, lock);
1142 			free(lock);
1143 			status = B_OK;
1144 			break;
1145 		}
1146 	}
1147 
1148 	bool removeLocking = list_is_empty(&locking->locks);
1149 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1150 
1151 	put_advisory_locking(locking);
1152 
1153 	if (status < B_OK)
1154 		return status;
1155 
1156 	if (removeLocking) {
1157 		// we can remove the whole advisory locking structure; it's no longer used
1158 		locking = get_advisory_locking(vnode);
1159 		if (locking != NULL) {
1160 			// the locking could have been changed in the mean time
1161 			if (list_is_empty(&locking->locks)) {
1162 				vnode->advisory_locking = NULL;
1163 
1164 				// we've detached the locking from the vnode, so we can safely delete it
1165 				delete_sem(locking->lock);
1166 				delete_sem(locking->wait_sem);
1167 				free(locking);
1168 			} else {
1169 				// the locking is in use again
1170 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1171 			}
1172 		}
1173 	}
1174 
1175 	return B_OK;
1176 }
1177 
1178 
1179 static status_t
1180 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1181 {
1182 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1183 		vnode, flock, wait ? "yes" : "no"));
1184 
1185 	bool shared = flock->l_type == F_RDLCK;
1186 	status_t status = B_OK;
1187 
1188 restart:
1189 	// if this vnode has an advisory_locking structure attached,
1190 	// lock that one and search for any colliding file lock
1191 	struct advisory_locking *locking = get_advisory_locking(vnode);
1192 	sem_id waitForLock = -1;
1193 
1194 	if (locking != NULL) {
1195 		// test for collisions
1196 		struct advisory_lock *lock = NULL;
1197 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1198 			if (lock->offset <= flock->l_start + flock->l_len
1199 				&& lock->offset + lock->length > flock->l_start) {
1200 				// locks do overlap
1201 				if (!shared || !lock->shared) {
1202 					// we need to wait
1203 					waitForLock = locking->wait_sem;
1204 					break;
1205 				}
1206 			}
1207 		}
1208 
1209 		if (waitForLock < B_OK || !wait)
1210 			put_advisory_locking(locking);
1211 	}
1212 
1213 	// wait for the lock if we have to, or else return immediately
1214 
1215 	if (waitForLock >= B_OK) {
1216 		if (!wait)
1217 			status = B_PERMISSION_DENIED;
1218 		else {
1219 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1220 			if (status == B_OK) {
1221 				// see if we're still colliding
1222 				goto restart;
1223 			}
1224 		}
1225 	}
1226 
1227 	if (status < B_OK)
1228 		return status;
1229 
1230 	// install new lock
1231 
1232 	locking = get_advisory_locking(vnode);
1233 	if (locking == NULL) {
1234 		// we need to create a new locking object
1235 		status = create_advisory_locking(vnode);
1236 		if (status < B_OK)
1237 			return status;
1238 
1239 		locking = vnode->advisory_locking;
1240 			// we own the locking object, so it can't go away
1241 	}
1242 
1243 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1244 	if (lock == NULL) {
1245 		if (waitForLock >= B_OK)
1246 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1247 		release_sem(locking->lock);
1248 		return B_NO_MEMORY;
1249 	}
1250 
1251 	lock->team = team_get_current_team_id();
1252 	// values must already be normalized when getting here
1253 	lock->offset = flock->l_start;
1254 	lock->length = flock->l_len;
1255 	lock->shared = shared;
1256 
1257 	list_add_item(&locking->locks, lock);
1258 	put_advisory_locking(locking);
1259 
1260 	return status;
1261 }
1262 
1263 
1264 static status_t
1265 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1266 {
1267 	switch (flock->l_whence) {
1268 		case SEEK_SET:
1269 			break;
1270 		case SEEK_CUR:
1271 			flock->l_start += descriptor->pos;
1272 			break;
1273 		case SEEK_END:
1274 		{
1275 			struct vnode *vnode = descriptor->u.vnode;
1276 			struct stat stat;
1277 			status_t status;
1278 
1279 			if (FS_CALL(vnode, read_stat) == NULL)
1280 				return EOPNOTSUPP;
1281 
1282 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1283 			if (status < B_OK)
1284 				return status;
1285 
1286 			flock->l_start += stat.st_size;
1287 			break;
1288 		}
1289 		default:
1290 			return B_BAD_VALUE;
1291 	}
1292 
1293 	if (flock->l_start < 0)
1294 		flock->l_start = 0;
1295 	if (flock->l_len == 0)
1296 		flock->l_len = OFF_MAX;
1297 
1298 	// don't let the offset and length overflow
1299 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1300 		flock->l_len = OFF_MAX - flock->l_start;
1301 
1302 	if (flock->l_len < 0) {
1303 		// a negative length reverses the region
1304 		flock->l_start += flock->l_len;
1305 		flock->l_len = -flock->l_len;
1306 	}
1307 
1308 	return B_OK;
1309 }
1310 
1311 
1312 /*!	Disconnects all file descriptors that are associated with the
1313 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1314 	\a mount object.
1315 
1316 	Note, after you've called this function, there might still be ongoing
1317 	accesses - they won't be interrupted if they already happened before.
1318 	However, any subsequent access will fail.
1319 
1320 	This is not a cheap function and should be used with care and rarely.
1321 	TODO: there is currently no means to stop a blocking read/write!
1322 */
1323 void
1324 disconnect_mount_or_vnode_fds(struct fs_mount *mount,
1325 	struct vnode *vnodeToDisconnect)
1326 {
1327 	// iterate over all teams and peek into their file descriptors
1328 	int32 nextTeamID = 0;
1329 
1330 	while (true) {
1331 		struct io_context *context = NULL;
1332 		sem_id contextMutex = -1;
1333 		struct team *team = NULL;
1334 		team_id lastTeamID;
1335 
1336 		cpu_status state = disable_interrupts();
1337 		GRAB_TEAM_LOCK();
1338 
1339 		lastTeamID = peek_next_thread_id();
1340 		if (nextTeamID < lastTeamID) {
1341 			// get next valid team
1342 			while (nextTeamID < lastTeamID
1343 				&& !(team = team_get_team_struct_locked(nextTeamID))) {
1344 				nextTeamID++;
1345 			}
1346 
1347 			if (team) {
1348 				context = (io_context *)team->io_context;
1349 				contextMutex = context->io_mutex.sem;
1350 				nextTeamID++;
1351 			}
1352 		}
1353 
1354 		RELEASE_TEAM_LOCK();
1355 		restore_interrupts(state);
1356 
1357 		if (context == NULL)
1358 			break;
1359 
1360 		// we now have a context - since we couldn't lock it while having
1361 		// safe access to the team structure, we now need to lock the mutex
1362 		// manually
1363 
1364 		if (acquire_sem(contextMutex) != B_OK) {
1365 			// team seems to be gone, go over to the next team
1366 			continue;
1367 		}
1368 
1369 		// the team cannot be deleted completely while we're owning its
1370 		// io_context mutex, so we can safely play with it now
1371 
1372 		context->io_mutex.holder = thread_get_current_thread_id();
1373 
1374 		if (context->cwd != NULL && context->cwd->mount == mount
1375 			&& (vnodeToDisconnect == NULL
1376 				|| vnodeToDisconnect == context->cwd)) {
1377 			put_vnode(context->cwd);
1378 				// Note: We're only accessing the pointer, not the vnode itself
1379 				// in the lines below.
1380 
1381 			if (context->cwd == mount->root_vnode) {
1382 				// redirect the current working directory to the covered vnode
1383 				context->cwd = mount->covers_vnode;
1384 				inc_vnode_ref_count(context->cwd);
1385 			} else
1386 				context->cwd = NULL;
1387 		}
1388 
1389 		for (uint32 i = 0; i < context->table_size; i++) {
1390 			if (struct file_descriptor *descriptor = context->fds[i]) {
1391 				inc_fd_ref_count(descriptor);
1392 
1393 				// if this descriptor points at this mount, we
1394 				// need to disconnect it to be able to unmount
1395 				struct vnode *vnode = fd_vnode(descriptor);
1396 				if (vnodeToDisconnect != NULL) {
1397 					if (vnode == vnodeToDisconnect)
1398 						disconnect_fd(descriptor);
1399 				} else if (vnode != NULL && vnode->mount == mount
1400 					|| vnode == NULL && descriptor->u.mount == mount)
1401 					disconnect_fd(descriptor);
1402 
1403 				put_fd(descriptor);
1404 			}
1405 		}
1406 
1407 		mutex_unlock(&context->io_mutex);
1408 	}
1409 }
1410 
1411 
1412 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1413 		   by.
1414 
1415 	Given an arbitrary vnode, the function checks, whether the node is covered
1416 	by the root of a volume. If it is the function obtains a reference to the
1417 	volume root node and returns it.
1418 
1419 	\param vnode The vnode in question.
1420 	\return The volume root vnode the vnode cover is covered by, if it is
1421 			indeed a mount point, or \c NULL otherwise.
1422 */
1423 static struct vnode *
1424 resolve_mount_point_to_volume_root(struct vnode *vnode)
1425 {
1426 	if (!vnode)
1427 		return NULL;
1428 
1429 	struct vnode *volumeRoot = NULL;
1430 
1431 	mutex_lock(&sVnodeCoveredByMutex);
1432 	if (vnode->covered_by) {
1433 		volumeRoot = vnode->covered_by;
1434 		inc_vnode_ref_count(volumeRoot);
1435 	}
1436 	mutex_unlock(&sVnodeCoveredByMutex);
1437 
1438 	return volumeRoot;
1439 }
1440 
1441 
1442 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1443 		   by.
1444 
1445 	Given an arbitrary vnode (identified by mount and node ID), the function
1446 	checks, whether the node is covered by the root of a volume. If it is the
1447 	function returns the mount and node ID of the volume root node. Otherwise
1448 	it simply returns the supplied mount and node ID.
1449 
1450 	In case of error (e.g. the supplied node could not be found) the variables
1451 	for storing the resolved mount and node ID remain untouched and an error
1452 	code is returned.
1453 
1454 	\param mountID The mount ID of the vnode in question.
1455 	\param nodeID The node ID of the vnode in question.
1456 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1457 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1458 	\return
1459 	- \c B_OK, if everything went fine,
1460 	- another error code, if something went wrong.
1461 */
1462 status_t
1463 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1464 	dev_t *resolvedMountID, ino_t *resolvedNodeID)
1465 {
1466 	// get the node
1467 	struct vnode *node;
1468 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1469 	if (error != B_OK)
1470 		return error;
1471 
1472 	// resolve the node
1473 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1474 	if (resolvedNode) {
1475 		put_vnode(node);
1476 		node = resolvedNode;
1477 	}
1478 
1479 	// set the return values
1480 	*resolvedMountID = node->device;
1481 	*resolvedNodeID = node->id;
1482 
1483 	put_vnode(node);
1484 
1485 	return B_OK;
1486 }
1487 
1488 
1489 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1490 
1491 	Given an arbitrary vnode, the function checks, whether the node is the
1492 	root of a volume. If it is (and if it is not "/"), the function obtains
1493 	a reference to the underlying mount point node and returns it.
1494 
1495 	\param vnode The vnode in question (caller must have a reference).
1496 	\return The mount point vnode the vnode covers, if it is indeed a volume
1497 			root and not "/", or \c NULL otherwise.
1498 */
1499 static struct vnode *
1500 resolve_volume_root_to_mount_point(struct vnode *vnode)
1501 {
1502 	if (!vnode)
1503 		return NULL;
1504 
1505 	struct vnode *mountPoint = NULL;
1506 
1507 	struct fs_mount *mount = vnode->mount;
1508 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1509 		mountPoint = mount->covers_vnode;
1510 		inc_vnode_ref_count(mountPoint);
1511 	}
1512 
1513 	return mountPoint;
1514 }
1515 
1516 
1517 /*!	\brief Gets the directory path and leaf name for a given path.
1518 
1519 	The supplied \a path is transformed to refer to the directory part of
1520 	the entry identified by the original path, and into the buffer \a filename
1521 	the leaf name of the original entry is written.
1522 	Neither the returned path nor the leaf name can be expected to be
1523 	canonical.
1524 
1525 	\param path The path to be analyzed. Must be able to store at least one
1526 		   additional character.
1527 	\param filename The buffer into which the leaf name will be written.
1528 		   Must be of size B_FILE_NAME_LENGTH at least.
1529 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1530 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1531 		   if the given path name is empty.
1532 */
1533 static status_t
1534 get_dir_path_and_leaf(char *path, char *filename)
1535 {
1536 	if (*path == '\0')
1537 		return B_ENTRY_NOT_FOUND;
1538 
1539 	char *p = strrchr(path, '/');
1540 		// '/' are not allowed in file names!
1541 
1542 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1543 
1544 	if (!p) {
1545 		// this path is single segment with no '/' in it
1546 		// ex. "foo"
1547 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1548 			return B_NAME_TOO_LONG;
1549 		strcpy(path, ".");
1550 	} else {
1551 		p++;
1552 		if (*p == '\0') {
1553 			// special case: the path ends in '/'
1554 			strcpy(filename, ".");
1555 		} else {
1556 			// normal leaf: replace the leaf portion of the path with a '.'
1557 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1558 				>= B_FILE_NAME_LENGTH) {
1559 				return B_NAME_TOO_LONG;
1560 			}
1561 		}
1562 		p[0] = '.';
1563 		p[1] = '\0';
1564 	}
1565 	return B_OK;
1566 }
1567 
1568 
1569 static status_t
1570 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char *name,
1571 	struct vnode **_vnode)
1572 {
1573 	char clonedName[B_FILE_NAME_LENGTH + 1];
1574 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1575 		return B_NAME_TOO_LONG;
1576 
1577 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1578 	struct vnode *directory;
1579 
1580 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
1581 	if (status < 0)
1582 		return status;
1583 
1584 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL,
1585 		NULL);
1586 }
1587 
1588 
1589 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
1590 	\a path must not be NULL.
1591 	If it returns successfully, \a path contains the name of the last path
1592 	component.
1593 	Note, this reduces the ref_count of the starting \a vnode, no matter if
1594 	it is successful or not!
1595 */
1596 static status_t
1597 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1598 	int count, struct vnode **_vnode, ino_t *_parentID, int *_type)
1599 {
1600 	status_t status = 0;
1601 	ino_t lastParentID = vnode->id;
1602 	int type = 0;
1603 
1604 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1605 
1606 	if (path == NULL) {
1607 		put_vnode(vnode);
1608 		return B_BAD_VALUE;
1609 	}
1610 
1611 	if (*path == '\0') {
1612 		put_vnode(vnode);
1613 		return B_ENTRY_NOT_FOUND;
1614 	}
1615 
1616 	while (true) {
1617 		struct vnode *nextVnode;
1618 		ino_t vnodeID;
1619 		char *nextPath;
1620 
1621 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1622 
1623 		// done?
1624 		if (path[0] == '\0')
1625 			break;
1626 
1627 		// walk to find the next path component ("path" will point to a single
1628 		// path component), and filter out multiple slashes
1629 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1630 
1631 		if (*nextPath == '/') {
1632 			*nextPath = '\0';
1633 			do
1634 				nextPath++;
1635 			while (*nextPath == '/');
1636 		}
1637 
1638 		// See if the '..' is at the root of a mount and move to the covered
1639 		// vnode so we pass the '..' path to the underlying filesystem
1640 		if (!strcmp("..", path)
1641 			&& vnode->mount->root_vnode == vnode
1642 			&& vnode->mount->covers_vnode) {
1643 			nextVnode = vnode->mount->covers_vnode;
1644 			inc_vnode_ref_count(nextVnode);
1645 			put_vnode(vnode);
1646 			vnode = nextVnode;
1647 		}
1648 
1649 		// Check if we have the right to search the current directory vnode.
1650 		// If a file system doesn't have the access() function, we assume that
1651 		// searching a directory is always allowed
1652 		if (FS_CALL(vnode, access))
1653 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1654 
1655 		// Tell the filesystem to get the vnode of this path component (if we got the
1656 		// permission from the call above)
1657 		if (status >= B_OK)
1658 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1659 
1660 		if (status < B_OK) {
1661 			put_vnode(vnode);
1662 			return status;
1663 		}
1664 
1665 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1666 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1667 		// ref count incremented at this point
1668 		mutex_lock(&sVnodeMutex);
1669 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1670 		mutex_unlock(&sVnodeMutex);
1671 
1672 		if (!nextVnode) {
1673 			// pretty screwed up here - the file system found the vnode, but the hash
1674 			// lookup failed, so our internal structures are messed up
1675 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1676 				vnode->device, vnodeID);
1677 			put_vnode(vnode);
1678 			return B_ENTRY_NOT_FOUND;
1679 		}
1680 
1681 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1682 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1683 			size_t bufferSize;
1684 			char *buffer;
1685 
1686 			TRACE(("traverse link\n"));
1687 
1688 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1689 			if (count + 1 > B_MAX_SYMLINKS) {
1690 				status = B_LINK_LIMIT;
1691 				goto resolve_link_error;
1692 			}
1693 
1694 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1695 			if (buffer == NULL) {
1696 				status = B_NO_MEMORY;
1697 				goto resolve_link_error;
1698 			}
1699 
1700 			if (FS_CALL(nextVnode, read_symlink) != NULL) {
1701 				status = FS_CALL(nextVnode, read_symlink)(
1702 					nextVnode->mount->cookie, nextVnode->private_node, buffer,
1703 					&bufferSize);
1704 			} else
1705 				status = B_BAD_VALUE;
1706 
1707 			if (status < B_OK) {
1708 				free(buffer);
1709 
1710 		resolve_link_error:
1711 				put_vnode(vnode);
1712 				put_vnode(nextVnode);
1713 
1714 				return status;
1715 			}
1716 			put_vnode(nextVnode);
1717 
1718 			// Check if we start from the root directory or the current
1719 			// directory ("vnode" still points to that one).
1720 			// Cut off all leading slashes if it's the root directory
1721 			path = buffer;
1722 			bool absoluteSymlink = false;
1723 			if (path[0] == '/') {
1724 				// we don't need the old directory anymore
1725 				put_vnode(vnode);
1726 
1727 				while (*++path == '/')
1728 					;
1729 				vnode = sRoot;
1730 				inc_vnode_ref_count(vnode);
1731 
1732 				absoluteSymlink = true;
1733 			}
1734 
1735 			inc_vnode_ref_count(vnode);
1736 				// balance the next recursion - we will decrement the
1737 				// ref_count of the vnode, no matter if we succeeded or not
1738 
1739 			if (absoluteSymlink && *path == '\0') {
1740 				// symlink was just "/"
1741 				nextVnode = vnode;
1742 			} else {
1743 				status = vnode_path_to_vnode(vnode, path, traverseLeafLink,
1744 					count + 1, &nextVnode, &lastParentID, _type);
1745 			}
1746 
1747 			free(buffer);
1748 
1749 			if (status < B_OK) {
1750 				put_vnode(vnode);
1751 				return status;
1752 			}
1753 		} else
1754 			lastParentID = vnode->id;
1755 
1756 		// decrease the ref count on the old dir we just looked up into
1757 		put_vnode(vnode);
1758 
1759 		path = nextPath;
1760 		vnode = nextVnode;
1761 
1762 		// see if we hit a mount point
1763 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1764 		if (mountPoint) {
1765 			put_vnode(vnode);
1766 			vnode = mountPoint;
1767 		}
1768 	}
1769 
1770 	*_vnode = vnode;
1771 	if (_type)
1772 		*_type = type;
1773 	if (_parentID)
1774 		*_parentID = lastParentID;
1775 
1776 	return B_OK;
1777 }
1778 
1779 
1780 static status_t
1781 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1782 	ino_t *_parentID, bool kernel)
1783 {
1784 	struct vnode *start = NULL;
1785 
1786 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1787 
1788 	if (!path)
1789 		return B_BAD_VALUE;
1790 
1791 	if (*path == '\0')
1792 		return B_ENTRY_NOT_FOUND;
1793 
1794 	// figure out if we need to start at root or at cwd
1795 	if (*path == '/') {
1796 		if (sRoot == NULL) {
1797 			// we're a bit early, aren't we?
1798 			return B_ERROR;
1799 		}
1800 
1801 		while (*++path == '/')
1802 			;
1803 		start = sRoot;
1804 		inc_vnode_ref_count(start);
1805 
1806 		if (*path == '\0') {
1807 			*_vnode = start;
1808 			return B_OK;
1809 		}
1810 
1811 	} else {
1812 		struct io_context *context = get_current_io_context(kernel);
1813 
1814 		mutex_lock(&context->io_mutex);
1815 		start = context->cwd;
1816 		if (start != NULL)
1817 			inc_vnode_ref_count(start);
1818 		mutex_unlock(&context->io_mutex);
1819 
1820 		if (start == NULL)
1821 			return B_ERROR;
1822 	}
1823 
1824 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1825 }
1826 
1827 
1828 /*! Returns the vnode in the next to last segment of the path, and returns
1829 	the last portion in filename.
1830 	The path buffer must be able to store at least one additional character.
1831 */
1832 static status_t
1833 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1834 {
1835 	status_t status = get_dir_path_and_leaf(path, filename);
1836 	if (status != B_OK)
1837 		return status;
1838 
1839 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1840 }
1841 
1842 
1843 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
1844 		   to by a FD + path pair.
1845 
1846 	\a path must be given in either case. \a fd might be omitted, in which
1847 	case \a path is either an absolute path or one relative to the current
1848 	directory. If both a supplied and \a path is relative it is reckoned off
1849 	of the directory referred to by \a fd. If \a path is absolute \a fd is
1850 	ignored.
1851 
1852 	The caller has the responsibility to call put_vnode() on the returned
1853 	directory vnode.
1854 
1855 	\param fd The FD. May be < 0.
1856 	\param path The absolute or relative path. Must not be \c NULL. The buffer
1857 	       is modified by this function. It must have at least room for a
1858 	       string one character longer than the path it contains.
1859 	\param _vnode A pointer to a variable the directory vnode shall be written
1860 		   into.
1861 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1862 		   the leaf name of the specified entry will be written.
1863 	\param kernel \c true, if invoked from inside the kernel, \c false if
1864 		   invoked from userland.
1865 	\return \c B_OK, if everything went fine, another error code otherwise.
1866 */
1867 static status_t
1868 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1869 	char *filename, bool kernel)
1870 {
1871 	if (!path)
1872 		return B_BAD_VALUE;
1873 	if (*path == '\0')
1874 		return B_ENTRY_NOT_FOUND;
1875 	if (fd < 0)
1876 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1877 
1878 	status_t status = get_dir_path_and_leaf(path, filename);
1879 	if (status != B_OK)
1880 		return status;
1881 
1882 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1883 }
1884 
1885 
1886 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
1887 */
1888 static status_t
1889 get_vnode_name(struct vnode *vnode, struct vnode *parent, struct dirent *buffer,
1890 	size_t bufferSize)
1891 {
1892 	if (bufferSize < sizeof(struct dirent))
1893 		return B_BAD_VALUE;
1894 
1895 	// See if vnode is the root of a mount and move to the covered
1896 	// vnode so we get the underlying file system
1897 	VNodePutter vnodePutter;
1898 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1899 		vnode = vnode->mount->covers_vnode;
1900 		inc_vnode_ref_count(vnode);
1901 		vnodePutter.SetTo(vnode);
1902 	}
1903 
1904 	if (FS_CALL(vnode, get_vnode_name)) {
1905 		// The FS supports getting the name of a vnode.
1906 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1907 			vnode->private_node, buffer->d_name,
1908 			(char*)buffer + bufferSize - buffer->d_name);
1909 	}
1910 
1911 	// The FS doesn't support getting the name of a vnode. So we search the
1912 	// parent directory for the vnode, if the caller let us.
1913 
1914 	if (parent == NULL)
1915 		return EOPNOTSUPP;
1916 
1917 	fs_cookie cookie;
1918 
1919 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1920 		parent->private_node, &cookie);
1921 	if (status >= B_OK) {
1922 		while (true) {
1923 			uint32 num = 1;
1924 			status = dir_read(parent, cookie, buffer, bufferSize, &num);
1925 			if (status < B_OK)
1926 				break;
1927 			if (num == 0) {
1928 				status = B_ENTRY_NOT_FOUND;
1929 				break;
1930 			}
1931 
1932 			if (vnode->id == buffer->d_ino) {
1933 				// found correct entry!
1934 				break;
1935 			}
1936 		}
1937 
1938 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node,
1939 			cookie);
1940 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie,
1941 			vnode->private_node, cookie);
1942 	}
1943 	return status;
1944 }
1945 
1946 
1947 static status_t
1948 get_vnode_name(struct vnode *vnode, struct vnode *parent, char *name,
1949 	size_t nameSize)
1950 {
1951 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1952 	struct dirent *dirent = (struct dirent *)buffer;
1953 
1954 	status_t status = get_vnode_name(vnode, parent, buffer, sizeof(buffer));
1955 	if (status != B_OK)
1956 		return status;
1957 
1958 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1959 		return B_BUFFER_OVERFLOW;
1960 
1961 	return B_OK;
1962 }
1963 
1964 
1965 /*!	Gets the full path to a given directory vnode.
1966 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1967 	file system doesn't support this call, it will fall back to iterating
1968 	through the parent directory to get the name of the child.
1969 
1970 	To protect against circular loops, it supports a maximum tree depth
1971 	of 256 levels.
1972 
1973 	Note that the path may not be correct the time this function returns!
1974 	It doesn't use any locking to prevent returning the correct path, as
1975 	paths aren't safe anyway: the path to a file can change at any time.
1976 
1977 	It might be a good idea, though, to check if the returned path exists
1978 	in the calling function (it's not done here because of efficiency)
1979 */
1980 static status_t
1981 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1982 {
1983 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1984 
1985 	if (vnode == NULL || buffer == NULL)
1986 		return B_BAD_VALUE;
1987 
1988 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1989 	KPath pathBuffer;
1990 	if (pathBuffer.InitCheck() != B_OK)
1991 		return B_NO_MEMORY;
1992 
1993 	char *path = pathBuffer.LockBuffer();
1994 	int32 insert = pathBuffer.BufferSize();
1995 	int32 maxLevel = 256;
1996 	int32 length;
1997 	status_t status;
1998 
1999 	// we don't use get_vnode() here because this call is more
2000 	// efficient and does all we need from get_vnode()
2001 	inc_vnode_ref_count(vnode);
2002 
2003 	// resolve a volume root to its mount point
2004 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
2005 	if (mountPoint) {
2006 		put_vnode(vnode);
2007 		vnode = mountPoint;
2008 	}
2009 
2010 	path[--insert] = '\0';
2011 
2012 	while (true) {
2013 		// the name buffer is also used for fs_read_dir()
2014 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2015 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
2016 		struct vnode *parentVnode;
2017 		ino_t parentID;
2018 		int type;
2019 
2020 		// lookup the parent vnode
2021 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..",
2022 			&parentID, &type);
2023 		if (status < B_OK)
2024 			goto out;
2025 
2026 		mutex_lock(&sVnodeMutex);
2027 		parentVnode = lookup_vnode(vnode->device, parentID);
2028 		mutex_unlock(&sVnodeMutex);
2029 
2030 		if (parentVnode == NULL) {
2031 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
2032 				vnode->device, parentID);
2033 			status = B_ENTRY_NOT_FOUND;
2034 			goto out;
2035 		}
2036 
2037 		// get the node's name
2038 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2039 			sizeof(nameBuffer));
2040 
2041 		// resolve a volume root to its mount point
2042 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
2043 		if (mountPoint) {
2044 			put_vnode(parentVnode);
2045 			parentVnode = mountPoint;
2046 			parentID = parentVnode->id;
2047 		}
2048 
2049 		bool hitRoot = (parentVnode == vnode);
2050 
2051 		// release the current vnode, we only need its parent from now on
2052 		put_vnode(vnode);
2053 		vnode = parentVnode;
2054 
2055 		if (status < B_OK)
2056 			goto out;
2057 
2058 		if (hitRoot) {
2059 			// we have reached "/", which means we have constructed the full
2060 			// path
2061 			break;
2062 		}
2063 
2064 		// ToDo: add an explicit check for loops in about 10 levels to do
2065 		// real loop detection
2066 
2067 		// don't go deeper as 'maxLevel' to prevent circular loops
2068 		if (maxLevel-- < 0) {
2069 			status = ELOOP;
2070 			goto out;
2071 		}
2072 
2073 		// add the name in front of the current path
2074 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2075 		length = strlen(name);
2076 		insert -= length;
2077 		if (insert <= 0) {
2078 			status = ENOBUFS;
2079 			goto out;
2080 		}
2081 		memcpy(path + insert, name, length);
2082 		path[--insert] = '/';
2083 	}
2084 
2085 	// the root dir will result in an empty path: fix it
2086 	if (path[insert] == '\0')
2087 		path[--insert] = '/';
2088 
2089 	TRACE(("  path is: %s\n", path + insert));
2090 
2091 	// copy the path to the output buffer
2092 	length = pathBuffer.BufferSize() - insert;
2093 	if (length <= (int)bufferSize)
2094 		memcpy(buffer, path + insert, length);
2095 	else
2096 		status = ENOBUFS;
2097 
2098 out:
2099 	put_vnode(vnode);
2100 	return status;
2101 }
2102 
2103 
2104 /*!	Checks the length of every path component, and adds a '.'
2105 	if the path ends in a slash.
2106 	The given path buffer must be able to store at least one
2107 	additional character.
2108 */
2109 static status_t
2110 check_path(char *to)
2111 {
2112 	int32 length = 0;
2113 
2114 	// check length of every path component
2115 
2116 	while (*to) {
2117 		char *begin;
2118 		if (*to == '/')
2119 			to++, length++;
2120 
2121 		begin = to;
2122 		while (*to != '/' && *to)
2123 			to++, length++;
2124 
2125 		if (to - begin > B_FILE_NAME_LENGTH)
2126 			return B_NAME_TOO_LONG;
2127 	}
2128 
2129 	if (length == 0)
2130 		return B_ENTRY_NOT_FOUND;
2131 
2132 	// complete path if there is a slash at the end
2133 
2134 	if (*(to - 1) == '/') {
2135 		if (length > B_PATH_NAME_LENGTH - 2)
2136 			return B_NAME_TOO_LONG;
2137 
2138 		to[0] = '.';
2139 		to[1] = '\0';
2140 	}
2141 
2142 	return B_OK;
2143 }
2144 
2145 
2146 static struct file_descriptor *
2147 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
2148 {
2149 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
2150 	if (descriptor == NULL)
2151 		return NULL;
2152 
2153 	if (fd_vnode(descriptor) == NULL) {
2154 		put_fd(descriptor);
2155 		return NULL;
2156 	}
2157 
2158 	// ToDo: when we can close a file descriptor at any point, investigate
2159 	//	if this is still valid to do (accessing the vnode without ref_count
2160 	//	or locking)
2161 	*_vnode = descriptor->u.vnode;
2162 	return descriptor;
2163 }
2164 
2165 
2166 static struct vnode *
2167 get_vnode_from_fd(int fd, bool kernel)
2168 {
2169 	struct file_descriptor *descriptor;
2170 	struct vnode *vnode;
2171 
2172 	descriptor = get_fd(get_current_io_context(kernel), fd);
2173 	if (descriptor == NULL)
2174 		return NULL;
2175 
2176 	vnode = fd_vnode(descriptor);
2177 	if (vnode != NULL)
2178 		inc_vnode_ref_count(vnode);
2179 
2180 	put_fd(descriptor);
2181 	return vnode;
2182 }
2183 
2184 
2185 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2186 	only the path will be considered. In this case, the \a path must not be
2187 	NULL.
2188 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2189 	and should be NULL for files.
2190 */
2191 static status_t
2192 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
2193 	struct vnode **_vnode, ino_t *_parentID, bool kernel)
2194 {
2195 	if (fd < 0 && !path)
2196 		return B_BAD_VALUE;
2197 
2198 	if (path != NULL && *path == '\0')
2199 		return B_ENTRY_NOT_FOUND;
2200 
2201 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2202 		// no FD or absolute path
2203 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2204 	}
2205 
2206 	// FD only, or FD + relative path
2207 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
2208 	if (!vnode)
2209 		return B_FILE_ERROR;
2210 
2211 	if (path != NULL) {
2212 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2213 			_vnode, _parentID, NULL);
2214 	}
2215 
2216 	// there is no relative path to take into account
2217 
2218 	*_vnode = vnode;
2219 	if (_parentID)
2220 		*_parentID = -1;
2221 
2222 	return B_OK;
2223 }
2224 
2225 
2226 static int
2227 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
2228 	fs_cookie cookie, int openMode, bool kernel)
2229 {
2230 	struct file_descriptor *descriptor;
2231 	int fd;
2232 
2233 	// if the vnode is locked, we don't allow creating a new file descriptor for it
2234 	if (vnode && vnode->mandatory_locked_by != NULL)
2235 		return B_BUSY;
2236 
2237 	descriptor = alloc_fd();
2238 	if (!descriptor)
2239 		return B_NO_MEMORY;
2240 
2241 	if (vnode)
2242 		descriptor->u.vnode = vnode;
2243 	else
2244 		descriptor->u.mount = mount;
2245 	descriptor->cookie = cookie;
2246 
2247 	switch (type) {
2248 		// vnode types
2249 		case FDTYPE_FILE:
2250 			descriptor->ops = &sFileOps;
2251 			break;
2252 		case FDTYPE_DIR:
2253 			descriptor->ops = &sDirectoryOps;
2254 			break;
2255 		case FDTYPE_ATTR:
2256 			descriptor->ops = &sAttributeOps;
2257 			break;
2258 		case FDTYPE_ATTR_DIR:
2259 			descriptor->ops = &sAttributeDirectoryOps;
2260 			break;
2261 
2262 		// mount types
2263 		case FDTYPE_INDEX_DIR:
2264 			descriptor->ops = &sIndexDirectoryOps;
2265 			break;
2266 		case FDTYPE_QUERY:
2267 			descriptor->ops = &sQueryOps;
2268 			break;
2269 
2270 		default:
2271 			panic("get_new_fd() called with unknown type %d\n", type);
2272 			break;
2273 	}
2274 	descriptor->type = type;
2275 	descriptor->open_mode = openMode;
2276 
2277 	fd = new_fd(get_current_io_context(kernel), descriptor);
2278 	if (fd < 0) {
2279 		free(descriptor);
2280 		return B_NO_MORE_FDS;
2281 	}
2282 
2283 	return fd;
2284 }
2285 
2286 #ifdef ADD_DEBUGGER_COMMANDS
2287 
2288 
2289 static void
2290 _dump_advisory_locking(advisory_locking *locking)
2291 {
2292 	if (locking == NULL)
2293 		return;
2294 
2295 	kprintf("   lock:        %ld", locking->lock);
2296 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2297 
2298 	struct advisory_lock *lock = NULL;
2299 	int32 index = 0;
2300 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
2301 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
2302 		kprintf("        offset: %Ld\n", lock->offset);
2303 		kprintf("        length: %Ld\n", lock->length);
2304 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2305 	}
2306 }
2307 
2308 
2309 static void
2310 _dump_mount(struct fs_mount *mount)
2311 {
2312 	kprintf("MOUNT: %p\n", mount);
2313 	kprintf(" id:            %ld\n", mount->id);
2314 	kprintf(" device_name:   %s\n", mount->device_name);
2315 	kprintf(" fs_name:       %s\n", mount->fs_name);
2316 	kprintf(" cookie:        %p\n", mount->cookie);
2317 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2318 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2319 	kprintf(" partition:     %p\n", mount->partition);
2320 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2321 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2322 		mount->owns_file_device ? " owns_file_device" : "");
2323 
2324 	set_debug_variable("_cookie", (addr_t)mount->cookie);
2325 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2326 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2327 	set_debug_variable("_partition", (addr_t)mount->partition);
2328 }
2329 
2330 
2331 static void
2332 _dump_vnode(struct vnode *vnode)
2333 {
2334 	kprintf("VNODE: %p\n", vnode);
2335 	kprintf(" device:        %ld\n", vnode->device);
2336 	kprintf(" id:            %Ld\n", vnode->id);
2337 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2338 	kprintf(" private_node:  %p\n", vnode->private_node);
2339 	kprintf(" mount:         %p\n", vnode->mount);
2340 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2341 	kprintf(" cache:         %p\n", vnode->cache);
2342 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2343 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2344 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2345 
2346 	_dump_advisory_locking(vnode->advisory_locking);
2347 
2348 	set_debug_variable("_node", (addr_t)vnode->private_node);
2349 	set_debug_variable("_mount", (addr_t)vnode->mount);
2350 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
2351 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
2352 }
2353 
2354 
2355 static int
2356 dump_mount(int argc, char **argv)
2357 {
2358 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2359 		kprintf("usage: %s [id|address]\n", argv[0]);
2360 		return 0;
2361 	}
2362 
2363 	uint32 id = parse_expression(argv[1]);
2364 	struct fs_mount *mount = NULL;
2365 
2366 	mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2367 	if (mount == NULL) {
2368 		if (IS_USER_ADDRESS(id)) {
2369 			kprintf("fs_mount not found\n");
2370 			return 0;
2371 		}
2372 		mount = (fs_mount *)id;
2373 	}
2374 
2375 	_dump_mount(mount);
2376 	return 0;
2377 }
2378 
2379 
2380 static int
2381 dump_mounts(int argc, char **argv)
2382 {
2383 	if (argc != 1) {
2384 		kprintf("usage: %s\n", argv[0]);
2385 		return 0;
2386 	}
2387 
2388 	kprintf("address     id root       covers     cookie     fs_name\n");
2389 
2390 	struct hash_iterator iterator;
2391 	struct fs_mount *mount;
2392 
2393 	hash_open(sMountsTable, &iterator);
2394 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2395 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
2396 			mount->covers_vnode, mount->cookie, mount->fs_name);
2397 	}
2398 
2399 	hash_close(sMountsTable, &iterator, false);
2400 	return 0;
2401 }
2402 
2403 
2404 static int
2405 dump_vnode(int argc, char **argv)
2406 {
2407 	if (argc < 2 || argc > 3 || !strcmp(argv[1], "--help")) {
2408 		kprintf("usage: %s <device> <id>\n"
2409 			"   or: %s <address>\n", argv[0], argv[0]);
2410 		return 0;
2411 	}
2412 
2413 	struct vnode *vnode = NULL;
2414 
2415 	if (argc == 2) {
2416 		vnode = (struct vnode *)parse_expression(argv[1]);
2417 		if (IS_USER_ADDRESS(vnode)) {
2418 			kprintf("invalid vnode address\n");
2419 			return 0;
2420 		}
2421 		_dump_vnode(vnode);
2422 		return 0;
2423 	}
2424 
2425 	struct hash_iterator iterator;
2426 	dev_t device = parse_expression(argv[1]);
2427 	ino_t id = atoll(argv[2]);
2428 
2429 	hash_open(sVnodeTable, &iterator);
2430 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2431 		if (vnode->id != id || vnode->device != device)
2432 			continue;
2433 
2434 		_dump_vnode(vnode);
2435 	}
2436 
2437 	hash_close(sVnodeTable, &iterator, false);
2438 	return 0;
2439 }
2440 
2441 
2442 static int
2443 dump_vnodes(int argc, char **argv)
2444 {
2445 	if (argc != 2 || !strcmp(argv[1], "--help")) {
2446 		kprintf("usage: %s [device]\n", argv[0]);
2447 		return 0;
2448 	}
2449 
2450 	// restrict dumped nodes to a certain device if requested
2451 	dev_t device = parse_expression(argv[1]);
2452 
2453 	struct hash_iterator iterator;
2454 	struct vnode *vnode;
2455 
2456 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
2457 		"flags\n");
2458 
2459 	hash_open(sVnodeTable, &iterator);
2460 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2461 		if (vnode->device != device)
2462 			continue;
2463 
2464 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
2465 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
2466 			vnode->advisory_locking, vnode->remove ? "r" : "-",
2467 			vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2468 	}
2469 
2470 	hash_close(sVnodeTable, &iterator, false);
2471 	return 0;
2472 }
2473 
2474 
2475 static int
2476 dump_vnode_caches(int argc, char **argv)
2477 {
2478 	struct hash_iterator iterator;
2479 	struct vnode *vnode;
2480 
2481 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2482 		kprintf("usage: %s [device]\n", argv[0]);
2483 		return 0;
2484 	}
2485 
2486 	// restrict dumped nodes to a certain device if requested
2487 	dev_t device = -1;
2488 	if (argc > 1)
2489 		device = atoi(argv[1]);
2490 
2491 	kprintf("address    dev     inode cache          size   pages\n");
2492 
2493 	hash_open(sVnodeTable, &iterator);
2494 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2495 		if (vnode->cache == NULL)
2496 			continue;
2497 		if (device != -1 && vnode->device != device)
2498 			continue;
2499 
2500 		// count pages in cache
2501 		size_t numPages = 0;
2502 		for (struct vm_page *page = vnode->cache->page_list;
2503 				page != NULL; page = page->cache_next) {
2504 			numPages++;
2505 		}
2506 
2507 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
2508 			vnode->cache, (vnode->cache->virtual_size + B_PAGE_SIZE - 1)
2509 				/ B_PAGE_SIZE, numPages);
2510 	}
2511 
2512 	hash_close(sVnodeTable, &iterator, false);
2513 	return 0;
2514 }
2515 
2516 
2517 int
2518 dump_io_context(int argc, char **argv)
2519 {
2520 	if (argc > 2 || !strcmp(argv[1], "--help")) {
2521 		kprintf("usage: %s [team-id|address]\n", argv[0]);
2522 		return 0;
2523 	}
2524 
2525 	struct io_context *context = NULL;
2526 
2527 	if (argc > 1) {
2528 		uint32 num = parse_expression(argv[1]);
2529 		if (IS_KERNEL_ADDRESS(num))
2530 			context = (struct io_context *)num;
2531 		else {
2532 			struct team *team = team_get_team_struct_locked(num);
2533 			if (team == NULL) {
2534 				kprintf("could not find team with ID %ld\n", num);
2535 				return 0;
2536 			}
2537 			context = (struct io_context *)team->io_context;
2538 		}
2539 	} else
2540 		context = get_current_io_context(true);
2541 
2542 	kprintf("I/O CONTEXT: %p\n", context);
2543 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2544 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2545 	kprintf(" max fds:\t%lu\n", context->table_size);
2546 
2547 	if (context->num_used_fds)
2548 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2549 
2550 	for (uint32 i = 0; i < context->table_size; i++) {
2551 		struct file_descriptor *fd = context->fds[i];
2552 		if (fd == NULL)
2553 			continue;
2554 
2555 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2556 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2557 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2558 			fd->u.vnode);
2559 	}
2560 
2561 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2562 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2563 
2564 	set_debug_variable("_cwd", (addr_t)context->cwd);
2565 
2566 	return 0;
2567 }
2568 
2569 
2570 int
2571 dump_vnode_usage(int argc, char **argv)
2572 {
2573 	if (argc != 1) {
2574 		kprintf("usage: %s\n", argv[0]);
2575 		return 0;
2576 	}
2577 
2578 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
2579 		kMaxUnusedVnodes);
2580 
2581 	struct hash_iterator iterator;
2582 	hash_open(sVnodeTable, &iterator);
2583 
2584 	uint32 count = 0;
2585 	struct vnode *vnode;
2586 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2587 		count++;
2588 	}
2589 
2590 	hash_close(sVnodeTable, &iterator, false);
2591 
2592 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
2593 	return 0;
2594 }
2595 
2596 #endif	// ADD_DEBUGGER_COMMANDS
2597 
2598 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
2599 	and calls the file system hooks to read/write the request to disk.
2600 */
2601 static status_t
2602 common_file_io_vec_pages(struct vnode *vnode, void *cookie,
2603 	const file_io_vec *fileVecs, size_t fileVecCount, const iovec *vecs,
2604 	size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset, size_t *_numBytes,
2605 	bool doWrite)
2606 {
2607 	if (fileVecCount == 0) {
2608 		// There are no file vecs at this offset, so we're obviously trying
2609 		// to access the file outside of its bounds
2610 		return B_BAD_VALUE;
2611 	}
2612 
2613 	size_t numBytes = *_numBytes;
2614 	uint32 fileVecIndex;
2615 	size_t vecOffset = *_vecOffset;
2616 	uint32 vecIndex = *_vecIndex;
2617 	status_t status;
2618 	size_t size;
2619 
2620 	if (!doWrite && vecOffset == 0) {
2621 		// now directly read the data from the device
2622 		// the first file_io_vec can be read directly
2623 
2624 		size = fileVecs[0].length;
2625 		if (size > numBytes)
2626 			size = numBytes;
2627 
2628 		status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2629 			vnode->private_node, cookie, fileVecs[0].offset, &vecs[vecIndex],
2630 			vecCount - vecIndex, &size, false);
2631 		if (status < B_OK)
2632 			return status;
2633 
2634 		// TODO: this is a work-around for buggy device drivers!
2635 		//	When our own drivers honour the length, we can:
2636 		//	a) also use this direct I/O for writes (otherwise, it would
2637 		//	   overwrite precious data)
2638 		//	b) panic if the term below is true (at least for writes)
2639 		if (size > fileVecs[0].length) {
2640 			//dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
2641 			size = fileVecs[0].length;
2642 		}
2643 
2644 		ASSERT(size <= fileVecs[0].length);
2645 
2646 		// If the file portion was contiguous, we're already done now
2647 		if (size == numBytes)
2648 			return B_OK;
2649 
2650 		// if we reached the end of the file, we can return as well
2651 		if (size != fileVecs[0].length) {
2652 			*_numBytes = size;
2653 			return B_OK;
2654 		}
2655 
2656 		fileVecIndex = 1;
2657 
2658 		// first, find out where we have to continue in our iovecs
2659 		for (; vecIndex < vecCount; vecIndex++) {
2660 			if (size < vecs[vecIndex].iov_len)
2661 				break;
2662 
2663 			size -= vecs[vecIndex].iov_len;
2664 		}
2665 
2666 		vecOffset = size;
2667 	} else {
2668 		fileVecIndex = 0;
2669 		size = 0;
2670 	}
2671 
2672 	// Too bad, let's process the rest of the file_io_vecs
2673 
2674 	size_t totalSize = size;
2675 	size_t bytesLeft = numBytes - size;
2676 
2677 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
2678 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
2679 		off_t fileOffset = fileVec.offset;
2680 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
2681 
2682 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
2683 
2684 		// process the complete fileVec
2685 		while (fileLeft > 0) {
2686 			iovec tempVecs[MAX_TEMP_IO_VECS];
2687 			uint32 tempCount = 0;
2688 
2689 			// size tracks how much of what is left of the current fileVec
2690 			// (fileLeft) has been assigned to tempVecs
2691 			size = 0;
2692 
2693 			// assign what is left of the current fileVec to the tempVecs
2694 			for (size = 0; size < fileLeft && vecIndex < vecCount
2695 					&& tempCount < MAX_TEMP_IO_VECS;) {
2696 				// try to satisfy one iovec per iteration (or as much as
2697 				// possible)
2698 
2699 				// bytes left of the current iovec
2700 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
2701 				if (vecLeft == 0) {
2702 					vecOffset = 0;
2703 					vecIndex++;
2704 					continue;
2705 				}
2706 
2707 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
2708 					vecIndex, vecOffset, size));
2709 
2710 				// actually available bytes
2711 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
2712 
2713 				tempVecs[tempCount].iov_base
2714 					= (void *)((addr_t)vecs[vecIndex].iov_base + vecOffset);
2715 				tempVecs[tempCount].iov_len = tempVecSize;
2716 				tempCount++;
2717 
2718 				size += tempVecSize;
2719 				vecOffset += tempVecSize;
2720 			}
2721 
2722 			size_t bytes = size;
2723 			if (doWrite) {
2724 				status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
2725 					vnode->private_node, cookie, fileOffset, tempVecs,
2726 					tempCount, &bytes, false);
2727 			} else {
2728 				status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2729 					vnode->private_node, cookie, fileOffset, tempVecs,
2730 					tempCount, &bytes, false);
2731 			}
2732 			if (status < B_OK)
2733 				return status;
2734 
2735 			totalSize += bytes;
2736 			bytesLeft -= size;
2737 			fileOffset += size;
2738 			fileLeft -= size;
2739 			//dprintf("-> file left = %Lu\n", fileLeft);
2740 
2741 			if (size != bytes || vecIndex >= vecCount) {
2742 				// there are no more bytes or iovecs, let's bail out
2743 				*_numBytes = totalSize;
2744 				return B_OK;
2745 			}
2746 		}
2747 	}
2748 
2749 	*_vecIndex = vecIndex;
2750 	*_vecOffset = vecOffset;
2751 	*_numBytes = totalSize;
2752 	return B_OK;
2753 }
2754 
2755 
2756 //	#pragma mark - public API for file systems
2757 
2758 
2759 extern "C" status_t
2760 new_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2761 {
2762 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2763 		mountID, vnodeID, privateNode));
2764 
2765 	if (privateNode == NULL)
2766 		return B_BAD_VALUE;
2767 
2768 	mutex_lock(&sVnodeMutex);
2769 
2770 	// file system integrity check:
2771 	// test if the vnode already exists and bail out if this is the case!
2772 
2773 	// ToDo: the R5 implementation obviously checks for a different cookie
2774 	//	and doesn't panic if they are equal
2775 
2776 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2777 	if (vnode != NULL)
2778 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2779 
2780 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2781 	if (status == B_OK) {
2782 		vnode->private_node = privateNode;
2783 		vnode->busy = true;
2784 		vnode->unpublished = true;
2785 	}
2786 
2787 	TRACE(("returns: %s\n", strerror(status)));
2788 
2789 	mutex_unlock(&sVnodeMutex);
2790 	return status;
2791 }
2792 
2793 
2794 extern "C" status_t
2795 publish_vnode(dev_t mountID, ino_t vnodeID, fs_vnode privateNode)
2796 {
2797 	FUNCTION(("publish_vnode()\n"));
2798 
2799 	mutex_lock(&sVnodeMutex);
2800 
2801 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2802 	status_t status = B_OK;
2803 
2804 	if (vnode != NULL && vnode->busy && vnode->unpublished
2805 		&& vnode->private_node == privateNode) {
2806 		vnode->busy = false;
2807 		vnode->unpublished = false;
2808 	} else if (vnode == NULL && privateNode != NULL) {
2809 		status = create_new_vnode(&vnode, mountID, vnodeID);
2810 		if (status == B_OK)
2811 			vnode->private_node = privateNode;
2812 	} else
2813 		status = B_BAD_VALUE;
2814 
2815 	TRACE(("returns: %s\n", strerror(status)));
2816 
2817 	mutex_unlock(&sVnodeMutex);
2818 	return status;
2819 }
2820 
2821 
2822 extern "C" status_t
2823 get_vnode(dev_t mountID, ino_t vnodeID, fs_vnode *_fsNode)
2824 {
2825 	struct vnode *vnode;
2826 
2827 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
2828 	if (status < B_OK)
2829 		return status;
2830 
2831 	*_fsNode = vnode->private_node;
2832 	return B_OK;
2833 }
2834 
2835 
2836 extern "C" status_t
2837 put_vnode(dev_t mountID, ino_t vnodeID)
2838 {
2839 	struct vnode *vnode;
2840 
2841 	mutex_lock(&sVnodeMutex);
2842 	vnode = lookup_vnode(mountID, vnodeID);
2843 	mutex_unlock(&sVnodeMutex);
2844 
2845 	if (vnode)
2846 		dec_vnode_ref_count(vnode, true);
2847 
2848 	return B_OK;
2849 }
2850 
2851 
2852 extern "C" status_t
2853 remove_vnode(dev_t mountID, ino_t vnodeID)
2854 {
2855 	struct vnode *vnode;
2856 	bool remove = false;
2857 
2858 	MutexLocker locker(sVnodeMutex);
2859 
2860 	vnode = lookup_vnode(mountID, vnodeID);
2861 	if (vnode == NULL)
2862 		return B_ENTRY_NOT_FOUND;
2863 
2864 	if (vnode->covered_by != NULL) {
2865 		// this vnode is in use
2866 		mutex_unlock(&sVnodeMutex);
2867 		return B_BUSY;
2868 	}
2869 
2870 	vnode->remove = true;
2871 	if (vnode->unpublished) {
2872 		// prepare the vnode for deletion
2873 		vnode->busy = true;
2874 		remove = true;
2875 	}
2876 
2877 	locker.Unlock();
2878 
2879 	if (remove) {
2880 		// if the vnode hasn't been published yet, we delete it here
2881 		atomic_add(&vnode->ref_count, -1);
2882 		free_vnode(vnode, true);
2883 	}
2884 
2885 	return B_OK;
2886 }
2887 
2888 
2889 extern "C" status_t
2890 unremove_vnode(dev_t mountID, ino_t vnodeID)
2891 {
2892 	struct vnode *vnode;
2893 
2894 	mutex_lock(&sVnodeMutex);
2895 
2896 	vnode = lookup_vnode(mountID, vnodeID);
2897 	if (vnode)
2898 		vnode->remove = false;
2899 
2900 	mutex_unlock(&sVnodeMutex);
2901 	return B_OK;
2902 }
2903 
2904 
2905 extern "C" status_t
2906 get_vnode_removed(dev_t mountID, ino_t vnodeID, bool* removed)
2907 {
2908 	mutex_lock(&sVnodeMutex);
2909 
2910 	status_t result;
2911 
2912 	if (struct vnode* vnode = lookup_vnode(mountID, vnodeID)) {
2913 		if (removed)
2914 			*removed = vnode->remove;
2915 		result = B_OK;
2916 	} else
2917 		result = B_BAD_VALUE;
2918 
2919 	mutex_unlock(&sVnodeMutex);
2920 	return result;
2921 }
2922 
2923 
2924 extern "C" status_t
2925 read_pages(int fd, off_t pos, const iovec *vecs, size_t count,
2926 	size_t *_numBytes, bool fsReenter)
2927 {
2928 	struct file_descriptor *descriptor;
2929 	struct vnode *vnode;
2930 
2931 	descriptor = get_fd_and_vnode(fd, &vnode, true);
2932 	if (descriptor == NULL)
2933 		return B_FILE_ERROR;
2934 
2935 	status_t status = FS_CALL(vnode, read_pages)(vnode->mount->cookie,
2936 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
2937 		fsReenter);
2938 
2939 	put_fd(descriptor);
2940 	return status;
2941 }
2942 
2943 
2944 extern "C" status_t
2945 write_pages(int fd, off_t pos, const iovec *vecs, size_t count,
2946 	size_t *_numBytes, bool fsReenter)
2947 {
2948 	struct file_descriptor *descriptor;
2949 	struct vnode *vnode;
2950 
2951 	descriptor = get_fd_and_vnode(fd, &vnode, true);
2952 	if (descriptor == NULL)
2953 		return B_FILE_ERROR;
2954 
2955 	status_t status = FS_CALL(vnode, write_pages)(vnode->mount->cookie,
2956 		vnode->private_node, descriptor->cookie, pos, vecs, count, _numBytes,
2957 		fsReenter);
2958 
2959 	put_fd(descriptor);
2960 	return status;
2961 }
2962 
2963 
2964 extern "C" status_t
2965 read_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
2966 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
2967 	size_t *_bytes)
2968 {
2969 	struct file_descriptor *descriptor;
2970 	struct vnode *vnode;
2971 
2972 	descriptor = get_fd_and_vnode(fd, &vnode, true);
2973 	if (descriptor == NULL)
2974 		return B_FILE_ERROR;
2975 
2976 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
2977 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
2978 		false);
2979 
2980 	put_fd(descriptor);
2981 	return status;
2982 }
2983 
2984 
2985 extern "C" status_t
2986 write_file_io_vec_pages(int fd, const file_io_vec *fileVecs, size_t fileVecCount,
2987 	const iovec *vecs, size_t vecCount, uint32 *_vecIndex, size_t *_vecOffset,
2988 	size_t *_bytes)
2989 {
2990 	struct file_descriptor *descriptor;
2991 	struct vnode *vnode;
2992 
2993 	descriptor = get_fd_and_vnode(fd, &vnode, true);
2994 	if (descriptor == NULL)
2995 		return B_FILE_ERROR;
2996 
2997 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
2998 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
2999 		true);
3000 
3001 	put_fd(descriptor);
3002 	return status;
3003 }
3004 
3005 
3006 //	#pragma mark - private VFS API
3007 //	Functions the VFS exports for other parts of the kernel
3008 
3009 
3010 /*! Acquires another reference to the vnode that has to be released
3011 	by calling vfs_put_vnode().
3012 */
3013 void
3014 vfs_acquire_vnode(struct vnode *vnode)
3015 {
3016 	inc_vnode_ref_count(vnode);
3017 }
3018 
3019 
3020 /*! This is currently called from file_cache_create() only.
3021 	It's probably a temporary solution as long as devfs requires that
3022 	fs_read_pages()/fs_write_pages() are called with the standard
3023 	open cookie and not with a device cookie.
3024 	If that's done differently, remove this call; it has no other
3025 	purpose.
3026 */
3027 extern "C" status_t
3028 vfs_get_cookie_from_fd(int fd, void **_cookie)
3029 {
3030 	struct file_descriptor *descriptor;
3031 
3032 	descriptor = get_fd(get_current_io_context(true), fd);
3033 	if (descriptor == NULL)
3034 		return B_FILE_ERROR;
3035 
3036 	*_cookie = descriptor->cookie;
3037 	return B_OK;
3038 }
3039 
3040 
3041 extern "C" int
3042 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode **vnode)
3043 {
3044 	*vnode = get_vnode_from_fd(fd, kernel);
3045 
3046 	if (*vnode == NULL)
3047 		return B_FILE_ERROR;
3048 
3049 	return B_NO_ERROR;
3050 }
3051 
3052 
3053 extern "C" status_t
3054 vfs_get_vnode_from_path(const char *path, bool kernel, struct vnode **_vnode)
3055 {
3056 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
3057 		path, kernel));
3058 
3059 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3060 	if (pathBuffer.InitCheck() != B_OK)
3061 		return B_NO_MEMORY;
3062 
3063 	char *buffer = pathBuffer.LockBuffer();
3064 	strlcpy(buffer, path, pathBuffer.BufferSize());
3065 
3066 	struct vnode *vnode;
3067 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
3068 	if (status < B_OK)
3069 		return status;
3070 
3071 	*_vnode = vnode;
3072 	return B_OK;
3073 }
3074 
3075 
3076 extern "C" status_t
3077 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode **_vnode)
3078 {
3079 	struct vnode *vnode;
3080 
3081 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
3082 	if (status < B_OK)
3083 		return status;
3084 
3085 	*_vnode = vnode;
3086 	return B_OK;
3087 }
3088 
3089 
3090 extern "C" status_t
3091 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
3092 	const char *name, struct vnode **_vnode)
3093 {
3094 	return entry_ref_to_vnode(mountID, directoryID, name, _vnode);
3095 }
3096 
3097 
3098 extern "C" void
3099 vfs_vnode_to_node_ref(struct vnode *vnode, dev_t *_mountID, ino_t *_vnodeID)
3100 {
3101 	*_mountID = vnode->device;
3102 	*_vnodeID = vnode->id;
3103 }
3104 
3105 
3106 /*!	Looks up a vnode with the given mount and vnode ID.
3107 	Must only be used with "in-use" vnodes as it doesn't grab a reference
3108 	to the node.
3109 	It's currently only be used by file_cache_create().
3110 */
3111 extern "C" status_t
3112 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode **_vnode)
3113 {
3114 	mutex_lock(&sVnodeMutex);
3115 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
3116 	mutex_unlock(&sVnodeMutex);
3117 
3118 	if (vnode == NULL)
3119 		return B_ERROR;
3120 
3121 	*_vnode = vnode;
3122 	return B_OK;
3123 }
3124 
3125 
3126 extern "C" status_t
3127 vfs_get_fs_node_from_path(dev_t mountID, const char *path, bool kernel,
3128 	void **_node)
3129 {
3130 	TRACE(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n",
3131 		mountID, path, kernel));
3132 
3133 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
3134 	if (pathBuffer.InitCheck() != B_OK)
3135 		return B_NO_MEMORY;
3136 
3137 	fs_mount *mount;
3138 	status_t status = get_mount(mountID, &mount);
3139 	if (status < B_OK)
3140 		return status;
3141 
3142 	char *buffer = pathBuffer.LockBuffer();
3143 	strlcpy(buffer, path, pathBuffer.BufferSize());
3144 
3145 	struct vnode *vnode = mount->root_vnode;
3146 
3147 	if (buffer[0] == '/')
3148 		status = path_to_vnode(buffer, true, &vnode, NULL, true);
3149 	else {
3150 		inc_vnode_ref_count(vnode);
3151 			// vnode_path_to_vnode() releases a reference to the starting vnode
3152 		status = vnode_path_to_vnode(vnode, buffer, true, 0, &vnode, NULL, NULL);
3153 	}
3154 
3155 	put_mount(mount);
3156 
3157 	if (status < B_OK)
3158 		return status;
3159 
3160 	if (vnode->device != mountID) {
3161 		// wrong mount ID - must not gain access on foreign file system nodes
3162 		put_vnode(vnode);
3163 		return B_BAD_VALUE;
3164 	}
3165 
3166 	*_node = vnode->private_node;
3167 	return B_OK;
3168 }
3169 
3170 
3171 /*!	Finds the full path to the file that contains the module \a moduleName,
3172 	puts it into \a pathBuffer, and returns B_OK for success.
3173 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
3174 	\c B_ENTRY_NOT_FOUNT if no file could be found.
3175 	\a pathBuffer is clobbered in any case and must not be relied on if this
3176 	functions returns unsuccessfully.
3177 */
3178 status_t
3179 vfs_get_module_path(const char *basePath, const char *moduleName,
3180 	char *pathBuffer, size_t bufferSize)
3181 {
3182 	struct vnode *dir, *file;
3183 	status_t status;
3184 	size_t length;
3185 	char *path;
3186 
3187 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
3188 		return B_BUFFER_OVERFLOW;
3189 
3190 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
3191 	if (status < B_OK)
3192 		return status;
3193 
3194 	// the path buffer had been clobbered by the above call
3195 	length = strlcpy(pathBuffer, basePath, bufferSize);
3196 	if (pathBuffer[length - 1] != '/')
3197 		pathBuffer[length++] = '/';
3198 
3199 	path = pathBuffer + length;
3200 	bufferSize -= length;
3201 
3202 	while (moduleName) {
3203 		int type;
3204 
3205 		char *nextPath = strchr(moduleName, '/');
3206 		if (nextPath == NULL)
3207 			length = strlen(moduleName);
3208 		else {
3209 			length = nextPath - moduleName;
3210 			nextPath++;
3211 		}
3212 
3213 		if (length + 1 >= bufferSize) {
3214 			status = B_BUFFER_OVERFLOW;
3215 			goto err;
3216 		}
3217 
3218 		memcpy(path, moduleName, length);
3219 		path[length] = '\0';
3220 		moduleName = nextPath;
3221 
3222 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
3223 		if (status < B_OK) {
3224 			// vnode_path_to_vnode() has already released the reference to dir
3225 			return status;
3226 		}
3227 
3228 		if (S_ISDIR(type)) {
3229 			// goto the next directory
3230 			path[length] = '/';
3231 			path[length + 1] = '\0';
3232 			path += length + 1;
3233 			bufferSize -= length + 1;
3234 
3235 			dir = file;
3236 		} else if (S_ISREG(type)) {
3237 			// it's a file so it should be what we've searched for
3238 			put_vnode(file);
3239 
3240 			return B_OK;
3241 		} else {
3242 			TRACE(("vfs_get_module_path(): something is strange here: %d...\n", type));
3243 			status = B_ERROR;
3244 			dir = file;
3245 			goto err;
3246 		}
3247 	}
3248 
3249 	// if we got here, the moduleName just pointed to a directory, not to
3250 	// a real module - what should we do in this case?
3251 	status = B_ENTRY_NOT_FOUND;
3252 
3253 err:
3254 	put_vnode(dir);
3255 	return status;
3256 }
3257 
3258 
3259 /*!	\brief Normalizes a given path.
3260 
3261 	The path must refer to an existing or non-existing entry in an existing
3262 	directory, that is chopping off the leaf component the remaining path must
3263 	refer to an existing directory.
3264 
3265 	The returned will be canonical in that it will be absolute, will not
3266 	contain any "." or ".." components or duplicate occurrences of '/'s,
3267 	and none of the directory components will by symbolic links.
3268 
3269 	Any two paths referring to the same entry, will result in the same
3270 	normalized path (well, that is pretty much the definition of `normalized',
3271 	isn't it :-).
3272 
3273 	\param path The path to be normalized.
3274 	\param buffer The buffer into which the normalized path will be written.
3275 	\param bufferSize The size of \a buffer.
3276 	\param kernel \c true, if the IO context of the kernel shall be used,
3277 		   otherwise that of the team this thread belongs to. Only relevant,
3278 		   if the path is relative (to get the CWD).
3279 	\return \c B_OK if everything went fine, another error code otherwise.
3280 */
3281 status_t
3282 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
3283 	bool kernel)
3284 {
3285 	if (!path || !buffer || bufferSize < 1)
3286 		return B_BAD_VALUE;
3287 
3288 	TRACE(("vfs_normalize_path(`%s')\n", path));
3289 
3290 	// copy the supplied path to the stack, so it can be modified
3291 	KPath mutablePathBuffer(B_PATH_NAME_LENGTH + 1);
3292 	if (mutablePathBuffer.InitCheck() != B_OK)
3293 		return B_NO_MEMORY;
3294 
3295 	char *mutablePath = mutablePathBuffer.LockBuffer();
3296 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
3297 		return B_NAME_TOO_LONG;
3298 
3299 	// get the dir vnode and the leaf name
3300 	struct vnode *dirNode;
3301 	char leaf[B_FILE_NAME_LENGTH];
3302 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
3303 	if (error != B_OK) {
3304 		TRACE(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
3305 		return error;
3306 	}
3307 
3308 	// if the leaf is "." or "..", we directly get the correct directory
3309 	// vnode and ignore the leaf later
3310 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
3311 	if (isDir)
3312 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
3313 	if (error != B_OK) {
3314 		TRACE(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n",
3315 			strerror(error)));
3316 		return error;
3317 	}
3318 
3319 	// get the directory path
3320 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
3321 	put_vnode(dirNode);
3322 	if (error < B_OK) {
3323 		TRACE(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
3324 		return error;
3325 	}
3326 
3327 	// append the leaf name
3328 	if (!isDir) {
3329 		// insert a directory separator only if this is not the file system root
3330 		if ((strcmp(buffer, "/") != 0
3331 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
3332 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
3333 			return B_NAME_TOO_LONG;
3334 		}
3335 	}
3336 
3337 	TRACE(("vfs_normalize_path() -> `%s'\n", buffer));
3338 	return B_OK;
3339 }
3340 
3341 
3342 extern "C" void
3343 vfs_put_vnode(struct vnode *vnode)
3344 {
3345 	put_vnode(vnode);
3346 }
3347 
3348 
3349 extern "C" status_t
3350 vfs_get_cwd(dev_t *_mountID, ino_t *_vnodeID)
3351 {
3352 	// Get current working directory from io context
3353 	struct io_context *context = get_current_io_context(false);
3354 	status_t status = B_OK;
3355 
3356 	mutex_lock(&context->io_mutex);
3357 
3358 	if (context->cwd != NULL) {
3359 		*_mountID = context->cwd->device;
3360 		*_vnodeID = context->cwd->id;
3361 	} else
3362 		status = B_ERROR;
3363 
3364 	mutex_unlock(&context->io_mutex);
3365 	return status;
3366 }
3367 
3368 
3369 status_t
3370 vfs_unmount(dev_t mountID, uint32 flags)
3371 {
3372 	return fs_unmount(NULL, mountID, flags, true);
3373 }
3374 
3375 
3376 extern "C" status_t
3377 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
3378 {
3379 	struct vnode *vnode;
3380 
3381 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
3382 	if (status < B_OK)
3383 		return status;
3384 
3385 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
3386 	put_vnode(vnode);
3387 	return B_OK;
3388 }
3389 
3390 
3391 extern "C" void
3392 vfs_free_unused_vnodes(int32 level)
3393 {
3394 	vnode_low_memory_handler(NULL, level);
3395 }
3396 
3397 
3398 extern "C" bool
3399 vfs_can_page(struct vnode *vnode, void *cookie)
3400 {
3401 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
3402 
3403 	if (FS_CALL(vnode, can_page)) {
3404 		return FS_CALL(vnode, can_page)(vnode->mount->cookie,
3405 			vnode->private_node, cookie);
3406 	}
3407 	return false;
3408 }
3409 
3410 
3411 extern "C" status_t
3412 vfs_read_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3413 	size_t count, size_t *_numBytes, bool fsReenter)
3414 {
3415 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3416 
3417 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node,
3418 		cookie, pos, vecs, count, _numBytes, fsReenter);
3419 }
3420 
3421 
3422 extern "C" status_t
3423 vfs_write_pages(struct vnode *vnode, void *cookie, off_t pos, const iovec *vecs,
3424 	size_t count, size_t *_numBytes, bool fsReenter)
3425 {
3426 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
3427 
3428 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node,
3429 		cookie, pos, vecs, count, _numBytes, fsReenter);
3430 }
3431 
3432 
3433 /*!	Gets the vnode's vm_cache object. If it didn't have one, it will be
3434 	created if \a allocate is \c true.
3435 	In case it's successful, it will also grab a reference to the cache
3436 	it returns.
3437 */
3438 extern "C" status_t
3439 vfs_get_vnode_cache(struct vnode *vnode, vm_cache **_cache, bool allocate)
3440 {
3441 	if (vnode->cache != NULL) {
3442 		vm_cache_acquire_ref(vnode->cache);
3443 		*_cache = vnode->cache;
3444 		return B_OK;
3445 	}
3446 
3447 	mutex_lock(&sVnodeMutex);
3448 
3449 	status_t status = B_OK;
3450 
3451 	// The cache could have been created in the meantime
3452 	if (vnode->cache == NULL) {
3453 		if (allocate) {
3454 			// TODO: actually the vnode need to be busy already here, or
3455 			//	else this won't work...
3456 			bool wasBusy = vnode->busy;
3457 			vnode->busy = true;
3458 			mutex_unlock(&sVnodeMutex);
3459 
3460 			status = vm_create_vnode_cache(vnode, &vnode->cache);
3461 
3462 			mutex_lock(&sVnodeMutex);
3463 			vnode->busy = wasBusy;
3464 		} else
3465 			status = B_BAD_VALUE;
3466 	}
3467 
3468 	if (status == B_OK) {
3469 		vm_cache_acquire_ref(vnode->cache);
3470 		*_cache = vnode->cache;
3471 	}
3472 
3473 	mutex_unlock(&sVnodeMutex);
3474 	return status;
3475 }
3476 
3477 
3478 status_t
3479 vfs_get_file_map(struct vnode *vnode, off_t offset, size_t size,
3480 	file_io_vec *vecs, size_t *_count)
3481 {
3482 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
3483 
3484 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie,
3485 		vnode->private_node, offset, size, vecs, _count);
3486 }
3487 
3488 
3489 status_t
3490 vfs_stat_vnode(struct vnode *vnode, struct stat *stat)
3491 {
3492 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
3493 		vnode->private_node, stat);
3494 
3495 	// fill in the st_dev and st_ino fields
3496 	if (status == B_OK) {
3497 		stat->st_dev = vnode->device;
3498 		stat->st_ino = vnode->id;
3499 	}
3500 
3501 	return status;
3502 }
3503 
3504 
3505 status_t
3506 vfs_get_vnode_name(struct vnode *vnode, char *name, size_t nameSize)
3507 {
3508 	return get_vnode_name(vnode, NULL, name, nameSize);
3509 }
3510 
3511 
3512 /*!	If the given descriptor locked its vnode, that lock will be released. */
3513 void
3514 vfs_unlock_vnode_if_locked(struct file_descriptor *descriptor)
3515 {
3516 	struct vnode *vnode = fd_vnode(descriptor);
3517 
3518 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
3519 		vnode->mandatory_locked_by = NULL;
3520 }
3521 
3522 
3523 /*!	Closes all file descriptors of the specified I/O context that
3524 	have the O_CLOEXEC flag set.
3525 */
3526 void
3527 vfs_exec_io_context(void *_context)
3528 {
3529 	struct io_context *context = (struct io_context *)_context;
3530 	uint32 i;
3531 
3532 	for (i = 0; i < context->table_size; i++) {
3533 		mutex_lock(&context->io_mutex);
3534 
3535 		struct file_descriptor *descriptor = context->fds[i];
3536 		bool remove = false;
3537 
3538 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
3539 			context->fds[i] = NULL;
3540 			context->num_used_fds--;
3541 
3542 			remove = true;
3543 		}
3544 
3545 		mutex_unlock(&context->io_mutex);
3546 
3547 		if (remove) {
3548 			close_fd(descriptor);
3549 			put_fd(descriptor);
3550 		}
3551 	}
3552 }
3553 
3554 
3555 /*! Sets up a new io_control structure, and inherits the properties
3556 	of the parent io_control if it is given.
3557 */
3558 void *
3559 vfs_new_io_context(void *_parentContext)
3560 {
3561 	size_t tableSize;
3562 	struct io_context *context;
3563 	struct io_context *parentContext;
3564 
3565 	context = (io_context *)malloc(sizeof(struct io_context));
3566 	if (context == NULL)
3567 		return NULL;
3568 
3569 	memset(context, 0, sizeof(struct io_context));
3570 
3571 	parentContext = (struct io_context *)_parentContext;
3572 	if (parentContext)
3573 		tableSize = parentContext->table_size;
3574 	else
3575 		tableSize = DEFAULT_FD_TABLE_SIZE;
3576 
3577 	// allocate space for FDs and their close-on-exec flag
3578 	context->fds = (file_descriptor**)malloc(
3579 		sizeof(struct file_descriptor*) * tableSize
3580 		+ sizeof(struct select_sync*) * tableSize
3581 		+ (tableSize + 7) / 8);
3582 	if (context->fds == NULL) {
3583 		free(context);
3584 		return NULL;
3585 	}
3586 
3587 	context->select_infos = (select_info**)(context->fds + tableSize);
3588 	context->fds_close_on_exec = (uint8 *)(context->select_infos + tableSize);
3589 
3590 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
3591 		+ sizeof(struct select_sync*) * tableSize
3592 		+ (tableSize + 7) / 8);
3593 
3594 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
3595 		free(context->fds);
3596 		free(context);
3597 		return NULL;
3598 	}
3599 
3600 	// Copy all parent file descriptors
3601 
3602 	if (parentContext) {
3603 		size_t i;
3604 
3605 		mutex_lock(&parentContext->io_mutex);
3606 
3607 		context->cwd = parentContext->cwd;
3608 		if (context->cwd)
3609 			inc_vnode_ref_count(context->cwd);
3610 
3611 		for (i = 0; i < tableSize; i++) {
3612 			struct file_descriptor *descriptor = parentContext->fds[i];
3613 
3614 			if (descriptor != NULL) {
3615 				context->fds[i] = descriptor;
3616 				context->num_used_fds++;
3617 				atomic_add(&descriptor->ref_count, 1);
3618 				atomic_add(&descriptor->open_count, 1);
3619 			}
3620 		}
3621 
3622 		mutex_unlock(&parentContext->io_mutex);
3623 	} else {
3624 		context->cwd = sRoot;
3625 
3626 		if (context->cwd)
3627 			inc_vnode_ref_count(context->cwd);
3628 	}
3629 
3630 	context->table_size = tableSize;
3631 
3632 	list_init(&context->node_monitors);
3633 	context->max_monitors = DEFAULT_NODE_MONITORS;
3634 
3635 	return context;
3636 }
3637 
3638 
3639 status_t
3640 vfs_free_io_context(void *_ioContext)
3641 {
3642 	struct io_context *context = (struct io_context *)_ioContext;
3643 	uint32 i;
3644 
3645 	if (context->cwd)
3646 		dec_vnode_ref_count(context->cwd, false);
3647 
3648 	mutex_lock(&context->io_mutex);
3649 
3650 	for (i = 0; i < context->table_size; i++) {
3651 		if (struct file_descriptor *descriptor = context->fds[i]) {
3652 			close_fd(descriptor);
3653 			put_fd(descriptor);
3654 		}
3655 	}
3656 
3657 	mutex_destroy(&context->io_mutex);
3658 
3659 	remove_node_monitors(context);
3660 	free(context->fds);
3661 	free(context);
3662 
3663 	return B_OK;
3664 }
3665 
3666 
3667 static status_t
3668 vfs_resize_fd_table(struct io_context *context, const int newSize)
3669 {
3670 	struct file_descriptor **fds;
3671 
3672 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
3673 		return EINVAL;
3674 
3675 	MutexLocker(context->io_mutex);
3676 
3677 	int oldSize = context->table_size;
3678 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
3679 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
3680 
3681 	// If the tables shrink, make sure none of the fds being dropped are in use.
3682 	if (newSize < oldSize) {
3683 		for (int i = oldSize; i-- > newSize;) {
3684 			if (context->fds[i])
3685 				return EBUSY;
3686 		}
3687 	}
3688 
3689 	// store pointers to the old tables
3690 	file_descriptor** oldFDs = context->fds;
3691 	select_info** oldSelectInfos = context->select_infos;
3692 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
3693 
3694 	// allocate new tables
3695 	file_descriptor** newFDs = (file_descriptor**)malloc(
3696 		sizeof(struct file_descriptor*) * newSize
3697 		+ sizeof(struct select_sync*) * newSize
3698 		+ newCloseOnExitBitmapSize);
3699 	if (newFDs == NULL)
3700 		return ENOMEM;
3701 
3702 	context->fds = newFDs;
3703 	context->select_infos = (select_info**)(context->fds + newSize);
3704 	context->fds_close_on_exec = (uint8 *)(context->select_infos + newSize);
3705 	context->table_size = newSize;
3706 
3707 	// copy entries from old tables
3708 	int toCopy = min_c(oldSize, newSize);
3709 
3710 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
3711 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
3712 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
3713 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
3714 
3715 	// clear additional entries, if the tables grow
3716 	if (newSize > oldSize) {
3717 		memset(context->fds + oldSize, 0, sizeof(void *) * (newSize - oldSize));
3718 		memset(context->select_infos + oldSize, 0,
3719 			sizeof(void *) * (newSize - oldSize));
3720 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
3721 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
3722 	}
3723 
3724 	free(oldFDs);
3725 
3726 	return B_OK;
3727 }
3728 
3729 
3730 static status_t
3731 vfs_resize_monitor_table(struct io_context *context, const int newSize)
3732 {
3733 	void *fds;
3734 	int	status = B_OK;
3735 
3736 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3737 		return EINVAL;
3738 
3739 	mutex_lock(&context->io_mutex);
3740 
3741 	if ((size_t)newSize < context->num_monitors) {
3742 		status = EBUSY;
3743 		goto out;
3744 	}
3745 	context->max_monitors = newSize;
3746 
3747 out:
3748 	mutex_unlock(&context->io_mutex);
3749 	return status;
3750 }
3751 
3752 
3753 int
3754 vfs_getrlimit(int resource, struct rlimit * rlp)
3755 {
3756 	if (!rlp)
3757 		return B_BAD_ADDRESS;
3758 
3759 	switch (resource) {
3760 		case RLIMIT_NOFILE:
3761 		{
3762 			struct io_context *ioctx = get_current_io_context(false);
3763 
3764 			mutex_lock(&ioctx->io_mutex);
3765 
3766 			rlp->rlim_cur = ioctx->table_size;
3767 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3768 
3769 			mutex_unlock(&ioctx->io_mutex);
3770 
3771 			return 0;
3772 		}
3773 
3774 		case RLIMIT_NOVMON:
3775 		{
3776 			struct io_context *ioctx = get_current_io_context(false);
3777 
3778 			mutex_lock(&ioctx->io_mutex);
3779 
3780 			rlp->rlim_cur = ioctx->max_monitors;
3781 			rlp->rlim_max = MAX_NODE_MONITORS;
3782 
3783 			mutex_unlock(&ioctx->io_mutex);
3784 
3785 			return 0;
3786 		}
3787 
3788 		default:
3789 			return EINVAL;
3790 	}
3791 }
3792 
3793 
3794 int
3795 vfs_setrlimit(int resource, const struct rlimit * rlp)
3796 {
3797 	if (!rlp)
3798 		return B_BAD_ADDRESS;
3799 
3800 	switch (resource) {
3801 		case RLIMIT_NOFILE:
3802 			/* TODO: check getuid() */
3803 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3804 			    rlp->rlim_max != MAX_FD_TABLE_SIZE)
3805 				return EPERM;
3806 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3807 
3808 		case RLIMIT_NOVMON:
3809 			/* TODO: check getuid() */
3810 			if (rlp->rlim_max != RLIM_SAVED_MAX &&
3811 			    rlp->rlim_max != MAX_NODE_MONITORS)
3812 				return EPERM;
3813 			return vfs_resize_monitor_table(get_current_io_context(false), rlp->rlim_cur);
3814 
3815 		default:
3816 			return EINVAL;
3817 	}
3818 }
3819 
3820 
3821 status_t
3822 vfs_init(kernel_args *args)
3823 {
3824 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3825 		&vnode_compare, &vnode_hash);
3826 	if (sVnodeTable == NULL)
3827 		panic("vfs_init: error creating vnode hash table\n");
3828 
3829 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3830 
3831 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3832 		&mount_compare, &mount_hash);
3833 	if (sMountsTable == NULL)
3834 		panic("vfs_init: error creating mounts hash table\n");
3835 
3836 	node_monitor_init();
3837 
3838 	sRoot = NULL;
3839 
3840 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3841 		panic("vfs_init: error allocating file systems lock\n");
3842 
3843 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3844 		panic("vfs_init: error allocating mount op lock\n");
3845 
3846 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3847 		panic("vfs_init: error allocating mount lock\n");
3848 
3849 	if (mutex_init(&sVnodeCoveredByMutex, "vfs_vnode_covered_by_lock") < 0)
3850 		panic("vfs_init: error allocating vnode::covered_by lock\n");
3851 
3852 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3853 		panic("vfs_init: error allocating vnode lock\n");
3854 
3855 	if (block_cache_init() != B_OK)
3856 		return B_ERROR;
3857 
3858 #ifdef ADD_DEBUGGER_COMMANDS
3859 	// add some debugger commands
3860 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3861 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3862 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3863 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3864 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3865 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3866 	add_debugger_command("vnode_usage", &dump_vnode_usage, "info about vnode usage");
3867 #endif
3868 
3869 	register_low_memory_handler(&vnode_low_memory_handler, NULL, 0);
3870 
3871 	return file_cache_init();
3872 }
3873 
3874 
3875 //	#pragma mark - fd_ops implementations
3876 
3877 
3878 /*!
3879 	Calls fs_open() on the given vnode and returns a new
3880 	file descriptor for it
3881 */
3882 static int
3883 create_vnode(struct vnode *directory, const char *name, int openMode,
3884 	int perms, bool kernel)
3885 {
3886 	struct vnode *vnode;
3887 	fs_cookie cookie;
3888 	ino_t newID;
3889 	int status;
3890 
3891 	if (FS_CALL(directory, create) == NULL)
3892 		return EROFS;
3893 
3894 	status = FS_CALL(directory, create)(directory->mount->cookie,
3895 		directory->private_node, name, openMode, perms, &cookie, &newID);
3896 	if (status < B_OK)
3897 		return status;
3898 
3899 	mutex_lock(&sVnodeMutex);
3900 	vnode = lookup_vnode(directory->device, newID);
3901 	mutex_unlock(&sVnodeMutex);
3902 
3903 	if (vnode == NULL) {
3904 		panic("vfs: fs_create() returned success but there is no vnode, mount ID %ld!\n",
3905 			directory->device);
3906 		return B_BAD_VALUE;
3907 	}
3908 
3909 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3910 		return status;
3911 
3912 	// something went wrong, clean up
3913 
3914 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3915 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3916 	put_vnode(vnode);
3917 
3918 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3919 
3920 	return status;
3921 }
3922 
3923 
3924 /*!
3925 	Calls fs_open() on the given vnode and returns a new
3926 	file descriptor for it
3927 */
3928 static int
3929 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3930 {
3931 	fs_cookie cookie;
3932 	int status;
3933 
3934 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3935 	if (status < 0)
3936 		return status;
3937 
3938 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3939 	if (status < 0) {
3940 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3941 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3942 	}
3943 	return status;
3944 }
3945 
3946 
3947 /*! Calls fs open_dir() on the given vnode and returns a new
3948 	file descriptor for it
3949 */
3950 static int
3951 open_dir_vnode(struct vnode *vnode, bool kernel)
3952 {
3953 	fs_cookie cookie;
3954 	int status;
3955 
3956 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3957 	if (status < B_OK)
3958 		return status;
3959 
3960 	// file is opened, create a fd
3961 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3962 	if (status >= 0)
3963 		return status;
3964 
3965 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3966 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3967 
3968 	return status;
3969 }
3970 
3971 
3972 /*! Calls fs open_attr_dir() on the given vnode and returns a new
3973 	file descriptor for it.
3974 	Used by attr_dir_open(), and attr_dir_open_fd().
3975 */
3976 static int
3977 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3978 {
3979 	fs_cookie cookie;
3980 	int status;
3981 
3982 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3983 		return EOPNOTSUPP;
3984 
3985 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3986 	if (status < 0)
3987 		return status;
3988 
3989 	// file is opened, create a fd
3990 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3991 	if (status >= 0)
3992 		return status;
3993 
3994 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3995 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3996 
3997 	return status;
3998 }
3999 
4000 
4001 static int
4002 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char *name,
4003 	int openMode, int perms, bool kernel)
4004 {
4005 	struct vnode *directory;
4006 	int status;
4007 
4008 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
4009 
4010 	// get directory to put the new file in
4011 	status = get_vnode(mountID, directoryID, &directory, true, false);
4012 	if (status < B_OK)
4013 		return status;
4014 
4015 	status = create_vnode(directory, name, openMode, perms, kernel);
4016 	put_vnode(directory);
4017 
4018 	return status;
4019 }
4020 
4021 
4022 static int
4023 file_create(int fd, char *path, int openMode, int perms, bool kernel)
4024 {
4025 	char name[B_FILE_NAME_LENGTH];
4026 	struct vnode *directory;
4027 	int status;
4028 
4029 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
4030 
4031 	// get directory to put the new file in
4032 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4033 	if (status < 0)
4034 		return status;
4035 
4036 	status = create_vnode(directory, name, openMode, perms, kernel);
4037 
4038 	put_vnode(directory);
4039 	return status;
4040 }
4041 
4042 
4043 static int
4044 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char *name, int openMode, bool kernel)
4045 {
4046 	struct vnode *vnode;
4047 	int status;
4048 
4049 	if (name == NULL || *name == '\0')
4050 		return B_BAD_VALUE;
4051 
4052 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
4053 		mountID, directoryID, name, openMode));
4054 
4055 	// get the vnode matching the entry_ref
4056 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
4057 	if (status < B_OK)
4058 		return status;
4059 
4060 	status = open_vnode(vnode, openMode, kernel);
4061 	if (status < B_OK)
4062 		put_vnode(vnode);
4063 
4064 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
4065 	return status;
4066 }
4067 
4068 
4069 static int
4070 file_open(int fd, char *path, int openMode, bool kernel)
4071 {
4072 	int status = B_OK;
4073 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
4074 
4075 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
4076 		fd, path, openMode, kernel));
4077 
4078 	// get the vnode matching the vnode + path combination
4079 	struct vnode *vnode = NULL;
4080 	ino_t parentID;
4081 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
4082 	if (status != B_OK)
4083 		return status;
4084 
4085 	// open the vnode
4086 	status = open_vnode(vnode, openMode, kernel);
4087 	// put only on error -- otherwise our reference was transferred to the FD
4088 	if (status < B_OK)
4089 		put_vnode(vnode);
4090 
4091 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
4092 		vnode->device, parentID, vnode->id, NULL);
4093 
4094 	return status;
4095 }
4096 
4097 
4098 static status_t
4099 file_close(struct file_descriptor *descriptor)
4100 {
4101 	struct vnode *vnode = descriptor->u.vnode;
4102 	status_t status = B_OK;
4103 
4104 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
4105 
4106 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
4107 	if (FS_CALL(vnode, close))
4108 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4109 
4110 	if (status == B_OK) {
4111 		// remove all outstanding locks for this team
4112 		release_advisory_lock(vnode, NULL);
4113 	}
4114 	return status;
4115 }
4116 
4117 
4118 static void
4119 file_free_fd(struct file_descriptor *descriptor)
4120 {
4121 	struct vnode *vnode = descriptor->u.vnode;
4122 
4123 	if (vnode != NULL) {
4124 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4125 		put_vnode(vnode);
4126 	}
4127 }
4128 
4129 
4130 static status_t
4131 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4132 {
4133 	struct vnode *vnode = descriptor->u.vnode;
4134 
4135 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4136 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4137 }
4138 
4139 
4140 static status_t
4141 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4142 {
4143 	struct vnode *vnode = descriptor->u.vnode;
4144 
4145 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4146 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4147 }
4148 
4149 
4150 static off_t
4151 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4152 {
4153 	off_t offset;
4154 
4155 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
4156 
4157 	// stat() the node
4158 	struct vnode *vnode = descriptor->u.vnode;
4159 	if (FS_CALL(vnode, read_stat) == NULL)
4160 		return EOPNOTSUPP;
4161 
4162 	struct stat stat;
4163 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4164 		vnode->private_node, &stat);
4165 	if (status < B_OK)
4166 		return status;
4167 
4168 	// some kinds of files are not seekable
4169 	switch (stat.st_mode & S_IFMT) {
4170 		case S_IFIFO:
4171 			return ESPIPE;
4172 // TODO: We don't catch sockets here, but they are not seekable either (ESPIPE)!
4173 		// The Open Group Base Specs don't mention any file types besides pipes,
4174 		// fifos, and sockets specially, so we allow seeking them.
4175 		case S_IFREG:
4176 		case S_IFBLK:
4177 		case S_IFDIR:
4178 		case S_IFLNK:
4179 		case S_IFCHR:
4180 			break;
4181 	}
4182 
4183 	switch (seekType) {
4184 		case SEEK_SET:
4185 			offset = 0;
4186 			break;
4187 		case SEEK_CUR:
4188 			offset = descriptor->pos;
4189 			break;
4190 		case SEEK_END:
4191 			offset = stat.st_size;
4192 			break;
4193 		default:
4194 			return B_BAD_VALUE;
4195 	}
4196 
4197 	// assumes off_t is 64 bits wide
4198 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4199 		return EOVERFLOW;
4200 
4201 	pos += offset;
4202 	if (pos < 0)
4203 		return B_BAD_VALUE;
4204 
4205 	return descriptor->pos = pos;
4206 }
4207 
4208 
4209 static status_t
4210 file_select(struct file_descriptor *descriptor, uint8 event,
4211 	struct selectsync *sync)
4212 {
4213 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
4214 
4215 	struct vnode *vnode = descriptor->u.vnode;
4216 
4217 	// If the FS has no select() hook, notify select() now.
4218 	if (FS_CALL(vnode, select) == NULL)
4219 		return notify_select_event(sync, event);
4220 
4221 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
4222 		descriptor->cookie, event, 0, sync);
4223 }
4224 
4225 
4226 static status_t
4227 file_deselect(struct file_descriptor *descriptor, uint8 event,
4228 	struct selectsync *sync)
4229 {
4230 	struct vnode *vnode = descriptor->u.vnode;
4231 
4232 	if (FS_CALL(vnode, deselect) == NULL)
4233 		return B_OK;
4234 
4235 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
4236 		descriptor->cookie, event, sync);
4237 }
4238 
4239 
4240 static status_t
4241 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char *name, int perms, bool kernel)
4242 {
4243 	struct vnode *vnode;
4244 	ino_t newID;
4245 	status_t status;
4246 
4247 	if (name == NULL || *name == '\0')
4248 		return B_BAD_VALUE;
4249 
4250 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
4251 
4252 	status = get_vnode(mountID, parentID, &vnode, true, false);
4253 	if (status < B_OK)
4254 		return status;
4255 
4256 	if (FS_CALL(vnode, create_dir))
4257 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
4258 	else
4259 		status = EROFS;
4260 
4261 	put_vnode(vnode);
4262 	return status;
4263 }
4264 
4265 
4266 static status_t
4267 dir_create(int fd, char *path, int perms, bool kernel)
4268 {
4269 	char filename[B_FILE_NAME_LENGTH];
4270 	struct vnode *vnode;
4271 	ino_t newID;
4272 	status_t status;
4273 
4274 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
4275 
4276 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4277 	if (status < 0)
4278 		return status;
4279 
4280 	if (FS_CALL(vnode, create_dir))
4281 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
4282 	else
4283 		status = EROFS;
4284 
4285 	put_vnode(vnode);
4286 	return status;
4287 }
4288 
4289 
4290 static int
4291 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char *name, bool kernel)
4292 {
4293 	struct vnode *vnode;
4294 	int status;
4295 
4296 	FUNCTION(("dir_open_entry_ref()\n"));
4297 
4298 	if (name && *name == '\0')
4299 		return B_BAD_VALUE;
4300 
4301 	// get the vnode matching the entry_ref/node_ref
4302 	if (name)
4303 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
4304 	else
4305 		status = get_vnode(mountID, parentID, &vnode, true, false);
4306 	if (status < B_OK)
4307 		return status;
4308 
4309 	status = open_dir_vnode(vnode, kernel);
4310 	if (status < B_OK)
4311 		put_vnode(vnode);
4312 
4313 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
4314 	return status;
4315 }
4316 
4317 
4318 static int
4319 dir_open(int fd, char *path, bool kernel)
4320 {
4321 	int status = B_OK;
4322 
4323 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
4324 
4325 	// get the vnode matching the vnode + path combination
4326 	struct vnode *vnode = NULL;
4327 	ino_t parentID;
4328 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
4329 	if (status != B_OK)
4330 		return status;
4331 
4332 	// open the dir
4333 	status = open_dir_vnode(vnode, kernel);
4334 	if (status < B_OK)
4335 		put_vnode(vnode);
4336 
4337 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
4338 	return status;
4339 }
4340 
4341 
4342 static status_t
4343 dir_close(struct file_descriptor *descriptor)
4344 {
4345 	struct vnode *vnode = descriptor->u.vnode;
4346 
4347 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
4348 
4349 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
4350 	if (FS_CALL(vnode, close_dir))
4351 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4352 
4353 	return B_OK;
4354 }
4355 
4356 
4357 static void
4358 dir_free_fd(struct file_descriptor *descriptor)
4359 {
4360 	struct vnode *vnode = descriptor->u.vnode;
4361 
4362 	if (vnode != NULL) {
4363 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4364 		put_vnode(vnode);
4365 	}
4366 }
4367 
4368 
4369 static status_t
4370 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4371 {
4372 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
4373 }
4374 
4375 
4376 static void
4377 fix_dirent(struct vnode *parent, struct dirent *entry)
4378 {
4379 	// set d_pdev and d_pino
4380 	entry->d_pdev = parent->device;
4381 	entry->d_pino = parent->id;
4382 
4383 	// If this is the ".." entry and the directory is the root of a FS,
4384 	// we need to replace d_dev and d_ino with the actual values.
4385 	if (strcmp(entry->d_name, "..") == 0
4386 		&& parent->mount->root_vnode == parent
4387 		&& parent->mount->covers_vnode) {
4388 		inc_vnode_ref_count(parent);
4389 			// vnode_path_to_vnode() puts the node
4390 
4391 		struct vnode *vnode;
4392 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
4393 			NULL, NULL);
4394 
4395 		if (status == B_OK) {
4396 			entry->d_dev = vnode->device;
4397 			entry->d_ino = vnode->id;
4398 		}
4399 	} else {
4400 		// resolve mount points
4401 		struct vnode *vnode = NULL;
4402 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, true,
4403 			false);
4404 		if (status != B_OK)
4405 			return;
4406 
4407 		mutex_lock(&sVnodeCoveredByMutex);
4408 		if (vnode->covered_by) {
4409 			entry->d_dev = vnode->covered_by->device;
4410 			entry->d_ino = vnode->covered_by->id;
4411 		}
4412 		mutex_unlock(&sVnodeCoveredByMutex);
4413 
4414 		put_vnode(vnode);
4415 	}
4416 }
4417 
4418 
4419 static status_t
4420 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4421 {
4422 	if (!FS_CALL(vnode, read_dir))
4423 		return EOPNOTSUPP;
4424 
4425 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
4426 	if (error != B_OK)
4427 		return error;
4428 
4429 	// we need to adjust the read dirents
4430 	if (*_count > 0) {
4431 		// XXX: Currently reading only one dirent is supported. Make this a loop!
4432 		fix_dirent(vnode, buffer);
4433 	}
4434 
4435 	return error;
4436 }
4437 
4438 
4439 static status_t
4440 dir_rewind(struct file_descriptor *descriptor)
4441 {
4442 	struct vnode *vnode = descriptor->u.vnode;
4443 
4444 	if (FS_CALL(vnode, rewind_dir))
4445 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
4446 
4447 	return EOPNOTSUPP;
4448 }
4449 
4450 
4451 static status_t
4452 dir_remove(int fd, char *path, bool kernel)
4453 {
4454 	char name[B_FILE_NAME_LENGTH];
4455 	struct vnode *directory;
4456 	status_t status;
4457 
4458 	if (path != NULL) {
4459 		// we need to make sure our path name doesn't stop with "/", ".", or ".."
4460 		char *lastSlash = strrchr(path, '/');
4461 		if (lastSlash != NULL) {
4462 			char *leaf = lastSlash + 1;
4463 			if (!strcmp(leaf, ".."))
4464 				return B_NOT_ALLOWED;
4465 
4466 			// omit multiple slashes
4467 			while (lastSlash > path && lastSlash[-1] == '/') {
4468 				lastSlash--;
4469 			}
4470 
4471 			if (!leaf[0]
4472 				|| !strcmp(leaf, ".")) {
4473 				// "name/" -> "name", or "name/." -> "name"
4474 				lastSlash[0] = '\0';
4475 			}
4476 		} else if (!strcmp(path, ".."))
4477 			return B_NOT_ALLOWED;
4478 	}
4479 
4480 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
4481 	if (status < B_OK)
4482 		return status;
4483 
4484 	if (FS_CALL(directory, remove_dir)) {
4485 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie,
4486 			directory->private_node, name);
4487 	} else
4488 		status = EROFS;
4489 
4490 	put_vnode(directory);
4491 	return status;
4492 }
4493 
4494 
4495 static status_t
4496 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
4497 {
4498 	struct vnode *vnode = descriptor->u.vnode;
4499 
4500 	if (FS_CALL(vnode, ioctl)) {
4501 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
4502 			descriptor->cookie, op, buffer, length);
4503 	}
4504 
4505 	return EOPNOTSUPP;
4506 }
4507 
4508 
4509 static status_t
4510 common_fcntl(int fd, int op, uint32 argument, bool kernel)
4511 {
4512 	struct file_descriptor *descriptor;
4513 	struct vnode *vnode;
4514 	struct flock flock;
4515 	status_t status;
4516 
4517 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
4518 		fd, op, argument, kernel ? "kernel" : "user"));
4519 
4520 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4521 	if (descriptor == NULL)
4522 		return B_FILE_ERROR;
4523 
4524 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
4525 		if (descriptor->type != FDTYPE_FILE)
4526 			return B_BAD_VALUE;
4527 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
4528 			return B_BAD_ADDRESS;
4529 	}
4530 
4531 	switch (op) {
4532 		case F_SETFD:
4533 		{
4534 			struct io_context *context = get_current_io_context(kernel);
4535 			// Set file descriptor flags
4536 
4537 			// O_CLOEXEC is the only flag available at this time
4538 			mutex_lock(&context->io_mutex);
4539 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
4540 			mutex_unlock(&context->io_mutex);
4541 
4542 			status = B_OK;
4543 			break;
4544 		}
4545 
4546 		case F_GETFD:
4547 		{
4548 			struct io_context *context = get_current_io_context(kernel);
4549 
4550 			// Get file descriptor flags
4551 			mutex_lock(&context->io_mutex);
4552 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
4553 			mutex_unlock(&context->io_mutex);
4554 			break;
4555 		}
4556 
4557 		case F_SETFL:
4558 			// Set file descriptor open mode
4559 			if (FS_CALL(vnode, set_flags)) {
4560 				// we only accept changes to O_APPEND and O_NONBLOCK
4561 				argument &= O_APPEND | O_NONBLOCK;
4562 
4563 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie,
4564 					vnode->private_node, descriptor->cookie, (int)argument);
4565 				if (status == B_OK) {
4566 					// update this descriptor's open_mode field
4567 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK))
4568 						| argument;
4569 				}
4570 			} else
4571 				status = EOPNOTSUPP;
4572 			break;
4573 
4574 		case F_GETFL:
4575 			// Get file descriptor open mode
4576 			status = descriptor->open_mode;
4577 			break;
4578 
4579 		case F_DUPFD:
4580 		{
4581 			struct io_context *context = get_current_io_context(kernel);
4582 
4583 			status = new_fd_etc(context, descriptor, (int)argument);
4584 			if (status >= 0) {
4585 				mutex_lock(&context->io_mutex);
4586 				fd_set_close_on_exec(context, fd, false);
4587 				mutex_unlock(&context->io_mutex);
4588 
4589 				atomic_add(&descriptor->ref_count, 1);
4590 			}
4591 			break;
4592 		}
4593 
4594 		case F_GETLK:
4595 			status = get_advisory_lock(descriptor->u.vnode, &flock);
4596 			if (status == B_OK) {
4597 				// copy back flock structure
4598 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
4599 			}
4600 			break;
4601 
4602 		case F_SETLK:
4603 		case F_SETLKW:
4604 			status = normalize_flock(descriptor, &flock);
4605 			if (status < B_OK)
4606 				break;
4607 
4608 			if (flock.l_type == F_UNLCK)
4609 				status = release_advisory_lock(descriptor->u.vnode, &flock);
4610 			else {
4611 				// the open mode must match the lock type
4612 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
4613 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
4614 					status = B_FILE_ERROR;
4615 				else
4616 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
4617 			}
4618 			break;
4619 
4620 		// ToDo: add support for more ops?
4621 
4622 		default:
4623 			status = B_BAD_VALUE;
4624 	}
4625 
4626 	put_fd(descriptor);
4627 	return status;
4628 }
4629 
4630 
4631 static status_t
4632 common_sync(int fd, bool kernel)
4633 {
4634 	struct file_descriptor *descriptor;
4635 	struct vnode *vnode;
4636 	status_t status;
4637 
4638 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
4639 
4640 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4641 	if (descriptor == NULL)
4642 		return B_FILE_ERROR;
4643 
4644 	if (FS_CALL(vnode, fsync) != NULL)
4645 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
4646 	else
4647 		status = EOPNOTSUPP;
4648 
4649 	put_fd(descriptor);
4650 	return status;
4651 }
4652 
4653 
4654 static status_t
4655 common_lock_node(int fd, bool kernel)
4656 {
4657 	struct file_descriptor *descriptor;
4658 	struct vnode *vnode;
4659 
4660 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4661 	if (descriptor == NULL)
4662 		return B_FILE_ERROR;
4663 
4664 	status_t status = B_OK;
4665 
4666 	// We need to set the locking atomically - someone
4667 	// else might set one at the same time
4668 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4669 			(addr_t)descriptor, NULL) != NULL)
4670 		status = B_BUSY;
4671 
4672 	put_fd(descriptor);
4673 	return status;
4674 }
4675 
4676 
4677 static status_t
4678 common_unlock_node(int fd, bool kernel)
4679 {
4680 	struct file_descriptor *descriptor;
4681 	struct vnode *vnode;
4682 
4683 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4684 	if (descriptor == NULL)
4685 		return B_FILE_ERROR;
4686 
4687 	status_t status = B_OK;
4688 
4689 	// We need to set the locking atomically - someone
4690 	// else might set one at the same time
4691 	if (atomic_test_and_set((vint32 *)&vnode->mandatory_locked_by,
4692 			NULL, (addr_t)descriptor) != (int32)descriptor)
4693 		status = B_BAD_VALUE;
4694 
4695 	put_fd(descriptor);
4696 	return status;
4697 }
4698 
4699 
4700 static status_t
4701 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
4702 	bool kernel)
4703 {
4704 	struct vnode *vnode;
4705 	status_t status;
4706 
4707 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
4708 	if (status < B_OK)
4709 		return status;
4710 
4711 	if (FS_CALL(vnode, read_symlink) != NULL) {
4712 		status = FS_CALL(vnode, read_symlink)(vnode->mount->cookie,
4713 			vnode->private_node, buffer, _bufferSize);
4714 	} else
4715 		status = B_BAD_VALUE;
4716 
4717 	put_vnode(vnode);
4718 	return status;
4719 }
4720 
4721 
4722 static status_t
4723 common_create_symlink(int fd, char *path, const char *toPath, int mode,
4724 	bool kernel)
4725 {
4726 	// path validity checks have to be in the calling function!
4727 	char name[B_FILE_NAME_LENGTH];
4728 	struct vnode *vnode;
4729 	status_t status;
4730 
4731 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
4732 
4733 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
4734 	if (status < B_OK)
4735 		return status;
4736 
4737 	if (FS_CALL(vnode, create_symlink) != NULL)
4738 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
4739 	else
4740 		status = EROFS;
4741 
4742 	put_vnode(vnode);
4743 
4744 	return status;
4745 }
4746 
4747 
4748 static status_t
4749 common_create_link(char *path, char *toPath, bool kernel)
4750 {
4751 	// path validity checks have to be in the calling function!
4752 	char name[B_FILE_NAME_LENGTH];
4753 	struct vnode *directory, *vnode;
4754 	status_t status;
4755 
4756 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
4757 
4758 	status = path_to_dir_vnode(path, &directory, name, kernel);
4759 	if (status < B_OK)
4760 		return status;
4761 
4762 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
4763 	if (status < B_OK)
4764 		goto err;
4765 
4766 	if (directory->mount != vnode->mount) {
4767 		status = B_CROSS_DEVICE_LINK;
4768 		goto err1;
4769 	}
4770 
4771 	if (FS_CALL(vnode, link) != NULL)
4772 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
4773 	else
4774 		status = EROFS;
4775 
4776 err1:
4777 	put_vnode(vnode);
4778 err:
4779 	put_vnode(directory);
4780 
4781 	return status;
4782 }
4783 
4784 
4785 static status_t
4786 common_unlink(int fd, char *path, bool kernel)
4787 {
4788 	char filename[B_FILE_NAME_LENGTH];
4789 	struct vnode *vnode;
4790 	status_t status;
4791 
4792 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
4793 
4794 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
4795 	if (status < 0)
4796 		return status;
4797 
4798 	if (FS_CALL(vnode, unlink) != NULL)
4799 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
4800 	else
4801 		status = EROFS;
4802 
4803 	put_vnode(vnode);
4804 
4805 	return status;
4806 }
4807 
4808 
4809 static status_t
4810 common_access(char *path, int mode, bool kernel)
4811 {
4812 	struct vnode *vnode;
4813 	status_t status;
4814 
4815 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4816 	if (status < B_OK)
4817 		return status;
4818 
4819 	if (FS_CALL(vnode, access) != NULL)
4820 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4821 	else
4822 		status = B_OK;
4823 
4824 	put_vnode(vnode);
4825 
4826 	return status;
4827 }
4828 
4829 
4830 static status_t
4831 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4832 {
4833 	struct vnode *fromVnode, *toVnode;
4834 	char fromName[B_FILE_NAME_LENGTH];
4835 	char toName[B_FILE_NAME_LENGTH];
4836 	status_t status;
4837 
4838 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4839 
4840 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4841 	if (status < 0)
4842 		return status;
4843 
4844 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4845 	if (status < 0)
4846 		goto err;
4847 
4848 	if (fromVnode->device != toVnode->device) {
4849 		status = B_CROSS_DEVICE_LINK;
4850 		goto err1;
4851 	}
4852 
4853 	if (FS_CALL(fromVnode, rename) != NULL)
4854 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4855 	else
4856 		status = EROFS;
4857 
4858 err1:
4859 	put_vnode(toVnode);
4860 err:
4861 	put_vnode(fromVnode);
4862 
4863 	return status;
4864 }
4865 
4866 
4867 static status_t
4868 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4869 {
4870 	struct vnode *vnode = descriptor->u.vnode;
4871 
4872 	FUNCTION(("common_read_stat: stat %p\n", stat));
4873 
4874 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4875 		vnode->private_node, stat);
4876 
4877 	// fill in the st_dev and st_ino fields
4878 	if (status == B_OK) {
4879 		stat->st_dev = vnode->device;
4880 		stat->st_ino = vnode->id;
4881 	}
4882 
4883 	return status;
4884 }
4885 
4886 
4887 static status_t
4888 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4889 {
4890 	struct vnode *vnode = descriptor->u.vnode;
4891 
4892 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4893 	if (!FS_CALL(vnode, write_stat))
4894 		return EROFS;
4895 
4896 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4897 }
4898 
4899 
4900 static status_t
4901 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4902 	struct stat *stat, bool kernel)
4903 {
4904 	struct vnode *vnode;
4905 	status_t status;
4906 
4907 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4908 
4909 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4910 	if (status < 0)
4911 		return status;
4912 
4913 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4914 
4915 	// fill in the st_dev and st_ino fields
4916 	if (status == B_OK) {
4917 		stat->st_dev = vnode->device;
4918 		stat->st_ino = vnode->id;
4919 	}
4920 
4921 	put_vnode(vnode);
4922 	return status;
4923 }
4924 
4925 
4926 static status_t
4927 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4928 	const struct stat *stat, int statMask, bool kernel)
4929 {
4930 	struct vnode *vnode;
4931 	status_t status;
4932 
4933 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4934 
4935 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4936 	if (status < 0)
4937 		return status;
4938 
4939 	if (FS_CALL(vnode, write_stat))
4940 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4941 	else
4942 		status = EROFS;
4943 
4944 	put_vnode(vnode);
4945 
4946 	return status;
4947 }
4948 
4949 
4950 static int
4951 attr_dir_open(int fd, char *path, bool kernel)
4952 {
4953 	struct vnode *vnode;
4954 	int status;
4955 
4956 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4957 
4958 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4959 	if (status < B_OK)
4960 		return status;
4961 
4962 	status = open_attr_dir_vnode(vnode, kernel);
4963 	if (status < 0)
4964 		put_vnode(vnode);
4965 
4966 	return status;
4967 }
4968 
4969 
4970 static status_t
4971 attr_dir_close(struct file_descriptor *descriptor)
4972 {
4973 	struct vnode *vnode = descriptor->u.vnode;
4974 
4975 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4976 
4977 	if (FS_CALL(vnode, close_attr_dir))
4978 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4979 
4980 	return B_OK;
4981 }
4982 
4983 
4984 static void
4985 attr_dir_free_fd(struct file_descriptor *descriptor)
4986 {
4987 	struct vnode *vnode = descriptor->u.vnode;
4988 
4989 	if (vnode != NULL) {
4990 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4991 		put_vnode(vnode);
4992 	}
4993 }
4994 
4995 
4996 static status_t
4997 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4998 {
4999 	struct vnode *vnode = descriptor->u.vnode;
5000 
5001 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
5002 
5003 	if (FS_CALL(vnode, read_attr_dir))
5004 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
5005 
5006 	return EOPNOTSUPP;
5007 }
5008 
5009 
5010 static status_t
5011 attr_dir_rewind(struct file_descriptor *descriptor)
5012 {
5013 	struct vnode *vnode = descriptor->u.vnode;
5014 
5015 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
5016 
5017 	if (FS_CALL(vnode, rewind_attr_dir))
5018 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5019 
5020 	return EOPNOTSUPP;
5021 }
5022 
5023 
5024 static int
5025 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
5026 {
5027 	struct vnode *vnode;
5028 	fs_cookie cookie;
5029 	int status;
5030 
5031 	if (name == NULL || *name == '\0')
5032 		return B_BAD_VALUE;
5033 
5034 	vnode = get_vnode_from_fd(fd, kernel);
5035 	if (vnode == NULL)
5036 		return B_FILE_ERROR;
5037 
5038 	if (FS_CALL(vnode, create_attr) == NULL) {
5039 		status = EROFS;
5040 		goto err;
5041 	}
5042 
5043 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
5044 	if (status < B_OK)
5045 		goto err;
5046 
5047 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5048 		return status;
5049 
5050 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5051 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5052 
5053 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5054 
5055 err:
5056 	put_vnode(vnode);
5057 
5058 	return status;
5059 }
5060 
5061 
5062 static int
5063 attr_open(int fd, const char *name, int openMode, bool kernel)
5064 {
5065 	struct vnode *vnode;
5066 	fs_cookie cookie;
5067 	int status;
5068 
5069 	if (name == NULL || *name == '\0')
5070 		return B_BAD_VALUE;
5071 
5072 	vnode = get_vnode_from_fd(fd, kernel);
5073 	if (vnode == NULL)
5074 		return B_FILE_ERROR;
5075 
5076 	if (FS_CALL(vnode, open_attr) == NULL) {
5077 		status = EOPNOTSUPP;
5078 		goto err;
5079 	}
5080 
5081 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
5082 	if (status < B_OK)
5083 		goto err;
5084 
5085 	// now we only need a file descriptor for this attribute and we're done
5086 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
5087 		return status;
5088 
5089 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
5090 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
5091 
5092 err:
5093 	put_vnode(vnode);
5094 
5095 	return status;
5096 }
5097 
5098 
5099 static status_t
5100 attr_close(struct file_descriptor *descriptor)
5101 {
5102 	struct vnode *vnode = descriptor->u.vnode;
5103 
5104 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
5105 
5106 	if (FS_CALL(vnode, close_attr))
5107 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5108 
5109 	return B_OK;
5110 }
5111 
5112 
5113 static void
5114 attr_free_fd(struct file_descriptor *descriptor)
5115 {
5116 	struct vnode *vnode = descriptor->u.vnode;
5117 
5118 	if (vnode != NULL) {
5119 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5120 		put_vnode(vnode);
5121 	}
5122 }
5123 
5124 
5125 static status_t
5126 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
5127 {
5128 	struct vnode *vnode = descriptor->u.vnode;
5129 
5130 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
5131 	if (!FS_CALL(vnode, read_attr))
5132 		return EOPNOTSUPP;
5133 
5134 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5135 }
5136 
5137 
5138 static status_t
5139 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
5140 {
5141 	struct vnode *vnode = descriptor->u.vnode;
5142 
5143 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5144 	if (!FS_CALL(vnode, write_attr))
5145 		return EOPNOTSUPP;
5146 
5147 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
5148 }
5149 
5150 
5151 static off_t
5152 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
5153 {
5154 	off_t offset;
5155 
5156 	switch (seekType) {
5157 		case SEEK_SET:
5158 			offset = 0;
5159 			break;
5160 		case SEEK_CUR:
5161 			offset = descriptor->pos;
5162 			break;
5163 		case SEEK_END:
5164 		{
5165 			struct vnode *vnode = descriptor->u.vnode;
5166 			struct stat stat;
5167 			status_t status;
5168 
5169 			if (FS_CALL(vnode, read_stat) == NULL)
5170 				return EOPNOTSUPP;
5171 
5172 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
5173 			if (status < B_OK)
5174 				return status;
5175 
5176 			offset = stat.st_size;
5177 			break;
5178 		}
5179 		default:
5180 			return B_BAD_VALUE;
5181 	}
5182 
5183 	// assumes off_t is 64 bits wide
5184 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5185 		return EOVERFLOW;
5186 
5187 	pos += offset;
5188 	if (pos < 0)
5189 		return B_BAD_VALUE;
5190 
5191 	return descriptor->pos = pos;
5192 }
5193 
5194 
5195 static status_t
5196 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5197 {
5198 	struct vnode *vnode = descriptor->u.vnode;
5199 
5200 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
5201 
5202 	if (!FS_CALL(vnode, read_attr_stat))
5203 		return EOPNOTSUPP;
5204 
5205 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5206 }
5207 
5208 
5209 static status_t
5210 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
5211 {
5212 	struct vnode *vnode = descriptor->u.vnode;
5213 
5214 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
5215 
5216 	if (!FS_CALL(vnode, write_attr_stat))
5217 		return EROFS;
5218 
5219 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
5220 }
5221 
5222 
5223 static status_t
5224 attr_remove(int fd, const char *name, bool kernel)
5225 {
5226 	struct file_descriptor *descriptor;
5227 	struct vnode *vnode;
5228 	status_t status;
5229 
5230 	if (name == NULL || *name == '\0')
5231 		return B_BAD_VALUE;
5232 
5233 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
5234 
5235 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5236 	if (descriptor == NULL)
5237 		return B_FILE_ERROR;
5238 
5239 	if (FS_CALL(vnode, remove_attr))
5240 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
5241 	else
5242 		status = EROFS;
5243 
5244 	put_fd(descriptor);
5245 
5246 	return status;
5247 }
5248 
5249 
5250 static status_t
5251 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
5252 {
5253 	struct file_descriptor *fromDescriptor, *toDescriptor;
5254 	struct vnode *fromVnode, *toVnode;
5255 	status_t status;
5256 
5257 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
5258 		return B_BAD_VALUE;
5259 
5260 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
5261 
5262 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
5263 	if (fromDescriptor == NULL)
5264 		return B_FILE_ERROR;
5265 
5266 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
5267 	if (toDescriptor == NULL) {
5268 		status = B_FILE_ERROR;
5269 		goto err;
5270 	}
5271 
5272 	// are the files on the same volume?
5273 	if (fromVnode->device != toVnode->device) {
5274 		status = B_CROSS_DEVICE_LINK;
5275 		goto err1;
5276 	}
5277 
5278 	if (FS_CALL(fromVnode, rename_attr))
5279 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
5280 	else
5281 		status = EROFS;
5282 
5283 err1:
5284 	put_fd(toDescriptor);
5285 err:
5286 	put_fd(fromDescriptor);
5287 
5288 	return status;
5289 }
5290 
5291 
5292 static status_t
5293 index_dir_open(dev_t mountID, bool kernel)
5294 {
5295 	struct fs_mount *mount;
5296 	fs_cookie cookie;
5297 
5298 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
5299 
5300 	status_t status = get_mount(mountID, &mount);
5301 	if (status < B_OK)
5302 		return status;
5303 
5304 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
5305 		status = EOPNOTSUPP;
5306 		goto out;
5307 	}
5308 
5309 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
5310 	if (status < B_OK)
5311 		goto out;
5312 
5313 	// get fd for the index directory
5314 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
5315 	if (status >= 0)
5316 		goto out;
5317 
5318 	// something went wrong
5319 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
5320 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
5321 
5322 out:
5323 	put_mount(mount);
5324 	return status;
5325 }
5326 
5327 
5328 static status_t
5329 index_dir_close(struct file_descriptor *descriptor)
5330 {
5331 	struct fs_mount *mount = descriptor->u.mount;
5332 
5333 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
5334 
5335 	if (FS_MOUNT_CALL(mount, close_index_dir))
5336 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
5337 
5338 	return B_OK;
5339 }
5340 
5341 
5342 static void
5343 index_dir_free_fd(struct file_descriptor *descriptor)
5344 {
5345 	struct fs_mount *mount = descriptor->u.mount;
5346 
5347 	if (mount != NULL) {
5348 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
5349 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5350 		//put_vnode(vnode);
5351 	}
5352 }
5353 
5354 
5355 static status_t
5356 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5357 {
5358 	struct fs_mount *mount = descriptor->u.mount;
5359 
5360 	if (FS_MOUNT_CALL(mount, read_index_dir))
5361 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5362 
5363 	return EOPNOTSUPP;
5364 }
5365 
5366 
5367 static status_t
5368 index_dir_rewind(struct file_descriptor *descriptor)
5369 {
5370 	struct fs_mount *mount = descriptor->u.mount;
5371 
5372 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
5373 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
5374 
5375 	return EOPNOTSUPP;
5376 }
5377 
5378 
5379 static status_t
5380 index_create(dev_t mountID, const char *name, uint32 type, uint32 flags, bool kernel)
5381 {
5382 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5383 
5384 	struct fs_mount *mount;
5385 	status_t status = get_mount(mountID, &mount);
5386 	if (status < B_OK)
5387 		return status;
5388 
5389 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
5390 		status = EROFS;
5391 		goto out;
5392 	}
5393 
5394 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
5395 
5396 out:
5397 	put_mount(mount);
5398 	return status;
5399 }
5400 
5401 
5402 #if 0
5403 static status_t
5404 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
5405 {
5406 	struct vnode *vnode = descriptor->u.vnode;
5407 
5408 	// ToDo: currently unused!
5409 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
5410 	if (!FS_CALL(vnode, read_index_stat))
5411 		return EOPNOTSUPP;
5412 
5413 	return EOPNOTSUPP;
5414 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
5415 }
5416 
5417 
5418 static void
5419 index_free_fd(struct file_descriptor *descriptor)
5420 {
5421 	struct vnode *vnode = descriptor->u.vnode;
5422 
5423 	if (vnode != NULL) {
5424 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
5425 		put_vnode(vnode);
5426 	}
5427 }
5428 #endif
5429 
5430 
5431 static status_t
5432 index_name_read_stat(dev_t mountID, const char *name, struct stat *stat, bool kernel)
5433 {
5434 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5435 
5436 	struct fs_mount *mount;
5437 	status_t status = get_mount(mountID, &mount);
5438 	if (status < B_OK)
5439 		return status;
5440 
5441 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
5442 		status = EOPNOTSUPP;
5443 		goto out;
5444 	}
5445 
5446 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
5447 
5448 out:
5449 	put_mount(mount);
5450 	return status;
5451 }
5452 
5453 
5454 static status_t
5455 index_remove(dev_t mountID, const char *name, bool kernel)
5456 {
5457 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
5458 
5459 	struct fs_mount *mount;
5460 	status_t status = get_mount(mountID, &mount);
5461 	if (status < B_OK)
5462 		return status;
5463 
5464 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
5465 		status = EROFS;
5466 		goto out;
5467 	}
5468 
5469 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
5470 
5471 out:
5472 	put_mount(mount);
5473 	return status;
5474 }
5475 
5476 
5477 /*!	ToDo: the query FS API is still the pretty much the same as in R5.
5478 		It would be nice if the FS would find some more kernel support
5479 		for them.
5480 		For example, query parsing should be moved into the kernel.
5481 */
5482 static int
5483 query_open(dev_t device, const char *query, uint32 flags,
5484 	port_id port, int32 token, bool kernel)
5485 {
5486 	struct fs_mount *mount;
5487 	fs_cookie cookie;
5488 
5489 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
5490 
5491 	status_t status = get_mount(device, &mount);
5492 	if (status < B_OK)
5493 		return status;
5494 
5495 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
5496 		status = EOPNOTSUPP;
5497 		goto out;
5498 	}
5499 
5500 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
5501 	if (status < B_OK)
5502 		goto out;
5503 
5504 	// get fd for the index directory
5505 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
5506 	if (status >= 0)
5507 		goto out;
5508 
5509 	// something went wrong
5510 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
5511 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
5512 
5513 out:
5514 	put_mount(mount);
5515 	return status;
5516 }
5517 
5518 
5519 static status_t
5520 query_close(struct file_descriptor *descriptor)
5521 {
5522 	struct fs_mount *mount = descriptor->u.mount;
5523 
5524 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
5525 
5526 	if (FS_MOUNT_CALL(mount, close_query))
5527 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
5528 
5529 	return B_OK;
5530 }
5531 
5532 
5533 static void
5534 query_free_fd(struct file_descriptor *descriptor)
5535 {
5536 	struct fs_mount *mount = descriptor->u.mount;
5537 
5538 	if (mount != NULL) {
5539 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
5540 		// ToDo: find a replacement ref_count object - perhaps the root dir?
5541 		//put_vnode(vnode);
5542 	}
5543 }
5544 
5545 
5546 static status_t
5547 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
5548 {
5549 	struct fs_mount *mount = descriptor->u.mount;
5550 
5551 	if (FS_MOUNT_CALL(mount, read_query))
5552 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
5553 
5554 	return EOPNOTSUPP;
5555 }
5556 
5557 
5558 static status_t
5559 query_rewind(struct file_descriptor *descriptor)
5560 {
5561 	struct fs_mount *mount = descriptor->u.mount;
5562 
5563 	if (FS_MOUNT_CALL(mount, rewind_query))
5564 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
5565 
5566 	return EOPNOTSUPP;
5567 }
5568 
5569 
5570 //	#pragma mark - General File System functions
5571 
5572 
5573 static dev_t
5574 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
5575 	const char *args, bool kernel)
5576 {
5577 	struct fs_mount *mount;
5578 	status_t status = 0;
5579 
5580 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
5581 
5582 	// The path is always safe, we just have to make sure that fsName is
5583 	// almost valid - we can't make any assumptions about args, though.
5584 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
5585 	// We'll get it from the DDM later.
5586 	if (fsName == NULL) {
5587 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
5588 			return B_BAD_VALUE;
5589 	} else if (fsName[0] == '\0')
5590 		return B_BAD_VALUE;
5591 
5592 	RecursiveLocker mountOpLocker(sMountOpLock);
5593 
5594 	// Helper to delete a newly created file device on failure.
5595 	// Not exactly beautiful, but helps to keep the code below cleaner.
5596 	struct FileDeviceDeleter {
5597 		FileDeviceDeleter() : id(-1) {}
5598 		~FileDeviceDeleter()
5599 		{
5600 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
5601 		}
5602 
5603 		partition_id id;
5604 	} fileDeviceDeleter;
5605 
5606 	// If the file system is not a "virtual" one, the device argument should
5607 	// point to a real file/device (if given at all).
5608 	// get the partition
5609 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
5610 	KPartition *partition = NULL;
5611 	KPath normalizedDevice;
5612 	bool newlyCreatedFileDevice = false;
5613 
5614 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
5615 		// normalize the device path
5616 		status = normalizedDevice.SetTo(device, true);
5617 		if (status != B_OK)
5618 			return status;
5619 
5620 		// get a corresponding partition from the DDM
5621 		partition = ddm->RegisterPartition(normalizedDevice.Path());
5622 
5623 		if (!partition) {
5624 			// Partition not found: This either means, the user supplied
5625 			// an invalid path, or the path refers to an image file. We try
5626 			// to let the DDM create a file device for the path.
5627 			partition_id deviceID = ddm->CreateFileDevice(normalizedDevice.Path(),
5628 				&newlyCreatedFileDevice);
5629 			if (deviceID >= 0) {
5630 				partition = ddm->RegisterPartition(deviceID);
5631 				if (newlyCreatedFileDevice)
5632 					fileDeviceDeleter.id = deviceID;
5633 			}
5634 		}
5635 
5636 		if (!partition) {
5637 			TRACE(("fs_mount(): Partition `%s' not found.\n",
5638 				normalizedDevice.Path()));
5639 			return B_ENTRY_NOT_FOUND;
5640 		}
5641 
5642 		device = normalizedDevice.Path();
5643 			// correct path to file device
5644 	}
5645 	PartitionRegistrar partitionRegistrar(partition, true);
5646 
5647 	// Write lock the partition's device. For the time being, we keep the lock
5648 	// until we're done mounting -- not nice, but ensure, that no-one is
5649 	// interfering.
5650 	// TODO: Find a better solution.
5651 	KDiskDevice *diskDevice = NULL;
5652 	if (partition) {
5653 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5654 		if (!diskDevice) {
5655 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
5656 			return B_ERROR;
5657 		}
5658 	}
5659 
5660 	DeviceWriteLocker writeLocker(diskDevice, true);
5661 		// this takes over the write lock acquired before
5662 
5663 	if (partition) {
5664 		// make sure, that the partition is not busy
5665 		if (partition->IsBusy()) {
5666 			TRACE(("fs_mount(): Partition is busy.\n"));
5667 			return B_BUSY;
5668 		}
5669 
5670 		// if no FS name had been supplied, we get it from the partition
5671 		if (!fsName) {
5672 			KDiskSystem *diskSystem = partition->DiskSystem();
5673 			if (!diskSystem) {
5674 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
5675 					"recognize it.\n"));
5676 				return B_BAD_VALUE;
5677 			}
5678 
5679 			if (!diskSystem->IsFileSystem()) {
5680 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
5681 					"partitioning system.\n"));
5682 				return B_BAD_VALUE;
5683 			}
5684 
5685 			// The disk system name will not change, and the KDiskSystem
5686 			// object will not go away while the disk device is locked (and
5687 			// the partition has a reference to it), so this is safe.
5688 			fsName = diskSystem->Name();
5689 		}
5690 	}
5691 
5692 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
5693 	if (mount == NULL)
5694 		return B_NO_MEMORY;
5695 
5696 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
5697 
5698 	mount->fs_name = get_file_system_name(fsName);
5699 	if (mount->fs_name == NULL) {
5700 		status = B_NO_MEMORY;
5701 		goto err1;
5702 	}
5703 
5704 	mount->device_name = strdup(device);
5705 		// "device" can be NULL
5706 
5707 	mount->fs = get_file_system(fsName);
5708 	if (mount->fs == NULL) {
5709 		status = ENODEV;
5710 		goto err3;
5711 	}
5712 
5713 	status = recursive_lock_init(&mount->rlock, "mount rlock");
5714 	if (status < B_OK)
5715 		goto err4;
5716 
5717 	// initialize structure
5718 	mount->id = sNextMountID++;
5719 	mount->partition = NULL;
5720 	mount->root_vnode = NULL;
5721 	mount->covers_vnode = NULL;
5722 	mount->cookie = NULL;
5723 	mount->unmounting = false;
5724 	mount->owns_file_device = false;
5725 
5726 	// insert mount struct into list before we call FS's mount() function
5727 	// so that vnodes can be created for this mount
5728 	mutex_lock(&sMountMutex);
5729 	hash_insert(sMountsTable, mount);
5730 	mutex_unlock(&sMountMutex);
5731 
5732 	ino_t rootID;
5733 
5734 	if (!sRoot) {
5735 		// we haven't mounted anything yet
5736 		if (strcmp(path, "/") != 0) {
5737 			status = B_ERROR;
5738 			goto err5;
5739 		}
5740 
5741 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5742 		if (status < 0) {
5743 			// ToDo: why should we hide the error code from the file system here?
5744 			//status = ERR_VFS_GENERAL;
5745 			goto err5;
5746 		}
5747 	} else {
5748 		struct vnode *coveredVnode;
5749 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
5750 		if (status < B_OK)
5751 			goto err5;
5752 
5753 		// make sure covered_vnode is a DIR
5754 		struct stat coveredNodeStat;
5755 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
5756 			coveredVnode->private_node, &coveredNodeStat);
5757 		if (status < B_OK)
5758 			goto err5;
5759 
5760 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
5761 			status = B_NOT_A_DIRECTORY;
5762 			goto err5;
5763 		}
5764 
5765 		if (coveredVnode->mount->root_vnode == coveredVnode) {
5766 			// this is already a mount point
5767 			status = B_BUSY;
5768 			goto err5;
5769 		}
5770 
5771 		mount->covers_vnode = coveredVnode;
5772 
5773 		// mount it
5774 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
5775 		if (status < B_OK)
5776 			goto err6;
5777 	}
5778 
5779 	// the root node is supposed to be owned by the file system - it must
5780 	// exist at this point
5781 	mount->root_vnode = lookup_vnode(mount->id, rootID);
5782 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
5783 		panic("fs_mount: file system does not own its root node!\n");
5784 		status = B_ERROR;
5785 		goto err7;
5786 	}
5787 
5788 	// No race here, since fs_mount() is the only function changing
5789 	// covers_vnode (and holds sMountOpLock at that time).
5790 	mutex_lock(&sVnodeCoveredByMutex);
5791 	if (mount->covers_vnode)
5792 		mount->covers_vnode->covered_by = mount->root_vnode;
5793 	mutex_unlock(&sVnodeCoveredByMutex);
5794 
5795 	if (!sRoot)
5796 		sRoot = mount->root_vnode;
5797 
5798 	// supply the partition (if any) with the mount cookie and mark it mounted
5799 	if (partition) {
5800 		partition->SetMountCookie(mount->cookie);
5801 		partition->SetVolumeID(mount->id);
5802 
5803 		// keep a partition reference as long as the partition is mounted
5804 		partitionRegistrar.Detach();
5805 		mount->partition = partition;
5806 		mount->owns_file_device = newlyCreatedFileDevice;
5807 		fileDeviceDeleter.id = -1;
5808 	}
5809 
5810 	notify_mount(mount->id, mount->covers_vnode ? mount->covers_vnode->device : -1,
5811 		mount->covers_vnode ? mount->covers_vnode->id : -1);
5812 
5813 	return mount->id;
5814 
5815 err7:
5816 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5817 err6:
5818 	if (mount->covers_vnode)
5819 		put_vnode(mount->covers_vnode);
5820 err5:
5821 	mutex_lock(&sMountMutex);
5822 	hash_remove(sMountsTable, mount);
5823 	mutex_unlock(&sMountMutex);
5824 
5825 	recursive_lock_destroy(&mount->rlock);
5826 err4:
5827 	put_file_system(mount->fs);
5828 	free(mount->device_name);
5829 err3:
5830 	free(mount->fs_name);
5831 err1:
5832 	free(mount);
5833 
5834 	return status;
5835 }
5836 
5837 
5838 static status_t
5839 fs_unmount(char *path, dev_t mountID, uint32 flags, bool kernel)
5840 {
5841 	struct vnode *vnode = NULL;
5842 	struct fs_mount *mount;
5843 	status_t err;
5844 
5845 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
5846 		kernel));
5847 
5848 	if (path != NULL) {
5849 		err = path_to_vnode(path, true, &vnode, NULL, kernel);
5850 		if (err != B_OK)
5851 			return B_ENTRY_NOT_FOUND;
5852 	}
5853 
5854 	RecursiveLocker mountOpLocker(sMountOpLock);
5855 
5856 	mount = find_mount(path != NULL ? vnode->device : mountID);
5857 	if (mount == NULL) {
5858 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
5859 			vnode);
5860 	}
5861 
5862 	if (path != NULL) {
5863 		put_vnode(vnode);
5864 
5865 		if (mount->root_vnode != vnode) {
5866 			// not mountpoint
5867 			return B_BAD_VALUE;
5868 		}
5869 	}
5870 
5871 	// if the volume is associated with a partition, lock the device of the
5872 	// partition as long as we are unmounting
5873 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5874 	KPartition *partition = mount->partition;
5875 	KDiskDevice *diskDevice = NULL;
5876 	if (partition) {
5877 		if (partition->Device() == NULL) {
5878 			dprintf("fs_unmount(): There is no device!\n");
5879 			return B_ERROR;
5880 		}
5881 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5882 		if (!diskDevice) {
5883 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
5884 			return B_ERROR;
5885 		}
5886 	}
5887 	DeviceWriteLocker writeLocker(diskDevice, true);
5888 
5889 	// make sure, that the partition is not busy
5890 	if (partition) {
5891 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
5892 			TRACE(("fs_unmount(): Partition is busy.\n"));
5893 			return B_BUSY;
5894 		}
5895 	}
5896 
5897 	// grab the vnode master mutex to keep someone from creating
5898 	// a vnode while we're figuring out if we can continue
5899 	mutex_lock(&sVnodeMutex);
5900 
5901 	bool disconnectedDescriptors = false;
5902 
5903 	while (true) {
5904 		bool busy = false;
5905 
5906 		// cycle through the list of vnodes associated with this mount and
5907 		// make sure all of them are not busy or have refs on them
5908 		vnode = NULL;
5909 		while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes,
5910 				vnode)) != NULL) {
5911 			// The root vnode ref_count needs to be 1 here (the mount has a
5912 			// reference).
5913 			if (vnode->busy
5914 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
5915 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
5916 				// there are still vnodes in use on this mount, so we cannot
5917 				// unmount yet
5918 				busy = true;
5919 				break;
5920 			}
5921 		}
5922 
5923 		if (!busy)
5924 			break;
5925 
5926 		if ((flags & B_FORCE_UNMOUNT) == 0) {
5927 			mutex_unlock(&sVnodeMutex);
5928 			put_vnode(mount->root_vnode);
5929 
5930 			return B_BUSY;
5931 		}
5932 
5933 		if (disconnectedDescriptors) {
5934 			// wait a bit until the last access is finished, and then try again
5935 			mutex_unlock(&sVnodeMutex);
5936 			snooze(100000);
5937 			// TODO: if there is some kind of bug that prevents the ref counts
5938 			//	from getting back to zero, this will fall into an endless loop...
5939 			mutex_lock(&sVnodeMutex);
5940 			continue;
5941 		}
5942 
5943 		// the file system is still busy - but we're forced to unmount it,
5944 		// so let's disconnect all open file descriptors
5945 
5946 		mount->unmounting = true;
5947 			// prevent new vnodes from being created
5948 
5949 		mutex_unlock(&sVnodeMutex);
5950 
5951 		disconnect_mount_or_vnode_fds(mount, NULL);
5952 		disconnectedDescriptors = true;
5953 
5954 		mutex_lock(&sVnodeMutex);
5955 	}
5956 
5957 	// we can safely continue, mark all of the vnodes busy and this mount
5958 	// structure in unmounting state
5959 	mount->unmounting = true;
5960 
5961 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5962 		vnode->busy = true;
5963 
5964 		if (vnode->ref_count == 0) {
5965 			// this vnode has been unused before
5966 			list_remove_item(&sUnusedVnodeList, vnode);
5967 			sUnusedVnodes--;
5968 		}
5969 	}
5970 
5971 	// The ref_count of the root node is 1 at this point, see above why this is
5972 	mount->root_vnode->ref_count--;
5973 
5974 	mutex_unlock(&sVnodeMutex);
5975 
5976 	mutex_lock(&sVnodeCoveredByMutex);
5977 	mount->covers_vnode->covered_by = NULL;
5978 	mutex_unlock(&sVnodeCoveredByMutex);
5979 	put_vnode(mount->covers_vnode);
5980 
5981 	// Free all vnodes associated with this mount.
5982 	// They will be removed from the mount list by free_vnode(), so
5983 	// we don't have to do this.
5984 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes))
5985 			!= NULL) {
5986 		free_vnode(vnode, false);
5987 	}
5988 
5989 	// remove the mount structure from the hash table
5990 	mutex_lock(&sMountMutex);
5991 	hash_remove(sMountsTable, mount);
5992 	mutex_unlock(&sMountMutex);
5993 
5994 	mountOpLocker.Unlock();
5995 
5996 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5997 	notify_unmount(mount->id);
5998 
5999 	// release the file system
6000 	put_file_system(mount->fs);
6001 
6002 	// dereference the partition and mark it unmounted
6003 	if (partition) {
6004 		partition->SetVolumeID(-1);
6005 		partition->SetMountCookie(NULL);
6006 
6007 		if (mount->owns_file_device)
6008 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
6009 		partition->Unregister();
6010 	}
6011 
6012 	free(mount->device_name);
6013 	free(mount->fs_name);
6014 	free(mount);
6015 
6016 	return B_OK;
6017 }
6018 
6019 
6020 static status_t
6021 fs_sync(dev_t device)
6022 {
6023 	struct fs_mount *mount;
6024 	status_t status = get_mount(device, &mount);
6025 	if (status < B_OK)
6026 		return status;
6027 
6028 	// First, synchronize all file caches
6029 
6030 	struct vnode *previousVnode = NULL;
6031 	while (true) {
6032 		// synchronize access to vnode list
6033 		recursive_lock_lock(&mount->rlock);
6034 
6035 		struct vnode *vnode = previousVnode;
6036 		do {
6037 			// TODO: we could track writes (and writable mapped vnodes)
6038 			//	and have a simple flag that we could test for here
6039 			vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode);
6040 		} while (vnode != NULL && vnode->cache == NULL);
6041 
6042 		ino_t id = -1;
6043 		if (vnode != NULL)
6044 			id = vnode->id;
6045 
6046 		recursive_lock_unlock(&mount->rlock);
6047 
6048 		if (vnode == NULL)
6049 			break;
6050 
6051 		// acquire a reference to the vnode
6052 
6053 		if (get_vnode(mount->id, id, &vnode, true, false) == B_OK) {
6054 			if (previousVnode != NULL)
6055 				put_vnode(previousVnode);
6056 
6057 			if (vnode->cache != NULL)
6058 				vm_cache_write_modified(vnode->cache, false);
6059 
6060 			// the next vnode might change until we lock the vnode list again,
6061 			// but this vnode won't go away since we keep a reference to it.
6062 			previousVnode = vnode;
6063 		} else {
6064 			dprintf("syncing of mount %ld stopped due to vnode %Ld.\n",
6065 				mount->id, id);
6066 			break;
6067 		}
6068 	}
6069 
6070 	if (previousVnode != NULL)
6071 		put_vnode(previousVnode);
6072 
6073 	// And then, let the file systems do their synchronizing work
6074 
6075 	mutex_lock(&sMountMutex);
6076 
6077 	if (FS_MOUNT_CALL(mount, sync))
6078 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
6079 
6080 	mutex_unlock(&sMountMutex);
6081 
6082 	put_mount(mount);
6083 	return status;
6084 }
6085 
6086 
6087 static status_t
6088 fs_read_info(dev_t device, struct fs_info *info)
6089 {
6090 	struct fs_mount *mount;
6091 	status_t status = get_mount(device, &mount);
6092 	if (status < B_OK)
6093 		return status;
6094 
6095 	memset(info, 0, sizeof(struct fs_info));
6096 
6097 	if (FS_MOUNT_CALL(mount, read_fs_info))
6098 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
6099 
6100 	// fill in info the file system doesn't (have to) know about
6101 	if (status == B_OK) {
6102 		info->dev = mount->id;
6103 		info->root = mount->root_vnode->id;
6104 		strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
6105 		if (mount->device_name != NULL) {
6106 			strlcpy(info->device_name, mount->device_name,
6107 				sizeof(info->device_name));
6108 		}
6109 	}
6110 
6111 	// if the call is not supported by the file system, there are still
6112 	// the parts that we filled out ourselves
6113 
6114 	put_mount(mount);
6115 	return status;
6116 }
6117 
6118 
6119 static status_t
6120 fs_write_info(dev_t device, const struct fs_info *info, int mask)
6121 {
6122 	struct fs_mount *mount;
6123 	status_t status = get_mount(device, &mount);
6124 	if (status < B_OK)
6125 		return status;
6126 
6127 	if (FS_MOUNT_CALL(mount, write_fs_info))
6128 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
6129 	else
6130 		status = EROFS;
6131 
6132 	put_mount(mount);
6133 	return status;
6134 }
6135 
6136 
6137 static dev_t
6138 fs_next_device(int32 *_cookie)
6139 {
6140 	struct fs_mount *mount = NULL;
6141 	dev_t device = *_cookie;
6142 
6143 	mutex_lock(&sMountMutex);
6144 
6145 	// Since device IDs are assigned sequentially, this algorithm
6146 	// does work good enough. It makes sure that the device list
6147 	// returned is sorted, and that no device is skipped when an
6148 	// already visited device got unmounted.
6149 
6150 	while (device < sNextMountID) {
6151 		mount = find_mount(device++);
6152 		if (mount != NULL && mount->cookie != NULL)
6153 			break;
6154 	}
6155 
6156 	*_cookie = device;
6157 
6158 	if (mount != NULL)
6159 		device = mount->id;
6160 	else
6161 		device = B_BAD_VALUE;
6162 
6163 	mutex_unlock(&sMountMutex);
6164 
6165 	return device;
6166 }
6167 
6168 
6169 static status_t
6170 get_cwd(char *buffer, size_t size, bool kernel)
6171 {
6172 	// Get current working directory from io context
6173 	struct io_context *context = get_current_io_context(kernel);
6174 	status_t status;
6175 
6176 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
6177 
6178 	mutex_lock(&context->io_mutex);
6179 
6180 	if (context->cwd)
6181 		status = dir_vnode_to_path(context->cwd, buffer, size);
6182 	else
6183 		status = B_ERROR;
6184 
6185 	mutex_unlock(&context->io_mutex);
6186 	return status;
6187 }
6188 
6189 
6190 static status_t
6191 set_cwd(int fd, char *path, bool kernel)
6192 {
6193 	struct io_context *context;
6194 	struct vnode *vnode = NULL;
6195 	struct vnode *oldDirectory;
6196 	struct stat stat;
6197 	status_t status;
6198 
6199 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
6200 
6201 	// Get vnode for passed path, and bail if it failed
6202 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6203 	if (status < 0)
6204 		return status;
6205 
6206 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
6207 	if (status < 0)
6208 		goto err;
6209 
6210 	if (!S_ISDIR(stat.st_mode)) {
6211 		// nope, can't cwd to here
6212 		status = B_NOT_A_DIRECTORY;
6213 		goto err;
6214 	}
6215 
6216 	// Get current io context and lock
6217 	context = get_current_io_context(kernel);
6218 	mutex_lock(&context->io_mutex);
6219 
6220 	// save the old current working directory first
6221 	oldDirectory = context->cwd;
6222 	context->cwd = vnode;
6223 
6224 	mutex_unlock(&context->io_mutex);
6225 
6226 	if (oldDirectory)
6227 		put_vnode(oldDirectory);
6228 
6229 	return B_NO_ERROR;
6230 
6231 err:
6232 	put_vnode(vnode);
6233 	return status;
6234 }
6235 
6236 
6237 //	#pragma mark - kernel mirrored syscalls
6238 
6239 
6240 dev_t
6241 _kern_mount(const char *path, const char *device, const char *fsName,
6242 	uint32 flags, const char *args, size_t argsLength)
6243 {
6244 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6245 	if (pathBuffer.InitCheck() != B_OK)
6246 		return B_NO_MEMORY;
6247 
6248 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
6249 }
6250 
6251 
6252 status_t
6253 _kern_unmount(const char *path, uint32 flags)
6254 {
6255 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6256 	if (pathBuffer.InitCheck() != B_OK)
6257 		return B_NO_MEMORY;
6258 
6259 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
6260 }
6261 
6262 
6263 status_t
6264 _kern_read_fs_info(dev_t device, struct fs_info *info)
6265 {
6266 	if (info == NULL)
6267 		return B_BAD_VALUE;
6268 
6269 	return fs_read_info(device, info);
6270 }
6271 
6272 
6273 status_t
6274 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
6275 {
6276 	if (info == NULL)
6277 		return B_BAD_VALUE;
6278 
6279 	return fs_write_info(device, info, mask);
6280 }
6281 
6282 
6283 status_t
6284 _kern_sync(void)
6285 {
6286 	// Note: _kern_sync() is also called from _user_sync()
6287 	int32 cookie = 0;
6288 	dev_t device;
6289 	while ((device = next_dev(&cookie)) >= 0) {
6290 		status_t status = fs_sync(device);
6291 		if (status != B_OK && status != B_BAD_VALUE)
6292 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
6293 	}
6294 
6295 	return B_OK;
6296 }
6297 
6298 
6299 dev_t
6300 _kern_next_device(int32 *_cookie)
6301 {
6302 	return fs_next_device(_cookie);
6303 }
6304 
6305 
6306 status_t
6307 _kern_get_next_fd_info(team_id teamID, uint32 *_cookie, fd_info *info,
6308 	size_t infoSize)
6309 {
6310 	if (infoSize != sizeof(fd_info))
6311 		return B_BAD_VALUE;
6312 
6313 	struct io_context *context = NULL;
6314 	sem_id contextMutex = -1;
6315 	struct team *team = NULL;
6316 
6317 	cpu_status state = disable_interrupts();
6318 	GRAB_TEAM_LOCK();
6319 
6320 	team = team_get_team_struct_locked(teamID);
6321 	if (team) {
6322 		context = (io_context *)team->io_context;
6323 		contextMutex = context->io_mutex.sem;
6324 	}
6325 
6326 	RELEASE_TEAM_LOCK();
6327 	restore_interrupts(state);
6328 
6329 	// we now have a context - since we couldn't lock it while having
6330 	// safe access to the team structure, we now need to lock the mutex
6331 	// manually
6332 
6333 	if (context == NULL || acquire_sem(contextMutex) != B_OK) {
6334 		// team doesn't exit or seems to be gone
6335 		return B_BAD_TEAM_ID;
6336 	}
6337 
6338 	// the team cannot be deleted completely while we're owning its
6339 	// io_context mutex, so we can safely play with it now
6340 
6341 	context->io_mutex.holder = thread_get_current_thread_id();
6342 
6343 	uint32 slot = *_cookie;
6344 
6345 	struct file_descriptor *descriptor;
6346 	while (slot < context->table_size && (descriptor = context->fds[slot]) == NULL)
6347 		slot++;
6348 
6349 	if (slot >= context->table_size) {
6350 		mutex_unlock(&context->io_mutex);
6351 		return B_ENTRY_NOT_FOUND;
6352 	}
6353 
6354 	info->number = slot;
6355 	info->open_mode = descriptor->open_mode;
6356 
6357 	struct vnode *vnode = fd_vnode(descriptor);
6358 	if (vnode != NULL) {
6359 		info->device = vnode->device;
6360 		info->node = vnode->id;
6361 	} else if (descriptor->u.mount != NULL) {
6362 		info->device = descriptor->u.mount->id;
6363 		info->node = -1;
6364 	}
6365 
6366 	mutex_unlock(&context->io_mutex);
6367 
6368 	*_cookie = slot + 1;
6369 	return B_OK;
6370 }
6371 
6372 
6373 int
6374 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
6375 {
6376 	if (openMode & O_CREAT)
6377 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
6378 
6379 	return file_open_entry_ref(device, inode, name, openMode, true);
6380 }
6381 
6382 
6383 /*!	\brief Opens a node specified by a FD + path pair.
6384 
6385 	At least one of \a fd and \a path must be specified.
6386 	If only \a fd is given, the function opens the node identified by this
6387 	FD. If only a path is given, this path is opened. If both are given and
6388 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6389 	of the directory (!) identified by \a fd.
6390 
6391 	\param fd The FD. May be < 0.
6392 	\param path The absolute or relative path. May be \c NULL.
6393 	\param openMode The open mode.
6394 	\return A FD referring to the newly opened node, or an error code,
6395 			if an error occurs.
6396 */
6397 int
6398 _kern_open(int fd, const char *path, int openMode, int perms)
6399 {
6400 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6401 	if (pathBuffer.InitCheck() != B_OK)
6402 		return B_NO_MEMORY;
6403 
6404 	if (openMode & O_CREAT)
6405 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
6406 
6407 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
6408 }
6409 
6410 
6411 /*!	\brief Opens a directory specified by entry_ref or node_ref.
6412 
6413 	The supplied name may be \c NULL, in which case directory identified
6414 	by \a device and \a inode will be opened. Otherwise \a device and
6415 	\a inode identify the parent directory of the directory to be opened
6416 	and \a name its entry name.
6417 
6418 	\param device If \a name is specified the ID of the device the parent
6419 		   directory of the directory to be opened resides on, otherwise
6420 		   the device of the directory itself.
6421 	\param inode If \a name is specified the node ID of the parent
6422 		   directory of the directory to be opened, otherwise node ID of the
6423 		   directory itself.
6424 	\param name The entry name of the directory to be opened. If \c NULL,
6425 		   the \a device + \a inode pair identify the node to be opened.
6426 	\return The FD of the newly opened directory or an error code, if
6427 			something went wrong.
6428 */
6429 int
6430 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
6431 {
6432 	return dir_open_entry_ref(device, inode, name, true);
6433 }
6434 
6435 
6436 /*!	\brief Opens a directory specified by a FD + path pair.
6437 
6438 	At least one of \a fd and \a path must be specified.
6439 	If only \a fd is given, the function opens the directory identified by this
6440 	FD. If only a path is given, this path is opened. If both are given and
6441 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6442 	of the directory (!) identified by \a fd.
6443 
6444 	\param fd The FD. May be < 0.
6445 	\param path The absolute or relative path. May be \c NULL.
6446 	\return A FD referring to the newly opened directory, or an error code,
6447 			if an error occurs.
6448 */
6449 int
6450 _kern_open_dir(int fd, const char *path)
6451 {
6452 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6453 	if (pathBuffer.InitCheck() != B_OK)
6454 		return B_NO_MEMORY;
6455 
6456 	return dir_open(fd, pathBuffer.LockBuffer(), true);
6457 }
6458 
6459 
6460 status_t
6461 _kern_fcntl(int fd, int op, uint32 argument)
6462 {
6463 	return common_fcntl(fd, op, argument, true);
6464 }
6465 
6466 
6467 status_t
6468 _kern_fsync(int fd)
6469 {
6470 	return common_sync(fd, true);
6471 }
6472 
6473 
6474 status_t
6475 _kern_lock_node(int fd)
6476 {
6477 	return common_lock_node(fd, true);
6478 }
6479 
6480 
6481 status_t
6482 _kern_unlock_node(int fd)
6483 {
6484 	return common_unlock_node(fd, true);
6485 }
6486 
6487 
6488 status_t
6489 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
6490 {
6491 	return dir_create_entry_ref(device, inode, name, perms, true);
6492 }
6493 
6494 
6495 /*!	\brief Creates a directory specified by a FD + path pair.
6496 
6497 	\a path must always be specified (it contains the name of the new directory
6498 	at least). If only a path is given, this path identifies the location at
6499 	which the directory shall be created. If both \a fd and \a path are given and
6500 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6501 	of the directory (!) identified by \a fd.
6502 
6503 	\param fd The FD. May be < 0.
6504 	\param path The absolute or relative path. Must not be \c NULL.
6505 	\param perms The access permissions the new directory shall have.
6506 	\return \c B_OK, if the directory has been created successfully, another
6507 			error code otherwise.
6508 */
6509 status_t
6510 _kern_create_dir(int fd, const char *path, int perms)
6511 {
6512 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6513 	if (pathBuffer.InitCheck() != B_OK)
6514 		return B_NO_MEMORY;
6515 
6516 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
6517 }
6518 
6519 
6520 status_t
6521 _kern_remove_dir(int fd, const char *path)
6522 {
6523 	if (path) {
6524 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6525 		if (pathBuffer.InitCheck() != B_OK)
6526 			return B_NO_MEMORY;
6527 
6528 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
6529 	}
6530 
6531 	return dir_remove(fd, NULL, true);
6532 }
6533 
6534 
6535 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
6536 
6537 	At least one of \a fd and \a path must be specified.
6538 	If only \a fd is given, the function the symlink to be read is the node
6539 	identified by this FD. If only a path is given, this path identifies the
6540 	symlink to be read. If both are given and the path is absolute, \a fd is
6541 	ignored; a relative path is reckoned off of the directory (!) identified
6542 	by \a fd.
6543 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
6544 	will still be updated to reflect the required buffer size.
6545 
6546 	\param fd The FD. May be < 0.
6547 	\param path The absolute or relative path. May be \c NULL.
6548 	\param buffer The buffer into which the contents of the symlink shall be
6549 		   written.
6550 	\param _bufferSize A pointer to the size of the supplied buffer.
6551 	\return The length of the link on success or an appropriate error code
6552 */
6553 status_t
6554 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
6555 {
6556 	status_t status;
6557 
6558 	if (path) {
6559 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6560 		if (pathBuffer.InitCheck() != B_OK)
6561 			return B_NO_MEMORY;
6562 
6563 		return common_read_link(fd, pathBuffer.LockBuffer(),
6564 			buffer, _bufferSize, true);
6565 	}
6566 
6567 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
6568 }
6569 
6570 
6571 /*!	\brief Creates a symlink specified by a FD + path pair.
6572 
6573 	\a path must always be specified (it contains the name of the new symlink
6574 	at least). If only a path is given, this path identifies the location at
6575 	which the symlink shall be created. If both \a fd and \a path are given and
6576 	the path is absolute, \a fd is ignored; a relative path is reckoned off
6577 	of the directory (!) identified by \a fd.
6578 
6579 	\param fd The FD. May be < 0.
6580 	\param toPath The absolute or relative path. Must not be \c NULL.
6581 	\param mode The access permissions the new symlink shall have.
6582 	\return \c B_OK, if the symlink has been created successfully, another
6583 			error code otherwise.
6584 */
6585 status_t
6586 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
6587 {
6588 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6589 	if (pathBuffer.InitCheck() != B_OK)
6590 		return B_NO_MEMORY;
6591 
6592 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
6593 		toPath, mode, true);
6594 }
6595 
6596 
6597 status_t
6598 _kern_create_link(const char *path, const char *toPath)
6599 {
6600 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6601 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
6602 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
6603 		return B_NO_MEMORY;
6604 
6605 	return common_create_link(pathBuffer.LockBuffer(),
6606 		toPathBuffer.LockBuffer(), true);
6607 }
6608 
6609 
6610 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
6611 
6612 	\a path must always be specified (it contains at least the name of the entry
6613 	to be deleted). If only a path is given, this path identifies the entry
6614 	directly. If both \a fd and \a path are given and the path is absolute,
6615 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6616 	identified by \a fd.
6617 
6618 	\param fd The FD. May be < 0.
6619 	\param path The absolute or relative path. Must not be \c NULL.
6620 	\return \c B_OK, if the entry has been removed successfully, another
6621 			error code otherwise.
6622 */
6623 status_t
6624 _kern_unlink(int fd, const char *path)
6625 {
6626 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6627 	if (pathBuffer.InitCheck() != B_OK)
6628 		return B_NO_MEMORY;
6629 
6630 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
6631 }
6632 
6633 
6634 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
6635 		   by another FD + path pair.
6636 
6637 	\a oldPath and \a newPath must always be specified (they contain at least
6638 	the name of the entry). If only a path is given, this path identifies the
6639 	entry directly. If both a FD and a path are given and the path is absolute,
6640 	the FD is ignored; a relative path is reckoned off of the directory (!)
6641 	identified by the respective FD.
6642 
6643 	\param oldFD The FD of the old location. May be < 0.
6644 	\param oldPath The absolute or relative path of the old location. Must not
6645 		   be \c NULL.
6646 	\param newFD The FD of the new location. May be < 0.
6647 	\param newPath The absolute or relative path of the new location. Must not
6648 		   be \c NULL.
6649 	\return \c B_OK, if the entry has been moved successfully, another
6650 			error code otherwise.
6651 */
6652 status_t
6653 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
6654 {
6655 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
6656 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
6657 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
6658 		return B_NO_MEMORY;
6659 
6660 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
6661 		newFD, newPathBuffer.LockBuffer(), true);
6662 }
6663 
6664 
6665 status_t
6666 _kern_access(const char *path, int mode)
6667 {
6668 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6669 	if (pathBuffer.InitCheck() != B_OK)
6670 		return B_NO_MEMORY;
6671 
6672 	return common_access(pathBuffer.LockBuffer(), mode, true);
6673 }
6674 
6675 
6676 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
6677 
6678 	If only \a fd is given, the stat operation associated with the type
6679 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6680 	given, this path identifies the entry for whose node to retrieve the
6681 	stat data. If both \a fd and \a path are given and the path is absolute,
6682 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6683 	identified by \a fd and specifies the entry whose stat data shall be
6684 	retrieved.
6685 
6686 	\param fd The FD. May be < 0.
6687 	\param path The absolute or relative path. Must not be \c NULL.
6688 	\param traverseLeafLink If \a path is given, \c true specifies that the
6689 		   function shall not stick to symlinks, but traverse them.
6690 	\param stat The buffer the stat data shall be written into.
6691 	\param statSize The size of the supplied stat buffer.
6692 	\return \c B_OK, if the the stat data have been read successfully, another
6693 			error code otherwise.
6694 */
6695 status_t
6696 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
6697 	struct stat *stat, size_t statSize)
6698 {
6699 	struct stat completeStat;
6700 	struct stat *originalStat = NULL;
6701 	status_t status;
6702 
6703 	if (statSize > sizeof(struct stat))
6704 		return B_BAD_VALUE;
6705 
6706 	// this supports different stat extensions
6707 	if (statSize < sizeof(struct stat)) {
6708 		originalStat = stat;
6709 		stat = &completeStat;
6710 	}
6711 
6712 	if (path) {
6713 		// path given: get the stat of the node referred to by (fd, path)
6714 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6715 		if (pathBuffer.InitCheck() != B_OK)
6716 			return B_NO_MEMORY;
6717 
6718 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
6719 			traverseLeafLink, stat, true);
6720 	} else {
6721 		// no path given: get the FD and use the FD operation
6722 		struct file_descriptor *descriptor
6723 			= get_fd(get_current_io_context(true), fd);
6724 		if (descriptor == NULL)
6725 			return B_FILE_ERROR;
6726 
6727 		if (descriptor->ops->fd_read_stat)
6728 			status = descriptor->ops->fd_read_stat(descriptor, stat);
6729 		else
6730 			status = EOPNOTSUPP;
6731 
6732 		put_fd(descriptor);
6733 	}
6734 
6735 	if (status == B_OK && originalStat != NULL)
6736 		memcpy(originalStat, stat, statSize);
6737 
6738 	return status;
6739 }
6740 
6741 
6742 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
6743 
6744 	If only \a fd is given, the stat operation associated with the type
6745 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
6746 	given, this path identifies the entry for whose node to write the
6747 	stat data. If both \a fd and \a path are given and the path is absolute,
6748 	\a fd is ignored; a relative path is reckoned off of the directory (!)
6749 	identified by \a fd and specifies the entry whose stat data shall be
6750 	written.
6751 
6752 	\param fd The FD. May be < 0.
6753 	\param path The absolute or relative path. Must not be \c NULL.
6754 	\param traverseLeafLink If \a path is given, \c true specifies that the
6755 		   function shall not stick to symlinks, but traverse them.
6756 	\param stat The buffer containing the stat data to be written.
6757 	\param statSize The size of the supplied stat buffer.
6758 	\param statMask A mask specifying which parts of the stat data shall be
6759 		   written.
6760 	\return \c B_OK, if the the stat data have been written successfully,
6761 			another error code otherwise.
6762 */
6763 status_t
6764 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
6765 	const struct stat *stat, size_t statSize, int statMask)
6766 {
6767 	struct stat completeStat;
6768 
6769 	if (statSize > sizeof(struct stat))
6770 		return B_BAD_VALUE;
6771 
6772 	// this supports different stat extensions
6773 	if (statSize < sizeof(struct stat)) {
6774 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
6775 		memcpy(&completeStat, stat, statSize);
6776 		stat = &completeStat;
6777 	}
6778 
6779 	status_t status;
6780 
6781 	if (path) {
6782 		// path given: write the stat of the node referred to by (fd, path)
6783 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
6784 		if (pathBuffer.InitCheck() != B_OK)
6785 			return B_NO_MEMORY;
6786 
6787 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
6788 			traverseLeafLink, stat, statMask, true);
6789 	} else {
6790 		// no path given: get the FD and use the FD operation
6791 		struct file_descriptor *descriptor
6792 			= get_fd(get_current_io_context(true), fd);
6793 		if (descriptor == NULL)
6794 			return B_FILE_ERROR;
6795 
6796 		if (descriptor->ops->fd_write_stat)
6797 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
6798 		else
6799 			status = EOPNOTSUPP;
6800 
6801 		put_fd(descriptor);
6802 	}
6803 
6804 	return status;
6805 }
6806 
6807 
6808 int
6809 _kern_open_attr_dir(int fd, const char *path)
6810 {
6811 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6812 	if (pathBuffer.InitCheck() != B_OK)
6813 		return B_NO_MEMORY;
6814 
6815 	if (path != NULL)
6816 		pathBuffer.SetTo(path);
6817 
6818 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
6819 }
6820 
6821 
6822 int
6823 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
6824 {
6825 	return attr_create(fd, name, type, openMode, true);
6826 }
6827 
6828 
6829 int
6830 _kern_open_attr(int fd, const char *name, int openMode)
6831 {
6832 	return attr_open(fd, name, openMode, true);
6833 }
6834 
6835 
6836 status_t
6837 _kern_remove_attr(int fd, const char *name)
6838 {
6839 	return attr_remove(fd, name, true);
6840 }
6841 
6842 
6843 status_t
6844 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
6845 {
6846 	return attr_rename(fromFile, fromName, toFile, toName, true);
6847 }
6848 
6849 
6850 int
6851 _kern_open_index_dir(dev_t device)
6852 {
6853 	return index_dir_open(device, true);
6854 }
6855 
6856 
6857 status_t
6858 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
6859 {
6860 	return index_create(device, name, type, flags, true);
6861 }
6862 
6863 
6864 status_t
6865 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
6866 {
6867 	return index_name_read_stat(device, name, stat, true);
6868 }
6869 
6870 
6871 status_t
6872 _kern_remove_index(dev_t device, const char *name)
6873 {
6874 	return index_remove(device, name, true);
6875 }
6876 
6877 
6878 status_t
6879 _kern_getcwd(char *buffer, size_t size)
6880 {
6881 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
6882 
6883 	// Call vfs to get current working directory
6884 	return get_cwd(buffer, size, true);
6885 }
6886 
6887 
6888 status_t
6889 _kern_setcwd(int fd, const char *path)
6890 {
6891 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6892 	if (pathBuffer.InitCheck() != B_OK)
6893 		return B_NO_MEMORY;
6894 
6895 	if (path != NULL)
6896 		pathBuffer.SetTo(path);
6897 
6898 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
6899 }
6900 
6901 
6902 //	#pragma mark - userland syscalls
6903 
6904 
6905 dev_t
6906 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
6907 	uint32 flags, const char *userArgs, size_t argsLength)
6908 {
6909 	char fileSystem[B_OS_NAME_LENGTH];
6910 	KPath path, device;
6911 	char *args = NULL;
6912 	status_t status;
6913 
6914 	if (!IS_USER_ADDRESS(userPath)
6915 		|| !IS_USER_ADDRESS(userFileSystem)
6916 		|| !IS_USER_ADDRESS(userDevice))
6917 		return B_BAD_ADDRESS;
6918 
6919 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
6920 		return B_NO_MEMORY;
6921 
6922 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
6923 		return B_BAD_ADDRESS;
6924 
6925 	if (userFileSystem != NULL
6926 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
6927 		return B_BAD_ADDRESS;
6928 
6929 	if (userDevice != NULL
6930 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
6931 		return B_BAD_ADDRESS;
6932 
6933 	if (userArgs != NULL && argsLength > 0) {
6934 		// this is a safety restriction
6935 		if (argsLength >= 65536)
6936 			return B_NAME_TOO_LONG;
6937 
6938 		args = (char *)malloc(argsLength + 1);
6939 		if (args == NULL)
6940 			return B_NO_MEMORY;
6941 
6942 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
6943 			free(args);
6944 			return B_BAD_ADDRESS;
6945 		}
6946 	}
6947 	path.UnlockBuffer();
6948 	device.UnlockBuffer();
6949 
6950 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6951 		userFileSystem ? fileSystem : NULL, flags, args, false);
6952 
6953 	free(args);
6954 	return status;
6955 }
6956 
6957 
6958 status_t
6959 _user_unmount(const char *userPath, uint32 flags)
6960 {
6961 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6962 	if (pathBuffer.InitCheck() != B_OK)
6963 		return B_NO_MEMORY;
6964 
6965 	char *path = pathBuffer.LockBuffer();
6966 
6967 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6968 		return B_BAD_ADDRESS;
6969 
6970 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
6971 }
6972 
6973 
6974 status_t
6975 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6976 {
6977 	struct fs_info info;
6978 	status_t status;
6979 
6980 	if (userInfo == NULL)
6981 		return B_BAD_VALUE;
6982 
6983 	if (!IS_USER_ADDRESS(userInfo))
6984 		return B_BAD_ADDRESS;
6985 
6986 	status = fs_read_info(device, &info);
6987 	if (status != B_OK)
6988 		return status;
6989 
6990 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6991 		return B_BAD_ADDRESS;
6992 
6993 	return B_OK;
6994 }
6995 
6996 
6997 status_t
6998 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6999 {
7000 	struct fs_info info;
7001 
7002 	if (userInfo == NULL)
7003 		return B_BAD_VALUE;
7004 
7005 	if (!IS_USER_ADDRESS(userInfo)
7006 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
7007 		return B_BAD_ADDRESS;
7008 
7009 	return fs_write_info(device, &info, mask);
7010 }
7011 
7012 
7013 dev_t
7014 _user_next_device(int32 *_userCookie)
7015 {
7016 	int32 cookie;
7017 	dev_t device;
7018 
7019 	if (!IS_USER_ADDRESS(_userCookie)
7020 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
7021 		return B_BAD_ADDRESS;
7022 
7023 	device = fs_next_device(&cookie);
7024 
7025 	if (device >= B_OK) {
7026 		// update user cookie
7027 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
7028 			return B_BAD_ADDRESS;
7029 	}
7030 
7031 	return device;
7032 }
7033 
7034 
7035 status_t
7036 _user_sync(void)
7037 {
7038 	return _kern_sync();
7039 }
7040 
7041 
7042 status_t
7043 _user_get_next_fd_info(team_id team, uint32 *userCookie, fd_info *userInfo,
7044 	size_t infoSize)
7045 {
7046 	struct fd_info info;
7047 	uint32 cookie;
7048 
7049 	// only root can do this (or should root's group be enough?)
7050 	if (geteuid() != 0)
7051 		return B_NOT_ALLOWED;
7052 
7053 	if (infoSize != sizeof(fd_info))
7054 		return B_BAD_VALUE;
7055 
7056 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
7057 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) < B_OK)
7058 		return B_BAD_ADDRESS;
7059 
7060 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
7061 	if (status < B_OK)
7062 		return status;
7063 
7064 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) < B_OK
7065 		|| user_memcpy(userInfo, &info, infoSize) < B_OK)
7066 		return B_BAD_ADDRESS;
7067 
7068 	return status;
7069 }
7070 
7071 
7072 status_t
7073 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
7074 	char *userPath, size_t pathLength)
7075 {
7076 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7077 	if (pathBuffer.InitCheck() != B_OK)
7078 		return B_NO_MEMORY;
7079 
7080 	struct vnode *vnode;
7081 	status_t status;
7082 
7083 	if (!IS_USER_ADDRESS(userPath))
7084 		return B_BAD_ADDRESS;
7085 
7086 	// copy the leaf name onto the stack
7087 	char stackLeaf[B_FILE_NAME_LENGTH];
7088 	if (leaf) {
7089 		if (!IS_USER_ADDRESS(leaf))
7090 			return B_BAD_ADDRESS;
7091 
7092 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
7093 		if (len < 0)
7094 			return len;
7095 		if (len >= B_FILE_NAME_LENGTH)
7096 			return B_NAME_TOO_LONG;
7097 		leaf = stackLeaf;
7098 
7099 		// filter invalid leaf names
7100 		if (leaf[0] == '\0' || strchr(leaf, '/'))
7101 			return B_BAD_VALUE;
7102 	}
7103 
7104 	// get the vnode matching the dir's node_ref
7105 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
7106 		// special cases "." and "..": we can directly get the vnode of the
7107 		// referenced directory
7108 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
7109 		leaf = NULL;
7110 	} else
7111 		status = get_vnode(device, inode, &vnode, true, false);
7112 	if (status < B_OK)
7113 		return status;
7114 
7115 	char *path = pathBuffer.LockBuffer();
7116 
7117 	// get the directory path
7118 	status = dir_vnode_to_path(vnode, path, pathBuffer.BufferSize());
7119 	put_vnode(vnode);
7120 		// we don't need the vnode anymore
7121 	if (status < B_OK)
7122 		return status;
7123 
7124 	// append the leaf name
7125 	if (leaf) {
7126 		// insert a directory separator if this is not the file system root
7127 		if ((strcmp(path, "/") && strlcat(path, "/", pathBuffer.BufferSize())
7128 				>= pathBuffer.BufferSize())
7129 			|| strlcat(path, leaf, pathBuffer.BufferSize()) >= pathBuffer.BufferSize()) {
7130 			return B_NAME_TOO_LONG;
7131 		}
7132 	}
7133 
7134 	int len = user_strlcpy(userPath, path, pathLength);
7135 	if (len < 0)
7136 		return len;
7137 	if (len >= (int)pathLength)
7138 		return B_BUFFER_OVERFLOW;
7139 
7140 	return B_OK;
7141 }
7142 
7143 
7144 int
7145 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
7146 	int openMode, int perms)
7147 {
7148 	char name[B_FILE_NAME_LENGTH];
7149 
7150 	if (userName == NULL || device < 0 || inode < 0)
7151 		return B_BAD_VALUE;
7152 	if (!IS_USER_ADDRESS(userName)
7153 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7154 		return B_BAD_ADDRESS;
7155 
7156 	if (openMode & O_CREAT)
7157 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
7158 
7159 	return file_open_entry_ref(device, inode, name, openMode, false);
7160 }
7161 
7162 
7163 int
7164 _user_open(int fd, const char *userPath, int openMode, int perms)
7165 {
7166 	KPath path(B_PATH_NAME_LENGTH + 1);
7167 	if (path.InitCheck() != B_OK)
7168 		return B_NO_MEMORY;
7169 
7170 	char *buffer = path.LockBuffer();
7171 
7172 	if (!IS_USER_ADDRESS(userPath)
7173 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7174 		return B_BAD_ADDRESS;
7175 
7176 	if (openMode & O_CREAT)
7177 		return file_create(fd, buffer, openMode, perms, false);
7178 
7179 	return file_open(fd, buffer, openMode, false);
7180 }
7181 
7182 
7183 int
7184 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *userName)
7185 {
7186 	if (userName != NULL) {
7187 		char name[B_FILE_NAME_LENGTH];
7188 
7189 		if (!IS_USER_ADDRESS(userName)
7190 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
7191 			return B_BAD_ADDRESS;
7192 
7193 		return dir_open_entry_ref(device, inode, name, false);
7194 	}
7195 	return dir_open_entry_ref(device, inode, NULL, false);
7196 }
7197 
7198 
7199 int
7200 _user_open_dir(int fd, const char *userPath)
7201 {
7202 	KPath path(B_PATH_NAME_LENGTH + 1);
7203 	if (path.InitCheck() != B_OK)
7204 		return B_NO_MEMORY;
7205 
7206 	char *buffer = path.LockBuffer();
7207 
7208 	if (!IS_USER_ADDRESS(userPath)
7209 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
7210 		return B_BAD_ADDRESS;
7211 
7212 	return dir_open(fd, buffer, false);
7213 }
7214 
7215 
7216 /*!	\brief Opens a directory's parent directory and returns the entry name
7217 		   of the former.
7218 
7219 	Aside from that is returns the directory's entry name, this method is
7220 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
7221 	equivalent, if \a userName is \c NULL.
7222 
7223 	If a name buffer is supplied and the name does not fit the buffer, the
7224 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
7225 
7226 	\param fd A FD referring to a directory.
7227 	\param userName Buffer the directory's entry name shall be written into.
7228 		   May be \c NULL.
7229 	\param nameLength Size of the name buffer.
7230 	\return The file descriptor of the opened parent directory, if everything
7231 			went fine, an error code otherwise.
7232 */
7233 int
7234 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
7235 {
7236 	bool kernel = false;
7237 
7238 	if (userName && !IS_USER_ADDRESS(userName))
7239 		return B_BAD_ADDRESS;
7240 
7241 	// open the parent dir
7242 	int parentFD = dir_open(fd, "..", kernel);
7243 	if (parentFD < 0)
7244 		return parentFD;
7245 	FDCloser fdCloser(parentFD, kernel);
7246 
7247 	if (userName) {
7248 		// get the vnodes
7249 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
7250 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
7251 		VNodePutter parentVNodePutter(parentVNode);
7252 		VNodePutter dirVNodePutter(dirVNode);
7253 		if (!parentVNode || !dirVNode)
7254 			return B_FILE_ERROR;
7255 
7256 		// get the vnode name
7257 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
7258 		struct dirent *buffer = (struct dirent*)_buffer;
7259 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
7260 			sizeof(_buffer));
7261 		if (status != B_OK)
7262 			return status;
7263 
7264 		// copy the name to the userland buffer
7265 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
7266 		if (len < 0)
7267 			return len;
7268 		if (len >= (int)nameLength)
7269 			return B_BUFFER_OVERFLOW;
7270 	}
7271 
7272 	return fdCloser.Detach();
7273 }
7274 
7275 
7276 status_t
7277 _user_fcntl(int fd, int op, uint32 argument)
7278 {
7279 	return common_fcntl(fd, op, argument, false);
7280 }
7281 
7282 
7283 status_t
7284 _user_fsync(int fd)
7285 {
7286 	return common_sync(fd, false);
7287 }
7288 
7289 
7290 status_t
7291 _user_lock_node(int fd)
7292 {
7293 	return common_lock_node(fd, false);
7294 }
7295 
7296 
7297 status_t
7298 _user_unlock_node(int fd)
7299 {
7300 	return common_unlock_node(fd, false);
7301 }
7302 
7303 
7304 status_t
7305 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
7306 {
7307 	char name[B_FILE_NAME_LENGTH];
7308 	status_t status;
7309 
7310 	if (!IS_USER_ADDRESS(userName))
7311 		return B_BAD_ADDRESS;
7312 
7313 	status = user_strlcpy(name, userName, sizeof(name));
7314 	if (status < 0)
7315 		return status;
7316 
7317 	return dir_create_entry_ref(device, inode, name, perms, false);
7318 }
7319 
7320 
7321 status_t
7322 _user_create_dir(int fd, const char *userPath, int perms)
7323 {
7324 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7325 	if (pathBuffer.InitCheck() != B_OK)
7326 		return B_NO_MEMORY;
7327 
7328 	char *path = pathBuffer.LockBuffer();
7329 
7330 	if (!IS_USER_ADDRESS(userPath)
7331 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7332 		return B_BAD_ADDRESS;
7333 
7334 	return dir_create(fd, path, perms, false);
7335 }
7336 
7337 
7338 status_t
7339 _user_remove_dir(int fd, const char *userPath)
7340 {
7341 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7342 	if (pathBuffer.InitCheck() != B_OK)
7343 		return B_NO_MEMORY;
7344 
7345 	char *path = pathBuffer.LockBuffer();
7346 
7347 	if (userPath != NULL) {
7348 		if (!IS_USER_ADDRESS(userPath)
7349 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7350 			return B_BAD_ADDRESS;
7351 	}
7352 
7353 	return dir_remove(fd, userPath ? path : NULL, false);
7354 }
7355 
7356 
7357 status_t
7358 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
7359 {
7360 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
7361 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
7362 		return B_NO_MEMORY;
7363 
7364 	size_t bufferSize;
7365 
7366 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
7367 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
7368 		return B_BAD_ADDRESS;
7369 
7370 	char *path = pathBuffer.LockBuffer();
7371 	char *buffer = linkBuffer.LockBuffer();
7372 
7373 	if (userPath) {
7374 		if (!IS_USER_ADDRESS(userPath)
7375 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7376 			return B_BAD_ADDRESS;
7377 
7378 		if (bufferSize > B_PATH_NAME_LENGTH)
7379 			bufferSize = B_PATH_NAME_LENGTH;
7380 	}
7381 
7382 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
7383 		&bufferSize, false);
7384 
7385 	// we also update the bufferSize in case of errors
7386 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
7387 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
7388 		return B_BAD_ADDRESS;
7389 
7390 	if (status < B_OK)
7391 		return status;
7392 
7393 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
7394 		return B_BAD_ADDRESS;
7395 
7396 	return B_OK;
7397 }
7398 
7399 
7400 status_t
7401 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
7402 	int mode)
7403 {
7404 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7405 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7406 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7407 		return B_NO_MEMORY;
7408 
7409 	char *path = pathBuffer.LockBuffer();
7410 	char *toPath = toPathBuffer.LockBuffer();
7411 
7412 	if (!IS_USER_ADDRESS(userPath)
7413 		|| !IS_USER_ADDRESS(userToPath)
7414 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7415 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7416 		return B_BAD_ADDRESS;
7417 
7418 	return common_create_symlink(fd, path, toPath, mode, false);
7419 }
7420 
7421 
7422 status_t
7423 _user_create_link(const char *userPath, const char *userToPath)
7424 {
7425 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7426 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
7427 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
7428 		return B_NO_MEMORY;
7429 
7430 	char *path = pathBuffer.LockBuffer();
7431 	char *toPath = toPathBuffer.LockBuffer();
7432 
7433 	if (!IS_USER_ADDRESS(userPath)
7434 		|| !IS_USER_ADDRESS(userToPath)
7435 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
7436 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
7437 		return B_BAD_ADDRESS;
7438 
7439 	status_t status = check_path(toPath);
7440 	if (status < B_OK)
7441 		return status;
7442 
7443 	return common_create_link(path, toPath, false);
7444 }
7445 
7446 
7447 status_t
7448 _user_unlink(int fd, const char *userPath)
7449 {
7450 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7451 	if (pathBuffer.InitCheck() != B_OK)
7452 		return B_NO_MEMORY;
7453 
7454 	char *path = pathBuffer.LockBuffer();
7455 
7456 	if (!IS_USER_ADDRESS(userPath)
7457 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7458 		return B_BAD_ADDRESS;
7459 
7460 	return common_unlink(fd, path, false);
7461 }
7462 
7463 
7464 status_t
7465 _user_rename(int oldFD, const char *userOldPath, int newFD,
7466 	const char *userNewPath)
7467 {
7468 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
7469 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
7470 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
7471 		return B_NO_MEMORY;
7472 
7473 	char *oldPath = oldPathBuffer.LockBuffer();
7474 	char *newPath = newPathBuffer.LockBuffer();
7475 
7476 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
7477 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
7478 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
7479 		return B_BAD_ADDRESS;
7480 
7481 	return common_rename(oldFD, oldPath, newFD, newPath, false);
7482 }
7483 
7484 
7485 status_t
7486 _user_access(const char *userPath, int mode)
7487 {
7488 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7489 	if (pathBuffer.InitCheck() != B_OK)
7490 		return B_NO_MEMORY;
7491 
7492 	char *path = pathBuffer.LockBuffer();
7493 
7494 	if (!IS_USER_ADDRESS(userPath)
7495 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7496 		return B_BAD_ADDRESS;
7497 
7498 	return common_access(path, mode, false);
7499 }
7500 
7501 
7502 status_t
7503 _user_read_stat(int fd, const char *userPath, bool traverseLink,
7504 	struct stat *userStat, size_t statSize)
7505 {
7506 	struct stat stat;
7507 	status_t status;
7508 
7509 	if (statSize > sizeof(struct stat))
7510 		return B_BAD_VALUE;
7511 
7512 	if (!IS_USER_ADDRESS(userStat))
7513 		return B_BAD_ADDRESS;
7514 
7515 	if (userPath) {
7516 		// path given: get the stat of the node referred to by (fd, path)
7517 		if (!IS_USER_ADDRESS(userPath))
7518 			return B_BAD_ADDRESS;
7519 
7520 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7521 		if (pathBuffer.InitCheck() != B_OK)
7522 			return B_NO_MEMORY;
7523 
7524 		char *path = pathBuffer.LockBuffer();
7525 
7526 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7527 		if (length < B_OK)
7528 			return length;
7529 		if (length >= B_PATH_NAME_LENGTH)
7530 			return B_NAME_TOO_LONG;
7531 
7532 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
7533 	} else {
7534 		// no path given: get the FD and use the FD operation
7535 		struct file_descriptor *descriptor
7536 			= get_fd(get_current_io_context(false), fd);
7537 		if (descriptor == NULL)
7538 			return B_FILE_ERROR;
7539 
7540 		if (descriptor->ops->fd_read_stat)
7541 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
7542 		else
7543 			status = EOPNOTSUPP;
7544 
7545 		put_fd(descriptor);
7546 	}
7547 
7548 	if (status < B_OK)
7549 		return status;
7550 
7551 	return user_memcpy(userStat, &stat, statSize);
7552 }
7553 
7554 
7555 status_t
7556 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
7557 	const struct stat *userStat, size_t statSize, int statMask)
7558 {
7559 	if (statSize > sizeof(struct stat))
7560 		return B_BAD_VALUE;
7561 
7562 	struct stat stat;
7563 
7564 	if (!IS_USER_ADDRESS(userStat)
7565 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
7566 		return B_BAD_ADDRESS;
7567 
7568 	// clear additional stat fields
7569 	if (statSize < sizeof(struct stat))
7570 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
7571 
7572 	status_t status;
7573 
7574 	if (userPath) {
7575 		// path given: write the stat of the node referred to by (fd, path)
7576 		if (!IS_USER_ADDRESS(userPath))
7577 			return B_BAD_ADDRESS;
7578 
7579 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7580 		if (pathBuffer.InitCheck() != B_OK)
7581 			return B_NO_MEMORY;
7582 
7583 		char *path = pathBuffer.LockBuffer();
7584 
7585 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
7586 		if (length < B_OK)
7587 			return length;
7588 		if (length >= B_PATH_NAME_LENGTH)
7589 			return B_NAME_TOO_LONG;
7590 
7591 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
7592 			statMask, false);
7593 	} else {
7594 		// no path given: get the FD and use the FD operation
7595 		struct file_descriptor *descriptor
7596 			= get_fd(get_current_io_context(false), fd);
7597 		if (descriptor == NULL)
7598 			return B_FILE_ERROR;
7599 
7600 		if (descriptor->ops->fd_write_stat)
7601 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
7602 		else
7603 			status = EOPNOTSUPP;
7604 
7605 		put_fd(descriptor);
7606 	}
7607 
7608 	return status;
7609 }
7610 
7611 
7612 int
7613 _user_open_attr_dir(int fd, const char *userPath)
7614 {
7615 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7616 	if (pathBuffer.InitCheck() != B_OK)
7617 		return B_NO_MEMORY;
7618 
7619 	char *path = pathBuffer.LockBuffer();
7620 
7621 	if (userPath != NULL) {
7622 		if (!IS_USER_ADDRESS(userPath)
7623 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7624 			return B_BAD_ADDRESS;
7625 	}
7626 
7627 	return attr_dir_open(fd, userPath ? path : NULL, false);
7628 }
7629 
7630 
7631 int
7632 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
7633 {
7634 	char name[B_FILE_NAME_LENGTH];
7635 
7636 	if (!IS_USER_ADDRESS(userName)
7637 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7638 		return B_BAD_ADDRESS;
7639 
7640 	return attr_create(fd, name, type, openMode, false);
7641 }
7642 
7643 
7644 int
7645 _user_open_attr(int fd, const char *userName, int openMode)
7646 {
7647 	char name[B_FILE_NAME_LENGTH];
7648 
7649 	if (!IS_USER_ADDRESS(userName)
7650 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7651 		return B_BAD_ADDRESS;
7652 
7653 	return attr_open(fd, name, openMode, false);
7654 }
7655 
7656 
7657 status_t
7658 _user_remove_attr(int fd, const char *userName)
7659 {
7660 	char name[B_FILE_NAME_LENGTH];
7661 
7662 	if (!IS_USER_ADDRESS(userName)
7663 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7664 		return B_BAD_ADDRESS;
7665 
7666 	return attr_remove(fd, name, false);
7667 }
7668 
7669 
7670 status_t
7671 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
7672 {
7673 	if (!IS_USER_ADDRESS(userFromName)
7674 		|| !IS_USER_ADDRESS(userToName))
7675 		return B_BAD_ADDRESS;
7676 
7677 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
7678 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
7679 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
7680 		return B_NO_MEMORY;
7681 
7682 	char *fromName = fromNameBuffer.LockBuffer();
7683 	char *toName = toNameBuffer.LockBuffer();
7684 
7685 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
7686 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
7687 		return B_BAD_ADDRESS;
7688 
7689 	return attr_rename(fromFile, fromName, toFile, toName, false);
7690 }
7691 
7692 
7693 int
7694 _user_open_index_dir(dev_t device)
7695 {
7696 	return index_dir_open(device, false);
7697 }
7698 
7699 
7700 status_t
7701 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
7702 {
7703 	char name[B_FILE_NAME_LENGTH];
7704 
7705 	if (!IS_USER_ADDRESS(userName)
7706 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7707 		return B_BAD_ADDRESS;
7708 
7709 	return index_create(device, name, type, flags, false);
7710 }
7711 
7712 
7713 status_t
7714 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
7715 {
7716 	char name[B_FILE_NAME_LENGTH];
7717 	struct stat stat;
7718 	status_t status;
7719 
7720 	if (!IS_USER_ADDRESS(userName)
7721 		|| !IS_USER_ADDRESS(userStat)
7722 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7723 		return B_BAD_ADDRESS;
7724 
7725 	status = index_name_read_stat(device, name, &stat, false);
7726 	if (status == B_OK) {
7727 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
7728 			return B_BAD_ADDRESS;
7729 	}
7730 
7731 	return status;
7732 }
7733 
7734 
7735 status_t
7736 _user_remove_index(dev_t device, const char *userName)
7737 {
7738 	char name[B_FILE_NAME_LENGTH];
7739 
7740 	if (!IS_USER_ADDRESS(userName)
7741 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
7742 		return B_BAD_ADDRESS;
7743 
7744 	return index_remove(device, name, false);
7745 }
7746 
7747 
7748 status_t
7749 _user_getcwd(char *userBuffer, size_t size)
7750 {
7751 	if (!IS_USER_ADDRESS(userBuffer))
7752 		return B_BAD_ADDRESS;
7753 
7754 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
7755 	if (pathBuffer.InitCheck() != B_OK)
7756 		return B_NO_MEMORY;
7757 
7758 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
7759 
7760 	if (size > B_PATH_NAME_LENGTH)
7761 		size = B_PATH_NAME_LENGTH;
7762 
7763 	char *path = pathBuffer.LockBuffer();
7764 
7765 	status_t status = get_cwd(path, size, false);
7766 	if (status < B_OK)
7767 		return status;
7768 
7769 	// Copy back the result
7770 	if (user_strlcpy(userBuffer, path, size) < B_OK)
7771 		return B_BAD_ADDRESS;
7772 
7773 	return status;
7774 }
7775 
7776 
7777 status_t
7778 _user_setcwd(int fd, const char *userPath)
7779 {
7780 	TRACE(("user_setcwd: path = %p\n", userPath));
7781 
7782 	KPath pathBuffer(B_PATH_NAME_LENGTH);
7783 	if (pathBuffer.InitCheck() != B_OK)
7784 		return B_NO_MEMORY;
7785 
7786 	char *path = pathBuffer.LockBuffer();
7787 
7788 	if (userPath != NULL) {
7789 		if (!IS_USER_ADDRESS(userPath)
7790 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
7791 			return B_BAD_ADDRESS;
7792 	}
7793 
7794 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
7795 }
7796 
7797 
7798 int
7799 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
7800 	uint32 flags, port_id port, int32 token)
7801 {
7802 	char *query;
7803 
7804 	if (device < 0 || userQuery == NULL || queryLength == 0)
7805 		return B_BAD_VALUE;
7806 
7807 	// this is a safety restriction
7808 	if (queryLength >= 65536)
7809 		return B_NAME_TOO_LONG;
7810 
7811 	query = (char *)malloc(queryLength + 1);
7812 	if (query == NULL)
7813 		return B_NO_MEMORY;
7814 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
7815 		free(query);
7816 		return B_BAD_ADDRESS;
7817 	}
7818 
7819 	int fd = query_open(device, query, flags, port, token, false);
7820 
7821 	free(query);
7822 	return fd;
7823 }
7824