xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 2ae568931fcac7deb9f1e6ff4e47213fbfe4029b)
1 /*
2  * Copyright 2002-2005, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  *
5  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
6  * Distributed under the terms of the NewOS License.
7  */
8 
9 /* Virtual File System and File System Interface Layer */
10 
11 #include <OS.h>
12 #include <StorageDefs.h>
13 #include <fs_info.h>
14 #include <fs_interface.h>
15 #include <fs_volume.h>
16 
17 #include <disk_device_manager/KDiskDevice.h>
18 #include <disk_device_manager/KDiskDeviceManager.h>
19 #include <disk_device_manager/KDiskDeviceUtils.h>
20 #include <disk_device_manager/KDiskSystem.h>
21 #include <KPath.h>
22 #include <syscalls.h>
23 #include <boot/kernel_args.h>
24 #include <vfs.h>
25 #include <vm.h>
26 #include <vm_cache.h>
27 #include <file_cache.h>
28 #include <block_cache.h>
29 #include <khash.h>
30 #include <lock.h>
31 #include <fd.h>
32 #include <fs/node_monitor.h>
33 #include <util/kernel_cpp.h>
34 
35 #include <string.h>
36 #include <stdio.h>
37 #include <ctype.h>
38 #include <unistd.h>
39 #include <sys/stat.h>
40 #include <sys/resource.h>
41 #include <fcntl.h>
42 #include <limits.h>
43 #include <stddef.h>
44 
45 //#define TRACE_VFS
46 #ifdef TRACE_VFS
47 #	define PRINT(x) dprintf x
48 #	define FUNCTION(x) dprintf x
49 #else
50 #	define PRINT(x) ;
51 #	define FUNCTION(x) ;
52 #endif
53 
54 #define ADD_DEBUGGER_COMMANDS
55 
56 #define MAX_SYM_LINKS SYMLINKS_MAX
57 
58 const static uint32 kMaxUnusedVnodes = 8192;
59 	// This is the maximum number of unused vnodes that the system
60 	// will keep around.
61 	// It may be chosen with respect to the available memory or enhanced
62 	// by some timestamp/frequency heurism.
63 
64 struct vnode {
65 	struct vnode	*next;
66 	vm_cache_ref	*cache;
67 	mount_id		device;
68 	list_link		mount_link;
69 	list_link		unused_link;
70 	vnode_id		id;
71 	fs_vnode		private_node;
72 	struct fs_mount	*mount;
73 	struct vnode	*covered_by;
74 	int32			ref_count;
75 	uint8			remove : 1;
76 	uint8			busy : 1;
77 	uint8			unpublished : 1;
78 	struct advisory_locking	*advisory_locking;
79 };
80 
81 struct vnode_hash_key {
82 	mount_id	device;
83 	vnode_id	vnode;
84 };
85 
86 #define FS_CALL(vnode, op) (vnode->mount->fs->op)
87 #define FS_MOUNT_CALL(mount, op) (mount->fs->op)
88 
89 struct fs_mount {
90 	struct fs_mount	*next;
91 	file_system_module_info *fs;
92 	mount_id		id;
93 	void			*cookie;
94 	char			*device_name;
95 	char			*fs_name;
96 	recursive_lock	rlock;	// guards the vnodes list
97 	struct vnode	*root_vnode;
98 	struct vnode	*covers_vnode;
99 	KPartition		*partition;
100 	struct list		vnodes;
101 	bool			unmounting;
102 	bool			owns_file_device;
103 };
104 
105 struct advisory_locking {
106 	sem_id			lock;
107 	sem_id			wait_sem;
108 	struct list		locks;
109 };
110 
111 struct advisory_lock {
112 	list_link		link;
113 	team_id			team;
114 	off_t			offset;
115 	off_t			length;
116 	bool			shared;
117 };
118 
119 static mutex sFileSystemsMutex;
120 
121 /**	\brief Guards sMountsTable.
122  *
123  *	The holder is allowed to read/write access the sMountsTable.
124  *	Manipulation of the fs_mount structures themselves
125  *	(and their destruction) requires different locks though.
126  */
127 static mutex sMountMutex;
128 
129 /**	\brief Guards mount/unmount operations.
130  *
131  *	The fs_mount() and fs_unmount() hold the lock during their whole operation.
132  *	That is locking the lock ensures that no FS is mounted/unmounted. In
133  *	particular this means that
134  *	- sMountsTable will not be modified,
135  *	- the fields immutable after initialization of the fs_mount structures in
136  *	  sMountsTable will not be modified,
137  *	- vnode::covered_by of any vnode in sVnodeTable will not be modified,
138  *
139  *	The thread trying to lock the lock must not hold sVnodeMutex or
140  *	sMountMutex.
141  */
142 static recursive_lock sMountOpLock;
143 
144 /**	\brief Guards sVnodeTable.
145  *
146  *	The holder is allowed to read/write access sVnodeTable and to
147  *	to any unbusy vnode in that table, save
148  *	to the immutable fields (device, id, private_node, mount) to which
149  *	only read-only access is allowed, and to the field covered_by, which is
150  *	guarded by sMountOpLock.
151  *
152  *	The thread trying to lock the mutex must not hold sMountMutex.
153  */
154 static mutex sVnodeMutex;
155 
156 #define VNODE_HASH_TABLE_SIZE 1024
157 static hash_table *sVnodeTable;
158 static list sUnusedVnodeList;
159 static uint32 sUnusedVnodes = 0;
160 static struct vnode *sRoot;
161 
162 #define MOUNTS_HASH_TABLE_SIZE 16
163 static hash_table *sMountsTable;
164 static mount_id sNextMountID = 1;
165 
166 mode_t __gUmask = 022;
167 
168 /* function declarations */
169 
170 // file descriptor operation prototypes
171 static status_t file_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
172 static status_t file_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
173 static off_t file_seek(struct file_descriptor *, off_t pos, int seek_type);
174 static void file_free_fd(struct file_descriptor *);
175 static status_t file_close(struct file_descriptor *);
176 static status_t file_select(struct file_descriptor *, uint8 event, uint32 ref,
177 	struct select_sync *sync);
178 static status_t file_deselect(struct file_descriptor *, uint8 event,
179 	struct select_sync *sync);
180 static status_t dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
181 static status_t dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count);
182 static status_t dir_rewind(struct file_descriptor *);
183 static void dir_free_fd(struct file_descriptor *);
184 static status_t dir_close(struct file_descriptor *);
185 static status_t attr_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
186 static status_t attr_dir_rewind(struct file_descriptor *);
187 static void attr_dir_free_fd(struct file_descriptor *);
188 static status_t attr_dir_close(struct file_descriptor *);
189 static status_t attr_read(struct file_descriptor *, off_t pos, void *buffer, size_t *);
190 static status_t attr_write(struct file_descriptor *, off_t pos, const void *buffer, size_t *);
191 static off_t attr_seek(struct file_descriptor *, off_t pos, int seek_type);
192 static void attr_free_fd(struct file_descriptor *);
193 static status_t attr_close(struct file_descriptor *);
194 static status_t attr_read_stat(struct file_descriptor *, struct stat *);
195 static status_t attr_write_stat(struct file_descriptor *, const struct stat *, int statMask);
196 static status_t index_dir_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
197 static status_t index_dir_rewind(struct file_descriptor *);
198 static void index_dir_free_fd(struct file_descriptor *);
199 static status_t index_dir_close(struct file_descriptor *);
200 static status_t query_read(struct file_descriptor *, struct dirent *buffer, size_t bufferSize, uint32 *_count);
201 static status_t query_rewind(struct file_descriptor *);
202 static void query_free_fd(struct file_descriptor *);
203 static status_t query_close(struct file_descriptor *);
204 
205 static status_t common_ioctl(struct file_descriptor *, ulong, void *buf, size_t len);
206 static status_t common_read_stat(struct file_descriptor *, struct stat *);
207 static status_t common_write_stat(struct file_descriptor *, const struct stat *, int statMask);
208 
209 static status_t vnode_path_to_vnode(struct vnode *vnode, char *path,
210 	bool traverseLeafLink, int count, struct vnode **_vnode, vnode_id *_parentID, int *_type);
211 static status_t dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize);
212 static status_t fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
213 	struct vnode **_vnode, vnode_id *_parentID, bool kernel);
214 static void inc_vnode_ref_count(struct vnode *vnode);
215 static status_t dec_vnode_ref_count(struct vnode *vnode, bool reenter);
216 static inline void put_vnode(struct vnode *vnode);
217 
218 static struct fd_ops sFileOps = {
219 	file_read,
220 	file_write,
221 	file_seek,
222 	common_ioctl,
223 	file_select,
224 	file_deselect,
225 	NULL,		// read_dir()
226 	NULL,		// rewind_dir()
227 	common_read_stat,
228 	common_write_stat,
229 	file_close,
230 	file_free_fd
231 };
232 
233 static struct fd_ops sDirectoryOps = {
234 	NULL,		// read()
235 	NULL,		// write()
236 	NULL,		// seek()
237 	common_ioctl,
238 	NULL,		// select()
239 	NULL,		// deselect()
240 	dir_read,
241 	dir_rewind,
242 	common_read_stat,
243 	common_write_stat,
244 	dir_close,
245 	dir_free_fd
246 };
247 
248 static struct fd_ops sAttributeDirectoryOps = {
249 	NULL,		// read()
250 	NULL,		// write()
251 	NULL,		// seek()
252 	common_ioctl,
253 	NULL,		// select()
254 	NULL,		// deselect()
255 	attr_dir_read,
256 	attr_dir_rewind,
257 	common_read_stat,
258 	common_write_stat,
259 	attr_dir_close,
260 	attr_dir_free_fd
261 };
262 
263 static struct fd_ops sAttributeOps = {
264 	attr_read,
265 	attr_write,
266 	attr_seek,
267 	common_ioctl,
268 	NULL,		// select()
269 	NULL,		// deselect()
270 	NULL,		// read_dir()
271 	NULL,		// rewind_dir()
272 	attr_read_stat,
273 	attr_write_stat,
274 	attr_close,
275 	attr_free_fd
276 };
277 
278 static struct fd_ops sIndexDirectoryOps = {
279 	NULL,		// read()
280 	NULL,		// write()
281 	NULL,		// seek()
282 	NULL,		// ioctl()
283 	NULL,		// select()
284 	NULL,		// deselect()
285 	index_dir_read,
286 	index_dir_rewind,
287 	NULL,		// read_stat()
288 	NULL,		// write_stat()
289 	index_dir_close,
290 	index_dir_free_fd
291 };
292 
293 #if 0
294 static struct fd_ops sIndexOps = {
295 	NULL,		// read()
296 	NULL,		// write()
297 	NULL,		// seek()
298 	NULL,		// ioctl()
299 	NULL,		// select()
300 	NULL,		// deselect()
301 	NULL,		// dir_read()
302 	NULL,		// dir_rewind()
303 	index_read_stat,	// read_stat()
304 	NULL,		// write_stat()
305 	NULL,		// dir_close()
306 	NULL		// free_fd()
307 };
308 #endif
309 
310 static struct fd_ops sQueryOps = {
311 	NULL,		// read()
312 	NULL,		// write()
313 	NULL,		// seek()
314 	NULL,		// ioctl()
315 	NULL,		// select()
316 	NULL,		// deselect()
317 	query_read,
318 	query_rewind,
319 	NULL,		// read_stat()
320 	NULL,		// write_stat()
321 	query_close,
322 	query_free_fd
323 };
324 
325 
326 // VNodePutter
327 class VNodePutter {
328 public:
329 	VNodePutter(struct vnode *vnode = NULL) : fVNode(vnode) {}
330 
331 	~VNodePutter()
332 	{
333 		Put();
334 	}
335 
336 	void SetTo(struct vnode *vnode)
337 	{
338 		Put();
339 		fVNode = vnode;
340 	}
341 
342 	void Put()
343 	{
344 		if (fVNode) {
345 			put_vnode(fVNode);
346 			fVNode = NULL;
347 		}
348 	}
349 
350 	struct vnode *Detach()
351 	{
352 		struct vnode *vnode = fVNode;
353 		fVNode = NULL;
354 		return vnode;
355 	}
356 
357 private:
358 	struct vnode *fVNode;
359 };
360 
361 
362 class FDCloser {
363 public:
364 	FDCloser() : fFD(-1), fKernel(true) {}
365 
366 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
367 
368 	~FDCloser()
369 	{
370 		Close();
371 	}
372 
373 	void SetTo(int fd, bool kernel)
374 	{
375 		Close();
376 		fFD = fd;
377 		fKernel = kernel;
378 	}
379 
380 	void Close()
381 	{
382 		if (fFD >= 0) {
383 			if (fKernel)
384 				_kern_close(fFD);
385 			else
386 				_user_close(fFD);
387 			fFD = -1;
388 		}
389 	}
390 
391 	int Detach()
392 	{
393 		int fd = fFD;
394 		fFD = -1;
395 		return fd;
396 	}
397 
398 private:
399 	int		fFD;
400 	bool	fKernel;
401 };
402 
403 
404 static int
405 mount_compare(void *_m, const void *_key)
406 {
407 	struct fs_mount *mount = (fs_mount *)_m;
408 	const mount_id *id = (mount_id *)_key;
409 
410 	if (mount->id == *id)
411 		return 0;
412 
413 	return -1;
414 }
415 
416 
417 static uint32
418 mount_hash(void *_m, const void *_key, uint32 range)
419 {
420 	struct fs_mount *mount = (fs_mount *)_m;
421 	const mount_id *id = (mount_id *)_key;
422 
423 	if (mount)
424 		return mount->id % range;
425 
426 	return (uint32)*id % range;
427 }
428 
429 
430 /** Finds the mounted device (the fs_mount structure) with the given ID.
431  *	Note, you must hold the gMountMutex lock when you call this function.
432  */
433 
434 static struct fs_mount *
435 find_mount(mount_id id)
436 {
437 	ASSERT_LOCKED_MUTEX(&sMountMutex);
438 
439 	return (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
440 }
441 
442 
443 static status_t
444 get_mount(mount_id id, struct fs_mount **_mount)
445 {
446 	struct fs_mount *mount;
447 	status_t status;
448 
449 	mutex_lock(&sMountMutex);
450 
451 	mount = find_mount(id);
452 	if (mount) {
453 		// ToDo: the volume is locked (against removal) by locking
454 		//	its root node - investigate if that's a good idea
455 		if (mount->root_vnode)
456 			inc_vnode_ref_count(mount->root_vnode);
457 		else {
458 			// might have been called during a mount operation in which
459 			// case the root node may still be NULL
460 			mount = NULL;
461 		}
462 	} else
463 		status = B_BAD_VALUE;
464 
465 	mutex_unlock(&sMountMutex);
466 
467 	if (mount == NULL)
468 		return B_BUSY;
469 
470 	*_mount = mount;
471 	return B_OK;
472 }
473 
474 
475 static void
476 put_mount(struct fs_mount *mount)
477 {
478 	if (mount)
479 		put_vnode(mount->root_vnode);
480 }
481 
482 
483 static status_t
484 put_file_system(file_system_module_info *fs)
485 {
486 	return put_module(fs->info.name);
487 }
488 
489 
490 /**	Tries to open the specified file system module.
491  *	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
492  *	Returns a pointer to file system module interface, or NULL if it
493  *	could not open the module.
494  */
495 
496 static file_system_module_info *
497 get_file_system(const char *fsName)
498 {
499 	char name[B_FILE_NAME_LENGTH];
500 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
501 		// construct module name if we didn't get one
502 		// (we currently support only one API)
503 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
504 		fsName = NULL;
505 	}
506 
507 	file_system_module_info *info;
508 	if (get_module(fsName ? fsName : name, (module_info **)&info) != B_OK)
509 		return NULL;
510 
511 	return info;
512 }
513 
514 
515 /**	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
516  *	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
517  *	The name is allocated for you, and you have to free() it when you're
518  *	done with it.
519  *	Returns NULL if the required memory is no available.
520  */
521 
522 static char *
523 get_file_system_name(const char *fsName)
524 {
525 	const size_t length = strlen("file_systems/");
526 
527 	if (strncmp(fsName, "file_systems/", length)) {
528 		// the name already seems to be the module's file name
529 		return strdup(fsName);
530 	}
531 
532 	fsName += length;
533 	const char *end = strchr(fsName, '/');
534 	if (end == NULL) {
535 		// this doesn't seem to be a valid name, but well...
536 		return strdup(fsName);
537 	}
538 
539 	// cut off the trailing /v1
540 
541 	char *name = (char *)malloc(end + 1 - fsName);
542 	if (name == NULL)
543 		return NULL;
544 
545 	strlcpy(name, fsName, end + 1 - fsName);
546 	return name;
547 }
548 
549 
550 static int
551 vnode_compare(void *_vnode, const void *_key)
552 {
553 	struct vnode *vnode = (struct vnode *)_vnode;
554 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
555 
556 	if (vnode->device == key->device && vnode->id == key->vnode)
557 		return 0;
558 
559 	return -1;
560 }
561 
562 
563 static uint32
564 vnode_hash(void *_vnode, const void *_key, uint32 range)
565 {
566 	struct vnode *vnode = (struct vnode *)_vnode;
567 	const struct vnode_hash_key *key = (vnode_hash_key *)_key;
568 
569 #define VHASH(mountid, vnodeid) (((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
570 
571 	if (vnode != NULL)
572 		return VHASH(vnode->device, vnode->id) % range;
573 
574 	return VHASH(key->device, key->vnode) % range;
575 
576 #undef VHASH
577 }
578 
579 
580 static void
581 add_vnode_to_mount_list(struct vnode *vnode, struct fs_mount *mount)
582 {
583 	recursive_lock_lock(&mount->rlock);
584 
585 	list_add_link_to_head(&mount->vnodes, &vnode->mount_link);
586 
587 	recursive_lock_unlock(&mount->rlock);
588 }
589 
590 
591 static void
592 remove_vnode_from_mount_list(struct vnode *vnode, struct fs_mount *mount)
593 {
594 	recursive_lock_lock(&mount->rlock);
595 
596 	list_remove_link(&vnode->mount_link);
597 	vnode->mount_link.next = vnode->mount_link.prev = NULL;
598 
599 	recursive_lock_unlock(&mount->rlock);
600 }
601 
602 
603 static status_t
604 create_new_vnode(struct vnode **_vnode, mount_id mountID, vnode_id vnodeID)
605 {
606 	FUNCTION(("create_new_vnode()\n"));
607 
608 	struct vnode *vnode = (struct vnode *)malloc(sizeof(struct vnode));
609 	if (vnode == NULL)
610 		return B_NO_MEMORY;
611 
612 	// initialize basic values
613 	memset(vnode, 0, sizeof(struct vnode));
614 	vnode->device = mountID;
615 	vnode->id = vnodeID;
616 
617 	// add the vnode to the mount structure
618 	mutex_lock(&sMountMutex);
619 	vnode->mount = find_mount(mountID);
620 	if (!vnode->mount || vnode->mount->unmounting) {
621 		mutex_unlock(&sMountMutex);
622 		free(vnode);
623 		return B_ENTRY_NOT_FOUND;
624 	}
625 
626 	hash_insert(sVnodeTable, vnode);
627 	add_vnode_to_mount_list(vnode, vnode->mount);
628 
629 	mutex_unlock(&sMountMutex);
630 
631 	vnode->ref_count = 1;
632 	*_vnode = vnode;
633 
634 	return B_OK;
635 }
636 
637 
638 /**	Frees the vnode and all resources it has acquired, and removes
639  *	it from the vnode hash as well as from its mount structure.
640  *	Will also make sure that any cache modifications are written back.
641  */
642 
643 static void
644 free_vnode(struct vnode *vnode, bool reenter)
645 {
646 	ASSERT(vnode->ref_count == 0 && vnode->busy);
647 
648 	// write back any changes in this vnode's cache -- but only
649 	// if the vnode won't be deleted, in which case the changes
650 	// will be discarded
651 
652 	if (vnode->cache && !vnode->remove)
653 		vm_cache_write_modified(vnode->cache);
654 
655 	if (!vnode->unpublished) {
656 		if (vnode->remove)
657 			FS_CALL(vnode, remove_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
658 		else
659 			FS_CALL(vnode, put_vnode)(vnode->mount->cookie, vnode->private_node, reenter);
660 	}
661 
662 	// The file system has removed the resources of the vnode now, so we can
663 	// make it available again (and remove the busy vnode from the hash)
664 	mutex_lock(&sVnodeMutex);
665 	hash_remove(sVnodeTable, vnode);
666 	mutex_unlock(&sVnodeMutex);
667 
668 	// if we have a vm_cache attached, remove it
669 	if (vnode->cache)
670 		vm_cache_release_ref(vnode->cache);
671 
672 	vnode->cache = NULL;
673 
674 	remove_vnode_from_mount_list(vnode, vnode->mount);
675 
676 	free(vnode);
677 }
678 
679 
680 /**	\brief Decrements the reference counter of the given vnode and deletes it,
681  *	if the counter dropped to 0.
682  *
683  *	The caller must, of course, own a reference to the vnode to call this
684  *	function.
685  *	The caller must not hold the sVnodeMutex or the sMountMutex.
686  *
687  *	\param vnode the vnode.
688  *	\param reenter \c true, if this function is called (indirectly) from within
689  *		   a file system.
690  *	\return \c B_OK, if everything went fine, an error code otherwise.
691  */
692 
693 static status_t
694 dec_vnode_ref_count(struct vnode *vnode, bool reenter)
695 {
696 	int32 oldRefCount;
697 
698 	mutex_lock(&sVnodeMutex);
699 
700 	if (vnode->busy)
701 		panic("dec_vnode_ref_count called on vnode that was busy! vnode %p\n", vnode);
702 
703 	oldRefCount = atomic_add(&vnode->ref_count, -1);
704 
705 	PRINT(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
706 
707 	if (oldRefCount == 1) {
708 		bool freeNode = false;
709 
710 		// Just insert the vnode into an unused list if we don't need
711 		// to delete it
712 		if (vnode->remove) {
713 			vnode->busy = true;
714 			freeNode = true;
715 		} else {
716 			list_add_item(&sUnusedVnodeList, vnode);
717 			if (++sUnusedVnodes > kMaxUnusedVnodes) {
718 				// there are too many unused vnodes so we free the oldest one
719 				// ToDo: evaluate this mechanism
720 				vnode = (struct vnode *)list_remove_head_item(&sUnusedVnodeList);
721 				vnode->busy = true;
722 				freeNode = true;
723 				sUnusedVnodes--;
724 			}
725 		}
726 
727 		mutex_unlock(&sVnodeMutex);
728 
729 		if (freeNode)
730 			free_vnode(vnode, reenter);
731 	} else
732 		mutex_unlock(&sVnodeMutex);
733 
734 	return B_OK;
735 }
736 
737 
738 /**	\brief Increments the reference counter of the given vnode.
739  *
740  *	The caller must either already have a reference to the vnode or hold
741  *	the sVnodeMutex.
742  *
743  *	\param vnode the vnode.
744  */
745 
746 static void
747 inc_vnode_ref_count(struct vnode *vnode)
748 {
749 	atomic_add(&vnode->ref_count, 1);
750 	PRINT(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode, vnode->ref_count));
751 }
752 
753 
754 /**	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
755  *
756  *	The caller must hold the sVnodeMutex.
757  *
758  *	\param mountID the mount ID.
759  *	\param vnodeID the node ID.
760  *
761  *	\return The vnode structure, if it was found in the hash table, \c NULL
762  *			otherwise.
763  */
764 
765 static struct vnode *
766 lookup_vnode(mount_id mountID, vnode_id vnodeID)
767 {
768 	struct vnode_hash_key key;
769 
770 	key.device = mountID;
771 	key.vnode = vnodeID;
772 
773 	return (vnode *)hash_lookup(sVnodeTable, &key);
774 }
775 
776 
777 /**	\brief Retrieves a vnode for a given mount ID, node ID pair.
778  *
779  *	If the node is not yet in memory, it will be loaded.
780  *
781  *	The caller must not hold the sVnodeMutex or the sMountMutex.
782  *
783  *	\param mountID the mount ID.
784  *	\param vnodeID the node ID.
785  *	\param _vnode Pointer to a vnode* variable into which the pointer to the
786  *		   retrieved vnode structure shall be written.
787  *	\param reenter \c true, if this function is called (indirectly) from within
788  *		   a file system.
789  *	\return \c B_OK, if everything when fine, an error code otherwise.
790  */
791 
792 static status_t
793 get_vnode(mount_id mountID, vnode_id vnodeID, struct vnode **_vnode, int reenter)
794 {
795 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID, _vnode));
796 
797 	mutex_lock(&sVnodeMutex);
798 
799 restart:
800 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
801 	if (vnode && vnode->busy) {
802 		// ToDo: this is an endless loop if the vnode is not
803 		//	becoming unbusy anymore (for whatever reason)
804 		mutex_unlock(&sVnodeMutex);
805 		snooze(10000); // 10 ms
806 		mutex_lock(&sVnodeMutex);
807 		goto restart;
808 	}
809 
810 	PRINT(("get_vnode: tried to lookup vnode, got %p\n", vnode));
811 
812 	status_t status;
813 
814 	if (vnode) {
815 		if (vnode->ref_count == 0) {
816 			// this vnode has been unused before
817 			list_remove_item(&sUnusedVnodeList, vnode);
818 		}
819 		inc_vnode_ref_count(vnode);
820 	} else {
821 		// we need to create a new vnode and read it in
822 		status = create_new_vnode(&vnode, mountID, vnodeID);
823 		if (status < B_OK)
824 			goto err;
825 
826 		vnode->busy = true;
827 		mutex_unlock(&sVnodeMutex);
828 
829 		status = FS_CALL(vnode, get_vnode)(vnode->mount->cookie, vnodeID, &vnode->private_node, reenter);
830 		if (status < B_OK || vnode->private_node == NULL) {
831 			if (status == B_NO_ERROR)
832 				status = B_BAD_VALUE;
833 		}
834 		mutex_lock(&sVnodeMutex);
835 
836 		if (status < B_OK)
837 			goto err1;
838 
839 		vnode->busy = false;
840 	}
841 
842 	mutex_unlock(&sVnodeMutex);
843 
844 	PRINT(("get_vnode: returning %p\n", vnode));
845 
846 	*_vnode = vnode;
847 	return B_OK;
848 
849 err1:
850 	hash_remove(sVnodeTable, vnode);
851 	remove_vnode_from_mount_list(vnode, vnode->mount);
852 err:
853 	mutex_unlock(&sVnodeMutex);
854 	if (vnode)
855 		free(vnode);
856 
857 	return status;
858 }
859 
860 
861 /**	\brief Decrements the reference counter of the given vnode and deletes it,
862  *	if the counter dropped to 0.
863  *
864  *	The caller must, of course, own a reference to the vnode to call this
865  *	function.
866  *	The caller must not hold the sVnodeMutex or the sMountMutex.
867  *
868  *	\param vnode the vnode.
869  */
870 
871 static inline void
872 put_vnode(struct vnode *vnode)
873 {
874 	dec_vnode_ref_count(vnode, false);
875 }
876 
877 
878 static status_t
879 create_advisory_locking(struct vnode *vnode)
880 {
881 	status_t status;
882 
883 	struct advisory_locking *locking = (struct advisory_locking *)malloc(sizeof(struct advisory_locking));
884 	if (locking == NULL)
885 		return B_NO_MEMORY;
886 
887 	locking->wait_sem = create_sem(0, "advisory lock");
888 	if (locking->wait_sem < B_OK) {
889 		status = locking->wait_sem;
890 		goto err1;
891 	}
892 
893 	locking->lock = create_sem(1, "advisory locking");
894 	if (locking->lock < B_OK) {
895 		status = locking->lock;
896 		goto err2;
897 	}
898 
899 	list_init(&locking->locks);
900 	vnode->advisory_locking = locking;
901 	return B_OK;
902 
903 err2:
904 	delete_sem(locking->wait_sem);
905 err1:
906 	free(locking);
907 	return status;
908 }
909 
910 
911 static inline void
912 put_advisory_locking(struct advisory_locking *locking)
913 {
914 	release_sem(locking->lock);
915 }
916 
917 
918 static struct advisory_locking *
919 get_advisory_locking(struct vnode *vnode)
920 {
921 	mutex_lock(&sVnodeMutex);
922 
923 	struct advisory_locking *locking = vnode->advisory_locking;
924 	if (locking != NULL)
925 		acquire_sem(locking->lock);
926 
927 	mutex_unlock(&sVnodeMutex);
928 	return locking;
929 }
930 
931 
932 static status_t
933 get_advisory_lock(struct vnode *vnode, struct flock *flock)
934 {
935 	return B_ERROR;
936 }
937 
938 
939 /**	Removes the specified lock, or all locks of the calling team
940  *	if \a flock is NULL.
941  */
942 
943 static status_t
944 release_advisory_lock(struct vnode *vnode, struct flock *flock)
945 {
946 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
947 
948 	struct advisory_locking *locking = get_advisory_locking(vnode);
949 	if (locking == NULL)
950 		return flock != NULL ? B_BAD_VALUE : B_OK;
951 
952 	team_id team = team_get_current_team_id();
953 
954 	// find matching lock entry
955 
956 	status_t status = B_BAD_VALUE;
957 	struct advisory_lock *lock = NULL;
958 	while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
959 		if (lock->team == team && (flock == NULL || (flock != NULL
960 			&& lock->offset == flock->l_start
961 			&& lock->length == flock->l_len))) {
962 			// we found our lock, free it
963 			list_remove_item(&locking->locks, lock);
964 			free(lock);
965 			status = B_OK;
966 			break;
967 		}
968 	}
969 
970 	bool removeLocking = list_is_empty(&locking->locks);
971 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
972 
973 	put_advisory_locking(locking);
974 
975 	if (status < B_OK)
976 		return status;
977 
978 	if (removeLocking) {
979 		// we can remove the whole advisory locking structure; it's no longer used
980 		mutex_lock(&sVnodeMutex);
981 		locking = vnode->advisory_locking;
982 		if (locking != NULL)
983 			acquire_sem(locking->lock);
984 
985 		// the locking could have been changed in the mean time
986 		if (list_is_empty(&locking->locks))
987 			vnode->advisory_locking = NULL;
988 		else {
989 			removeLocking = false;
990 			release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
991 		}
992 
993 		mutex_unlock(&sVnodeMutex);
994 	}
995 	if (removeLocking) {
996 		// we've detached the locking from the vnode, so we can safely delete it
997 		delete_sem(locking->lock);
998 		delete_sem(locking->wait_sem);
999 		free(locking);
1000 	}
1001 
1002 	return B_OK;
1003 }
1004 
1005 
1006 static status_t
1007 acquire_advisory_lock(struct vnode *vnode, struct flock *flock, bool wait)
1008 {
1009 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1010 		vnode, flock, wait ? "yes" : "no"));
1011 
1012 	bool shared = flock->l_type == F_RDLCK;
1013 	status_t status = B_OK;
1014 
1015 restart:
1016 	// if this vnode has an advisory_locking structure attached,
1017 	// lock that one and search for any colliding lock
1018 	struct advisory_locking *locking = get_advisory_locking(vnode);
1019 	sem_id waitForLock = -1;
1020 
1021 	if (locking != NULL) {
1022 		// test for collisions
1023 		struct advisory_lock *lock = NULL;
1024 		while ((lock = (struct advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1025 			if (lock->offset <= flock->l_start + flock->l_len
1026 				&& lock->offset + lock->length > flock->l_start) {
1027 				// locks do overlap
1028 				if (!shared || !lock->shared) {
1029 					// we need to wait
1030 					waitForLock = locking->wait_sem;
1031 					break;
1032 				}
1033 			}
1034 		}
1035 
1036 		if (waitForLock < B_OK || !wait)
1037 			put_advisory_locking(locking);
1038 	}
1039 
1040 	// wait for the lock if we have to, or else return immediately
1041 
1042 	if (waitForLock >= B_OK) {
1043 		if (!wait)
1044 			status = B_PERMISSION_DENIED;
1045 		else {
1046 			status = switch_sem_etc(locking->lock, waitForLock, 1, B_CAN_INTERRUPT, 0);
1047 			if (status == B_OK) {
1048 				// see if we're still colliding
1049 				goto restart;
1050 			}
1051 		}
1052 	}
1053 
1054 	if (status < B_OK)
1055 		return status;
1056 
1057 	// install new lock
1058 
1059 	mutex_lock(&sVnodeMutex);
1060 
1061 	locking = vnode->advisory_locking;
1062 	if (locking == NULL) {
1063 		status = create_advisory_locking(vnode);
1064 		locking = vnode->advisory_locking;
1065 	}
1066 
1067 	if (locking != NULL)
1068 		acquire_sem(locking->lock);
1069 
1070 	mutex_unlock(&sVnodeMutex);
1071 
1072 	if (status < B_OK)
1073 		return status;
1074 
1075 	struct advisory_lock *lock = (struct advisory_lock *)malloc(sizeof(struct advisory_lock));
1076 	if (lock == NULL) {
1077 		if (waitForLock >= B_OK)
1078 			release_sem_etc(waitForLock, 1, B_RELEASE_ALL);
1079 		release_sem(locking->lock);
1080 		return B_NO_MEMORY;
1081 	}
1082 
1083 	lock->team = team_get_current_team_id();
1084 	// values must already be normalized when getting here
1085 	lock->offset = flock->l_start;
1086 	lock->length = flock->l_len;
1087 	lock->shared = shared;
1088 
1089 	list_add_item(&locking->locks, lock);
1090 	release_sem(locking->lock);
1091 
1092 	return status;
1093 }
1094 
1095 
1096 static status_t
1097 normalize_flock(struct file_descriptor *descriptor, struct flock *flock)
1098 {
1099 	switch (flock->l_whence) {
1100 		case SEEK_SET:
1101 			break;
1102 		case SEEK_CUR:
1103 			flock->l_start += descriptor->pos;
1104 			break;
1105 		case SEEK_END:
1106 		{
1107 			struct vnode *vnode = descriptor->u.vnode;
1108 			struct stat stat;
1109 			status_t status;
1110 
1111 			if (FS_CALL(vnode, read_stat) == NULL)
1112 				return EOPNOTSUPP;
1113 
1114 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
1115 			if (status < B_OK)
1116 				return status;
1117 
1118 			flock->l_start += stat.st_size;
1119 			break;
1120 		}
1121 		default:
1122 			return B_BAD_VALUE;
1123 	}
1124 
1125 	if (flock->l_start < 0)
1126 		flock->l_start = 0;
1127 	if (flock->l_len == 0)
1128 		flock->l_len = OFF_MAX;
1129 
1130 	// don't let the offset and length overflow
1131 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1132 		flock->l_len = OFF_MAX - flock->l_start;
1133 
1134 	if (flock->l_len < 0) {
1135 		// a negative length reverses the region
1136 		flock->l_start += flock->l_len;
1137 		flock->l_len = -flock->l_len;
1138 	}
1139 
1140 	return B_OK;
1141 }
1142 
1143 
1144 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1145  *		   by.
1146  *
1147  *	Given an arbitrary vnode, the function checks, whether the node is covered
1148  *	by the root of a volume. If it is the function obtains a reference to the
1149  *	volume root node and returns it.
1150  *
1151  *	\param vnode The vnode in question.
1152  *	\return The volume root vnode the vnode cover is covered by, if it is
1153  *			indeed a mount point, or \c NULL otherwise.
1154  */
1155 
1156 static struct vnode *
1157 resolve_mount_point_to_volume_root(struct vnode *vnode)
1158 {
1159 	if (!vnode)
1160 		return NULL;
1161 
1162 	struct vnode *volumeRoot = NULL;
1163 
1164 	recursive_lock_lock(&sMountOpLock);
1165 	if (vnode->covered_by) {
1166 		volumeRoot = vnode->covered_by;
1167 		inc_vnode_ref_count(volumeRoot);
1168 	}
1169 	recursive_lock_unlock(&sMountOpLock);
1170 
1171 	return volumeRoot;
1172 }
1173 
1174 
1175 /**	\brief Resolves a mount point vnode to the volume root vnode it is covered
1176  *		   by.
1177  *
1178  *	Given an arbitrary vnode (identified by mount and node ID), the function
1179  *	checks, whether the node is covered by the root of a volume. If it is the
1180  *	function returns the mount and node ID of the volume root node. Otherwise
1181  *	it simply returns the supplied mount and node ID.
1182  *
1183  *	In case of error (e.g. the supplied node could not be found) the variables
1184  *	for storing the resolved mount and node ID remain untouched and an error
1185  *	code is returned.
1186  *
1187  *	\param mountID The mount ID of the vnode in question.
1188  *	\param nodeID The node ID of the vnode in question.
1189  *	\param resolvedMountID Pointer to storage for the resolved mount ID.
1190  *	\param resolvedNodeID Pointer to storage for the resolved node ID.
1191  *	\return
1192  *	- \c B_OK, if everything went fine,
1193  *	- another error code, if something went wrong.
1194  */
1195 
1196 status_t
1197 resolve_mount_point_to_volume_root(mount_id mountID, vnode_id nodeID,
1198 	mount_id *resolvedMountID, vnode_id *resolvedNodeID)
1199 {
1200 	// get the node
1201 	struct vnode *node;
1202 	status_t error = get_vnode(mountID, nodeID, &node, false);
1203 	if (error != B_OK)
1204 		return error;
1205 
1206 
1207 	// resolve the node
1208 	struct vnode *resolvedNode = resolve_mount_point_to_volume_root(node);
1209 	if (resolvedNode) {
1210 		put_vnode(node);
1211 		node = resolvedNode;
1212 	}
1213 
1214 	// set the return values
1215 	*resolvedMountID = node->device;
1216 	*resolvedNodeID = node->id;
1217 
1218 	put_vnode(node);
1219 
1220 	return B_OK;
1221 }
1222 
1223 
1224 /**	\brief Resolves a volume root vnode to the underlying mount point vnode.
1225  *
1226  *	Given an arbitrary vnode, the function checks, whether the node is the
1227  *	root of a volume. If it is (and if it is not "/"), the function obtains
1228  *	a reference to the underlying mount point node and returns it.
1229  *
1230  *	\param vnode The vnode in question.
1231  *	\return The mount point vnode the vnode covers, if it is indeed a volume
1232  *			root and not "/", or \c NULL otherwise.
1233  */
1234 
1235 static struct vnode *
1236 resolve_volume_root_to_mount_point(struct vnode *vnode)
1237 {
1238 	if (!vnode)
1239 		return NULL;
1240 
1241 	struct vnode *mountPoint = NULL;
1242 
1243 	recursive_lock_lock(&sMountOpLock);
1244 	struct fs_mount *mount = vnode->mount;
1245 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1246 		mountPoint = mount->covers_vnode;
1247 		inc_vnode_ref_count(mountPoint);
1248 	}
1249 	recursive_lock_unlock(&sMountOpLock);
1250 
1251 	return mountPoint;
1252 }
1253 
1254 
1255 /**	\brief Gets the directory path and leaf name for a given path.
1256  *
1257  *	The supplied \a path is transformed to refer to the directory part of
1258  *	the entry identified by the original path, and into the buffer \a filename
1259  *	the leaf name of the original entry is written.
1260  *	Neither the returned path nor the leaf name can be expected to be
1261  *	canonical.
1262  *
1263  *	\param path The path to be analyzed. Must be able to store at least one
1264  *		   additional character.
1265  *	\param filename The buffer into which the leaf name will be written.
1266  *		   Must be of size B_FILE_NAME_LENGTH at least.
1267  *	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1268  *		   name is longer than \c B_FILE_NAME_LENGTH.
1269  */
1270 
1271 static status_t
1272 get_dir_path_and_leaf(char *path, char *filename)
1273 {
1274 	char *p = strrchr(path, '/');
1275 		// '/' are not allowed in file names!
1276 
1277 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1278 
1279 	if (!p) {
1280 		// this path is single segment with no '/' in it
1281 		// ex. "foo"
1282 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1283 			return B_NAME_TOO_LONG;
1284 		strcpy(path, ".");
1285 	} else {
1286 		p++;
1287 		if (*p == '\0') {
1288 			// special case: the path ends in '/'
1289 			strcpy(filename, ".");
1290 		} else {
1291 			// normal leaf: replace the leaf portion of the path with a '.'
1292 			if (strlcpy(filename, p, B_FILE_NAME_LENGTH)
1293 				>= B_FILE_NAME_LENGTH) {
1294 				return B_NAME_TOO_LONG;
1295 			}
1296 		}
1297 		p[0] = '.';
1298 		p[1] = '\0';
1299 	}
1300 	return B_OK;
1301 }
1302 
1303 
1304 static status_t
1305 entry_ref_to_vnode(mount_id mountID, vnode_id directoryID, const char *name, struct vnode **_vnode)
1306 {
1307 	char clonedName[B_FILE_NAME_LENGTH + 1];
1308 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1309 		return B_NAME_TOO_LONG;
1310 
1311 	// get the directory vnode and let vnode_path_to_vnode() do the rest
1312 	struct vnode *directory;
1313 
1314 	status_t status = get_vnode(mountID, directoryID, &directory, false);
1315 	if (status < 0)
1316 		return status;
1317 
1318 	return vnode_path_to_vnode(directory, clonedName, false, 0, _vnode, NULL, NULL);
1319 }
1320 
1321 
1322 /**	Returns the vnode for the relative path starting at the specified \a vnode.
1323  *	\a path must not be NULL.
1324  *	If it returns successfully, \a path contains the name of the last path
1325  *	component.
1326  */
1327 
1328 static status_t
1329 vnode_path_to_vnode(struct vnode *vnode, char *path, bool traverseLeafLink,
1330 	int count, struct vnode **_vnode, vnode_id *_parentID, int *_type)
1331 {
1332 	status_t status = 0;
1333 	vnode_id lastParentID = vnode->id;
1334 	int type = 0;
1335 
1336 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
1337 
1338 	if (path == NULL)
1339 		return B_BAD_VALUE;
1340 
1341 	while (true) {
1342 		struct vnode *nextVnode;
1343 		vnode_id vnodeID;
1344 		char *nextPath;
1345 
1346 		PRINT(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path, path));
1347 
1348 		// done?
1349 		if (path[0] == '\0')
1350 			break;
1351 
1352 		// walk to find the next path component ("path" will point to a single
1353 		// path component), and filter out multiple slashes
1354 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/'; nextPath++);
1355 
1356 		if (*nextPath == '/') {
1357 			*nextPath = '\0';
1358 			do
1359 				nextPath++;
1360 			while (*nextPath == '/');
1361 		}
1362 
1363 		// See if the '..' is at the root of a mount and move to the covered
1364 		// vnode so we pass the '..' path to the underlying filesystem
1365 		if (!strcmp("..", path)
1366 			&& vnode->mount->root_vnode == vnode
1367 			&& vnode->mount->covers_vnode) {
1368 			nextVnode = vnode->mount->covers_vnode;
1369 			inc_vnode_ref_count(nextVnode);
1370 			put_vnode(vnode);
1371 			vnode = nextVnode;
1372 		}
1373 
1374 		// Check if we have the right to search the current directory vnode.
1375 		// If a file system doesn't have the access() function, we assume that
1376 		// searching a directory is always allowed
1377 		if (FS_CALL(vnode, access))
1378 			status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, X_OK);
1379 
1380 		// Tell the filesystem to get the vnode of this path component (if we got the
1381 		// permission from the call above)
1382 		if (status >= B_OK)
1383 			status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, path, &vnodeID, &type);
1384 
1385 		if (status < B_OK) {
1386 			put_vnode(vnode);
1387 			return status;
1388 		}
1389 
1390 		// Lookup the vnode, the call to fs_lookup should have caused a get_vnode to be called
1391 		// from inside the filesystem, thus the vnode would have to be in the list and it's
1392 		// ref count incremented at this point
1393 		mutex_lock(&sVnodeMutex);
1394 		nextVnode = lookup_vnode(vnode->device, vnodeID);
1395 		mutex_unlock(&sVnodeMutex);
1396 
1397 		if (!nextVnode) {
1398 			// pretty screwed up here - the file system found the vnode, but the hash
1399 			// lookup failed, so our internal structures are messed up
1400 			panic("vnode_path_to_vnode: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n",
1401 				vnode->device, vnodeID);
1402 			put_vnode(vnode);
1403 			return B_ENTRY_NOT_FOUND;
1404 		}
1405 
1406 		// If the new node is a symbolic link, resolve it (if we've been told to do it)
1407 		if (S_ISLNK(type) && !(!traverseLeafLink && nextPath[0] == '\0')) {
1408 			size_t bufferSize;
1409 			char *buffer;
1410 
1411 			PRINT(("traverse link\n"));
1412 
1413 			// it's not exactly nice style using goto in this way, but hey, it works :-/
1414 			if (count + 1 > MAX_SYM_LINKS) {
1415 				status = B_LINK_LIMIT;
1416 				goto resolve_link_error;
1417 			}
1418 
1419 			buffer = (char *)malloc(bufferSize = B_PATH_NAME_LENGTH);
1420 			if (buffer == NULL) {
1421 				status = B_NO_MEMORY;
1422 				goto resolve_link_error;
1423 			}
1424 
1425 			status = FS_CALL(nextVnode, read_link)(nextVnode->mount->cookie,
1426 				nextVnode->private_node, buffer, &bufferSize);
1427 			if (status < B_OK) {
1428 				free(buffer);
1429 
1430 		resolve_link_error:
1431 				put_vnode(vnode);
1432 				put_vnode(nextVnode);
1433 
1434 				return status;
1435 			}
1436 			put_vnode(nextVnode);
1437 
1438 			// Check if we start from the root directory or the current
1439 			// directory ("vnode" still points to that one).
1440 			// Cut off all leading slashes if it's the root directory
1441 			path = buffer;
1442 			if (path[0] == '/') {
1443 				// we don't need the old directory anymore
1444 				put_vnode(vnode);
1445 
1446 				while (*++path == '/')
1447 					;
1448 				vnode = sRoot;
1449 				inc_vnode_ref_count(vnode);
1450 			}
1451 			inc_vnode_ref_count(vnode);
1452 				// balance the next recursion - we will decrement the ref_count
1453 				// of the vnode, no matter if we succeeded or not
1454 
1455 			status = vnode_path_to_vnode(vnode, path, traverseLeafLink, count + 1,
1456 				&nextVnode, &lastParentID, _type);
1457 
1458 			free(buffer);
1459 
1460 			if (status < B_OK) {
1461 				put_vnode(vnode);
1462 				return status;
1463 			}
1464 		} else
1465 			lastParentID = vnode->id;
1466 
1467 		// decrease the ref count on the old dir we just looked up into
1468 		put_vnode(vnode);
1469 
1470 		path = nextPath;
1471 		vnode = nextVnode;
1472 
1473 		// see if we hit a mount point
1474 		struct vnode *mountPoint = resolve_mount_point_to_volume_root(vnode);
1475 		if (mountPoint) {
1476 			put_vnode(vnode);
1477 			vnode = mountPoint;
1478 		}
1479 	}
1480 
1481 	*_vnode = vnode;
1482 	if (_type)
1483 		*_type = type;
1484 	if (_parentID)
1485 		*_parentID = lastParentID;
1486 
1487 	return B_OK;
1488 }
1489 
1490 
1491 static status_t
1492 path_to_vnode(char *path, bool traverseLink, struct vnode **_vnode,
1493 	vnode_id *_parentID, bool kernel)
1494 {
1495 	struct vnode *start;
1496 
1497 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
1498 
1499 	if (!path)
1500 		return B_BAD_VALUE;
1501 
1502 	// figure out if we need to start at root or at cwd
1503 	if (*path == '/') {
1504 		if (sRoot == NULL) {
1505 			// we're a bit early, aren't we?
1506 			return B_ERROR;
1507 		}
1508 
1509 		while (*++path == '/')
1510 			;
1511 		start = sRoot;
1512 		inc_vnode_ref_count(start);
1513 	} else {
1514 		struct io_context *context = get_current_io_context(kernel);
1515 
1516 		mutex_lock(&context->io_mutex);
1517 		start = context->cwd;
1518 		inc_vnode_ref_count(start);
1519 		mutex_unlock(&context->io_mutex);
1520 	}
1521 
1522 	return vnode_path_to_vnode(start, path, traverseLink, 0, _vnode, _parentID, NULL);
1523 }
1524 
1525 
1526 /** Returns the vnode in the next to last segment of the path, and returns
1527  *	the last portion in filename.
1528  *	The path buffer must be able to store at least one additional character.
1529  */
1530 
1531 static status_t
1532 path_to_dir_vnode(char *path, struct vnode **_vnode, char *filename, bool kernel)
1533 {
1534 	status_t status = get_dir_path_and_leaf(path, filename);
1535 	if (status != B_OK)
1536 		return status;
1537 
1538 	return path_to_vnode(path, true, _vnode, NULL, kernel);
1539 }
1540 
1541 
1542 /**	\brief Retrieves the directory vnode and the leaf name of an entry referred
1543  *		   to by a FD + path pair.
1544  *
1545  *	\a path must be given in either case. \a fd might be omitted, in which
1546  *	case \a path is either an absolute path or one relative to the current
1547  *	directory. If both a supplied and \a path is relative it is reckoned off
1548  *	of the directory referred to by \a fd. If \a path is absolute \a fd is
1549  *	ignored.
1550  *
1551  *	The caller has the responsibility to call put_vnode() on the returned
1552  *	directory vnode.
1553  *
1554  *	\param fd The FD. May be < 0.
1555  *	\param path The absolute or relative path. Must not be \c NULL. The buffer
1556  *	       is modified by this function. It must have at least room for a
1557  *	       string one character longer than the path it contains.
1558  *	\param _vnode A pointer to a variable the directory vnode shall be written
1559  *		   into.
1560  *	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
1561  *		   the leaf name of the specified entry will be written.
1562  *	\param kernel \c true, if invoked from inside the kernel, \c false if
1563  *		   invoked from userland.
1564  *	\return \c B_OK, if everything went fine, another error code otherwise.
1565  */
1566 
1567 static status_t
1568 fd_and_path_to_dir_vnode(int fd, char *path, struct vnode **_vnode,
1569 	char *filename, bool kernel)
1570 {
1571 	if (!path)
1572 		return B_BAD_VALUE;
1573 	if (fd < 0)
1574 		return path_to_dir_vnode(path, _vnode, filename, kernel);
1575 
1576 	status_t status = get_dir_path_and_leaf(path, filename);
1577 	if (status != B_OK)
1578 		return status;
1579 
1580 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
1581 }
1582 
1583 
1584 static status_t
1585 get_vnode_name(struct vnode *vnode, struct vnode *parent,
1586 	char *name, size_t nameSize)
1587 {
1588 	VNodePutter vnodePutter;
1589 
1590 	// See if vnode is the root of a mount and move to the covered
1591 	// vnode so we get the underlying file system
1592 	if (vnode->mount->root_vnode == vnode && vnode->mount->covers_vnode != NULL) {
1593 		vnode = vnode->mount->covers_vnode;
1594 		inc_vnode_ref_count(vnode);
1595 		vnodePutter.SetTo(vnode);
1596 	}
1597 
1598 	if (FS_CALL(vnode, get_vnode_name)) {
1599 		// The FS supports getting the name of a vnode.
1600 		return FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie,
1601 			vnode->private_node, name, nameSize);
1602 	}
1603 
1604 	// The FS doesn't support getting the name of a vnode. So we search the
1605 	// parent directory for the vnode, if the caller let us.
1606 
1607 	if (parent == NULL)
1608 		return EOPNOTSUPP;
1609 
1610 	fs_cookie cookie;
1611 
1612 	status_t status = FS_CALL(parent, open_dir)(parent->mount->cookie,
1613 		parent->private_node, &cookie);
1614 	if (status >= B_OK) {
1615 		char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1616 		struct dirent *dirent = (struct dirent *)buffer;
1617 		while (true) {
1618 			uint32 num = 1;
1619 			status = dir_read(parent, cookie, dirent, sizeof(buffer), &num);
1620 			if (status < B_OK)
1621 				break;
1622 
1623 			if (vnode->id == dirent->d_ino) {
1624 				// found correct entry!
1625 				if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
1626 					status = B_BUFFER_OVERFLOW;
1627 				break;
1628 			}
1629 		}
1630 		FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1631 	}
1632 	return status;
1633 }
1634 
1635 
1636 /**	Gets the full path to a given directory vnode.
1637  *	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
1638  *	file system doesn't support this call, it will fall back to iterating
1639  *	through the parent directory to get the name of the child.
1640  *
1641  *	To protect against circular loops, it supports a maximum tree depth
1642  *	of 256 levels.
1643  *
1644  *	Note that the path may not be correct the time this function returns!
1645  *	It doesn't use any locking to prevent returning the correct path, as
1646  *	paths aren't safe anyway: the path to a file can change at any time.
1647  *
1648  *	It might be a good idea, though, to check if the returned path exists
1649  *	in the calling function (it's not done here because of efficiency)
1650  */
1651 
1652 static status_t
1653 dir_vnode_to_path(struct vnode *vnode, char *buffer, size_t bufferSize)
1654 {
1655 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
1656 
1657 	/* this implementation is currently bound to B_PATH_NAME_LENGTH */
1658 	char path[B_PATH_NAME_LENGTH];
1659 	int32 insert = sizeof(path);
1660 	int32 maxLevel = 256;
1661 	int32 length;
1662 	status_t status;
1663 
1664 	if (vnode == NULL || buffer == NULL)
1665 		return EINVAL;
1666 
1667 	// we don't use get_vnode() here because this call is more
1668 	// efficient and does all we need from get_vnode()
1669 	inc_vnode_ref_count(vnode);
1670 
1671 	// resolve a volume root to its mount point
1672 	struct vnode *mountPoint = resolve_volume_root_to_mount_point(vnode);
1673 	if (mountPoint) {
1674 		put_vnode(vnode);
1675 		vnode = mountPoint;
1676 	}
1677 
1678 	path[--insert] = '\0';
1679 
1680 	while (true) {
1681 		// the name buffer is also used for fs_read_dir()
1682 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
1683 		char *name = &((struct dirent *)nameBuffer)->d_name[0];
1684 		struct vnode *parentVnode;
1685 		vnode_id parentID, id;
1686 		int type;
1687 
1688 		// lookup the parent vnode
1689 		status = FS_CALL(vnode, lookup)(vnode->mount->cookie, vnode->private_node, "..", &parentID, &type);
1690 		if (status < B_OK)
1691 			goto out;
1692 
1693 		mutex_lock(&sVnodeMutex);
1694 		parentVnode = lookup_vnode(vnode->device, parentID);
1695 		mutex_unlock(&sVnodeMutex);
1696 
1697 		if (parentVnode == NULL) {
1698 			panic("dir_vnode_to_path: could not lookup vnode (mountid 0x%lx vnid 0x%Lx)\n", vnode->device, parentID);
1699 			status = B_ENTRY_NOT_FOUND;
1700 			goto out;
1701 		}
1702 
1703 		// resolve a volume root to its mount point
1704 		mountPoint = resolve_volume_root_to_mount_point(parentVnode);
1705 		if (mountPoint) {
1706 			put_vnode(parentVnode);
1707 			parentVnode = mountPoint;
1708 			parentID = parentVnode->id;
1709 		}
1710 
1711 		bool hitRoot = (parentVnode == vnode);
1712 
1713 		// Does the file system support getting the name of a vnode?
1714 		// If so, get it here...
1715 		if (status == B_OK && FS_CALL(vnode, get_vnode_name))
1716 			status = FS_CALL(vnode, get_vnode_name)(vnode->mount->cookie, vnode->private_node, name, B_FILE_NAME_LENGTH);
1717 
1718 		// ... if not, find it out later (by iterating through
1719 		// the parent directory, searching for the id)
1720 		id = vnode->id;
1721 
1722 		// release the current vnode, we only need its parent from now on
1723 		put_vnode(vnode);
1724 		vnode = parentVnode;
1725 
1726 		if (status < B_OK)
1727 			goto out;
1728 
1729 		// ToDo: add an explicit check for loops in about 10 levels to do
1730 		// real loop detection
1731 
1732 		// don't go deeper as 'maxLevel' to prevent circular loops
1733 		if (maxLevel-- < 0) {
1734 			status = ELOOP;
1735 			goto out;
1736 		}
1737 
1738 		if (hitRoot) {
1739 			// we have reached "/", which means we have constructed the full
1740 			// path
1741 			break;
1742 		}
1743 
1744 		if (!FS_CALL(vnode, get_vnode_name)) {
1745 			// If we haven't got the vnode's name yet, we have to search for it
1746 			// in the parent directory now
1747 			fs_cookie cookie;
1748 
1749 			status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
1750 			if (status >= B_OK) {
1751 				struct dirent *dirent = (struct dirent *)nameBuffer;
1752 				while (true) {
1753 					uint32 num = 1;
1754 					status = dir_read(vnode, cookie, dirent, sizeof(nameBuffer),
1755 						&num);
1756 
1757 					if (status < B_OK)
1758 						break;
1759 
1760 					if (id == dirent->d_ino)
1761 						// found correct entry!
1762 						break;
1763 				}
1764 				FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
1765 			}
1766 
1767 			if (status < B_OK)
1768 				goto out;
1769 		}
1770 
1771 		// add the name infront of the current path
1772 		name[B_FILE_NAME_LENGTH - 1] = '\0';
1773 		length = strlen(name);
1774 		insert -= length;
1775 		if (insert <= 0) {
1776 			status = ENOBUFS;
1777 			goto out;
1778 		}
1779 		memcpy(path + insert, name, length);
1780 		path[--insert] = '/';
1781 	}
1782 
1783 	// the root dir will result in an empty path: fix it
1784 	if (path[insert] == '\0')
1785 		path[--insert] = '/';
1786 
1787 	PRINT(("  path is: %s\n", path + insert));
1788 
1789 	// copy the path to the output buffer
1790 	length = sizeof(path) - insert;
1791 	if (length <= (int)bufferSize)
1792 		memcpy(buffer, path + insert, length);
1793 	else
1794 		status = ENOBUFS;
1795 
1796 out:
1797 	put_vnode(vnode);
1798 	return status;
1799 }
1800 
1801 
1802 /**	Checks the length of every path component, and adds a '.'
1803  *	if the path ends in a slash.
1804  *	The given path buffer must be able to store at least one
1805  *	additional character.
1806  */
1807 
1808 static status_t
1809 check_path(char *to)
1810 {
1811 	int32 length = 0;
1812 
1813 	// check length of every path component
1814 
1815 	while (*to) {
1816 		char *begin;
1817 		if (*to == '/')
1818 			to++, length++;
1819 
1820 		begin = to;
1821 		while (*to != '/' && *to)
1822 			to++, length++;
1823 
1824 		if (to - begin > B_FILE_NAME_LENGTH)
1825 			return B_NAME_TOO_LONG;
1826 	}
1827 
1828 	if (length == 0)
1829 		return B_ENTRY_NOT_FOUND;
1830 
1831 	// complete path if there is a slash at the end
1832 
1833 	if (*(to - 1) == '/') {
1834 		if (length > B_PATH_NAME_LENGTH - 2)
1835 			return B_NAME_TOO_LONG;
1836 
1837 		to[0] = '.';
1838 		to[1] = '\0';
1839 	}
1840 
1841 	return B_OK;
1842 }
1843 
1844 
1845 static struct file_descriptor *
1846 get_fd_and_vnode(int fd, struct vnode **_vnode, bool kernel)
1847 {
1848 	struct file_descriptor *descriptor = get_fd(get_current_io_context(kernel), fd);
1849 	if (descriptor == NULL)
1850 		return NULL;
1851 
1852 	if (descriptor->u.vnode == NULL) {
1853 		put_fd(descriptor);
1854 		return NULL;
1855 	}
1856 
1857 	// ToDo: when we can close a file descriptor at any point, investigate
1858 	//	if this is still valid to do (accessing the vnode without ref_count
1859 	//	or locking)
1860 	*_vnode = descriptor->u.vnode;
1861 	return descriptor;
1862 }
1863 
1864 
1865 static struct vnode *
1866 get_vnode_from_fd(int fd, bool kernel)
1867 {
1868 	struct file_descriptor *descriptor;
1869 	struct vnode *vnode;
1870 
1871 	descriptor = get_fd(get_current_io_context(kernel), fd);
1872 	if (descriptor == NULL)
1873 		return NULL;
1874 
1875 	vnode = descriptor->u.vnode;
1876 	if (vnode != NULL)
1877 		inc_vnode_ref_count(vnode);
1878 
1879 	put_fd(descriptor);
1880 	return vnode;
1881 }
1882 
1883 
1884 /**	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
1885  *	only the path will be considered. In this case, the \a path must not be
1886  *	NULL.
1887  *	If \a fd is a valid file descriptor, \a path may be NULL for directories,
1888  *	and should be NULL for files.
1889  */
1890 
1891 static status_t
1892 fd_and_path_to_vnode(int fd, char *path, bool traverseLeafLink,
1893 	struct vnode **_vnode, vnode_id *_parentID, bool kernel)
1894 {
1895 	if (fd < 0 && !path)
1896 		return B_BAD_VALUE;
1897 
1898 	if (fd < 0 || (path != NULL && path[0] == '/')) {
1899 		// no FD or absolute path
1900 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
1901 	}
1902 
1903 	// FD only, or FD + relative path
1904 	struct vnode *vnode = get_vnode_from_fd(fd, kernel);
1905 	if (!vnode)
1906 		return B_FILE_ERROR;
1907 
1908 	if (path != NULL) {
1909 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
1910 			_vnode, _parentID, NULL);
1911 	}
1912 
1913 	// there is no relative path to take into account
1914 
1915 	*_vnode = vnode;
1916 	if (_parentID)
1917 		*_parentID = -1;
1918 
1919 	return B_OK;
1920 }
1921 
1922 
1923 static int
1924 get_new_fd(int type, struct fs_mount *mount, struct vnode *vnode,
1925 	fs_cookie cookie, int openMode, bool kernel)
1926 {
1927 	struct file_descriptor *descriptor;
1928 	int fd;
1929 
1930 	descriptor = alloc_fd();
1931 	if (!descriptor)
1932 		return B_NO_MEMORY;
1933 
1934 	if (vnode)
1935 		descriptor->u.vnode = vnode;
1936 	else
1937 		descriptor->u.mount = mount;
1938 	descriptor->cookie = cookie;
1939 
1940 	switch (type) {
1941 		case FDTYPE_FILE:
1942 			descriptor->ops = &sFileOps;
1943 			break;
1944 		case FDTYPE_DIR:
1945 			descriptor->ops = &sDirectoryOps;
1946 			break;
1947 		case FDTYPE_ATTR:
1948 			descriptor->ops = &sAttributeOps;
1949 			break;
1950 		case FDTYPE_ATTR_DIR:
1951 			descriptor->ops = &sAttributeDirectoryOps;
1952 			break;
1953 		case FDTYPE_INDEX_DIR:
1954 			descriptor->ops = &sIndexDirectoryOps;
1955 			break;
1956 		case FDTYPE_QUERY:
1957 			descriptor->ops = &sQueryOps;
1958 			break;
1959 		default:
1960 			panic("get_new_fd() called with unknown type %d\n", type);
1961 			break;
1962 	}
1963 	descriptor->type = type;
1964 	descriptor->open_mode = openMode;
1965 
1966 	fd = new_fd(get_current_io_context(kernel), descriptor);
1967 	if (fd < 0) {
1968 		free(descriptor);
1969 		return B_NO_MORE_FDS;
1970 	}
1971 
1972 	return fd;
1973 }
1974 
1975 #ifdef ADD_DEBUGGER_COMMANDS
1976 
1977 
1978 static void
1979 _dump_advisory_locking(advisory_locking *locking)
1980 {
1981 	if (locking == NULL)
1982 		return;
1983 
1984 	kprintf("   lock:        %ld", locking->lock);
1985 	kprintf("   wait_sem:    %ld", locking->wait_sem);
1986 
1987 	struct advisory_lock *lock = NULL;
1988 	int32 index = 0;
1989 	while ((lock = (advisory_lock *)list_get_next_item(&locking->locks, lock)) != NULL) {
1990 		kprintf("   [%2ld] team:   %ld\n", index, lock->team);
1991 		kprintf("        offset: %Ld\n", lock->offset);
1992 		kprintf("        length: %Ld\n", lock->length);
1993 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
1994 	}
1995 }
1996 
1997 
1998 static void
1999 _dump_mount(struct fs_mount *mount)
2000 {
2001 	kprintf("MOUNT: %p\n", mount);
2002 	kprintf(" id:            %ld\n", mount->id);
2003 	kprintf(" device_name:   %s\n", mount->device_name);
2004 	kprintf(" fs_name:       %s\n", mount->fs_name);
2005 	kprintf(" cookie:        %p\n", mount->cookie);
2006 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2007 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2008 	kprintf(" partition:     %p\n", mount->partition);
2009 	kprintf(" lock:          %ld\n", mount->rlock.sem);
2010 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2011 		mount->owns_file_device ? " owns_file_device" : "");
2012 }
2013 
2014 
2015 static void
2016 _dump_vnode(struct vnode *vnode)
2017 {
2018 	kprintf("VNODE: %p\n", vnode);
2019 	kprintf(" device:        %ld\n", vnode->device);
2020 	kprintf(" id:            %Ld\n", vnode->id);
2021 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
2022 	kprintf(" private_node:  %p\n", vnode->private_node);
2023 	kprintf(" mount:         %p\n", vnode->mount);
2024 	kprintf(" covered_by:    %p\n", vnode->covered_by);
2025 	kprintf(" cache_ref:     %p\n", vnode->cache);
2026 	kprintf(" flags:         %s%s%s\n", vnode->remove ? "r" : "-",
2027 		vnode->busy ? "b" : "-", vnode->unpublished ? "u" : "-");
2028 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
2029 
2030 	_dump_advisory_locking(vnode->advisory_locking);
2031 }
2032 
2033 
2034 static int
2035 dump_mount(int argc, char **argv)
2036 {
2037 	if (argc != 2) {
2038 		kprintf("usage: mount [id/address]\n");
2039 		return 0;
2040 	}
2041 
2042 	struct fs_mount *mount = NULL;
2043 
2044 	// if the argument looks like a hex number, treat it as such
2045 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2046 		mount = (fs_mount *)strtoul(argv[1], NULL, 16);
2047 		if (IS_USER_ADDRESS(mount)) {
2048 			kprintf("invalid fs_mount address\n");
2049 			return 0;
2050 		}
2051 	} else {
2052 		mount_id id = atoll(argv[1]);
2053 		mount = (fs_mount *)hash_lookup(sMountsTable, (void *)&id);
2054 		if (mount == NULL) {
2055 			kprintf("fs_mount not found\n");
2056 			return 0;
2057 		}
2058 	}
2059 
2060 	_dump_mount(mount);
2061 	return 0;
2062 }
2063 
2064 
2065 static int
2066 dump_mounts(int argc, char **argv)
2067 {
2068 	struct hash_iterator iterator;
2069 	struct fs_mount *mount;
2070 
2071 	kprintf("address     id root       covers     fs_name\n");
2072 
2073 	hash_open(sMountsTable, &iterator);
2074 	while ((mount = (struct fs_mount *)hash_next(sMountsTable, &iterator)) != NULL) {
2075 		kprintf("%p%4ld %p %p %s\n", mount, mount->id, mount->root_vnode,
2076 			mount->covers_vnode, mount->fs_name);
2077 	}
2078 
2079 	hash_close(sMountsTable, &iterator, false);
2080 	return 0;
2081 }
2082 
2083 
2084 static int
2085 dump_vnode(int argc, char **argv)
2086 {
2087 	if (argc < 2) {
2088 		kprintf("usage: vnode [id/device id/address]\n");
2089 		return 0;
2090 	}
2091 
2092 	struct vnode *vnode = NULL;
2093 
2094 	// if the argument looks like a hex number, treat it as such
2095 	if (strlen(argv[1]) > 2 && argv[1][0] == '0' && argv[1][1] == 'x') {
2096 		vnode = (struct vnode *)strtoul(argv[1], NULL, 16);
2097 		if (IS_USER_ADDRESS(vnode)) {
2098 			kprintf("invalid vnode address\n");
2099 			return 0;
2100 		}
2101 		_dump_vnode(vnode);
2102 		return 0;
2103 	}
2104 
2105 	struct hash_iterator iterator;
2106 	mount_id device = -1;
2107 	vnode_id id;
2108 	if (argc > 2) {
2109 		device = atoi(argv[1]);
2110 		id = atoll(argv[2]);
2111 	} else
2112 		id = atoll(argv[1]);
2113 
2114 	hash_open(sVnodeTable, &iterator);
2115 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2116 		if (vnode->id != id || device != -1 && vnode->device != device)
2117 			continue;
2118 
2119 		_dump_vnode(vnode);
2120 	}
2121 
2122 	hash_close(sVnodeTable, &iterator, false);
2123 	return 0;
2124 }
2125 
2126 
2127 static int
2128 dump_vnodes(int argc, char **argv)
2129 {
2130 	// restrict dumped nodes to a certain device if requested
2131 	mount_id device = -1;
2132 	if (argc > 1)
2133 		device = atoi(argv[1]);
2134 
2135 	struct hash_iterator iterator;
2136 	struct vnode *vnode;
2137 
2138 	kprintf("address    dev     inode  ref cache      locking    flags\n");
2139 
2140 	hash_open(sVnodeTable, &iterator);
2141 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2142 		if (device != -1 && vnode->device != device)
2143 			continue;
2144 
2145 		kprintf("%p%4ld%10Ld%5ld %p %p %s%s%s\n", vnode, vnode->device, vnode->id,
2146 			vnode->ref_count, vnode->cache, vnode->advisory_locking,
2147 			vnode->remove ? "r" : "-", vnode->busy ? "b" : "-",
2148 			vnode->unpublished ? "u" : "-");
2149 	}
2150 
2151 	hash_close(sVnodeTable, &iterator, false);
2152 	return 0;
2153 }
2154 
2155 
2156 static int
2157 dump_vnode_caches(int argc, char **argv)
2158 {
2159 	struct hash_iterator iterator;
2160 	struct vnode *vnode;
2161 
2162 	kprintf("address    dev     inode cache          size   pages\n");
2163 
2164 	hash_open(sVnodeTable, &iterator);
2165 	while ((vnode = (struct vnode *)hash_next(sVnodeTable, &iterator)) != NULL) {
2166 		if (vnode->cache == NULL)
2167 			continue;
2168 
2169 		// count pages in cache
2170 		size_t numPages = 0;
2171 		for (struct vm_page *page = vnode->cache->cache->page_list;
2172 				page != NULL; page = page->cache_next) {
2173 			numPages++;
2174 		}
2175 
2176 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id, vnode->cache,
2177 			(vnode->cache->cache->virtual_size + B_PAGE_SIZE - 1) / B_PAGE_SIZE, numPages);
2178 	}
2179 
2180 	hash_close(sVnodeTable, &iterator, false);
2181 	return 0;
2182 }
2183 
2184 
2185 int
2186 dump_io_context(int argc, char **argv)
2187 {
2188 	if (argc > 2) {
2189 		kprintf("usage: io_context [team id/address]\n");
2190 		return 0;
2191 	}
2192 
2193 	struct io_context *context = NULL;
2194 
2195 	if (argc > 1) {
2196 		uint32 num = strtoul(argv[1], NULL, 0);
2197 		if (IS_KERNEL_ADDRESS(num))
2198 			context = (struct io_context *)num;
2199 		else {
2200 			struct team *team = team_get_team_struct_locked(num);
2201 			if (team == NULL) {
2202 				kprintf("could not find team with ID %ld\n", num);
2203 				return 0;
2204 			}
2205 			context = (struct io_context *)team->io_context;
2206 		}
2207 	} else
2208 		context = get_current_io_context(true);
2209 
2210 	kprintf("I/O CONTEXT: %p\n", context);
2211 	kprintf(" cwd vnode:\t%p\n", context->cwd);
2212 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
2213 	kprintf(" max fds:\t%lu\n", context->table_size);
2214 
2215 	if (context->num_used_fds)
2216 		kprintf("   no. type     ops ref open mode        pos cookie\n");
2217 
2218 	for (uint32 i = 0; i < context->table_size; i++) {
2219 		struct file_descriptor *fd = context->fds[i];
2220 		if (fd == NULL)
2221 			continue;
2222 
2223 		kprintf("  %3lu: %ld %p %3ld %4ld %4lx %10Ld %p %s %p\n", i, fd->type, fd->ops,
2224 			fd->ref_count, fd->open_count, fd->open_mode, fd->pos, fd->cookie,
2225 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY ? "mount" : "vnode",
2226 			fd->u.vnode);
2227 	}
2228 
2229 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
2230 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
2231 
2232 	return 0;
2233 }
2234 
2235 #endif	// ADD_DEBUGGER_COMMANDS
2236 
2237 
2238 //	#pragma mark -
2239 //	Public VFS API
2240 
2241 
2242 extern "C" status_t
2243 new_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2244 {
2245 	FUNCTION(("new_vnode(mountID = %ld, vnodeID = %Ld, node = %p)\n",
2246 		mountID, vnodeID, privateNode));
2247 
2248 	if (privateNode == NULL)
2249 		return B_BAD_VALUE;
2250 
2251 	mutex_lock(&sVnodeMutex);
2252 
2253 	// file system integrity check:
2254 	// test if the vnode already exists and bail out if this is the case!
2255 
2256 	// ToDo: the R5 implementation obviously checks for a different cookie
2257 	//	and doesn't panic if they are equal
2258 
2259 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2260 	if (vnode != NULL)
2261 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!", mountID, vnodeID, privateNode, vnode->private_node);
2262 
2263 	status_t status = create_new_vnode(&vnode, mountID, vnodeID);
2264 	if (status == B_OK) {
2265 		vnode->private_node = privateNode;
2266 		vnode->busy = true;
2267 		vnode->unpublished = true;
2268 	}
2269 
2270 	PRINT(("returns: %s\n", strerror(status)));
2271 
2272 	mutex_unlock(&sVnodeMutex);
2273 	return status;
2274 }
2275 
2276 
2277 extern "C" status_t
2278 publish_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode privateNode)
2279 {
2280 	FUNCTION(("publish_vnode()\n"));
2281 
2282 	mutex_lock(&sVnodeMutex);
2283 
2284 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2285 	status_t status = B_OK;
2286 
2287 	if (vnode != NULL && vnode->busy && vnode->unpublished
2288 		&& vnode->private_node == privateNode) {
2289 		vnode->busy = false;
2290 		vnode->unpublished = false;
2291 	} else if (vnode == NULL && privateNode != NULL) {
2292 		status = create_new_vnode(&vnode, mountID, vnodeID);
2293 		if (status == B_OK)
2294 			vnode->private_node = privateNode;
2295 	} else
2296 		status = B_BAD_VALUE;
2297 
2298 	PRINT(("returns: %s\n", strerror(status)));
2299 
2300 	mutex_unlock(&sVnodeMutex);
2301 	return status;
2302 }
2303 
2304 
2305 extern "C" status_t
2306 get_vnode(mount_id mountID, vnode_id vnodeID, fs_vnode *_fsNode)
2307 {
2308 	struct vnode *vnode;
2309 
2310 	status_t status = get_vnode(mountID, vnodeID, &vnode, true);
2311 	if (status < B_OK)
2312 		return status;
2313 
2314 	*_fsNode = vnode->private_node;
2315 	return B_OK;
2316 }
2317 
2318 
2319 extern "C" status_t
2320 put_vnode(mount_id mountID, vnode_id vnodeID)
2321 {
2322 	struct vnode *vnode;
2323 
2324 	mutex_lock(&sVnodeMutex);
2325 	vnode = lookup_vnode(mountID, vnodeID);
2326 	mutex_unlock(&sVnodeMutex);
2327 
2328 	if (vnode)
2329 		dec_vnode_ref_count(vnode, true);
2330 
2331 	return B_OK;
2332 }
2333 
2334 
2335 extern "C" status_t
2336 remove_vnode(mount_id mountID, vnode_id vnodeID)
2337 {
2338 	struct vnode *vnode;
2339 	bool remove = false;
2340 
2341 	mutex_lock(&sVnodeMutex);
2342 
2343 	vnode = lookup_vnode(mountID, vnodeID);
2344 	if (vnode != NULL) {
2345 		if (vnode->covered_by != NULL) {
2346 			// this vnode is in use
2347 			mutex_unlock(&sVnodeMutex);
2348 			return B_BUSY;
2349 		}
2350 
2351 		vnode->remove = true;
2352 		if (vnode->unpublished) {
2353 			// prepare the vnode for deletion
2354 			vnode->busy = true;
2355 			remove = true;
2356 		}
2357 	}
2358 
2359 	mutex_unlock(&sVnodeMutex);
2360 
2361 	if (remove) {
2362 		// if the vnode hasn't been published yet, we delete it here
2363 		atomic_add(&vnode->ref_count, -1);
2364 		free_vnode(vnode, true);
2365 	}
2366 
2367 	return B_OK;
2368 }
2369 
2370 
2371 extern "C" status_t
2372 unremove_vnode(mount_id mountID, vnode_id vnodeID)
2373 {
2374 	struct vnode *vnode;
2375 
2376 	mutex_lock(&sVnodeMutex);
2377 
2378 	vnode = lookup_vnode(mountID, vnodeID);
2379 	if (vnode)
2380 		vnode->remove = false;
2381 
2382 	mutex_unlock(&sVnodeMutex);
2383 	return B_OK;
2384 }
2385 
2386 
2387 //	#pragma mark -
2388 //	Functions the VFS exports for other parts of the kernel
2389 
2390 
2391 /** Acquires another reference to the vnode that has to be released
2392  *	by calling vfs_put_vnode().
2393  */
2394 
2395 void
2396 vfs_acquire_vnode(void *_vnode)
2397 {
2398 	inc_vnode_ref_count((struct vnode *)_vnode);
2399 }
2400 
2401 
2402 /** This is currently called from file_cache_create() only.
2403  *	It's probably a temporary solution as long as devfs requires that
2404  *	fs_read_pages()/fs_write_pages() are called with the standard
2405  *	open cookie and not with a device cookie.
2406  *	If that's done differently, remove this call; it has no other
2407  *	purpose.
2408  */
2409 
2410 extern "C" status_t
2411 vfs_get_cookie_from_fd(int fd, void **_cookie)
2412 {
2413 	struct file_descriptor *descriptor;
2414 
2415 	descriptor = get_fd(get_current_io_context(true), fd);
2416 	if (descriptor == NULL)
2417 		return B_FILE_ERROR;
2418 
2419 	*_cookie = descriptor->cookie;
2420 	return B_OK;
2421 }
2422 
2423 
2424 extern "C" int
2425 vfs_get_vnode_from_fd(int fd, bool kernel, void **vnode)
2426 {
2427 	*vnode = get_vnode_from_fd(fd, kernel);
2428 
2429 	if (*vnode == NULL)
2430 		return B_FILE_ERROR;
2431 
2432 	return B_NO_ERROR;
2433 }
2434 
2435 
2436 extern "C" status_t
2437 vfs_get_vnode_from_path(const char *path, bool kernel, void **_vnode)
2438 {
2439 	struct vnode *vnode;
2440 	status_t status;
2441 	char buffer[B_PATH_NAME_LENGTH + 1];
2442 
2443 	PRINT(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n", path, kernel));
2444 
2445 	strlcpy(buffer, path, sizeof(buffer));
2446 
2447 	status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2448 	if (status < B_OK)
2449 		return status;
2450 
2451 	*_vnode = vnode;
2452 	return B_OK;
2453 }
2454 
2455 
2456 extern "C" status_t
2457 vfs_get_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2458 {
2459 	struct vnode *vnode;
2460 
2461 	status_t status = get_vnode(mountID, vnodeID, &vnode, false);
2462 	if (status < B_OK)
2463 		return status;
2464 
2465 	*_vnode = vnode;
2466 	return B_OK;
2467 }
2468 
2469 
2470 extern "C" status_t
2471 vfs_entry_ref_to_vnode(mount_id mountID, vnode_id directoryID,
2472 	const char *name, void **_vnode)
2473 {
2474 	return entry_ref_to_vnode(mountID, directoryID, name, (struct vnode **)_vnode);
2475 }
2476 
2477 
2478 extern "C" void
2479 vfs_vnode_to_node_ref(void *_vnode, mount_id *_mountID, vnode_id *_vnodeID)
2480 {
2481 	struct vnode *vnode = (struct vnode *)_vnode;
2482 
2483 	*_mountID = vnode->device;
2484 	*_vnodeID = vnode->id;
2485 }
2486 
2487 
2488 extern "C" status_t
2489 vfs_lookup_vnode(mount_id mountID, vnode_id vnodeID, void **_vnode)
2490 {
2491 	// ToDo: this currently doesn't use the sVnodeMutex lock - that's
2492 	//	because it's only called from file_cache_create() with that
2493 	//	lock held anyway (as it should be called from fs_read_vnode()).
2494 	//	Find a better solution!
2495 	struct vnode *vnode = lookup_vnode(mountID, vnodeID);
2496 	if (vnode == NULL)
2497 		return B_ERROR;
2498 
2499 	*_vnode = vnode;
2500 	return B_OK;
2501 }
2502 
2503 
2504 extern "C" status_t
2505 vfs_get_fs_node_from_path(mount_id mountID, const char *path, bool kernel, void **_node)
2506 {
2507 	char buffer[B_PATH_NAME_LENGTH + 1];
2508 	struct vnode *vnode;
2509 	status_t status;
2510 
2511 	PRINT(("vfs_get_fs_node_from_path(mountID = %ld, path = \"%s\", kernel %d)\n", mountID, path, kernel));
2512 
2513 	strlcpy(buffer, path, sizeof(buffer));
2514 	status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
2515 	if (status < B_OK)
2516 		return status;
2517 
2518 	if (vnode->device != mountID) {
2519 		// wrong mount ID - must not gain access on foreign file system nodes
2520 		put_vnode(vnode);
2521 		return B_BAD_VALUE;
2522 	}
2523 
2524 	*_node = vnode->private_node;
2525 	return B_OK;
2526 }
2527 
2528 
2529 /**	Finds the full path to the file that contains the module \a moduleName,
2530  *	puts it into \a pathBuffer, and returns B_OK for success.
2531  *	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
2532  *	\c B_ENTRY_NOT_FOUNT if no file could be found.
2533  *	\a pathBuffer is clobbered in any case and must not be relied on if this
2534  *	functions returns unsuccessfully.
2535  */
2536 
2537 status_t
2538 vfs_get_module_path(const char *basePath, const char *moduleName, char *pathBuffer,
2539 	size_t bufferSize)
2540 {
2541 	struct vnode *dir, *file;
2542 	status_t status;
2543 	size_t length;
2544 	char *path;
2545 
2546 	if (bufferSize == 0 || strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
2547 		return B_BUFFER_OVERFLOW;
2548 
2549 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
2550 	if (status < B_OK)
2551 		return status;
2552 
2553 	// the path buffer had been clobbered by the above call
2554 	length = strlcpy(pathBuffer, basePath, bufferSize);
2555 	if (pathBuffer[length - 1] != '/')
2556 		pathBuffer[length++] = '/';
2557 
2558 	path = pathBuffer + length;
2559 	bufferSize -= length;
2560 
2561 	while (moduleName) {
2562 		int type;
2563 
2564 		char *nextPath = strchr(moduleName, '/');
2565 		if (nextPath == NULL)
2566 			length = strlen(moduleName);
2567 		else {
2568 			length = nextPath - moduleName;
2569 			nextPath++;
2570 		}
2571 
2572 		if (length + 1 >= bufferSize) {
2573 			status = B_BUFFER_OVERFLOW;
2574 			goto err;
2575 		}
2576 
2577 		memcpy(path, moduleName, length);
2578 		path[length] = '\0';
2579 		moduleName = nextPath;
2580 
2581 		status = vnode_path_to_vnode(dir, path, true, 0, &file, NULL, &type);
2582 		if (status < B_OK)
2583 			goto err;
2584 
2585 		put_vnode(dir);
2586 
2587 		if (S_ISDIR(type)) {
2588 			// goto the next directory
2589 			path[length] = '/';
2590 			path[length + 1] = '\0';
2591 			path += length + 1;
2592 			bufferSize -= length + 1;
2593 
2594 			dir = file;
2595 		} else if (S_ISREG(type)) {
2596 			// it's a file so it should be what we've searched for
2597 			put_vnode(file);
2598 
2599 			return B_OK;
2600 		} else {
2601 			PRINT(("vfs_get_module_path(): something is strange here: %d...\n", type));
2602 			status = B_ERROR;
2603 			goto err;
2604 		}
2605 	}
2606 
2607 	// if we got here, the moduleName just pointed to a directory, not to
2608 	// a real module - what should we do in this case?
2609 	status = B_ENTRY_NOT_FOUND;
2610 
2611 err:
2612 	put_vnode(dir);
2613 	return status;
2614 }
2615 
2616 
2617 /**	\brief Normalizes a given path.
2618  *
2619  *	The path must refer to an existing or non-existing entry in an existing
2620  *	directory, that is chopping off the leaf component the remaining path must
2621  *	refer to an existing directory.
2622  *
2623  *	The returned will be canonical in that it will be absolute, will not
2624  *	contain any "." or ".." components or duplicate occurrences of '/'s,
2625  *	and none of the directory components will by symbolic links.
2626  *
2627  *	Any two paths referring to the same entry, will result in the same
2628  *	normalized path (well, that is pretty much the definition of `normalized',
2629  *	isn't it :-).
2630  *
2631  *	\param path The path to be normalized.
2632  *	\param buffer The buffer into which the normalized path will be written.
2633  *	\param bufferSize The size of \a buffer.
2634  *	\param kernel \c true, if the IO context of the kernel shall be used,
2635  *		   otherwise that of the team this thread belongs to. Only relevant,
2636  *		   if the path is relative (to get the CWD).
2637  *	\return \c B_OK if everything went fine, another error code otherwise.
2638  */
2639 
2640 status_t
2641 vfs_normalize_path(const char *path, char *buffer, size_t bufferSize,
2642 	bool kernel)
2643 {
2644 	if (!path || !buffer || bufferSize < 1)
2645 		return B_BAD_VALUE;
2646 
2647 	PRINT(("vfs_normalize_path(`%s')\n", path));
2648 
2649 	// copy the supplied path to the stack, so it can be modified
2650 	char mutablePath[B_PATH_NAME_LENGTH + 1];
2651 	if (strlcpy(mutablePath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
2652 		return B_NAME_TOO_LONG;
2653 
2654 	// get the dir vnode and the leaf name
2655 	struct vnode *dirNode;
2656 	char leaf[B_FILE_NAME_LENGTH];
2657 	status_t error = path_to_dir_vnode(mutablePath, &dirNode, leaf, kernel);
2658 	if (error != B_OK) {
2659 		PRINT(("vfs_normalize_path(): failed to get dir vnode: %s\n", strerror(error)));
2660 		return error;
2661 	}
2662 
2663 	// if the leaf is "." or "..", we directly get the correct directory
2664 	// vnode and ignore the leaf later
2665 	bool isDir = (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0);
2666 	if (isDir)
2667 		error = vnode_path_to_vnode(dirNode, leaf, false, 0, &dirNode, NULL, NULL);
2668 	if (error != B_OK) {
2669 		PRINT(("vfs_normalize_path(): failed to get dir vnode for \".\" or \"..\": %s\n", strerror(error)));
2670 		return error;
2671 	}
2672 
2673 	// get the directory path
2674 	error = dir_vnode_to_path(dirNode, buffer, bufferSize);
2675 	put_vnode(dirNode);
2676 	if (error < B_OK) {
2677 		PRINT(("vfs_normalize_path(): failed to get dir path: %s\n", strerror(error)));
2678 		return error;
2679 	}
2680 
2681 	// append the leaf name
2682 	if (!isDir) {
2683 		// insert a directory separator only if this is not the file system root
2684 		if ((strcmp(buffer, "/") != 0
2685 			 && strlcat(buffer, "/", bufferSize) >= bufferSize)
2686 			|| strlcat(buffer, leaf, bufferSize) >= bufferSize) {
2687 			return B_NAME_TOO_LONG;
2688 		}
2689 	}
2690 
2691 	PRINT(("vfs_normalize_path() -> `%s'\n", buffer));
2692 	return B_OK;
2693 }
2694 
2695 
2696 extern "C" void
2697 vfs_put_vnode(void *_vnode)
2698 {
2699 	put_vnode((struct vnode *)_vnode);
2700 }
2701 
2702 
2703 extern "C" status_t
2704 vfs_get_cwd(mount_id *_mountID, vnode_id *_vnodeID)
2705 {
2706 	// Get current working directory from io context
2707 	struct io_context *context = get_current_io_context(false);
2708 	status_t status = B_OK;
2709 
2710 	mutex_lock(&context->io_mutex);
2711 
2712 	if (context->cwd != NULL) {
2713 		*_mountID = context->cwd->device;
2714 		*_vnodeID = context->cwd->id;
2715 	} else
2716 		status = B_ERROR;
2717 
2718 	mutex_unlock(&context->io_mutex);
2719 	return status;
2720 }
2721 
2722 
2723 extern "C" bool
2724 vfs_can_page(void *_vnode, void *cookie)
2725 {
2726 	struct vnode *vnode = (struct vnode *)_vnode;
2727 
2728 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
2729 
2730 	if (FS_CALL(vnode, can_page))
2731 		return FS_CALL(vnode, can_page)(vnode->mount->cookie, vnode->private_node, cookie);
2732 
2733 	return false;
2734 }
2735 
2736 
2737 extern "C" status_t
2738 vfs_read_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count, size_t *_numBytes)
2739 {
2740 	struct vnode *vnode = (struct vnode *)_vnode;
2741 
2742 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
2743 
2744 	return FS_CALL(vnode, read_pages)(vnode->mount->cookie, vnode->private_node, cookie, pos, vecs, count, _numBytes);
2745 }
2746 
2747 
2748 extern "C" status_t
2749 vfs_write_pages(void *_vnode, void *cookie, off_t pos, const iovec *vecs, size_t count, size_t *_numBytes)
2750 {
2751 	struct vnode *vnode = (struct vnode *)_vnode;
2752 
2753 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs, pos));
2754 
2755 	return FS_CALL(vnode, write_pages)(vnode->mount->cookie, vnode->private_node, cookie, pos, vecs, count, _numBytes);
2756 }
2757 
2758 
2759 extern "C" status_t
2760 vfs_get_vnode_cache(void *_vnode, vm_cache_ref **_cache, bool allocate)
2761 {
2762 	struct vnode *vnode = (struct vnode *)_vnode;
2763 
2764 	if (vnode->cache != NULL) {
2765 		*_cache = vnode->cache;
2766 		return B_OK;
2767 	}
2768 
2769 	mutex_lock(&sVnodeMutex);
2770 
2771 	status_t status = B_OK;
2772 
2773 	// The cache could have been created in the meantime
2774 	if (vnode->cache == NULL) {
2775 		if (allocate)
2776 			status = vm_create_vnode_cache(vnode, &vnode->cache);
2777 		else
2778 			status = B_BAD_VALUE;
2779 	}
2780 
2781 	if (status == B_OK)
2782 		*_cache = vnode->cache;
2783 
2784 	mutex_unlock(&sVnodeMutex);
2785 	return status;
2786 }
2787 
2788 
2789 status_t
2790 vfs_get_file_map(void *_vnode, off_t offset, size_t size, file_io_vec *vecs, size_t *_count)
2791 {
2792 	struct vnode *vnode = (struct vnode *)_vnode;
2793 
2794 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n", vnode, vecs, offset, size));
2795 
2796 	return FS_CALL(vnode, get_file_map)(vnode->mount->cookie, vnode->private_node, offset, size, vecs, _count);
2797 }
2798 
2799 
2800 status_t
2801 vfs_stat_vnode(void *_vnode, struct stat *stat)
2802 {
2803 	struct vnode *vnode = (struct vnode *)_vnode;
2804 
2805 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
2806 		vnode->private_node, stat);
2807 
2808 	// fill in the st_dev and st_ino fields
2809 	if (status == B_OK) {
2810 		stat->st_dev = vnode->device;
2811 		stat->st_ino = vnode->id;
2812 	}
2813 
2814 	return status;
2815 }
2816 
2817 
2818 status_t
2819 vfs_get_vnode_name(void *_vnode, char *name, size_t nameSize)
2820 {
2821 	return get_vnode_name((struct vnode *)_vnode, NULL, name, nameSize);
2822 }
2823 
2824 
2825 /**	Closes all file descriptors of the specified I/O context that
2826  *	don't have the O_CLOEXEC flag set.
2827  */
2828 
2829 void
2830 vfs_exec_io_context(void *_context)
2831 {
2832 	struct io_context *context = (struct io_context *)_context;
2833 	uint32 i;
2834 
2835 	for (i = 0; i < context->table_size; i++) {
2836 		mutex_lock(&context->io_mutex);
2837 
2838 		struct file_descriptor *descriptor = context->fds[i];
2839 		bool remove = false;
2840 
2841 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
2842 			context->fds[i] = NULL;
2843 			context->num_used_fds--;
2844 
2845 			remove = true;
2846 		}
2847 
2848 		mutex_unlock(&context->io_mutex);
2849 
2850 		if (remove) {
2851 			close_fd(descriptor);
2852 			put_fd(descriptor);
2853 		}
2854 	}
2855 }
2856 
2857 
2858 /** Sets up a new io_control structure, and inherits the properties
2859  *	of the parent io_control if it is given.
2860  */
2861 
2862 void *
2863 vfs_new_io_context(void *_parentContext)
2864 {
2865 	size_t tableSize;
2866 	struct io_context *context;
2867 	struct io_context *parentContext;
2868 
2869 	context = (io_context *)malloc(sizeof(struct io_context));
2870 	if (context == NULL)
2871 		return NULL;
2872 
2873 	memset(context, 0, sizeof(struct io_context));
2874 
2875 	parentContext = (struct io_context *)_parentContext;
2876 	if (parentContext)
2877 		tableSize = parentContext->table_size;
2878 	else
2879 		tableSize = DEFAULT_FD_TABLE_SIZE;
2880 
2881 	// allocate space for FDs and their close-on-exec flag
2882 	context->fds = (file_descriptor **)malloc(sizeof(struct file_descriptor *) * tableSize
2883 		+ tableSize / 8);
2884 	if (context->fds == NULL) {
2885 		free(context);
2886 		return NULL;
2887 	}
2888 
2889 	memset(context->fds, 0, sizeof(struct file_descriptor *) * tableSize
2890 		+ tableSize / 8);
2891 	context->fds_close_on_exec = (uint8 *)(context->fds + tableSize);
2892 
2893 	if (mutex_init(&context->io_mutex, "I/O context") < 0) {
2894 		free(context->fds);
2895 		free(context);
2896 		return NULL;
2897 	}
2898 
2899 	// Copy all parent files which don't have the O_CLOEXEC flag set
2900 
2901 	if (parentContext) {
2902 		size_t i;
2903 
2904 		mutex_lock(&parentContext->io_mutex);
2905 
2906 		context->cwd = parentContext->cwd;
2907 		if (context->cwd)
2908 			inc_vnode_ref_count(context->cwd);
2909 
2910 		for (i = 0; i < tableSize; i++) {
2911 			struct file_descriptor *descriptor = parentContext->fds[i];
2912 
2913 			if (descriptor != NULL && !fd_close_on_exec(parentContext, i)) {
2914 				context->fds[i] = descriptor;
2915 				context->num_used_fds++;
2916 				atomic_add(&descriptor->ref_count, 1);
2917 				atomic_add(&descriptor->open_count, 1);
2918 			}
2919 		}
2920 
2921 		mutex_unlock(&parentContext->io_mutex);
2922 	} else {
2923 		context->cwd = sRoot;
2924 
2925 		if (context->cwd)
2926 			inc_vnode_ref_count(context->cwd);
2927 	}
2928 
2929 	context->table_size = tableSize;
2930 
2931 	list_init(&context->node_monitors);
2932 	context->max_monitors = MAX_NODE_MONITORS;
2933 
2934 	return context;
2935 }
2936 
2937 
2938 status_t
2939 vfs_free_io_context(void *_ioContext)
2940 {
2941 	struct io_context *context = (struct io_context *)_ioContext;
2942 	uint32 i;
2943 
2944 	if (context->cwd)
2945 		dec_vnode_ref_count(context->cwd, false);
2946 
2947 	mutex_lock(&context->io_mutex);
2948 
2949 	for (i = 0; i < context->table_size; i++) {
2950 		if (struct file_descriptor *descriptor = context->fds[i]) {
2951 			close_fd(descriptor);
2952 			put_fd(descriptor);
2953 		}
2954 	}
2955 
2956 	mutex_unlock(&context->io_mutex);
2957 
2958 	mutex_destroy(&context->io_mutex);
2959 
2960 	remove_node_monitors(context);
2961 	free(context->fds);
2962 	free(context);
2963 
2964 	return B_OK;
2965 }
2966 
2967 
2968 static status_t
2969 vfs_resize_fd_table(struct io_context *context, const int newSize)
2970 {
2971 	void *fds;
2972 	int	status = B_OK;
2973 
2974 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
2975 		return EINVAL;
2976 
2977 	mutex_lock(&context->io_mutex);
2978 
2979 	if ((size_t)newSize < context->table_size) {
2980 		// shrink the fd table
2981 		int i;
2982 
2983 		// Make sure none of the fds being dropped are in use
2984 		for(i = context->table_size; i-- > newSize;) {
2985 			if (context->fds[i]) {
2986 				status = EBUSY;
2987 				goto out;
2988 			}
2989 		}
2990 
2991 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
2992 		if (fds == NULL) {
2993 			status = ENOMEM;
2994 			goto out;
2995 		}
2996 
2997 		memcpy(fds, context->fds, sizeof(struct file_descriptor *) * newSize);
2998 	} else {
2999 		// enlarge the fd table
3000 
3001 		fds = malloc(sizeof(struct file_descriptor *) * newSize);
3002 		if (fds == NULL) {
3003 			status = ENOMEM;
3004 			goto out;
3005 		}
3006 
3007 		// copy the fd array, and zero the additional slots
3008 		memcpy(fds, context->fds, sizeof(void *) * context->table_size);
3009 		memset((char *)fds + (sizeof(void *) * context->table_size), 0,
3010 			sizeof(void *) * (newSize - context->table_size));
3011 	}
3012 
3013 	free(context->fds);
3014 	context->fds = (file_descriptor **)fds;
3015 	context->table_size = newSize;
3016 
3017 out:
3018 	mutex_unlock(&context->io_mutex);
3019 	return status;
3020 }
3021 
3022 
3023 int
3024 vfs_getrlimit(int resource, struct rlimit * rlp)
3025 {
3026 	if (!rlp)
3027 		return -1;
3028 
3029 	switch (resource) {
3030 		case RLIMIT_NOFILE:
3031 		{
3032 			struct io_context *ioctx = get_current_io_context(false);
3033 
3034 			mutex_lock(&ioctx->io_mutex);
3035 
3036 			rlp->rlim_cur = ioctx->table_size;
3037 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
3038 
3039 			mutex_unlock(&ioctx->io_mutex);
3040 
3041 			return 0;
3042 		}
3043 
3044 		default:
3045 			return -1;
3046 	}
3047 }
3048 
3049 
3050 int
3051 vfs_setrlimit(int resource, const struct rlimit * rlp)
3052 {
3053 	if (!rlp)
3054 		return -1;
3055 
3056 	switch (resource) {
3057 		case RLIMIT_NOFILE:
3058 			return vfs_resize_fd_table(get_current_io_context(false), rlp->rlim_cur);
3059 
3060 		default:
3061 			return -1;
3062 	}
3063 }
3064 
3065 
3066 status_t
3067 vfs_init(kernel_args *args)
3068 {
3069 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE, offsetof(struct vnode, next),
3070 		&vnode_compare, &vnode_hash);
3071 	if (sVnodeTable == NULL)
3072 		panic("vfs_init: error creating vnode hash table\n");
3073 
3074 	list_init_etc(&sUnusedVnodeList, offsetof(struct vnode, unused_link));
3075 
3076 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE, offsetof(struct fs_mount, next),
3077 		&mount_compare, &mount_hash);
3078 	if (sMountsTable == NULL)
3079 		panic("vfs_init: error creating mounts hash table\n");
3080 
3081 	node_monitor_init();
3082 
3083 	sRoot = NULL;
3084 
3085 	if (mutex_init(&sFileSystemsMutex, "vfs_lock") < 0)
3086 		panic("vfs_init: error allocating file systems lock\n");
3087 
3088 	if (recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock") < 0)
3089 		panic("vfs_init: error allocating mount op lock\n");
3090 
3091 	if (mutex_init(&sMountMutex, "vfs_mount_lock") < 0)
3092 		panic("vfs_init: error allocating mount lock\n");
3093 
3094 	if (mutex_init(&sVnodeMutex, "vfs_vnode_lock") < 0)
3095 		panic("vfs_init: error allocating vnode lock\n");
3096 
3097 	if (block_cache_init() != B_OK)
3098 		return B_ERROR;
3099 
3100 #ifdef ADD_DEBUGGER_COMMANDS
3101 	// add some debugger commands
3102 	add_debugger_command("vnode", &dump_vnode, "info about the specified vnode");
3103 	add_debugger_command("vnodes", &dump_vnodes, "list all vnodes (from the specified device)");
3104 	add_debugger_command("vnode_caches", &dump_vnode_caches, "list all vnode caches");
3105 	add_debugger_command("mount", &dump_mount, "info about the specified fs_mount");
3106 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
3107 	add_debugger_command("io_context", &dump_io_context, "info about the I/O context");
3108 #endif
3109 
3110 	return file_cache_init();
3111 }
3112 
3113 
3114 //	#pragma mark -
3115 //	The filetype-dependent implementations (fd_ops + open/create/rename/remove, ...)
3116 
3117 
3118 /** Calls fs_open() on the given vnode and returns a new
3119  *	file descriptor for it
3120  */
3121 
3122 static int
3123 create_vnode(struct vnode *directory, const char *name, int openMode, int perms, bool kernel)
3124 {
3125 	struct vnode *vnode;
3126 	fs_cookie cookie;
3127 	vnode_id newID;
3128 	int status;
3129 
3130 	if (FS_CALL(directory, create) == NULL)
3131 		return EROFS;
3132 
3133 	status = FS_CALL(directory, create)(directory->mount->cookie, directory->private_node, name, openMode, perms, &cookie, &newID);
3134 	if (status < B_OK)
3135 		return status;
3136 
3137 	mutex_lock(&sVnodeMutex);
3138 	vnode = lookup_vnode(directory->device, newID);
3139 	mutex_unlock(&sVnodeMutex);
3140 
3141 	if (vnode == NULL) {
3142 		dprintf("vfs: fs_create() returned success but there is no vnode!");
3143 		return EINVAL;
3144 	}
3145 
3146 	if ((status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel)) >= 0)
3147 		return status;
3148 
3149 	// something went wrong, clean up
3150 
3151 	FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3152 	FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3153 	put_vnode(vnode);
3154 
3155 	FS_CALL(directory, unlink)(directory->mount->cookie, directory->private_node, name);
3156 
3157 	return status;
3158 }
3159 
3160 
3161 /** Calls fs_open() on the given vnode and returns a new
3162  *	file descriptor for it
3163  */
3164 
3165 static int
3166 open_vnode(struct vnode *vnode, int openMode, bool kernel)
3167 {
3168 	fs_cookie cookie;
3169 	int status;
3170 
3171 	status = FS_CALL(vnode, open)(vnode->mount->cookie, vnode->private_node, openMode, &cookie);
3172 	if (status < 0)
3173 		return status;
3174 
3175 	status = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
3176 	if (status < 0) {
3177 		FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, cookie);
3178 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3179 	}
3180 	return status;
3181 }
3182 
3183 
3184 /** Calls fs open_dir() on the given vnode and returns a new
3185  *	file descriptor for it
3186  */
3187 
3188 static int
3189 open_dir_vnode(struct vnode *vnode, bool kernel)
3190 {
3191 	fs_cookie cookie;
3192 	int status;
3193 
3194 	status = FS_CALL(vnode, open_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3195 	if (status < B_OK)
3196 		return status;
3197 
3198 	// file is opened, create a fd
3199 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, 0, kernel);
3200 	if (status >= 0)
3201 		return status;
3202 
3203 	FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3204 	FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3205 
3206 	return status;
3207 }
3208 
3209 
3210 /** Calls fs open_attr_dir() on the given vnode and returns a new
3211  *	file descriptor for it.
3212  *	Used by attr_dir_open(), and attr_dir_open_fd().
3213  */
3214 
3215 static int
3216 open_attr_dir_vnode(struct vnode *vnode, bool kernel)
3217 {
3218 	fs_cookie cookie;
3219 	int status;
3220 
3221 	if (FS_CALL(vnode, open_attr_dir) == NULL)
3222 		return EOPNOTSUPP;
3223 
3224 	status = FS_CALL(vnode, open_attr_dir)(vnode->mount->cookie, vnode->private_node, &cookie);
3225 	if (status < 0)
3226 		return status;
3227 
3228 	// file is opened, create a fd
3229 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, 0, kernel);
3230 	if (status >= 0)
3231 		return status;
3232 
3233 	FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, cookie);
3234 	FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
3235 
3236 	return status;
3237 }
3238 
3239 
3240 static int
3241 file_create_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, int perms, bool kernel)
3242 {
3243 	struct vnode *directory;
3244 	int status;
3245 
3246 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, kernel %d\n", name, openMode, perms, kernel));
3247 
3248 	// get directory to put the new file in
3249 	status = get_vnode(mountID, directoryID, &directory, false);
3250 	if (status < B_OK)
3251 		return status;
3252 
3253 	status = create_vnode(directory, name, openMode, perms, kernel);
3254 	put_vnode(directory);
3255 
3256 	return status;
3257 }
3258 
3259 
3260 static int
3261 file_create(int fd, char *path, int openMode, int perms, bool kernel)
3262 {
3263 	char name[B_FILE_NAME_LENGTH];
3264 	struct vnode *directory;
3265 	int status;
3266 
3267 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path, openMode, perms, kernel));
3268 
3269 	// get directory to put the new file in
3270 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
3271 	if (status < 0)
3272 		return status;
3273 
3274 	status = create_vnode(directory, name, openMode, perms, kernel);
3275 
3276 	put_vnode(directory);
3277 	return status;
3278 }
3279 
3280 
3281 static int
3282 file_open_entry_ref(mount_id mountID, vnode_id directoryID, const char *name, int openMode, bool kernel)
3283 {
3284 	struct vnode *vnode;
3285 	int status;
3286 
3287 	if (name == NULL || *name == '\0')
3288 		return B_BAD_VALUE;
3289 
3290 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
3291 		mountID, directoryID, name, openMode));
3292 
3293 	// get the vnode matching the entry_ref
3294 	status = entry_ref_to_vnode(mountID, directoryID, name, &vnode);
3295 	if (status < B_OK)
3296 		return status;
3297 
3298 	status = open_vnode(vnode, openMode, kernel);
3299 	if (status < B_OK)
3300 		put_vnode(vnode);
3301 
3302 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID, directoryID, vnode->id, name);
3303 	return status;
3304 }
3305 
3306 
3307 static int
3308 file_open(int fd, char *path, int openMode, bool kernel)
3309 {
3310 	int status = B_OK;
3311 	bool traverse = ((openMode & O_NOTRAVERSE) == 0);
3312 
3313 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
3314 		fd, path, openMode, kernel));
3315 
3316 	// get the vnode matching the vnode + path combination
3317 	struct vnode *vnode = NULL;
3318 	vnode_id parentID;
3319 	status = fd_and_path_to_vnode(fd, path, traverse, &vnode, &parentID, kernel);
3320 	if (status != B_OK)
3321 		return status;
3322 
3323 	// open the vnode
3324 	status = open_vnode(vnode, openMode, kernel);
3325 	// put only on error -- otherwise our reference was transferred to the FD
3326 	if (status < B_OK)
3327 		put_vnode(vnode);
3328 
3329 	cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
3330 		vnode->device, parentID, vnode->id, NULL);
3331 
3332 	return status;
3333 }
3334 
3335 
3336 static status_t
3337 file_close(struct file_descriptor *descriptor)
3338 {
3339 	struct vnode *vnode = descriptor->u.vnode;
3340 	status_t status = B_OK;
3341 
3342 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
3343 
3344 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device, vnode->id);
3345 	if (FS_CALL(vnode, close))
3346 		status = FS_CALL(vnode, close)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3347 
3348 	if (status == B_OK) {
3349 		// remove all outstanding locks for this team
3350 		release_advisory_lock(vnode, NULL);
3351 	}
3352 	return status;
3353 }
3354 
3355 
3356 static void
3357 file_free_fd(struct file_descriptor *descriptor)
3358 {
3359 	struct vnode *vnode = descriptor->u.vnode;
3360 
3361 	if (vnode != NULL) {
3362 		FS_CALL(vnode, free_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3363 		put_vnode(vnode);
3364 	}
3365 }
3366 
3367 
3368 static status_t
3369 file_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
3370 {
3371 	struct vnode *vnode = descriptor->u.vnode;
3372 
3373 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
3374 	return FS_CALL(vnode, read)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3375 }
3376 
3377 
3378 static status_t
3379 file_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
3380 {
3381 	struct vnode *vnode = descriptor->u.vnode;
3382 
3383 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
3384 	return FS_CALL(vnode, write)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
3385 }
3386 
3387 
3388 static off_t
3389 file_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
3390 {
3391 	off_t offset;
3392 
3393 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
3394 	// ToDo: seek should fail for pipes and FIFOs...
3395 
3396 	switch (seekType) {
3397 		case SEEK_SET:
3398 			offset = 0;
3399 			break;
3400 		case SEEK_CUR:
3401 			offset = descriptor->pos;
3402 			break;
3403 		case SEEK_END:
3404 		{
3405 			struct vnode *vnode = descriptor->u.vnode;
3406 			struct stat stat;
3407 			status_t status;
3408 
3409 			if (FS_CALL(vnode, read_stat) == NULL)
3410 				return EOPNOTSUPP;
3411 
3412 			status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
3413 			if (status < B_OK)
3414 				return status;
3415 
3416 			offset = stat.st_size;
3417 			break;
3418 		}
3419 		default:
3420 			return B_BAD_VALUE;
3421 	}
3422 
3423 	// assumes off_t is 64 bits wide
3424 	if (offset > 0 && LONGLONG_MAX - offset < pos)
3425 		return EOVERFLOW;
3426 
3427 	pos += offset;
3428 	if (pos < 0)
3429 		return B_BAD_VALUE;
3430 
3431 	return descriptor->pos = pos;
3432 }
3433 
3434 
3435 static status_t
3436 file_select(struct file_descriptor *descriptor, uint8 event, uint32 ref,
3437 	struct select_sync *sync)
3438 {
3439 	FUNCTION(("file_select(%p, %u, %lu, %p)\n", descriptor, event, ref, sync));
3440 
3441 	struct vnode *vnode = descriptor->u.vnode;
3442 
3443 	// If the FS has no select() hook, notify select() now.
3444 	if (FS_CALL(vnode, select) == NULL)
3445 		return notify_select_event((selectsync*)sync, ref, event);
3446 
3447 	return FS_CALL(vnode, select)(vnode->mount->cookie, vnode->private_node,
3448 		descriptor->cookie, event, ref, (selectsync*)sync);
3449 }
3450 
3451 
3452 static status_t
3453 file_deselect(struct file_descriptor *descriptor, uint8 event,
3454 	struct select_sync *sync)
3455 {
3456 	struct vnode *vnode = descriptor->u.vnode;
3457 
3458 	if (FS_CALL(vnode, deselect) == NULL)
3459 		return B_OK;
3460 
3461 	return FS_CALL(vnode, deselect)(vnode->mount->cookie, vnode->private_node,
3462 		descriptor->cookie, event, (selectsync*)sync);
3463 }
3464 
3465 
3466 static status_t
3467 dir_create_entry_ref(mount_id mountID, vnode_id parentID, const char *name, int perms, bool kernel)
3468 {
3469 	struct vnode *vnode;
3470 	vnode_id newID;
3471 	status_t status;
3472 
3473 	if (name == NULL || *name == '\0')
3474 		return B_BAD_VALUE;
3475 
3476 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', perms = %d)\n", mountID, parentID, name, perms));
3477 
3478 	status = get_vnode(mountID, parentID, &vnode, kernel);
3479 	if (status < B_OK)
3480 		return status;
3481 
3482 	if (FS_CALL(vnode, create_dir))
3483 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, name, perms, &newID);
3484 	else
3485 		status = EROFS;
3486 
3487 	put_vnode(vnode);
3488 	return status;
3489 }
3490 
3491 
3492 static status_t
3493 dir_create(int fd, char *path, int perms, bool kernel)
3494 {
3495 	char filename[B_FILE_NAME_LENGTH];
3496 	struct vnode *vnode;
3497 	vnode_id newID;
3498 	status_t status;
3499 
3500 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms, kernel));
3501 
3502 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3503 	if (status < 0)
3504 		return status;
3505 
3506 	if (FS_CALL(vnode, create_dir))
3507 		status = FS_CALL(vnode, create_dir)(vnode->mount->cookie, vnode->private_node, filename, perms, &newID);
3508 	else
3509 		status = EROFS;
3510 
3511 	put_vnode(vnode);
3512 	return status;
3513 }
3514 
3515 
3516 static int
3517 dir_open_entry_ref(mount_id mountID, vnode_id parentID, const char *name, bool kernel)
3518 {
3519 	struct vnode *vnode;
3520 	int status;
3521 
3522 	FUNCTION(("dir_open_entry_ref()\n"));
3523 
3524 	if (name && *name == '\0')
3525 		return B_BAD_VALUE;
3526 
3527 	// get the vnode matching the entry_ref/node_ref
3528 	if (name)
3529 		status = entry_ref_to_vnode(mountID, parentID, name, &vnode);
3530 	else
3531 		status = get_vnode(mountID, parentID, &vnode, false);
3532 	if (status < B_OK)
3533 		return status;
3534 
3535 	status = open_dir_vnode(vnode, kernel);
3536 	if (status < B_OK)
3537 		put_vnode(vnode);
3538 
3539 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID, vnode->id, name);
3540 	return status;
3541 }
3542 
3543 
3544 static int
3545 dir_open(int fd, char *path, bool kernel)
3546 {
3547 	int status = B_OK;
3548 
3549 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path, kernel));
3550 
3551 	// get the vnode matching the vnode + path combination
3552 	struct vnode *vnode = NULL;
3553 	vnode_id parentID;
3554 	status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID, kernel);
3555 	if (status != B_OK)
3556 		return status;
3557 
3558 	// open the dir
3559 	status = open_dir_vnode(vnode, kernel);
3560 	if (status < B_OK)
3561 		put_vnode(vnode);
3562 
3563 	cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device, parentID, vnode->id, NULL);
3564 	return status;
3565 }
3566 
3567 
3568 static status_t
3569 dir_close(struct file_descriptor *descriptor)
3570 {
3571 	struct vnode *vnode = descriptor->u.vnode;
3572 
3573 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
3574 
3575 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device, vnode->id);
3576 	if (FS_CALL(vnode, close_dir))
3577 		return FS_CALL(vnode, close_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3578 
3579 	return B_OK;
3580 }
3581 
3582 
3583 static void
3584 dir_free_fd(struct file_descriptor *descriptor)
3585 {
3586 	struct vnode *vnode = descriptor->u.vnode;
3587 
3588 	if (vnode != NULL) {
3589 		FS_CALL(vnode, free_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
3590 		put_vnode(vnode);
3591 	}
3592 }
3593 
3594 
3595 static status_t
3596 dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3597 {
3598 	return dir_read(descriptor->u.vnode, descriptor->cookie, buffer, bufferSize, _count);
3599 }
3600 
3601 
3602 static void
3603 fix_dirent(struct vnode *parent, struct dirent *entry)
3604 {
3605 	// set d_pdev and d_pino
3606 	entry->d_pdev = parent->device;
3607 	entry->d_pino = parent->id;
3608 
3609 	// If this is the ".." entry and the directory is the root of a FS,
3610 	// we need to replace d_dev and d_ino with the actual values.
3611 	if (strcmp(entry->d_name, "..") == 0
3612 		&& parent->mount->root_vnode == parent
3613 		&& parent->mount->covers_vnode) {
3614 
3615 		inc_vnode_ref_count(parent);	// vnode_path_to_vnode() puts the node
3616 
3617 		struct vnode *vnode;
3618 		status_t status = vnode_path_to_vnode(parent, "..", false, 0, &vnode,
3619 			NULL, NULL);
3620 
3621 		if (status == B_OK) {
3622 			entry->d_dev = vnode->device;
3623 			entry->d_ino = vnode->id;
3624 		}
3625 	} else {
3626 		// resolve mount points
3627 		struct vnode *vnode = NULL;
3628 		status_t status = get_vnode(entry->d_dev, entry->d_ino, &vnode, false);
3629 		if (status != B_OK)
3630 			return;
3631 
3632 		recursive_lock_lock(&sMountOpLock);
3633 		if (vnode->covered_by) {
3634 			entry->d_dev = vnode->covered_by->device;
3635 			entry->d_ino = vnode->covered_by->id;
3636 		}
3637 		recursive_lock_unlock(&sMountOpLock);
3638 
3639 		put_vnode(vnode);
3640 	}
3641 }
3642 
3643 
3644 static status_t
3645 dir_read(struct vnode *vnode, fs_cookie cookie, struct dirent *buffer, size_t bufferSize, uint32 *_count)
3646 {
3647 	if (!FS_CALL(vnode, read_dir))
3648 		return EOPNOTSUPP;
3649 
3650 	status_t error = FS_CALL(vnode, read_dir)(vnode->mount->cookie,vnode->private_node,cookie,buffer,bufferSize,_count);
3651 	if (error != B_OK)
3652 		return error;
3653 
3654 	// we need to adjust the read dirents
3655 	if (*_count > 0) {
3656 		// XXX: Currently reading only one dirent is supported. Make this a loop!
3657 		fix_dirent(vnode, buffer);
3658 	}
3659 
3660 	return error;
3661 }
3662 
3663 
3664 static status_t
3665 dir_rewind(struct file_descriptor *descriptor)
3666 {
3667 	struct vnode *vnode = descriptor->u.vnode;
3668 
3669 	if (FS_CALL(vnode, rewind_dir))
3670 		return FS_CALL(vnode, rewind_dir)(vnode->mount->cookie,vnode->private_node,descriptor->cookie);
3671 
3672 	return EOPNOTSUPP;
3673 }
3674 
3675 
3676 static status_t
3677 dir_remove(char *path, bool kernel)
3678 {
3679 	char name[B_FILE_NAME_LENGTH];
3680 	struct vnode *directory;
3681 	status_t status;
3682 
3683 	status = path_to_dir_vnode(path, &directory, name, kernel);
3684 	if (status < B_OK)
3685 		return status;
3686 
3687 	if (FS_CALL(directory, remove_dir))
3688 		status = FS_CALL(directory, remove_dir)(directory->mount->cookie, directory->private_node, name);
3689 	else
3690 		status = EROFS;
3691 
3692 	put_vnode(directory);
3693 	return status;
3694 }
3695 
3696 
3697 static status_t
3698 common_ioctl(struct file_descriptor *descriptor, ulong op, void *buffer, size_t length)
3699 {
3700 	struct vnode *vnode = descriptor->u.vnode;
3701 
3702 	if (FS_CALL(vnode, ioctl)) {
3703 		return FS_CALL(vnode, ioctl)(vnode->mount->cookie, vnode->private_node,
3704 			descriptor->cookie, op, buffer, length);
3705 	}
3706 
3707 	return EOPNOTSUPP;
3708 }
3709 
3710 
3711 static status_t
3712 common_fcntl(int fd, int op, uint32 argument, bool kernel)
3713 {
3714 	struct file_descriptor *descriptor;
3715 	struct vnode *vnode;
3716 	struct flock flock;
3717 	status_t status;
3718 
3719 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
3720 		fd, op, argument, kernel ? "kernel" : "user"));
3721 
3722 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
3723 	if (descriptor == NULL)
3724 		return B_FILE_ERROR;
3725 
3726 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
3727 		if (descriptor->type != FDTYPE_FILE)
3728 			return B_BAD_VALUE;
3729 		if (user_memcpy(&flock, (struct flock *)argument, sizeof(struct flock)) < B_OK)
3730 			return B_BAD_ADDRESS;
3731 	}
3732 
3733 	switch (op) {
3734 		case F_SETFD:
3735 		{
3736 			struct io_context *context = get_current_io_context(kernel);
3737 			// Set file descriptor flags
3738 
3739 			// O_CLOEXEC is the only flag available at this time
3740 			mutex_lock(&context->io_mutex);
3741 			fd_set_close_on_exec(context, fd, argument == FD_CLOEXEC);
3742 			mutex_unlock(&context->io_mutex);
3743 
3744 			status = B_OK;
3745 			break;
3746 		}
3747 
3748 		case F_GETFD:
3749 		{
3750 			struct io_context *context = get_current_io_context(kernel);
3751 
3752 			// Get file descriptor flags
3753 			mutex_lock(&context->io_mutex);
3754 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
3755 			mutex_unlock(&context->io_mutex);
3756 			break;
3757 		}
3758 
3759 		case F_SETFL:
3760 			// Set file descriptor open mode
3761 			if (FS_CALL(vnode, set_flags)) {
3762 				// we only accept changes to O_APPEND and O_NONBLOCK
3763 				argument &= O_APPEND | O_NONBLOCK;
3764 
3765 				status = FS_CALL(vnode, set_flags)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, (int)argument);
3766 				if (status == B_OK) {
3767 					// update this descriptor's open_mode field
3768 					descriptor->open_mode = (descriptor->open_mode & ~(O_APPEND | O_NONBLOCK)) | argument;
3769 				}
3770 			} else
3771 				status = EOPNOTSUPP;
3772 			break;
3773 
3774 		case F_GETFL:
3775 			// Get file descriptor open mode
3776 			status = descriptor->open_mode;
3777 			break;
3778 
3779 		case F_DUPFD:
3780 		{
3781 			struct io_context *context = get_current_io_context(kernel);
3782 
3783 			status = new_fd_etc(context, descriptor, (int)argument);
3784 			if (status >= 0) {
3785 				mutex_lock(&context->io_mutex);
3786 				fd_set_close_on_exec(context, fd, false);
3787 				mutex_unlock(&context->io_mutex);
3788 
3789 				atomic_add(&descriptor->ref_count, 1);
3790 			}
3791 			break;
3792 		}
3793 
3794 		case F_GETLK:
3795 			status = get_advisory_lock(descriptor->u.vnode, &flock);
3796 			if (status == B_OK) {
3797 				// copy back flock structure
3798 				status = user_memcpy((struct flock *)argument, &flock, sizeof(struct flock));
3799 			}
3800 			break;
3801 
3802 		case F_SETLK:
3803 		case F_SETLKW:
3804 			status = normalize_flock(descriptor, &flock);
3805 			if (status < B_OK)
3806 				break;
3807 
3808 			if (flock.l_type == F_UNLCK)
3809 				status = release_advisory_lock(descriptor->u.vnode, &flock);
3810 			else {
3811 				// the open mode must match the lock type
3812 				if ((descriptor->open_mode & O_RWMASK) == O_RDONLY && flock.l_type == F_WRLCK
3813 					|| (descriptor->open_mode & O_RWMASK) == O_WRONLY && flock.l_type == F_RDLCK)
3814 					status = B_FILE_ERROR;
3815 				else
3816 					status = acquire_advisory_lock(descriptor->u.vnode, &flock, op == F_SETLKW);
3817 			}
3818 			break;
3819 
3820 		// ToDo: add support for more ops?
3821 
3822 		default:
3823 			status = B_BAD_VALUE;
3824 	}
3825 
3826 	put_fd(descriptor);
3827 	return status;
3828 }
3829 
3830 
3831 static status_t
3832 common_sync(int fd, bool kernel)
3833 {
3834 	struct file_descriptor *descriptor;
3835 	struct vnode *vnode;
3836 	status_t status;
3837 
3838 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
3839 
3840 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
3841 	if (descriptor == NULL)
3842 		return B_FILE_ERROR;
3843 
3844 	if (FS_CALL(vnode, fsync) != NULL)
3845 		status = FS_CALL(vnode, fsync)(vnode->mount->cookie, vnode->private_node);
3846 	else
3847 		status = EOPNOTSUPP;
3848 
3849 	put_fd(descriptor);
3850 	return status;
3851 }
3852 
3853 
3854 static status_t
3855 common_lock_node(int fd, bool kernel)
3856 {
3857 	// TODO: Implement!
3858 	return EOPNOTSUPP;
3859 }
3860 
3861 
3862 static status_t
3863 common_unlock_node(int fd, bool kernel)
3864 {
3865 	// TODO: Implement!
3866 	return EOPNOTSUPP;
3867 }
3868 
3869 
3870 static status_t
3871 common_read_link(int fd, char *path, char *buffer, size_t *_bufferSize,
3872 	bool kernel)
3873 {
3874 	struct vnode *vnode;
3875 	status_t status;
3876 
3877 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
3878 	if (status < B_OK)
3879 		return status;
3880 
3881 	if (FS_CALL(vnode, read_link) != NULL) {
3882 		status = FS_CALL(vnode, read_link)(vnode->mount->cookie,
3883 			vnode->private_node, buffer, _bufferSize);
3884 	} else
3885 		status = B_BAD_VALUE;
3886 
3887 	put_vnode(vnode);
3888 	return status;
3889 }
3890 
3891 
3892 static status_t
3893 common_write_link(char *path, char *toPath, bool kernel)
3894 {
3895 	struct vnode *vnode;
3896 	status_t status;
3897 
3898 	status = path_to_vnode(path, false, &vnode, NULL, kernel);
3899 	if (status < B_OK)
3900 		return status;
3901 
3902 	if (FS_CALL(vnode, write_link) != NULL)
3903 		status = FS_CALL(vnode, write_link)(vnode->mount->cookie, vnode->private_node, toPath);
3904 	else
3905 		status = EOPNOTSUPP;
3906 
3907 	put_vnode(vnode);
3908 
3909 	return status;
3910 }
3911 
3912 
3913 static status_t
3914 common_create_symlink(int fd, char *path, const char *toPath, int mode,
3915 	bool kernel)
3916 {
3917 	// path validity checks have to be in the calling function!
3918 	char name[B_FILE_NAME_LENGTH];
3919 	struct vnode *vnode;
3920 	status_t status;
3921 
3922 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
3923 
3924 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
3925 	if (status < B_OK)
3926 		return status;
3927 
3928 	if (FS_CALL(vnode, create_symlink) != NULL)
3929 		status = FS_CALL(vnode, create_symlink)(vnode->mount->cookie, vnode->private_node, name, toPath, mode);
3930 	else
3931 		status = EROFS;
3932 
3933 	put_vnode(vnode);
3934 
3935 	return status;
3936 }
3937 
3938 
3939 static status_t
3940 common_create_link(char *path, char *toPath, bool kernel)
3941 {
3942 	// path validity checks have to be in the calling function!
3943 	char name[B_FILE_NAME_LENGTH];
3944 	struct vnode *directory, *vnode;
3945 	status_t status;
3946 
3947 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path, toPath, kernel));
3948 
3949 	status = path_to_dir_vnode(path, &directory, name, kernel);
3950 	if (status < B_OK)
3951 		return status;
3952 
3953 	status = path_to_vnode(toPath, true, &vnode, NULL, kernel);
3954 	if (status < B_OK)
3955 		goto err;
3956 
3957 	if (directory->mount != vnode->mount) {
3958 		status = B_CROSS_DEVICE_LINK;
3959 		goto err1;
3960 	}
3961 
3962 	if (FS_CALL(vnode, link) != NULL)
3963 		status = FS_CALL(vnode, link)(directory->mount->cookie, directory->private_node, name, vnode->private_node);
3964 	else
3965 		status = EROFS;
3966 
3967 err1:
3968 	put_vnode(vnode);
3969 err:
3970 	put_vnode(directory);
3971 
3972 	return status;
3973 }
3974 
3975 
3976 static status_t
3977 common_unlink(int fd, char *path, bool kernel)
3978 {
3979 	char filename[B_FILE_NAME_LENGTH];
3980 	struct vnode *vnode;
3981 	status_t status;
3982 
3983 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path, kernel));
3984 
3985 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
3986 	if (status < 0)
3987 		return status;
3988 
3989 	if (FS_CALL(vnode, unlink) != NULL)
3990 		status = FS_CALL(vnode, unlink)(vnode->mount->cookie, vnode->private_node, filename);
3991 	else
3992 		status = EROFS;
3993 
3994 	put_vnode(vnode);
3995 
3996 	return status;
3997 }
3998 
3999 
4000 static status_t
4001 common_access(char *path, int mode, bool kernel)
4002 {
4003 	struct vnode *vnode;
4004 	status_t status;
4005 
4006 	status = path_to_vnode(path, true, &vnode, NULL, kernel);
4007 	if (status < B_OK)
4008 		return status;
4009 
4010 	if (FS_CALL(vnode, access) != NULL)
4011 		status = FS_CALL(vnode, access)(vnode->mount->cookie, vnode->private_node, mode);
4012 	else
4013 		status = B_OK;
4014 
4015 	put_vnode(vnode);
4016 
4017 	return status;
4018 }
4019 
4020 
4021 static status_t
4022 common_rename(int fd, char *path, int newFD, char *newPath, bool kernel)
4023 {
4024 	struct vnode *fromVnode, *toVnode;
4025 	char fromName[B_FILE_NAME_LENGTH];
4026 	char toName[B_FILE_NAME_LENGTH];
4027 	status_t status;
4028 
4029 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, kernel = %d)\n", fd, path, newFD, newPath, kernel));
4030 
4031 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
4032 	if (status < 0)
4033 		return status;
4034 
4035 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
4036 	if (status < 0)
4037 		goto err;
4038 
4039 	if (fromVnode->device != toVnode->device) {
4040 		status = B_CROSS_DEVICE_LINK;
4041 		goto err1;
4042 	}
4043 
4044 	if (FS_CALL(fromVnode, rename) != NULL)
4045 		status = FS_CALL(fromVnode, rename)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4046 	else
4047 		status = EROFS;
4048 
4049 err1:
4050 	put_vnode(toVnode);
4051 err:
4052 	put_vnode(fromVnode);
4053 
4054 	return status;
4055 }
4056 
4057 
4058 static status_t
4059 common_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4060 {
4061 	struct vnode *vnode = descriptor->u.vnode;
4062 
4063 	FUNCTION(("common_read_stat: stat %p\n", stat));
4064 
4065 	status_t status = FS_CALL(vnode, read_stat)(vnode->mount->cookie,
4066 		vnode->private_node, stat);
4067 
4068 	// fill in the st_dev and st_ino fields
4069 	if (status == B_OK) {
4070 		stat->st_dev = vnode->device;
4071 		stat->st_ino = vnode->id;
4072 	}
4073 
4074 	return status;
4075 }
4076 
4077 
4078 static status_t
4079 common_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4080 {
4081 	struct vnode *vnode = descriptor->u.vnode;
4082 
4083 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n", vnode, stat, statMask));
4084 	if (!FS_CALL(vnode, write_stat))
4085 		return EROFS;
4086 
4087 	return FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4088 }
4089 
4090 
4091 static status_t
4092 common_path_read_stat(int fd, char *path, bool traverseLeafLink,
4093 	struct stat *stat, bool kernel)
4094 {
4095 	struct vnode *vnode;
4096 	status_t status;
4097 
4098 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path, stat));
4099 
4100 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4101 	if (status < 0)
4102 		return status;
4103 
4104 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, stat);
4105 
4106 	// fill in the st_dev and st_ino fields
4107 	if (status == B_OK) {
4108 		stat->st_dev = vnode->device;
4109 		stat->st_ino = vnode->id;
4110 	}
4111 
4112 	put_vnode(vnode);
4113 	return status;
4114 }
4115 
4116 
4117 static status_t
4118 common_path_write_stat(int fd, char *path, bool traverseLeafLink,
4119 	const struct stat *stat, int statMask, bool kernel)
4120 {
4121 	struct vnode *vnode;
4122 	status_t status;
4123 
4124 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, kernel %d\n", fd, path, stat, statMask, kernel));
4125 
4126 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL, kernel);
4127 	if (status < 0)
4128 		return status;
4129 
4130 	if (FS_CALL(vnode, write_stat))
4131 		status = FS_CALL(vnode, write_stat)(vnode->mount->cookie, vnode->private_node, stat, statMask);
4132 	else
4133 		status = EROFS;
4134 
4135 	put_vnode(vnode);
4136 
4137 	return status;
4138 }
4139 
4140 
4141 static int
4142 attr_dir_open(int fd, char *path, bool kernel)
4143 {
4144 	struct vnode *vnode;
4145 	int status;
4146 
4147 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path, kernel));
4148 
4149 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
4150 	if (status < B_OK)
4151 		return status;
4152 
4153 	status = open_attr_dir_vnode(vnode, kernel);
4154 	if (status < 0)
4155 		put_vnode(vnode);
4156 
4157 	return status;
4158 }
4159 
4160 
4161 static status_t
4162 attr_dir_close(struct file_descriptor *descriptor)
4163 {
4164 	struct vnode *vnode = descriptor->u.vnode;
4165 
4166 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
4167 
4168 	if (FS_CALL(vnode, close_attr_dir))
4169 		return FS_CALL(vnode, close_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4170 
4171 	return B_OK;
4172 }
4173 
4174 
4175 static void
4176 attr_dir_free_fd(struct file_descriptor *descriptor)
4177 {
4178 	struct vnode *vnode = descriptor->u.vnode;
4179 
4180 	if (vnode != NULL) {
4181 		FS_CALL(vnode, free_attr_dir_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4182 		put_vnode(vnode);
4183 	}
4184 }
4185 
4186 
4187 static status_t
4188 attr_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4189 {
4190 	struct vnode *vnode = descriptor->u.vnode;
4191 
4192 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
4193 
4194 	if (FS_CALL(vnode, read_attr_dir))
4195 		return FS_CALL(vnode, read_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, buffer, bufferSize, _count);
4196 
4197 	return EOPNOTSUPP;
4198 }
4199 
4200 
4201 static status_t
4202 attr_dir_rewind(struct file_descriptor *descriptor)
4203 {
4204 	struct vnode *vnode = descriptor->u.vnode;
4205 
4206 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
4207 
4208 	if (FS_CALL(vnode, rewind_attr_dir))
4209 		return FS_CALL(vnode, rewind_attr_dir)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4210 
4211 	return EOPNOTSUPP;
4212 }
4213 
4214 
4215 static int
4216 attr_create(int fd, const char *name, uint32 type, int openMode, bool kernel)
4217 {
4218 	struct vnode *vnode;
4219 	fs_cookie cookie;
4220 	int status;
4221 
4222 	if (name == NULL || *name == '\0')
4223 		return B_BAD_VALUE;
4224 
4225 	vnode = get_vnode_from_fd(fd, kernel);
4226 	if (vnode == NULL)
4227 		return B_FILE_ERROR;
4228 
4229 	if (FS_CALL(vnode, create_attr) == NULL) {
4230 		status = EROFS;
4231 		goto err;
4232 	}
4233 
4234 	status = FS_CALL(vnode, create_attr)(vnode->mount->cookie, vnode->private_node, name, type, openMode, &cookie);
4235 	if (status < B_OK)
4236 		goto err;
4237 
4238 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4239 		return status;
4240 
4241 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4242 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4243 
4244 	FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4245 
4246 err:
4247 	put_vnode(vnode);
4248 
4249 	return status;
4250 }
4251 
4252 
4253 static int
4254 attr_open(int fd, const char *name, int openMode, bool kernel)
4255 {
4256 	struct vnode *vnode;
4257 	fs_cookie cookie;
4258 	int status;
4259 
4260 	if (name == NULL || *name == '\0')
4261 		return B_BAD_VALUE;
4262 
4263 	vnode = get_vnode_from_fd(fd, kernel);
4264 	if (vnode == NULL)
4265 		return B_FILE_ERROR;
4266 
4267 	if (FS_CALL(vnode, open_attr) == NULL) {
4268 		status = EOPNOTSUPP;
4269 		goto err;
4270 	}
4271 
4272 	status = FS_CALL(vnode, open_attr)(vnode->mount->cookie, vnode->private_node, name, openMode, &cookie);
4273 	if (status < B_OK)
4274 		goto err;
4275 
4276 	// now we only need a file descriptor for this attribute and we're done
4277 	if ((status = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel)) >= 0)
4278 		return status;
4279 
4280 	FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, cookie);
4281 	FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, cookie);
4282 
4283 err:
4284 	put_vnode(vnode);
4285 
4286 	return status;
4287 }
4288 
4289 
4290 static status_t
4291 attr_close(struct file_descriptor *descriptor)
4292 {
4293 	struct vnode *vnode = descriptor->u.vnode;
4294 
4295 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
4296 
4297 	if (FS_CALL(vnode, close_attr))
4298 		return FS_CALL(vnode, close_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4299 
4300 	return B_OK;
4301 }
4302 
4303 
4304 static void
4305 attr_free_fd(struct file_descriptor *descriptor)
4306 {
4307 	struct vnode *vnode = descriptor->u.vnode;
4308 
4309 	if (vnode != NULL) {
4310 		FS_CALL(vnode, free_attr_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4311 		put_vnode(vnode);
4312 	}
4313 }
4314 
4315 
4316 static status_t
4317 attr_read(struct file_descriptor *descriptor, off_t pos, void *buffer, size_t *length)
4318 {
4319 	struct vnode *vnode = descriptor->u.vnode;
4320 
4321 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length, *length));
4322 	if (!FS_CALL(vnode, read_attr))
4323 		return EOPNOTSUPP;
4324 
4325 	return FS_CALL(vnode, read_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4326 }
4327 
4328 
4329 static status_t
4330 attr_write(struct file_descriptor *descriptor, off_t pos, const void *buffer, size_t *length)
4331 {
4332 	struct vnode *vnode = descriptor->u.vnode;
4333 
4334 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
4335 	if (!FS_CALL(vnode, write_attr))
4336 		return EOPNOTSUPP;
4337 
4338 	return FS_CALL(vnode, write_attr)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, pos, buffer, length);
4339 }
4340 
4341 
4342 static off_t
4343 attr_seek(struct file_descriptor *descriptor, off_t pos, int seekType)
4344 {
4345 	off_t offset;
4346 
4347 	switch (seekType) {
4348 		case SEEK_SET:
4349 			offset = 0;
4350 			break;
4351 		case SEEK_CUR:
4352 			offset = descriptor->pos;
4353 			break;
4354 		case SEEK_END:
4355 		{
4356 			struct vnode *vnode = descriptor->u.vnode;
4357 			struct stat stat;
4358 			status_t status;
4359 
4360 			if (FS_CALL(vnode, read_stat) == NULL)
4361 				return EOPNOTSUPP;
4362 
4363 			status = FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, &stat);
4364 			if (status < B_OK)
4365 				return status;
4366 
4367 			offset = stat.st_size;
4368 			break;
4369 		}
4370 		default:
4371 			return B_BAD_VALUE;
4372 	}
4373 
4374 	// assumes off_t is 64 bits wide
4375 	if (offset > 0 && LONGLONG_MAX - offset < pos)
4376 		return EOVERFLOW;
4377 
4378 	pos += offset;
4379 	if (pos < 0)
4380 		return B_BAD_VALUE;
4381 
4382 	return descriptor->pos = pos;
4383 }
4384 
4385 
4386 static status_t
4387 attr_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4388 {
4389 	struct vnode *vnode = descriptor->u.vnode;
4390 
4391 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
4392 
4393 	if (!FS_CALL(vnode, read_attr_stat))
4394 		return EOPNOTSUPP;
4395 
4396 	return FS_CALL(vnode, read_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4397 }
4398 
4399 
4400 static status_t
4401 attr_write_stat(struct file_descriptor *descriptor, const struct stat *stat, int statMask)
4402 {
4403 	struct vnode *vnode = descriptor->u.vnode;
4404 
4405 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
4406 
4407 	if (!FS_CALL(vnode, write_attr_stat))
4408 		return EROFS;
4409 
4410 	return FS_CALL(vnode, write_attr_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat, statMask);
4411 }
4412 
4413 
4414 static status_t
4415 attr_remove(int fd, const char *name, bool kernel)
4416 {
4417 	struct file_descriptor *descriptor;
4418 	struct vnode *vnode;
4419 	status_t status;
4420 
4421 	if (name == NULL || *name == '\0')
4422 		return B_BAD_VALUE;
4423 
4424 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name, kernel));
4425 
4426 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
4427 	if (descriptor == NULL)
4428 		return B_FILE_ERROR;
4429 
4430 	if (FS_CALL(vnode, remove_attr))
4431 		status = FS_CALL(vnode, remove_attr)(vnode->mount->cookie, vnode->private_node, name);
4432 	else
4433 		status = EROFS;
4434 
4435 	put_fd(descriptor);
4436 
4437 	return status;
4438 }
4439 
4440 
4441 static status_t
4442 attr_rename(int fromfd, const char *fromName, int tofd, const char *toName, bool kernel)
4443 {
4444 	struct file_descriptor *fromDescriptor, *toDescriptor;
4445 	struct vnode *fromVnode, *toVnode;
4446 	status_t status;
4447 
4448 	if (fromName == NULL || *fromName == '\0' || toName == NULL || *toName == '\0')
4449 		return B_BAD_VALUE;
4450 
4451 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to name = \"%s\", kernel %d\n", fromfd, fromName, tofd, toName, kernel));
4452 
4453 	fromDescriptor = get_fd_and_vnode(fromfd, &fromVnode, kernel);
4454 	if (fromDescriptor == NULL)
4455 		return B_FILE_ERROR;
4456 
4457 	toDescriptor = get_fd_and_vnode(tofd, &toVnode, kernel);
4458 	if (toDescriptor == NULL) {
4459 		status = B_FILE_ERROR;
4460 		goto err;
4461 	}
4462 
4463 	// are the files on the same volume?
4464 	if (fromVnode->device != toVnode->device) {
4465 		status = B_CROSS_DEVICE_LINK;
4466 		goto err1;
4467 	}
4468 
4469 	if (FS_CALL(fromVnode, rename_attr))
4470 		status = FS_CALL(fromVnode, rename_attr)(fromVnode->mount->cookie, fromVnode->private_node, fromName, toVnode->private_node, toName);
4471 	else
4472 		status = EROFS;
4473 
4474 err1:
4475 	put_fd(toDescriptor);
4476 err:
4477 	put_fd(fromDescriptor);
4478 
4479 	return status;
4480 }
4481 
4482 
4483 static status_t
4484 index_dir_open(mount_id mountID, bool kernel)
4485 {
4486 	struct fs_mount *mount;
4487 	fs_cookie cookie;
4488 
4489 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
4490 
4491 	status_t status = get_mount(mountID, &mount);
4492 	if (status < B_OK)
4493 		return status;
4494 
4495 	if (FS_MOUNT_CALL(mount, open_index_dir) == NULL) {
4496 		status = EOPNOTSUPP;
4497 		goto out;
4498 	}
4499 
4500 	status = FS_MOUNT_CALL(mount, open_index_dir)(mount->cookie, &cookie);
4501 	if (status < B_OK)
4502 		goto out;
4503 
4504 	// get fd for the index directory
4505 	status = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, 0, kernel);
4506 	if (status >= 0)
4507 		goto out;
4508 
4509 	// something went wrong
4510 	FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, cookie);
4511 	FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, cookie);
4512 
4513 out:
4514 	put_mount(mount);
4515 	return status;
4516 }
4517 
4518 
4519 static status_t
4520 index_dir_close(struct file_descriptor *descriptor)
4521 {
4522 	struct fs_mount *mount = descriptor->u.mount;
4523 
4524 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
4525 
4526 	if (FS_MOUNT_CALL(mount, close_index_dir))
4527 		return FS_MOUNT_CALL(mount, close_index_dir)(mount->cookie, descriptor->cookie);
4528 
4529 	return B_OK;
4530 }
4531 
4532 
4533 static void
4534 index_dir_free_fd(struct file_descriptor *descriptor)
4535 {
4536 	struct fs_mount *mount = descriptor->u.mount;
4537 
4538 	if (mount != NULL) {
4539 		FS_MOUNT_CALL(mount, free_index_dir_cookie)(mount->cookie, descriptor->cookie);
4540 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4541 		//put_vnode(vnode);
4542 	}
4543 }
4544 
4545 
4546 static status_t
4547 index_dir_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4548 {
4549 	struct fs_mount *mount = descriptor->u.mount;
4550 
4551 	if (FS_MOUNT_CALL(mount, read_index_dir))
4552 		return FS_MOUNT_CALL(mount, read_index_dir)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4553 
4554 	return EOPNOTSUPP;
4555 }
4556 
4557 
4558 static status_t
4559 index_dir_rewind(struct file_descriptor *descriptor)
4560 {
4561 	struct fs_mount *mount = descriptor->u.mount;
4562 
4563 	if (FS_MOUNT_CALL(mount, rewind_index_dir))
4564 		return FS_MOUNT_CALL(mount, rewind_index_dir)(mount->cookie, descriptor->cookie);
4565 
4566 	return EOPNOTSUPP;
4567 }
4568 
4569 
4570 static status_t
4571 index_create(mount_id mountID, const char *name, uint32 type, uint32 flags, bool kernel)
4572 {
4573 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4574 
4575 	struct fs_mount *mount;
4576 	status_t status = get_mount(mountID, &mount);
4577 	if (status < B_OK)
4578 		return status;
4579 
4580 	if (FS_MOUNT_CALL(mount, create_index) == NULL) {
4581 		status = EROFS;
4582 		goto out;
4583 	}
4584 
4585 	status = FS_MOUNT_CALL(mount, create_index)(mount->cookie, name, type, flags);
4586 
4587 out:
4588 	put_mount(mount);
4589 	return status;
4590 }
4591 
4592 
4593 #if 0
4594 static status_t
4595 index_read_stat(struct file_descriptor *descriptor, struct stat *stat)
4596 {
4597 	struct vnode *vnode = descriptor->u.vnode;
4598 
4599 	// ToDo: currently unused!
4600 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
4601 	if (!FS_CALL(vnode, read_index_stat))
4602 		return EOPNOTSUPP;
4603 
4604 	return EOPNOTSUPP;
4605 	//return FS_CALL(vnode, read_index_stat)(vnode->mount->cookie, vnode->private_node, descriptor->cookie, stat);
4606 }
4607 
4608 
4609 static void
4610 index_free_fd(struct file_descriptor *descriptor)
4611 {
4612 	struct vnode *vnode = descriptor->u.vnode;
4613 
4614 	if (vnode != NULL) {
4615 		FS_CALL(vnode, free_index_cookie)(vnode->mount->cookie, vnode->private_node, descriptor->cookie);
4616 		put_vnode(vnode);
4617 	}
4618 }
4619 #endif
4620 
4621 
4622 static status_t
4623 index_name_read_stat(mount_id mountID, const char *name, struct stat *stat, bool kernel)
4624 {
4625 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4626 
4627 	struct fs_mount *mount;
4628 	status_t status = get_mount(mountID, &mount);
4629 	if (status < B_OK)
4630 		return status;
4631 
4632 	if (FS_MOUNT_CALL(mount, read_index_stat) == NULL) {
4633 		status = EOPNOTSUPP;
4634 		goto out;
4635 	}
4636 
4637 	status = FS_MOUNT_CALL(mount, read_index_stat)(mount->cookie, name, stat);
4638 
4639 out:
4640 	put_mount(mount);
4641 	return status;
4642 }
4643 
4644 
4645 static status_t
4646 index_remove(mount_id mountID, const char *name, bool kernel)
4647 {
4648 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID, name, kernel));
4649 
4650 	struct fs_mount *mount;
4651 	status_t status = get_mount(mountID, &mount);
4652 	if (status < B_OK)
4653 		return status;
4654 
4655 	if (FS_MOUNT_CALL(mount, remove_index) == NULL) {
4656 		status = EROFS;
4657 		goto out;
4658 	}
4659 
4660 	status = FS_MOUNT_CALL(mount, remove_index)(mount->cookie, name);
4661 
4662 out:
4663 	put_mount(mount);
4664 	return status;
4665 }
4666 
4667 
4668 /**	ToDo: the query FS API is still the pretty much the same as in R5.
4669  *		It would be nice if the FS would find some more kernel support
4670  *		for them.
4671  *		For example, query parsing should be moved into the kernel.
4672  */
4673 
4674 static int
4675 query_open(dev_t device, const char *query, uint32 flags,
4676 	port_id port, int32 token, bool kernel)
4677 {
4678 	struct fs_mount *mount;
4679 	fs_cookie cookie;
4680 
4681 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device, query, kernel));
4682 
4683 	status_t status = get_mount(device, &mount);
4684 	if (status < B_OK)
4685 		return status;
4686 
4687 	if (FS_MOUNT_CALL(mount, open_query) == NULL) {
4688 		status = EOPNOTSUPP;
4689 		goto out;
4690 	}
4691 
4692 	status = FS_MOUNT_CALL(mount, open_query)(mount->cookie, query, flags, port, token, &cookie);
4693 	if (status < B_OK)
4694 		goto out;
4695 
4696 	// get fd for the index directory
4697 	status = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, 0, kernel);
4698 	if (status >= 0)
4699 		goto out;
4700 
4701 	// something went wrong
4702 	FS_MOUNT_CALL(mount, close_query)(mount->cookie, cookie);
4703 	FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, cookie);
4704 
4705 out:
4706 	put_mount(mount);
4707 	return status;
4708 }
4709 
4710 
4711 static status_t
4712 query_close(struct file_descriptor *descriptor)
4713 {
4714 	struct fs_mount *mount = descriptor->u.mount;
4715 
4716 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
4717 
4718 	if (FS_MOUNT_CALL(mount, close_query))
4719 		return FS_MOUNT_CALL(mount, close_query)(mount->cookie, descriptor->cookie);
4720 
4721 	return B_OK;
4722 }
4723 
4724 
4725 static void
4726 query_free_fd(struct file_descriptor *descriptor)
4727 {
4728 	struct fs_mount *mount = descriptor->u.mount;
4729 
4730 	if (mount != NULL) {
4731 		FS_MOUNT_CALL(mount, free_query_cookie)(mount->cookie, descriptor->cookie);
4732 		// ToDo: find a replacement ref_count object - perhaps the root dir?
4733 		//put_vnode(vnode);
4734 	}
4735 }
4736 
4737 
4738 static status_t
4739 query_read(struct file_descriptor *descriptor, struct dirent *buffer, size_t bufferSize, uint32 *_count)
4740 {
4741 	struct fs_mount *mount = descriptor->u.mount;
4742 
4743 	if (FS_MOUNT_CALL(mount, read_query))
4744 		return FS_MOUNT_CALL(mount, read_query)(mount->cookie, descriptor->cookie, buffer, bufferSize, _count);
4745 
4746 	return EOPNOTSUPP;
4747 }
4748 
4749 
4750 static status_t
4751 query_rewind(struct file_descriptor *descriptor)
4752 {
4753 	struct fs_mount *mount = descriptor->u.mount;
4754 
4755 	if (FS_MOUNT_CALL(mount, rewind_query))
4756 		return FS_MOUNT_CALL(mount, rewind_query)(mount->cookie, descriptor->cookie);
4757 
4758 	return EOPNOTSUPP;
4759 }
4760 
4761 
4762 //	#pragma mark -
4763 //	General File System functions
4764 
4765 
4766 static dev_t
4767 fs_mount(char *path, const char *device, const char *fsName, uint32 flags,
4768 	const char *args, bool kernel)
4769 {
4770 	struct fs_mount *mount;
4771 	status_t status = 0;
4772 
4773 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
4774 
4775 	// The path is always safe, we just have to make sure that fsName is
4776 	// almost valid - we can't make any assumptions about args, though.
4777 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
4778 	// We'll get it from the DDM later.
4779 	if (fsName == NULL) {
4780 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
4781 			return B_BAD_VALUE;
4782 	} else if (fsName[0] == '\0')
4783 		return B_BAD_VALUE;
4784 
4785 	RecursiveLocker mountOpLocker(sMountOpLock);
4786 
4787 	// Helper to delete a newly created file device on failure.
4788 	// Not exactly beautiful, but helps to keep the code below cleaner.
4789 	struct FileDeviceDeleter {
4790 		FileDeviceDeleter() : id(-1) {}
4791 		~FileDeviceDeleter()
4792 		{
4793 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
4794 		}
4795 
4796 		partition_id id;
4797 	} fileDeviceDeleter;
4798 
4799 	// If the file system is not a "virtual" one, the device argument should
4800 	// point to a real file/device (if given at all).
4801 	// get the partition
4802 	KDiskDeviceManager *ddm = KDiskDeviceManager::Default();
4803 	KPartition *partition = NULL;
4804 	bool newlyCreatedFileDevice = false;
4805 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device) {
4806 		// normalize the device path
4807 		KPath normalizedDevice;
4808 		status = normalizedDevice.SetTo(device, true);
4809 		if (status != B_OK)
4810 			return status;
4811 
4812 		// get a corresponding partition from the DDM
4813 		partition = ddm->RegisterPartition(normalizedDevice.Path(), true);
4814 
4815 		if (!partition) {
4816 			// Partition not found: This either means, the user supplied
4817 			// an invalid path, or the path refers to an image file. We try
4818 			// to let the DDM create a file device for the path.
4819 			partition_id deviceID = ddm->CreateFileDevice(
4820 				normalizedDevice.Path(), &newlyCreatedFileDevice);
4821 			if (deviceID >= 0) {
4822 				partition = ddm->RegisterPartition(deviceID, true);
4823 				if (newlyCreatedFileDevice)
4824 					fileDeviceDeleter.id = deviceID;
4825 // TODO: We must wait here, until the partition scan job is done.
4826 			}
4827 		}
4828 
4829 		if (!partition) {
4830 			PRINT(("fs_mount(): Partition `%s' not found.\n",
4831 				normalizedDevice.Path()));
4832 			return B_ENTRY_NOT_FOUND;
4833 		}
4834 	}
4835 	PartitionRegistrar partitionRegistrar(partition, true);
4836 
4837 	// Write lock the partition's device. For the time being, we keep the lock
4838 	// until we're done mounting -- not nice, but ensure, that no-one is
4839 	// interfering.
4840 	// TODO: Find a better solution.
4841 	KDiskDevice *diskDevice = NULL;
4842 	if (partition) {
4843 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
4844 		if (!diskDevice) {
4845 			PRINT(("fs_mount(): Failed to lock disk device!\n"));
4846 			return B_ERROR;
4847 		}
4848 	}
4849 	DeviceWriteLocker writeLocker(diskDevice, true);
4850 
4851 	if (partition) {
4852 		// make sure, that the partition is not busy
4853 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
4854 			PRINT(("fs_mount(): Partition is busy.\n"));
4855 			return B_BUSY;
4856 		}
4857 
4858 		// if no FS name had been supplied, we get it from the partition
4859 		if (!fsName) {
4860 			KDiskSystem *diskSystem = partition->DiskSystem();
4861 			if (!diskSystem) {
4862 				PRINT(("fs_mount(): No FS name was given, and the DDM didn't "
4863 					"recognize it.\n"));
4864 				return B_BAD_VALUE;
4865 			}
4866 
4867 			if (!diskSystem->IsFileSystem()) {
4868 				PRINT(("fs_mount(): No FS name was given, and the DDM found a "
4869 					"partitioning system.\n"));
4870 				return B_BAD_VALUE;
4871 			}
4872 
4873 			// The disk system name will not change, and the KDiskSystem
4874 			// object will not go away while the disk device is locked (and
4875 			// the partition has a reference to it), so this is safe.
4876 			fsName = diskSystem->Name();
4877 		}
4878 	}
4879 
4880 	mount = (struct fs_mount *)malloc(sizeof(struct fs_mount));
4881 	if (mount == NULL)
4882 		return B_NO_MEMORY;
4883 
4884 	list_init_etc(&mount->vnodes, offsetof(struct vnode, mount_link));
4885 
4886 	mount->fs_name = get_file_system_name(fsName);
4887 	if (mount->fs_name == NULL) {
4888 		status = B_NO_MEMORY;
4889 		goto err1;
4890 	}
4891 
4892 	mount->device_name = strdup(device);
4893 		// "device" can be NULL
4894 
4895 	mount->fs = get_file_system(fsName);
4896 	if (mount->fs == NULL) {
4897 		status = ENODEV;
4898 		goto err3;
4899 	}
4900 
4901 	status = recursive_lock_init(&mount->rlock, "mount rlock");
4902 	if (status < B_OK)
4903 		goto err4;
4904 
4905 	// initialize structure
4906 	mount->id = sNextMountID++;
4907 	mount->partition = NULL;
4908 	mount->root_vnode = NULL;
4909 	mount->covers_vnode = NULL;
4910 	mount->cookie = NULL;
4911 	mount->unmounting = false;
4912 	mount->owns_file_device = false;
4913 
4914 	// insert mount struct into list before we call FS's mount() function
4915 	// so that vnodes can be created for this mount
4916 	mutex_lock(&sMountMutex);
4917 	hash_insert(sMountsTable, mount);
4918 	mutex_unlock(&sMountMutex);
4919 
4920 	vnode_id rootID;
4921 
4922 	if (!sRoot) {
4923 		// we haven't mounted anything yet
4924 		if (strcmp(path, "/") != 0) {
4925 			status = B_ERROR;
4926 			goto err5;
4927 		}
4928 
4929 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
4930 		if (status < 0) {
4931 			// ToDo: why should we hide the error code from the file system here?
4932 			//status = ERR_VFS_GENERAL;
4933 			goto err5;
4934 		}
4935 	} else {
4936 		struct vnode *coveredVnode;
4937 		status = path_to_vnode(path, true, &coveredVnode, NULL, kernel);
4938 		if (status < B_OK)
4939 			goto err5;
4940 
4941 		// make sure covered_vnode is a DIR
4942 		struct stat coveredNodeStat;
4943 		status = FS_CALL(coveredVnode, read_stat)(coveredVnode->mount->cookie,
4944 			coveredVnode->private_node, &coveredNodeStat);
4945 		if (status < B_OK)
4946 			goto err5;
4947 
4948 		if (!S_ISDIR(coveredNodeStat.st_mode)) {
4949 			status = B_NOT_A_DIRECTORY;
4950 			goto err5;
4951 		}
4952 
4953 		if (coveredVnode->mount->root_vnode == coveredVnode) {
4954 			// this is already a mount point
4955 			status = B_BUSY;
4956 			goto err5;
4957 		}
4958 
4959 		mount->covers_vnode = coveredVnode;
4960 
4961 		// mount it
4962 		status = FS_MOUNT_CALL(mount, mount)(mount->id, device, flags, args, &mount->cookie, &rootID);
4963 		if (status < B_OK)
4964 			goto err6;
4965 	}
4966 
4967 	// the root node is supposed to be owned by the file system - it must
4968 	// exist at this point
4969 	mount->root_vnode = lookup_vnode(mount->id, rootID);
4970 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
4971 		panic("fs_mount: file system does not own its root node!\n");
4972 		status = B_ERROR;
4973 		goto err7;
4974 	}
4975 
4976 	// No race here, since fs_mount() is the only function changing
4977 	// covers_vnode (and holds sMountOpLock at that time).
4978 	if (mount->covers_vnode)
4979 		mount->covers_vnode->covered_by = mount->root_vnode;
4980 
4981 	if (!sRoot)
4982 		sRoot = mount->root_vnode;
4983 
4984 	// supply the partition (if any) with the mount cookie and mark it mounted
4985 	if (partition) {
4986 		partition->SetMountCookie(mount->cookie);
4987 		partition->SetVolumeID(mount->id);
4988 
4989 		// keep a partition reference as long as the partition is mounted
4990 		partitionRegistrar.Detach();
4991 		mount->partition = partition;
4992 		mount->owns_file_device = newlyCreatedFileDevice;
4993 		fileDeviceDeleter.id = -1;
4994 	}
4995 
4996 	return mount->id;
4997 
4998 err7:
4999 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5000 err6:
5001 	if (mount->covers_vnode)
5002 		put_vnode(mount->covers_vnode);
5003 err5:
5004 	mutex_lock(&sMountMutex);
5005 	hash_remove(sMountsTable, mount);
5006 	mutex_unlock(&sMountMutex);
5007 
5008 	recursive_lock_destroy(&mount->rlock);
5009 err4:
5010 	put_file_system(mount->fs);
5011 	free(mount->device_name);
5012 err3:
5013 	free(mount->fs_name);
5014 err1:
5015 	free(mount);
5016 
5017 	return status;
5018 }
5019 
5020 
5021 static status_t
5022 fs_unmount(char *path, uint32 flags, bool kernel)
5023 {
5024 	struct fs_mount *mount;
5025 	struct vnode *vnode;
5026 	status_t err;
5027 
5028 	FUNCTION(("vfs_unmount: entry. path = '%s', kernel %d\n", path, kernel));
5029 
5030 	err = path_to_vnode(path, true, &vnode, NULL, kernel);
5031 	if (err < 0)
5032 		return B_ENTRY_NOT_FOUND;
5033 
5034 	RecursiveLocker mountOpLocker(sMountOpLock);
5035 
5036 	mount = find_mount(vnode->device);
5037 	if (!mount)
5038 		panic("vfs_unmount: find_mount() failed on root vnode @%p of mount\n", vnode);
5039 
5040 	if (mount->root_vnode != vnode) {
5041 		// not mountpoint
5042 		put_vnode(vnode);
5043 		return B_BAD_VALUE;
5044 	}
5045 
5046 	// if the volume is associated with a partition, lock the device of the
5047 	// partition as long as we are unmounting
5048 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
5049 	KPartition *partition = mount->partition;
5050 	KDiskDevice *diskDevice = NULL;
5051 	if (partition) {
5052 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
5053 		if (!diskDevice) {
5054 			PRINT(("fs_unmount(): Failed to lock disk device!\n"));
5055 			return B_ERROR;
5056 		}
5057 	}
5058 	DeviceWriteLocker writeLocker(diskDevice, true);
5059 
5060 	// make sure, that the partition is not busy
5061 	if (partition) {
5062 		if (partition->IsBusy() || partition->IsDescendantBusy()) {
5063 			PRINT(("fs_unmount(): Partition is busy.\n"));
5064 			return B_BUSY;
5065 		}
5066 	}
5067 
5068 	// grab the vnode master mutex to keep someone from creating
5069 	// a vnode while we're figuring out if we can continue
5070 	mutex_lock(&sVnodeMutex);
5071 
5072 	// simplify the loop below: we decrement the root vnode ref_count
5073 	// by the known number of references: one for the file system, one
5074 	// from the path_to_vnode() call above
5075 	mount->root_vnode->ref_count -= 2;
5076 
5077 	// cycle through the list of vnodes associated with this mount and
5078 	// make sure all of them are not busy or have refs on them
5079 	vnode = NULL;
5080 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5081 		if (vnode->busy || vnode->ref_count != 0) {
5082 			// there are still vnodes in use on this mount, so we cannot unmount yet
5083 			// ToDo: cut read/write access file descriptors, depending on the B_FORCE_UNMOUNT flag
5084 			mount->root_vnode->ref_count += 2;
5085 			mutex_unlock(&sVnodeMutex);
5086 			put_vnode(mount->root_vnode);
5087 
5088 			return B_BUSY;
5089 		}
5090 	}
5091 
5092 	// we can safely continue, mark all of the vnodes busy and this mount
5093 	// structure in unmounting state
5094 	mount->unmounting = true;
5095 
5096 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5097 		vnode->busy = true;
5098 	}
5099 
5100 	mutex_unlock(&sVnodeMutex);
5101 
5102 	mount->covers_vnode->covered_by = NULL;
5103 	put_vnode(mount->covers_vnode);
5104 
5105 	// Free all vnodes associated with this mount.
5106 	// They will be removed from the mount list by free_vnode(), so
5107 	// we don't have to do this.
5108 	while ((vnode = (struct vnode *)list_get_first_item(&mount->vnodes)) != NULL) {
5109 		free_vnode(vnode, false);
5110 	}
5111 
5112 	// remove the mount structure from the hash table
5113 	mutex_lock(&sMountMutex);
5114 	hash_remove(sMountsTable, mount);
5115 	mutex_unlock(&sMountMutex);
5116 
5117 	mountOpLocker.Unlock();
5118 
5119 	FS_MOUNT_CALL(mount, unmount)(mount->cookie);
5120 
5121 	// release the file system
5122 	put_file_system(mount->fs);
5123 
5124 	// dereference the partition and mark it unmounted
5125 	if (partition) {
5126 		partition->SetVolumeID(-1);
5127 		partition->SetMountCookie(NULL);
5128 
5129 		if (mount->owns_file_device)
5130 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
5131 		partition->Unregister();
5132 	}
5133 
5134 	free(mount->device_name);
5135 	free(mount->fs_name);
5136 	free(mount);
5137 
5138 	return B_OK;
5139 }
5140 
5141 
5142 static status_t
5143 fs_sync(dev_t device)
5144 {
5145 	struct fs_mount *mount;
5146 	status_t status = get_mount(device, &mount);
5147 	if (status < B_OK)
5148 		return status;
5149 
5150 	mutex_lock(&sMountMutex);
5151 
5152 	if (FS_MOUNT_CALL(mount, sync))
5153 		status = FS_MOUNT_CALL(mount, sync)(mount->cookie);
5154 
5155 	mutex_unlock(&sMountMutex);
5156 
5157 	// synchronize all vnodes
5158 	recursive_lock_lock(&mount->rlock);
5159 
5160 	struct vnode *vnode = NULL;
5161 	while ((vnode = (struct vnode *)list_get_next_item(&mount->vnodes, vnode)) != NULL) {
5162 		if (vnode->cache)
5163 			vm_cache_write_modified(vnode->cache);
5164 	}
5165 
5166 	recursive_lock_unlock(&mount->rlock);
5167 	put_mount(mount);
5168 	return status;
5169 }
5170 
5171 
5172 static status_t
5173 fs_read_info(dev_t device, struct fs_info *info)
5174 {
5175 	struct fs_mount *mount;
5176 	status_t status = get_mount(device, &mount);
5177 	if (status < B_OK)
5178 		return status;
5179 
5180 	// fill in info the file system doesn't (have to) know about
5181 	memset(info, 0, sizeof(struct fs_info));
5182 	info->dev = mount->id;
5183 	info->root = mount->root_vnode->id;
5184 	strlcpy(info->fsh_name, mount->fs_name, sizeof(info->fsh_name));
5185 	if (mount->device_name != NULL)
5186 		strlcpy(info->device_name, mount->device_name, sizeof(info->device_name));
5187 
5188 	if (FS_MOUNT_CALL(mount, read_fs_info))
5189 		status = FS_MOUNT_CALL(mount, read_fs_info)(mount->cookie, info);
5190 
5191 	// if the call is not supported by the file system, there are still
5192 	// the parts that we filled out ourselves
5193 
5194 	put_mount(mount);
5195 	return status;
5196 }
5197 
5198 
5199 static status_t
5200 fs_write_info(dev_t device, const struct fs_info *info, int mask)
5201 {
5202 	struct fs_mount *mount;
5203 	status_t status = get_mount(device, &mount);
5204 	if (status < B_OK)
5205 		return status;
5206 
5207 	if (FS_MOUNT_CALL(mount, write_fs_info))
5208 		status = FS_MOUNT_CALL(mount, write_fs_info)(mount->cookie, info, mask);
5209 	else
5210 		status = EROFS;
5211 
5212 	put_mount(mount);
5213 	return status;
5214 }
5215 
5216 
5217 static dev_t
5218 fs_next_device(int32 *_cookie)
5219 {
5220 	struct fs_mount *mount = NULL;
5221 	dev_t device = *_cookie;
5222 
5223 	mutex_lock(&sMountMutex);
5224 
5225 	// Since device IDs are assigned sequentially, this algorithm
5226 	// does work good enough. It makes sure that the device list
5227 	// returned is sorted, and that no device is skipped when an
5228 	// already visited device got unmounted.
5229 
5230 	while (device < sNextMountID) {
5231 		mount = find_mount(device++);
5232 		if (mount != NULL && mount->cookie != NULL)
5233 			break;
5234 	}
5235 
5236 	*_cookie = device;
5237 
5238 	if (mount != NULL)
5239 		device = mount->id;
5240 	else
5241 		device = B_BAD_VALUE;
5242 
5243 	mutex_unlock(&sMountMutex);
5244 
5245 	return device;
5246 }
5247 
5248 
5249 static status_t
5250 get_cwd(char *buffer, size_t size, bool kernel)
5251 {
5252 	// Get current working directory from io context
5253 	struct io_context *context = get_current_io_context(kernel);
5254 	status_t status;
5255 
5256 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
5257 
5258 	mutex_lock(&context->io_mutex);
5259 
5260 	if (context->cwd)
5261 		status = dir_vnode_to_path(context->cwd, buffer, size);
5262 	else
5263 		status = B_ERROR;
5264 
5265 	mutex_unlock(&context->io_mutex);
5266 	return status;
5267 }
5268 
5269 
5270 static status_t
5271 set_cwd(int fd, char *path, bool kernel)
5272 {
5273 	struct io_context *context;
5274 	struct vnode *vnode = NULL;
5275 	struct vnode *oldDirectory;
5276 	struct stat stat;
5277 	status_t status;
5278 
5279 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
5280 
5281 	// Get vnode for passed path, and bail if it failed
5282 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
5283 	if (status < 0)
5284 		return status;
5285 
5286 	status = FS_CALL(vnode, read_stat)(vnode->mount->cookie, vnode->private_node, &stat);
5287 	if (status < 0)
5288 		goto err;
5289 
5290 	if (!S_ISDIR(stat.st_mode)) {
5291 		// nope, can't cwd to here
5292 		status = B_NOT_A_DIRECTORY;
5293 		goto err;
5294 	}
5295 
5296 	// Get current io context and lock
5297 	context = get_current_io_context(kernel);
5298 	mutex_lock(&context->io_mutex);
5299 
5300 	// save the old current working directory first
5301 	oldDirectory = context->cwd;
5302 	context->cwd = vnode;
5303 
5304 	mutex_unlock(&context->io_mutex);
5305 
5306 	if (oldDirectory)
5307 		put_vnode(oldDirectory);
5308 
5309 	return B_NO_ERROR;
5310 
5311 err:
5312 	put_vnode(vnode);
5313 	return status;
5314 }
5315 
5316 
5317 //	#pragma mark -
5318 //	Calls from within the kernel
5319 
5320 
5321 dev_t
5322 _kern_mount(const char *path, const char *device, const char *fsName,
5323 	uint32 flags, const char *args)
5324 {
5325 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5326 	if (pathBuffer.InitCheck() != B_OK)
5327 		return B_NO_MEMORY;
5328 
5329 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
5330 }
5331 
5332 
5333 status_t
5334 _kern_unmount(const char *path, uint32 flags)
5335 {
5336 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5337 	if (pathBuffer.InitCheck() != B_OK)
5338 		return B_NO_MEMORY;
5339 
5340 	return fs_unmount(pathBuffer.LockBuffer(), flags, true);
5341 }
5342 
5343 
5344 status_t
5345 _kern_read_fs_info(dev_t device, struct fs_info *info)
5346 {
5347 	if (info == NULL)
5348 		return B_BAD_VALUE;
5349 
5350 	return fs_read_info(device, info);
5351 }
5352 
5353 
5354 status_t
5355 _kern_write_fs_info(dev_t device, const struct fs_info *info, int mask)
5356 {
5357 	if (info == NULL)
5358 		return B_BAD_VALUE;
5359 
5360 	return fs_write_info(device, info, mask);
5361 }
5362 
5363 
5364 status_t
5365 _kern_sync(void)
5366 {
5367 	// Note: _kern_sync() is also called from _user_sync()
5368 	int32 cookie = 0;
5369 	dev_t device;
5370 	while ((device = next_dev(&cookie)) >= 0) {
5371 		status_t status = fs_sync(device);
5372 		if (status != B_OK && status != B_BAD_VALUE)
5373 			dprintf("sync: device %ld couldn't sync: %s\n", device, strerror(status));
5374 	}
5375 
5376 	return B_OK;
5377 }
5378 
5379 
5380 dev_t
5381 _kern_next_device(int32 *_cookie)
5382 {
5383 	return fs_next_device(_cookie);
5384 }
5385 
5386 
5387 int
5388 _kern_open_entry_ref(dev_t device, ino_t inode, const char *name, int openMode, int perms)
5389 {
5390 	if (openMode & O_CREAT)
5391 		return file_create_entry_ref(device, inode, name, openMode, perms, true);
5392 
5393 	return file_open_entry_ref(device, inode, name, openMode, true);
5394 }
5395 
5396 
5397 /**	\brief Opens a node specified by a FD + path pair.
5398  *
5399  *	At least one of \a fd and \a path must be specified.
5400  *	If only \a fd is given, the function opens the node identified by this
5401  *	FD. If only a path is given, this path is opened. If both are given and
5402  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5403  *	of the directory (!) identified by \a fd.
5404  *
5405  *	\param fd The FD. May be < 0.
5406  *	\param path The absolute or relative path. May be \c NULL.
5407  *	\param openMode The open mode.
5408  *	\return A FD referring to the newly opened node, or an error code,
5409  *			if an error occurs.
5410  */
5411 
5412 int
5413 _kern_open(int fd, const char *path, int openMode, int perms)
5414 {
5415 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5416 	if (pathBuffer.InitCheck() != B_OK)
5417 		return B_NO_MEMORY;
5418 
5419 	if (openMode & O_CREAT)
5420 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
5421 
5422 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
5423 }
5424 
5425 
5426 /**	\brief Opens a directory specified by entry_ref or node_ref.
5427  *
5428  *	The supplied name may be \c NULL, in which case directory identified
5429  *	by \a device and \a inode will be opened. Otherwise \a device and
5430  *	\a inode identify the parent directory of the directory to be opened
5431  *	and \a name its entry name.
5432  *
5433  *	\param device If \a name is specified the ID of the device the parent
5434  *		   directory of the directory to be opened resides on, otherwise
5435  *		   the device of the directory itself.
5436  *	\param inode If \a name is specified the node ID of the parent
5437  *		   directory of the directory to be opened, otherwise node ID of the
5438  *		   directory itself.
5439  *	\param name The entry name of the directory to be opened. If \c NULL,
5440  *		   the \a device + \a inode pair identify the node to be opened.
5441  *	\return The FD of the newly opened directory or an error code, if
5442  *			something went wrong.
5443  */
5444 
5445 int
5446 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char *name)
5447 {
5448 	return dir_open_entry_ref(device, inode, name, true);
5449 }
5450 
5451 
5452 /**	\brief Opens a directory specified by a FD + path pair.
5453  *
5454  *	At least one of \a fd and \a path must be specified.
5455  *	If only \a fd is given, the function opens the directory identified by this
5456  *	FD. If only a path is given, this path is opened. If both are given and
5457  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5458  *	of the directory (!) identified by \a fd.
5459  *
5460  *	\param fd The FD. May be < 0.
5461  *	\param path The absolute or relative path. May be \c NULL.
5462  *	\return A FD referring to the newly opened directory, or an error code,
5463  *			if an error occurs.
5464  */
5465 
5466 int
5467 _kern_open_dir(int fd, const char *path)
5468 {
5469 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5470 	if (pathBuffer.InitCheck() != B_OK)
5471 		return B_NO_MEMORY;
5472 
5473 	return dir_open(fd, pathBuffer.LockBuffer(), true);
5474 }
5475 
5476 
5477 status_t
5478 _kern_fcntl(int fd, int op, uint32 argument)
5479 {
5480 	return common_fcntl(fd, op, argument, true);
5481 }
5482 
5483 
5484 status_t
5485 _kern_fsync(int fd)
5486 {
5487 	return common_sync(fd, true);
5488 }
5489 
5490 
5491 status_t
5492 _kern_lock_node(int fd)
5493 {
5494 	return common_lock_node(fd, true);
5495 }
5496 
5497 
5498 status_t
5499 _kern_unlock_node(int fd)
5500 {
5501 	return common_unlock_node(fd, true);
5502 }
5503 
5504 
5505 status_t
5506 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char *name, int perms)
5507 {
5508 	return dir_create_entry_ref(device, inode, name, perms, true);
5509 }
5510 
5511 
5512 /**	\brief Creates a directory specified by a FD + path pair.
5513  *
5514  *	\a path must always be specified (it contains the name of the new directory
5515  *	at least). If only a path is given, this path identifies the location at
5516  *	which the directory shall be created. If both \a fd and \a path are given and
5517  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5518  *	of the directory (!) identified by \a fd.
5519  *
5520  *	\param fd The FD. May be < 0.
5521  *	\param path The absolute or relative path. Must not be \c NULL.
5522  *	\param perms The access permissions the new directory shall have.
5523  *	\return \c B_OK, if the directory has been created successfully, another
5524  *			error code otherwise.
5525  */
5526 
5527 status_t
5528 _kern_create_dir(int fd, const char *path, int perms)
5529 {
5530 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5531 	if (pathBuffer.InitCheck() != B_OK)
5532 		return B_NO_MEMORY;
5533 
5534 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
5535 }
5536 
5537 
5538 status_t
5539 _kern_remove_dir(const char *path)
5540 {
5541 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5542 	if (pathBuffer.InitCheck() != B_OK)
5543 		return B_NO_MEMORY;
5544 
5545 	return dir_remove(pathBuffer.LockBuffer(), true);
5546 }
5547 
5548 
5549 /**	\brief Reads the contents of a symlink referred to by a FD + path pair.
5550  *
5551  *	At least one of \a fd and \a path must be specified.
5552  *	If only \a fd is given, the function the symlink to be read is the node
5553  *	identified by this FD. If only a path is given, this path identifies the
5554  *	symlink to be read. If both are given and the path is absolute, \a fd is
5555  *	ignored; a relative path is reckoned off of the directory (!) identified
5556  *	by \a fd.
5557  *	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
5558  *	will still be updated to reflect the required buffer size.
5559  *
5560  *	\param fd The FD. May be < 0.
5561  *	\param path The absolute or relative path. May be \c NULL.
5562  *	\param buffer The buffer into which the contents of the symlink shall be
5563  *		   written.
5564  *	\param _bufferSize A pointer to the size of the supplied buffer.
5565  *	\return The length of the link on success or an appropriate error code
5566  */
5567 
5568 status_t
5569 _kern_read_link(int fd, const char *path, char *buffer, size_t *_bufferSize)
5570 {
5571 	status_t status;
5572 
5573 	if (path) {
5574 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5575 		if (pathBuffer.InitCheck() != B_OK)
5576 			return B_NO_MEMORY;
5577 
5578 		return common_read_link(fd, pathBuffer.LockBuffer(),
5579 			buffer, _bufferSize, true);
5580 	}
5581 
5582 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
5583 }
5584 
5585 
5586 status_t
5587 _kern_write_link(const char *path, const char *toPath)
5588 {
5589 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5590 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5591 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5592 		return B_NO_MEMORY;
5593 
5594 	char *toBuffer = toPathBuffer.LockBuffer();
5595 
5596 	status_t status = check_path(toBuffer);
5597 	if (status < B_OK)
5598 		return status;
5599 
5600 	return common_write_link(pathBuffer.LockBuffer(), toBuffer, true);
5601 }
5602 
5603 
5604 /**	\brief Creates a symlink specified by a FD + path pair.
5605  *
5606  *	\a path must always be specified (it contains the name of the new symlink
5607  *	at least). If only a path is given, this path identifies the location at
5608  *	which the symlink shall be created. If both \a fd and \a path are given and
5609  *	the path is absolute, \a fd is ignored; a relative path is reckoned off
5610  *	of the directory (!) identified by \a fd.
5611  *
5612  *	\param fd The FD. May be < 0.
5613  *	\param toPath The absolute or relative path. Must not be \c NULL.
5614  *	\param mode The access permissions the new symlink shall have.
5615  *	\return \c B_OK, if the symlink has been created successfully, another
5616  *			error code otherwise.
5617  */
5618 
5619 status_t
5620 _kern_create_symlink(int fd, const char *path, const char *toPath, int mode)
5621 {
5622 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5623 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5624 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5625 		return B_NO_MEMORY;
5626 
5627 	char *toBuffer = toPathBuffer.LockBuffer();
5628 
5629 	status_t status = check_path(toBuffer);
5630 	if (status < B_OK)
5631 		return status;
5632 
5633 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
5634 		toBuffer, mode, true);
5635 }
5636 
5637 
5638 status_t
5639 _kern_create_link(const char *path, const char *toPath)
5640 {
5641 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5642 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
5643 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
5644 		return B_NO_MEMORY;
5645 
5646 	return common_create_link(pathBuffer.LockBuffer(),
5647 		toPathBuffer.LockBuffer(), true);
5648 }
5649 
5650 
5651 /**	\brief Removes an entry specified by a FD + path pair from its directory.
5652  *
5653  *	\a path must always be specified (it contains at least the name of the entry
5654  *	to be deleted). If only a path is given, this path identifies the entry
5655  *	directly. If both \a fd and \a path are given and the path is absolute,
5656  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
5657  *	identified by \a fd.
5658  *
5659  *	\param fd The FD. May be < 0.
5660  *	\param path The absolute or relative path. Must not be \c NULL.
5661  *	\return \c B_OK, if the entry has been removed successfully, another
5662  *			error code otherwise.
5663  */
5664 
5665 status_t
5666 _kern_unlink(int fd, const char *path)
5667 {
5668 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5669 	if (pathBuffer.InitCheck() != B_OK)
5670 		return B_NO_MEMORY;
5671 
5672 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
5673 }
5674 
5675 
5676 /**	\brief Moves an entry specified by a FD + path pair to a an entry specified
5677  *		   by another FD + path pair.
5678  *
5679  *	\a oldPath and \a newPath must always be specified (they contain at least
5680  *	the name of the entry). If only a path is given, this path identifies the
5681  *	entry directly. If both a FD and a path are given and the path is absolute,
5682  *	the FD is ignored; a relative path is reckoned off of the directory (!)
5683  *	identified by the respective FD.
5684  *
5685  *	\param oldFD The FD of the old location. May be < 0.
5686  *	\param oldPath The absolute or relative path of the old location. Must not
5687  *		   be \c NULL.
5688  *	\param newFD The FD of the new location. May be < 0.
5689  *	\param newPath The absolute or relative path of the new location. Must not
5690  *		   be \c NULL.
5691  *	\return \c B_OK, if the entry has been moved successfully, another
5692  *			error code otherwise.
5693  */
5694 
5695 status_t
5696 _kern_rename(int oldFD, const char *oldPath, int newFD, const char *newPath)
5697 {
5698 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
5699 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
5700 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
5701 		return B_NO_MEMORY;
5702 
5703 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
5704 		newFD, newPathBuffer.LockBuffer(), true);
5705 }
5706 
5707 
5708 status_t
5709 _kern_access(const char *path, int mode)
5710 {
5711 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5712 	if (pathBuffer.InitCheck() != B_OK)
5713 		return B_NO_MEMORY;
5714 
5715 	return common_access(pathBuffer.LockBuffer(), mode, true);
5716 }
5717 
5718 
5719 /**	\brief Reads stat data of an entity specified by a FD + path pair.
5720  *
5721  *	If only \a fd is given, the stat operation associated with the type
5722  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
5723  *	given, this path identifies the entry for whose node to retrieve the
5724  *	stat data. If both \a fd and \a path are given and the path is absolute,
5725  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
5726  *	identified by \a fd and specifies the entry whose stat data shall be
5727  *	retrieved.
5728  *
5729  *	\param fd The FD. May be < 0.
5730  *	\param path The absolute or relative path. Must not be \c NULL.
5731  *	\param traverseLeafLink If \a path is given, \c true specifies that the
5732  *		   function shall not stick to symlinks, but traverse them.
5733  *	\param stat The buffer the stat data shall be written into.
5734  *	\param statSize The size of the supplied stat buffer.
5735  *	\return \c B_OK, if the the stat data have been read successfully, another
5736  *			error code otherwise.
5737  */
5738 
5739 status_t
5740 _kern_read_stat(int fd, const char *path, bool traverseLeafLink,
5741 	struct stat *stat, size_t statSize)
5742 {
5743 	struct stat completeStat;
5744 	struct stat *originalStat = NULL;
5745 	status_t status;
5746 
5747 	if (statSize > sizeof(struct stat))
5748 		return B_BAD_VALUE;
5749 
5750 	// this supports different stat extensions
5751 	if (statSize < sizeof(struct stat)) {
5752 		originalStat = stat;
5753 		stat = &completeStat;
5754 	}
5755 
5756 	if (path) {
5757 		// path given: get the stat of the node referred to by (fd, path)
5758 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5759 		if (pathBuffer.InitCheck() != B_OK)
5760 			return B_NO_MEMORY;
5761 
5762 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
5763 			traverseLeafLink, stat, true);
5764 	} else {
5765 		// no path given: get the FD and use the FD operation
5766 		struct file_descriptor *descriptor
5767 			= get_fd(get_current_io_context(true), fd);
5768 		if (descriptor == NULL)
5769 			return B_FILE_ERROR;
5770 
5771 		if (descriptor->ops->fd_read_stat)
5772 			status = descriptor->ops->fd_read_stat(descriptor, stat);
5773 		else
5774 			status = EOPNOTSUPP;
5775 
5776 		put_fd(descriptor);
5777 	}
5778 
5779 	if (status == B_OK && originalStat != NULL)
5780 		memcpy(originalStat, stat, statSize);
5781 
5782 	return status;
5783 }
5784 
5785 
5786 /**	\brief Writes stat data of an entity specified by a FD + path pair.
5787  *
5788  *	If only \a fd is given, the stat operation associated with the type
5789  *	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
5790  *	given, this path identifies the entry for whose node to write the
5791  *	stat data. If both \a fd and \a path are given and the path is absolute,
5792  *	\a fd is ignored; a relative path is reckoned off of the directory (!)
5793  *	identified by \a fd and specifies the entry whose stat data shall be
5794  *	written.
5795  *
5796  *	\param fd The FD. May be < 0.
5797  *	\param path The absolute or relative path. Must not be \c NULL.
5798  *	\param traverseLeafLink If \a path is given, \c true specifies that the
5799  *		   function shall not stick to symlinks, but traverse them.
5800  *	\param stat The buffer containing the stat data to be written.
5801  *	\param statSize The size of the supplied stat buffer.
5802  *	\param statMask A mask specifying which parts of the stat data shall be
5803  *		   written.
5804  *	\return \c B_OK, if the the stat data have been written successfully,
5805  *			another error code otherwise.
5806  */
5807 
5808 status_t
5809 _kern_write_stat(int fd, const char *path, bool traverseLeafLink,
5810 	const struct stat *stat, size_t statSize, int statMask)
5811 {
5812 	struct stat completeStat;
5813 
5814 	if (statSize > sizeof(struct stat))
5815 		return B_BAD_VALUE;
5816 
5817 	// this supports different stat extensions
5818 	if (statSize < sizeof(struct stat)) {
5819 		memset((uint8 *)&completeStat + statSize, 0, sizeof(struct stat) - statSize);
5820 		memcpy(&completeStat, stat, statSize);
5821 		stat = &completeStat;
5822 	}
5823 
5824 	status_t status;
5825 
5826 	if (path) {
5827 		// path given: write the stat of the node referred to by (fd, path)
5828 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
5829 		if (pathBuffer.InitCheck() != B_OK)
5830 			return B_NO_MEMORY;
5831 
5832 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
5833 			traverseLeafLink, stat, statMask, true);
5834 	} else {
5835 		// no path given: get the FD and use the FD operation
5836 		struct file_descriptor *descriptor
5837 			= get_fd(get_current_io_context(true), fd);
5838 		if (descriptor == NULL)
5839 			return B_FILE_ERROR;
5840 
5841 		if (descriptor->ops->fd_write_stat)
5842 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
5843 		else
5844 			status = EOPNOTSUPP;
5845 
5846 		put_fd(descriptor);
5847 	}
5848 
5849 	return status;
5850 }
5851 
5852 
5853 int
5854 _kern_open_attr_dir(int fd, const char *path)
5855 {
5856 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
5857 	if (pathBuffer.InitCheck() != B_OK)
5858 		return B_NO_MEMORY;
5859 
5860 	if (path != NULL)
5861 		pathBuffer.SetTo(path);
5862 
5863 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
5864 }
5865 
5866 
5867 int
5868 _kern_create_attr(int fd, const char *name, uint32 type, int openMode)
5869 {
5870 	return attr_create(fd, name, type, openMode, true);
5871 }
5872 
5873 
5874 int
5875 _kern_open_attr(int fd, const char *name, int openMode)
5876 {
5877 	return attr_open(fd, name, openMode, true);
5878 }
5879 
5880 
5881 status_t
5882 _kern_remove_attr(int fd, const char *name)
5883 {
5884 	return attr_remove(fd, name, true);
5885 }
5886 
5887 
5888 status_t
5889 _kern_rename_attr(int fromFile, const char *fromName, int toFile, const char *toName)
5890 {
5891 	return attr_rename(fromFile, fromName, toFile, toName, true);
5892 }
5893 
5894 
5895 int
5896 _kern_open_index_dir(dev_t device)
5897 {
5898 	return index_dir_open(device, true);
5899 }
5900 
5901 
5902 status_t
5903 _kern_create_index(dev_t device, const char *name, uint32 type, uint32 flags)
5904 {
5905 	return index_create(device, name, type, flags, true);
5906 }
5907 
5908 
5909 status_t
5910 _kern_read_index_stat(dev_t device, const char *name, struct stat *stat)
5911 {
5912 	return index_name_read_stat(device, name, stat, true);
5913 }
5914 
5915 
5916 status_t
5917 _kern_remove_index(dev_t device, const char *name)
5918 {
5919 	return index_remove(device, name, true);
5920 }
5921 
5922 
5923 status_t
5924 _kern_getcwd(char *buffer, size_t size)
5925 {
5926 	PRINT(("_kern_getcwd: buf %p, %ld\n", buffer, size));
5927 
5928 	// Call vfs to get current working directory
5929 	return get_cwd(buffer, size, true);
5930 }
5931 
5932 
5933 status_t
5934 _kern_setcwd(int fd, const char *path)
5935 {
5936 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
5937 	if (pathBuffer.InitCheck() != B_OK)
5938 		return B_NO_MEMORY;
5939 
5940 	if (path != NULL)
5941 		pathBuffer.SetTo(path);
5942 
5943 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
5944 }
5945 
5946 
5947 //	#pragma mark -
5948 //	Calls from userland (with extra address checks)
5949 
5950 
5951 dev_t
5952 _user_mount(const char *userPath, const char *userDevice, const char *userFileSystem,
5953 	uint32 flags, const char *userArgs)
5954 {
5955 	char fileSystem[B_OS_NAME_LENGTH];
5956 	KPath path, device;
5957 	char *args = NULL;
5958 	status_t status;
5959 
5960 	if (!IS_USER_ADDRESS(userPath)
5961 		|| !IS_USER_ADDRESS(userFileSystem)
5962 		|| !IS_USER_ADDRESS(userDevice))
5963 		return B_BAD_ADDRESS;
5964 
5965 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
5966 		return B_NO_MEMORY;
5967 
5968 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
5969 		return B_BAD_ADDRESS;
5970 
5971 	if (userFileSystem != NULL
5972 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
5973 		return B_BAD_ADDRESS;
5974 
5975 	if (userDevice != NULL
5976 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH) < B_OK)
5977 		return B_BAD_ADDRESS;
5978 
5979 	if (userArgs != NULL) {
5980 		// We have no real length restriction, so we need to create
5981 		// a buffer large enough to hold the argument string
5982 		// ToDo: we could think about determinung the length of the string
5983 		//	in userland :)
5984 		ssize_t length = user_strlcpy(args, userArgs, 0);
5985 		if (length < B_OK)
5986 			return B_BAD_ADDRESS;
5987 
5988 		// this is a safety restriction
5989 		if (length > 32 * 1024)
5990 			return B_NAME_TOO_LONG;
5991 
5992 		if (length > 0) {
5993 			args = (char *)malloc(length + 1);
5994 			if (args == NULL)
5995 				return B_NO_MEMORY;
5996 
5997 			if (user_strlcpy(args, userArgs, length + 1) < B_OK) {
5998 				free(args);
5999 				return B_BAD_ADDRESS;
6000 			}
6001 		}
6002 	}
6003 	path.UnlockBuffer();
6004 	device.UnlockBuffer();
6005 
6006 	status = fs_mount(path.LockBuffer(), userDevice != NULL ? device.Path() : NULL,
6007 		userFileSystem ? fileSystem : NULL, flags, args, false);
6008 
6009 	free(args);
6010 	return status;
6011 }
6012 
6013 
6014 status_t
6015 _user_unmount(const char *userPath, uint32 flags)
6016 {
6017 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
6018 	if (pathBuffer.InitCheck() != B_OK)
6019 		return B_NO_MEMORY;
6020 
6021 	char *path = pathBuffer.LockBuffer();
6022 
6023 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6024 		return B_BAD_ADDRESS;
6025 
6026 	return fs_unmount(path, flags, false);
6027 }
6028 
6029 
6030 status_t
6031 _user_read_fs_info(dev_t device, struct fs_info *userInfo)
6032 {
6033 	struct fs_info info;
6034 	status_t status;
6035 
6036 	if (userInfo == NULL)
6037 		return B_BAD_VALUE;
6038 
6039 	if (!IS_USER_ADDRESS(userInfo))
6040 		return B_BAD_ADDRESS;
6041 
6042 	status = fs_read_info(device, &info);
6043 	if (status != B_OK)
6044 		return status;
6045 
6046 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) < B_OK)
6047 		return B_BAD_ADDRESS;
6048 
6049 	return B_OK;
6050 }
6051 
6052 
6053 status_t
6054 _user_write_fs_info(dev_t device, const struct fs_info *userInfo, int mask)
6055 {
6056 	struct fs_info info;
6057 
6058 	if (userInfo == NULL)
6059 		return B_BAD_VALUE;
6060 
6061 	if (!IS_USER_ADDRESS(userInfo)
6062 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) < B_OK)
6063 		return B_BAD_ADDRESS;
6064 
6065 	return fs_write_info(device, &info, mask);
6066 }
6067 
6068 
6069 dev_t
6070 _user_next_device(int32 *_userCookie)
6071 {
6072 	int32 cookie;
6073 	dev_t device;
6074 
6075 	if (!IS_USER_ADDRESS(_userCookie)
6076 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) < B_OK)
6077 		return B_BAD_ADDRESS;
6078 
6079 	device = fs_next_device(&cookie);
6080 
6081 	if (device >= B_OK) {
6082 		// update user cookie
6083 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) < B_OK)
6084 			return B_BAD_ADDRESS;
6085 	}
6086 
6087 	return device;
6088 }
6089 
6090 
6091 status_t
6092 _user_sync(void)
6093 {
6094 	return _kern_sync();
6095 }
6096 
6097 
6098 status_t
6099 _user_entry_ref_to_path(dev_t device, ino_t inode, const char *leaf,
6100 	char *userPath, size_t pathLength)
6101 {
6102 	char path[B_PATH_NAME_LENGTH + 1];
6103 	struct vnode *vnode;
6104 	status_t status;
6105 
6106 	if (!IS_USER_ADDRESS(userPath))
6107 		return B_BAD_ADDRESS;
6108 
6109 	// copy the leaf name onto the stack
6110 	char stackLeaf[B_FILE_NAME_LENGTH];
6111 	if (leaf) {
6112 		if (!IS_USER_ADDRESS(leaf))
6113 			return B_BAD_ADDRESS;
6114 
6115 		int len = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
6116 		if (len < 0)
6117 			return len;
6118 		if (len >= B_FILE_NAME_LENGTH)
6119 			return B_NAME_TOO_LONG;
6120 		leaf = stackLeaf;
6121 
6122 		// filter invalid leaf names
6123 		if (leaf[0] == '\0' || strchr(leaf, '/'))
6124 			return B_BAD_VALUE;
6125 	}
6126 
6127 	// get the vnode matching the dir's node_ref
6128 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
6129 		// special cases "." and "..": we can directly get the vnode of the
6130 		// referenced directory
6131 		status = entry_ref_to_vnode(device, inode, leaf, &vnode);
6132 		leaf = NULL;
6133 	} else
6134 		status = get_vnode(device, inode, &vnode, false);
6135 	if (status < B_OK)
6136 		return status;
6137 
6138 	// get the directory path
6139 	status = dir_vnode_to_path(vnode, path, sizeof(path));
6140 	put_vnode(vnode);
6141 		// we don't need the vnode anymore
6142 	if (status < B_OK)
6143 		return status;
6144 
6145 	// append the leaf name
6146 	if (leaf) {
6147 		// insert a directory separator if this is not the file system root
6148 		if ((strcmp(path, "/") && strlcat(path, "/", sizeof(path)) >= sizeof(path))
6149 			|| strlcat(path, leaf, sizeof(path)) >= sizeof(path)) {
6150 			return B_NAME_TOO_LONG;
6151 		}
6152 	}
6153 
6154 	int len = user_strlcpy(userPath, path, pathLength);
6155 	if (len < 0)
6156 		return len;
6157 	if (len >= (int)pathLength)
6158 		return B_BUFFER_OVERFLOW;
6159 	return B_OK;
6160 }
6161 
6162 
6163 int
6164 _user_open_entry_ref(dev_t device, ino_t inode, const char *userName,
6165 	int openMode, int perms)
6166 {
6167 	char name[B_FILE_NAME_LENGTH];
6168 	int status;
6169 
6170 	if (!IS_USER_ADDRESS(userName))
6171 		return B_BAD_ADDRESS;
6172 
6173 	status = user_strlcpy(name, userName, sizeof(name));
6174 	if (status < B_OK)
6175 		return status;
6176 
6177 	if (openMode & O_CREAT)
6178 		return file_create_entry_ref(device, inode, name, openMode, perms, false);
6179 
6180 	return file_open_entry_ref(device, inode, name, openMode, false);
6181 }
6182 
6183 
6184 int
6185 _user_open(int fd, const char *userPath, int openMode, int perms)
6186 {
6187 	char path[B_PATH_NAME_LENGTH + 1];
6188 	int status;
6189 
6190 	if (!IS_USER_ADDRESS(userPath))
6191 		return B_BAD_ADDRESS;
6192 
6193 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6194 	if (status < 0)
6195 		return status;
6196 
6197 	if (openMode & O_CREAT)
6198 		return file_create(fd, path, openMode, perms, false);
6199 
6200 	return file_open(fd, path, openMode, false);
6201 }
6202 
6203 
6204 int
6205 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char *uname)
6206 {
6207 	if (uname) {
6208 		char name[B_FILE_NAME_LENGTH];
6209 
6210 		if (!IS_USER_ADDRESS(uname))
6211 			return B_BAD_ADDRESS;
6212 
6213 		int status = user_strlcpy(name, uname, sizeof(name));
6214 		if (status < B_OK)
6215 			return status;
6216 
6217 		return dir_open_entry_ref(device, inode, name, false);
6218 	}
6219 	return dir_open_entry_ref(device, inode, NULL, false);
6220 }
6221 
6222 
6223 int
6224 _user_open_dir(int fd, const char *userPath)
6225 {
6226 	char path[B_PATH_NAME_LENGTH + 1];
6227 	int status;
6228 
6229 	if (!IS_USER_ADDRESS(userPath))
6230 		return B_BAD_ADDRESS;
6231 
6232 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6233 	if (status < 0)
6234 		return status;
6235 
6236 	return dir_open(fd, path, false);
6237 }
6238 
6239 
6240 /**	\brief Opens a directory's parent directory and returns the entry name
6241  *		   of the former.
6242  *
6243  *	Aside from that is returns the directory's entry name, this method is
6244  *	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
6245  *	equivalent, if \a userName is \c NULL.
6246  *
6247  *	If a name buffer is supplied and the name does not fit the buffer, the
6248  *	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
6249  *
6250  *	\param fd A FD referring to a directory.
6251  *	\param userName Buffer the directory's entry name shall be written into.
6252  *		   May be \c NULL.
6253  *	\param nameLength Size of the name buffer.
6254  *	\return The file descriptor of the opened parent directory, if everything
6255  *			went fine, an error code otherwise.
6256  */
6257 
6258 int
6259 _user_open_parent_dir(int fd, char *userName, size_t nameLength)
6260 {
6261 	bool kernel = false;
6262 
6263 	if (userName && !IS_USER_ADDRESS(userName))
6264 		return B_BAD_ADDRESS;
6265 
6266 	// open the parent dir
6267 	int parentFD = dir_open(fd, "..", kernel);
6268 	if (parentFD < 0)
6269 		return parentFD;
6270 	FDCloser fdCloser(parentFD, kernel);
6271 
6272 	if (userName) {
6273 		// get the vnodes
6274 		struct vnode *parentVNode = get_vnode_from_fd(parentFD, kernel);
6275 		struct vnode *dirVNode = get_vnode_from_fd(fd, kernel);
6276 		VNodePutter parentVNodePutter(parentVNode);
6277 		VNodePutter dirVNodePutter(dirVNode);
6278 		if (!parentVNode || !dirVNode)
6279 			return B_FILE_ERROR;
6280 
6281 		// get the vnode name
6282 		char name[B_FILE_NAME_LENGTH];
6283 		status_t status = get_vnode_name(dirVNode, parentVNode,
6284 			name, sizeof(name));
6285 		if (status != B_OK)
6286 			return status;
6287 
6288 		// copy the name to the userland buffer
6289 		int len = user_strlcpy(userName, name, nameLength);
6290 		if (len < 0)
6291 			return len;
6292 		if (len >= (int)nameLength)
6293 			return B_BUFFER_OVERFLOW;
6294 	}
6295 
6296 	return fdCloser.Detach();
6297 }
6298 
6299 
6300 status_t
6301 _user_fcntl(int fd, int op, uint32 argument)
6302 {
6303 	return common_fcntl(fd, op, argument, false);
6304 }
6305 
6306 
6307 status_t
6308 _user_fsync(int fd)
6309 {
6310 	return common_sync(fd, false);
6311 }
6312 
6313 
6314 status_t
6315 _user_lock_node(int fd)
6316 {
6317 	return common_lock_node(fd, false);
6318 }
6319 
6320 
6321 status_t
6322 _user_unlock_node(int fd)
6323 {
6324 	return common_unlock_node(fd, false);
6325 }
6326 
6327 
6328 status_t
6329 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char *userName, int perms)
6330 {
6331 	char name[B_FILE_NAME_LENGTH];
6332 	status_t status;
6333 
6334 	if (!IS_USER_ADDRESS(userName))
6335 		return B_BAD_ADDRESS;
6336 
6337 	status = user_strlcpy(name, userName, sizeof(name));
6338 	if (status < 0)
6339 		return status;
6340 
6341 	return dir_create_entry_ref(device, inode, name, perms, false);
6342 }
6343 
6344 
6345 status_t
6346 _user_create_dir(int fd, const char *userPath, int perms)
6347 {
6348 	char path[B_PATH_NAME_LENGTH + 1];
6349 	status_t status;
6350 
6351 	if (!IS_USER_ADDRESS(userPath))
6352 		return B_BAD_ADDRESS;
6353 
6354 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6355 	if (status < 0)
6356 		return status;
6357 
6358 	return dir_create(fd, path, perms, false);
6359 }
6360 
6361 
6362 status_t
6363 _user_remove_dir(const char *userPath)
6364 {
6365 	char path[B_PATH_NAME_LENGTH + 1];
6366 	status_t status;
6367 
6368 	if (!IS_USER_ADDRESS(userPath))
6369 		return B_BAD_ADDRESS;
6370 
6371 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6372 	if (status < 0)
6373 		return status;
6374 
6375 	return dir_remove(path, false);
6376 }
6377 
6378 
6379 status_t
6380 _user_read_link(int fd, const char *userPath, char *userBuffer, size_t *userBufferSize)
6381 {
6382 	char path[B_PATH_NAME_LENGTH + 1];
6383 	char buffer[B_PATH_NAME_LENGTH];
6384 	size_t bufferSize;
6385 	status_t status;
6386 
6387 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
6388 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) < B_OK)
6389 		return B_BAD_ADDRESS;
6390 
6391 	if (userPath) {
6392 		if (!IS_USER_ADDRESS(userPath))
6393 			return B_BAD_ADDRESS;
6394 
6395 		status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6396 		if (status < 0)
6397 			return status;
6398 
6399 		if (bufferSize > B_PATH_NAME_LENGTH)
6400 			bufferSize = B_PATH_NAME_LENGTH;
6401 	}
6402 
6403 	status = common_read_link(fd, userPath ? path : NULL, buffer, &bufferSize, false);
6404 
6405 	// we also update the bufferSize in case of errors
6406 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
6407 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) < B_OK)
6408 		return B_BAD_ADDRESS;
6409 
6410 	if (status < B_OK)
6411 		return status;
6412 
6413 	if (user_strlcpy(userBuffer, buffer, bufferSize) < 0)
6414 		return B_BAD_ADDRESS;
6415 
6416 	return B_OK;
6417 }
6418 
6419 
6420 status_t
6421 _user_write_link(const char *userPath, const char *userToPath)
6422 {
6423 	char path[B_PATH_NAME_LENGTH + 1];
6424 	char toPath[B_PATH_NAME_LENGTH + 1];
6425 	status_t status;
6426 
6427 	if (!IS_USER_ADDRESS(userPath)
6428 		|| !IS_USER_ADDRESS(userToPath))
6429 		return B_BAD_ADDRESS;
6430 
6431 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6432 	if (status < 0)
6433 		return status;
6434 
6435 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6436 	if (status < 0)
6437 		return status;
6438 
6439 	status = check_path(toPath);
6440 	if (status < B_OK)
6441 		return status;
6442 
6443 	return common_write_link(path, toPath, false);
6444 }
6445 
6446 
6447 status_t
6448 _user_create_symlink(int fd, const char *userPath, const char *userToPath,
6449 	int mode)
6450 {
6451 	char path[B_PATH_NAME_LENGTH + 1];
6452 	char toPath[B_PATH_NAME_LENGTH + 1];
6453 	status_t status;
6454 
6455 	if (!IS_USER_ADDRESS(userPath)
6456 		|| !IS_USER_ADDRESS(userToPath))
6457 		return B_BAD_ADDRESS;
6458 
6459 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6460 	if (status < 0)
6461 		return status;
6462 
6463 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6464 	if (status < 0)
6465 		return status;
6466 
6467 	status = check_path(toPath);
6468 	if (status < B_OK)
6469 		return status;
6470 
6471 	return common_create_symlink(fd, path, toPath, mode, false);
6472 }
6473 
6474 
6475 status_t
6476 _user_create_link(const char *userPath, const char *userToPath)
6477 {
6478 	char path[B_PATH_NAME_LENGTH + 1];
6479 	char toPath[B_PATH_NAME_LENGTH + 1];
6480 	status_t status;
6481 
6482 	if (!IS_USER_ADDRESS(userPath)
6483 		|| !IS_USER_ADDRESS(userToPath))
6484 		return B_BAD_ADDRESS;
6485 
6486 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6487 	if (status < 0)
6488 		return status;
6489 
6490 	status = user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH);
6491 	if (status < 0)
6492 		return status;
6493 
6494 	status = check_path(toPath);
6495 	if (status < B_OK)
6496 		return status;
6497 
6498 	return common_create_link(path, toPath, false);
6499 }
6500 
6501 
6502 status_t
6503 _user_unlink(int fd, const char *userPath)
6504 {
6505 	char path[B_PATH_NAME_LENGTH + 1];
6506 	status_t status;
6507 
6508 	if (!IS_USER_ADDRESS(userPath))
6509 		return B_BAD_ADDRESS;
6510 
6511 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6512 	if (status < 0)
6513 		return status;
6514 
6515 	return common_unlink(fd, path, false);
6516 }
6517 
6518 
6519 status_t
6520 _user_rename(int oldFD, const char *userOldPath, int newFD,
6521 	const char *userNewPath)
6522 {
6523 	char oldPath[B_PATH_NAME_LENGTH + 1];
6524 	char newPath[B_PATH_NAME_LENGTH + 1];
6525 	status_t status;
6526 
6527 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
6528 		return B_BAD_ADDRESS;
6529 
6530 	status = user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH);
6531 	if (status < 0)
6532 		return status;
6533 
6534 	status = user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH);
6535 	if (status < 0)
6536 		return status;
6537 
6538 	return common_rename(oldFD, oldPath, newFD, newPath, false);
6539 }
6540 
6541 
6542 status_t
6543 _user_access(const char *userPath, int mode)
6544 {
6545 	char path[B_PATH_NAME_LENGTH + 1];
6546 	status_t status;
6547 
6548 	if (!IS_USER_ADDRESS(userPath))
6549 		return B_BAD_ADDRESS;
6550 
6551 	status = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6552 	if (status < 0)
6553 		return status;
6554 
6555 	return common_access(path, mode, false);
6556 }
6557 
6558 
6559 status_t
6560 _user_read_stat(int fd, const char *userPath, bool traverseLink,
6561 	struct stat *userStat, size_t statSize)
6562 {
6563 	struct stat stat;
6564 	status_t status;
6565 
6566 	if (statSize > sizeof(struct stat))
6567 		return B_BAD_VALUE;
6568 
6569 	if (!IS_USER_ADDRESS(userStat))
6570 		return B_BAD_ADDRESS;
6571 
6572 	if (userPath) {
6573 		// path given: get the stat of the node referred to by (fd, path)
6574 		char path[B_PATH_NAME_LENGTH + 1];
6575 		if (!IS_USER_ADDRESS(userPath))
6576 			return B_BAD_ADDRESS;
6577 		int len = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6578 		if (len < 0)
6579 			return len;
6580 		if (len >= B_PATH_NAME_LENGTH)
6581 			return B_NAME_TOO_LONG;
6582 
6583 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
6584 	} else {
6585 		// no path given: get the FD and use the FD operation
6586 		struct file_descriptor *descriptor
6587 			= get_fd(get_current_io_context(false), fd);
6588 		if (descriptor == NULL)
6589 			return B_FILE_ERROR;
6590 
6591 		if (descriptor->ops->fd_read_stat)
6592 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
6593 		else
6594 			status = EOPNOTSUPP;
6595 
6596 		put_fd(descriptor);
6597 	}
6598 
6599 	if (status < B_OK)
6600 		return status;
6601 
6602 	return user_memcpy(userStat, &stat, statSize);
6603 }
6604 
6605 
6606 status_t
6607 _user_write_stat(int fd, const char *userPath, bool traverseLeafLink,
6608 	const struct stat *userStat, size_t statSize, int statMask)
6609 {
6610 	char path[B_PATH_NAME_LENGTH + 1];
6611 	struct stat stat;
6612 
6613 	if (statSize > sizeof(struct stat))
6614 		return B_BAD_VALUE;
6615 
6616 	if (!IS_USER_ADDRESS(userStat)
6617 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
6618 		return B_BAD_ADDRESS;
6619 
6620 	// clear additional stat fields
6621 	if (statSize < sizeof(struct stat))
6622 		memset((uint8 *)&stat + statSize, 0, sizeof(struct stat) - statSize);
6623 
6624 	status_t status;
6625 
6626 	if (userPath) {
6627 		// path given: write the stat of the node referred to by (fd, path)
6628 		if (!IS_USER_ADDRESS(userPath))
6629 			return B_BAD_ADDRESS;
6630 		int len = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
6631 		if (len < 0)
6632 			return len;
6633 		if (len >= B_PATH_NAME_LENGTH)
6634 			return B_NAME_TOO_LONG;
6635 
6636 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
6637 			statMask, false);
6638 	} else {
6639 		// no path given: get the FD and use the FD operation
6640 		struct file_descriptor *descriptor
6641 			= get_fd(get_current_io_context(false), fd);
6642 		if (descriptor == NULL)
6643 			return B_FILE_ERROR;
6644 
6645 		if (descriptor->ops->fd_write_stat)
6646 			status = descriptor->ops->fd_write_stat(descriptor, &stat, statMask);
6647 		else
6648 			status = EOPNOTSUPP;
6649 
6650 		put_fd(descriptor);
6651 	}
6652 
6653 	return status;
6654 }
6655 
6656 
6657 int
6658 _user_open_attr_dir(int fd, const char *userPath)
6659 {
6660 	char pathBuffer[B_PATH_NAME_LENGTH + 1];
6661 
6662 	if (userPath != NULL) {
6663 		if (!IS_USER_ADDRESS(userPath)
6664 			|| user_strlcpy(pathBuffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
6665 			return B_BAD_ADDRESS;
6666 	}
6667 
6668 	return attr_dir_open(fd, userPath ? pathBuffer : NULL, false);
6669 }
6670 
6671 
6672 int
6673 _user_create_attr(int fd, const char *userName, uint32 type, int openMode)
6674 {
6675 	char name[B_FILE_NAME_LENGTH];
6676 
6677 	if (!IS_USER_ADDRESS(userName)
6678 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6679 		return B_BAD_ADDRESS;
6680 
6681 	return attr_create(fd, name, type, openMode, false);
6682 }
6683 
6684 
6685 int
6686 _user_open_attr(int fd, const char *userName, int openMode)
6687 {
6688 	char name[B_FILE_NAME_LENGTH];
6689 
6690 	if (!IS_USER_ADDRESS(userName)
6691 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6692 		return B_BAD_ADDRESS;
6693 
6694 	return attr_open(fd, name, openMode, false);
6695 }
6696 
6697 
6698 status_t
6699 _user_remove_attr(int fd, const char *userName)
6700 {
6701 	char name[B_FILE_NAME_LENGTH];
6702 
6703 	if (!IS_USER_ADDRESS(userName)
6704 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6705 		return B_BAD_ADDRESS;
6706 
6707 	return attr_remove(fd, name, false);
6708 }
6709 
6710 
6711 status_t
6712 _user_rename_attr(int fromFile, const char *userFromName, int toFile, const char *userToName)
6713 {
6714 	char fromName[B_FILE_NAME_LENGTH];
6715 	char toName[B_FILE_NAME_LENGTH];
6716 
6717 	if (!IS_USER_ADDRESS(userFromName)
6718 		|| !IS_USER_ADDRESS(userToName))
6719 		return B_BAD_ADDRESS;
6720 
6721 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
6722 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
6723 		return B_BAD_ADDRESS;
6724 
6725 	return attr_rename(fromFile, fromName, toFile, toName, false);
6726 }
6727 
6728 
6729 int
6730 _user_open_index_dir(dev_t device)
6731 {
6732 	return index_dir_open(device, false);
6733 }
6734 
6735 
6736 status_t
6737 _user_create_index(dev_t device, const char *userName, uint32 type, uint32 flags)
6738 {
6739 	char name[B_FILE_NAME_LENGTH];
6740 
6741 	if (!IS_USER_ADDRESS(userName)
6742 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6743 		return B_BAD_ADDRESS;
6744 
6745 	return index_create(device, name, type, flags, false);
6746 }
6747 
6748 
6749 status_t
6750 _user_read_index_stat(dev_t device, const char *userName, struct stat *userStat)
6751 {
6752 	char name[B_FILE_NAME_LENGTH];
6753 	struct stat stat;
6754 	status_t status;
6755 
6756 	if (!IS_USER_ADDRESS(userName)
6757 		|| !IS_USER_ADDRESS(userStat)
6758 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6759 		return B_BAD_ADDRESS;
6760 
6761 	status = index_name_read_stat(device, name, &stat, false);
6762 	if (status == B_OK) {
6763 		if (user_memcpy(userStat, &stat, sizeof(stat)) < B_OK)
6764 			return B_BAD_ADDRESS;
6765 	}
6766 
6767 	return status;
6768 }
6769 
6770 
6771 status_t
6772 _user_remove_index(dev_t device, const char *userName)
6773 {
6774 	char name[B_FILE_NAME_LENGTH];
6775 
6776 	if (!IS_USER_ADDRESS(userName)
6777 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
6778 		return B_BAD_ADDRESS;
6779 
6780 	return index_remove(device, name, false);
6781 }
6782 
6783 
6784 status_t
6785 _user_getcwd(char *userBuffer, size_t size)
6786 {
6787 	char buffer[B_PATH_NAME_LENGTH];
6788 	status_t status;
6789 
6790 	PRINT(("user_getcwd: buf %p, %ld\n", userBuffer, size));
6791 
6792 	if (!IS_USER_ADDRESS(userBuffer))
6793 		return B_BAD_ADDRESS;
6794 
6795 	if (size > B_PATH_NAME_LENGTH)
6796 		size = B_PATH_NAME_LENGTH;
6797 
6798 	status = get_cwd(buffer, size, false);
6799 	if (status < 0)
6800 		return status;
6801 
6802 	// Copy back the result
6803 	if (user_strlcpy(userBuffer, buffer, size) < B_OK)
6804 		return B_BAD_ADDRESS;
6805 
6806 	return status;
6807 }
6808 
6809 
6810 status_t
6811 _user_setcwd(int fd, const char *userPath)
6812 {
6813 	char path[B_PATH_NAME_LENGTH];
6814 
6815 	PRINT(("user_setcwd: path = %p\n", userPath));
6816 
6817 	if (userPath != NULL) {
6818 		if (!IS_USER_ADDRESS(userPath)
6819 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
6820 			return B_BAD_ADDRESS;
6821 	}
6822 
6823 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
6824 }
6825 
6826 
6827 int
6828 _user_open_query(dev_t device, const char *userQuery, size_t queryLength,
6829 	uint32 flags, port_id port, int32 token)
6830 {
6831 	char *query;
6832 
6833 	if (device < 0 || userQuery == NULL || queryLength == 0 || queryLength >= 65536)
6834 		return B_BAD_VALUE;
6835 
6836 	query = (char *)malloc(queryLength + 1);
6837 	if (query == NULL)
6838 		return B_NO_MEMORY;
6839 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
6840 		free(query);
6841 		return B_BAD_ADDRESS;
6842 	}
6843 
6844 	int fd = query_open(device, query, flags, port, token, false);
6845 
6846 	free(query);
6847 	return fd;
6848 }
6849