xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 922e7ba1f3228e6f28db69b0ded8f86eb32dea17)
1 /*
2  * Copyright 2005-2011, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2010, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/resource.h>
22 #include <sys/stat.h>
23 #include <unistd.h>
24 
25 #include <fs_attr.h>
26 #include <fs_info.h>
27 #include <fs_interface.h>
28 #include <fs_volume.h>
29 #include <OS.h>
30 #include <StorageDefs.h>
31 
32 #include <AutoDeleter.h>
33 #include <block_cache.h>
34 #include <boot/kernel_args.h>
35 #include <debug_heap.h>
36 #include <disk_device_manager/KDiskDevice.h>
37 #include <disk_device_manager/KDiskDeviceManager.h>
38 #include <disk_device_manager/KDiskDeviceUtils.h>
39 #include <disk_device_manager/KDiskSystem.h>
40 #include <fd.h>
41 #include <file_cache.h>
42 #include <fs/node_monitor.h>
43 #include <khash.h>
44 #include <KPath.h>
45 #include <lock.h>
46 #include <low_resource_manager.h>
47 #include <syscalls.h>
48 #include <syscall_restart.h>
49 #include <tracing.h>
50 #include <util/atomic.h>
51 #include <util/AutoLock.h>
52 #include <util/DoublyLinkedList.h>
53 #include <vfs.h>
54 #include <vm/vm.h>
55 #include <vm/VMCache.h>
56 
57 #include "EntryCache.h"
58 #include "fifo.h"
59 #include "IORequest.h"
60 #include "unused_vnodes.h"
61 #include "vfs_tracing.h"
62 #include "Vnode.h"
63 #include "../cache/vnode_store.h"
64 
65 
66 //#define TRACE_VFS
67 #ifdef TRACE_VFS
68 #	define TRACE(x) dprintf x
69 #	define FUNCTION(x) dprintf x
70 #else
71 #	define TRACE(x) ;
72 #	define FUNCTION(x) ;
73 #endif
74 
75 #define ADD_DEBUGGER_COMMANDS
76 
77 
78 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
79 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
80 
81 #if KDEBUG
82 #	define FS_CALL(vnode, op, params...) \
83 		( HAS_FS_CALL(vnode, op) ? \
84 			vnode->ops->op(vnode->mount->volume, vnode, params) \
85 			: (panic("FS_CALL op " #op " is NULL"), 0))
86 #	define FS_CALL_NO_PARAMS(vnode, op) \
87 		( HAS_FS_CALL(vnode, op) ? \
88 			vnode->ops->op(vnode->mount->volume, vnode) \
89 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
90 #	define FS_MOUNT_CALL(mount, op, params...) \
91 		( HAS_FS_MOUNT_CALL(mount, op) ? \
92 			mount->volume->ops->op(mount->volume, params) \
93 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
94 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
95 		( HAS_FS_MOUNT_CALL(mount, op) ? \
96 			mount->volume->ops->op(mount->volume) \
97 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
98 #else
99 #	define FS_CALL(vnode, op, params...) \
100 			vnode->ops->op(vnode->mount->volume, vnode, params)
101 #	define FS_CALL_NO_PARAMS(vnode, op) \
102 			vnode->ops->op(vnode->mount->volume, vnode)
103 #	define FS_MOUNT_CALL(mount, op, params...) \
104 			mount->volume->ops->op(mount->volume, params)
105 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
106 			mount->volume->ops->op(mount->volume)
107 #endif
108 
109 
110 const static size_t kMaxPathLength = 65536;
111 	// The absolute maximum path length (for getcwd() - this is not depending
112 	// on PATH_MAX
113 
114 
115 struct vnode_hash_key {
116 	dev_t	device;
117 	ino_t	vnode;
118 };
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and covers_vnode fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the covers_vnode, and thus
130 	making the access path vnode->mount->covers_vnode->mount->... safe if a
131 	reference to vnode is held (note that for the root mount covers_vnode
132 	is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		recursive_lock_init(&rlock, "mount rlock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		recursive_lock_destroy(&rlock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	recursive_lock	rlock;	// guards the vnodes list
165 		// TODO: Make this a mutex! It is never used recursively.
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
176 	list_link		link;
177 	team_id			team;
178 	pid_t			session;
179 	off_t			start;
180 	off_t			end;
181 	bool			shared;
182 };
183 
184 typedef DoublyLinkedList<advisory_lock> LockList;
185 
186 struct advisory_locking {
187 	sem_id			lock;
188 	sem_id			wait_sem;
189 	LockList		locks;
190 
191 	advisory_locking()
192 		:
193 		lock(-1),
194 		wait_sem(-1)
195 	{
196 	}
197 
198 	~advisory_locking()
199 	{
200 		if (lock >= 0)
201 			delete_sem(lock);
202 		if (wait_sem >= 0)
203 			delete_sem(wait_sem);
204 	}
205 };
206 
207 /*!	\brief Guards sMountsTable.
208 
209 	The holder is allowed to read/write access the sMountsTable.
210 	Manipulation of the fs_mount structures themselves
211 	(and their destruction) requires different locks though.
212 */
213 static mutex sMountMutex = MUTEX_INITIALIZER("vfs_mount_lock");
214 
215 /*!	\brief Guards mount/unmount operations.
216 
217 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
218 	That is locking the lock ensures that no FS is mounted/unmounted. In
219 	particular this means that
220 	- sMountsTable will not be modified,
221 	- the fields immutable after initialization of the fs_mount structures in
222 	  sMountsTable will not be modified,
223 	- vnode::covered_by of any vnode in sVnodeTable will not be modified.
224 
225 	The thread trying to lock the lock must not hold sVnodeLock or
226 	sMountMutex.
227 */
228 static recursive_lock sMountOpLock;
229 
230 /*!	\brief Guards sVnodeTable.
231 
232 	The holder is allowed read/write access to sVnodeTable and to
233 	any unbusy vnode in that table, save to the immutable fields (device, id,
234 	private_node, mount) to which only read-only access is allowed.
235 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
236 	well as the busy, removed, unused flags, and the vnode's type can also be
237 	write access when holding a read lock to sVnodeLock *and* having the vnode
238 	locked. Writing access to covered_by requires to write lock sVnodeLock.
239 
240 	The thread trying to acquire the lock must not hold sMountMutex.
241 	You must not have this lock held when calling create_sem(), as this
242 	might call vfs_free_unused_vnodes() and thus cause a deadlock.
243 */
244 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
245 
246 /*!	\brief Guards io_context::root.
247 
248 	Must be held when setting or getting the io_context::root field.
249 	The only operation allowed while holding this lock besides getting or
250 	setting the field is inc_vnode_ref_count() on io_context::root.
251 */
252 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
253 
254 
255 #define VNODE_HASH_TABLE_SIZE 1024
256 static hash_table* sVnodeTable;
257 static struct vnode* sRoot;
258 
259 #define MOUNTS_HASH_TABLE_SIZE 16
260 static hash_table* sMountsTable;
261 static dev_t sNextMountID = 1;
262 
263 #define MAX_TEMP_IO_VECS 8
264 
265 mode_t __gUmask = 022;
266 
267 /* function declarations */
268 
269 static void free_unused_vnodes();
270 
271 // file descriptor operation prototypes
272 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
273 	void* buffer, size_t* _bytes);
274 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
275 	const void* buffer, size_t* _bytes);
276 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
277 	int seekType);
278 static void file_free_fd(struct file_descriptor* descriptor);
279 static status_t file_close(struct file_descriptor* descriptor);
280 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
281 	struct selectsync* sync);
282 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
283 	struct selectsync* sync);
284 static status_t dir_read(struct io_context* context,
285 	struct file_descriptor* descriptor, struct dirent* buffer,
286 	size_t bufferSize, uint32* _count);
287 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
288 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
289 static status_t dir_rewind(struct file_descriptor* descriptor);
290 static void dir_free_fd(struct file_descriptor* descriptor);
291 static status_t dir_close(struct file_descriptor* descriptor);
292 static status_t attr_dir_read(struct io_context* context,
293 	struct file_descriptor* descriptor, struct dirent* buffer,
294 	size_t bufferSize, uint32* _count);
295 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
296 static void attr_dir_free_fd(struct file_descriptor* descriptor);
297 static status_t attr_dir_close(struct file_descriptor* descriptor);
298 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
299 	void* buffer, size_t* _bytes);
300 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
301 	const void* buffer, size_t* _bytes);
302 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
303 	int seekType);
304 static void attr_free_fd(struct file_descriptor* descriptor);
305 static status_t attr_close(struct file_descriptor* descriptor);
306 static status_t attr_read_stat(struct file_descriptor* descriptor,
307 	struct stat* statData);
308 static status_t attr_write_stat(struct file_descriptor* descriptor,
309 	const struct stat* stat, int statMask);
310 static status_t index_dir_read(struct io_context* context,
311 	struct file_descriptor* descriptor, struct dirent* buffer,
312 	size_t bufferSize, uint32* _count);
313 static status_t index_dir_rewind(struct file_descriptor* descriptor);
314 static void index_dir_free_fd(struct file_descriptor* descriptor);
315 static status_t index_dir_close(struct file_descriptor* descriptor);
316 static status_t query_read(struct io_context* context,
317 	struct file_descriptor* descriptor, struct dirent* buffer,
318 	size_t bufferSize, uint32* _count);
319 static status_t query_rewind(struct file_descriptor* descriptor);
320 static void query_free_fd(struct file_descriptor* descriptor);
321 static status_t query_close(struct file_descriptor* descriptor);
322 
323 static status_t common_ioctl(struct file_descriptor* descriptor, uint32 op,
324 	void* buffer, size_t length);
325 static status_t common_read_stat(struct file_descriptor* descriptor,
326 	struct stat* statData);
327 static status_t common_write_stat(struct file_descriptor* descriptor,
328 	const struct stat* statData, int statMask);
329 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
330 	struct stat* stat, bool kernel);
331 
332 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
333 	bool traverseLeafLink, int count, bool kernel,
334 	struct vnode** _vnode, ino_t* _parentID);
335 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
336 	size_t bufferSize, bool kernel);
337 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
338 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
339 static void inc_vnode_ref_count(struct vnode* vnode);
340 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
341 	bool reenter);
342 static inline void put_vnode(struct vnode* vnode);
343 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
344 	bool kernel);
345 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
346 
347 
348 static struct fd_ops sFileOps = {
349 	file_read,
350 	file_write,
351 	file_seek,
352 	common_ioctl,
353 	NULL,		// set_flags
354 	file_select,
355 	file_deselect,
356 	NULL,		// read_dir()
357 	NULL,		// rewind_dir()
358 	common_read_stat,
359 	common_write_stat,
360 	file_close,
361 	file_free_fd
362 };
363 
364 static struct fd_ops sDirectoryOps = {
365 	NULL,		// read()
366 	NULL,		// write()
367 	NULL,		// seek()
368 	common_ioctl,
369 	NULL,		// set_flags
370 	NULL,		// select()
371 	NULL,		// deselect()
372 	dir_read,
373 	dir_rewind,
374 	common_read_stat,
375 	common_write_stat,
376 	dir_close,
377 	dir_free_fd
378 };
379 
380 static struct fd_ops sAttributeDirectoryOps = {
381 	NULL,		// read()
382 	NULL,		// write()
383 	NULL,		// seek()
384 	common_ioctl,
385 	NULL,		// set_flags
386 	NULL,		// select()
387 	NULL,		// deselect()
388 	attr_dir_read,
389 	attr_dir_rewind,
390 	common_read_stat,
391 	common_write_stat,
392 	attr_dir_close,
393 	attr_dir_free_fd
394 };
395 
396 static struct fd_ops sAttributeOps = {
397 	attr_read,
398 	attr_write,
399 	attr_seek,
400 	common_ioctl,
401 	NULL,		// set_flags
402 	NULL,		// select()
403 	NULL,		// deselect()
404 	NULL,		// read_dir()
405 	NULL,		// rewind_dir()
406 	attr_read_stat,
407 	attr_write_stat,
408 	attr_close,
409 	attr_free_fd
410 };
411 
412 static struct fd_ops sIndexDirectoryOps = {
413 	NULL,		// read()
414 	NULL,		// write()
415 	NULL,		// seek()
416 	NULL,		// ioctl()
417 	NULL,		// set_flags
418 	NULL,		// select()
419 	NULL,		// deselect()
420 	index_dir_read,
421 	index_dir_rewind,
422 	NULL,		// read_stat()
423 	NULL,		// write_stat()
424 	index_dir_close,
425 	index_dir_free_fd
426 };
427 
428 #if 0
429 static struct fd_ops sIndexOps = {
430 	NULL,		// read()
431 	NULL,		// write()
432 	NULL,		// seek()
433 	NULL,		// ioctl()
434 	NULL,		// set_flags
435 	NULL,		// select()
436 	NULL,		// deselect()
437 	NULL,		// dir_read()
438 	NULL,		// dir_rewind()
439 	index_read_stat,	// read_stat()
440 	NULL,		// write_stat()
441 	NULL,		// dir_close()
442 	NULL		// free_fd()
443 };
444 #endif
445 
446 static struct fd_ops sQueryOps = {
447 	NULL,		// read()
448 	NULL,		// write()
449 	NULL,		// seek()
450 	NULL,		// ioctl()
451 	NULL,		// set_flags
452 	NULL,		// select()
453 	NULL,		// deselect()
454 	query_read,
455 	query_rewind,
456 	NULL,		// read_stat()
457 	NULL,		// write_stat()
458 	query_close,
459 	query_free_fd
460 };
461 
462 
463 // VNodePutter
464 class VNodePutter {
465 public:
466 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
467 
468 	~VNodePutter()
469 	{
470 		Put();
471 	}
472 
473 	void SetTo(struct vnode* vnode)
474 	{
475 		Put();
476 		fVNode = vnode;
477 	}
478 
479 	void Put()
480 	{
481 		if (fVNode) {
482 			put_vnode(fVNode);
483 			fVNode = NULL;
484 		}
485 	}
486 
487 	struct vnode* Detach()
488 	{
489 		struct vnode* vnode = fVNode;
490 		fVNode = NULL;
491 		return vnode;
492 	}
493 
494 private:
495 	struct vnode* fVNode;
496 };
497 
498 
499 class FDCloser {
500 public:
501 	FDCloser() : fFD(-1), fKernel(true) {}
502 
503 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
504 
505 	~FDCloser()
506 	{
507 		Close();
508 	}
509 
510 	void SetTo(int fd, bool kernel)
511 	{
512 		Close();
513 		fFD = fd;
514 		fKernel = kernel;
515 	}
516 
517 	void Close()
518 	{
519 		if (fFD >= 0) {
520 			if (fKernel)
521 				_kern_close(fFD);
522 			else
523 				_user_close(fFD);
524 			fFD = -1;
525 		}
526 	}
527 
528 	int Detach()
529 	{
530 		int fd = fFD;
531 		fFD = -1;
532 		return fd;
533 	}
534 
535 private:
536 	int		fFD;
537 	bool	fKernel;
538 };
539 
540 
541 #if VFS_PAGES_IO_TRACING
542 
543 namespace VFSPagesIOTracing {
544 
545 class PagesIOTraceEntry : public AbstractTraceEntry {
546 protected:
547 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
548 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
549 		status_t status, generic_size_t bytesTransferred)
550 		:
551 		fVnode(vnode),
552 		fMountID(vnode->mount->id),
553 		fNodeID(vnode->id),
554 		fCookie(cookie),
555 		fPos(pos),
556 		fCount(count),
557 		fFlags(flags),
558 		fBytesRequested(bytesRequested),
559 		fStatus(status),
560 		fBytesTransferred(bytesTransferred)
561 	{
562 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs, sizeof(generic_io_vec) * count,
563 			false);
564 	}
565 
566 	void AddDump(TraceOutput& out, const char* mode)
567 	{
568 		out.Print("vfs pages io %5s: vnode: %p (%ld, %lld), cookie: %p, "
569 			"pos: %lld, size: %llu, vecs: {", mode, fVnode, fMountID, fNodeID,
570 			fCookie, fPos, (uint64)fBytesRequested);
571 
572 		if (fVecs != NULL) {
573 			for (uint32 i = 0; i < fCount; i++) {
574 				if (i > 0)
575 					out.Print(", ");
576 				out.Print("(%llx, %llu)", (uint64)fVecs[i].base, (uint64)fVecs[i].length);
577 			}
578 		}
579 
580 		out.Print("}, flags: %#lx -> status: %#lx, transferred: %llu",
581 			fFlags, fStatus, (uint64)fBytesTransferred);
582 	}
583 
584 protected:
585 	struct vnode*	fVnode;
586 	dev_t			fMountID;
587 	ino_t			fNodeID;
588 	void*			fCookie;
589 	off_t			fPos;
590 	generic_io_vec*		fVecs;
591 	uint32			fCount;
592 	uint32			fFlags;
593 	generic_size_t			fBytesRequested;
594 	status_t		fStatus;
595 	generic_size_t			fBytesTransferred;
596 };
597 
598 
599 class ReadPages : public PagesIOTraceEntry {
600 public:
601 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
602 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
603 		status_t status, generic_size_t bytesTransferred)
604 		:
605 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
606 			bytesRequested, status, bytesTransferred)
607 	{
608 		Initialized();
609 	}
610 
611 	virtual void AddDump(TraceOutput& out)
612 	{
613 		PagesIOTraceEntry::AddDump(out, "read");
614 	}
615 };
616 
617 
618 class WritePages : public PagesIOTraceEntry {
619 public:
620 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
621 		const generic_io_vec* vecs, uint32 count, uint32 flags, generic_size_t bytesRequested,
622 		status_t status, generic_size_t bytesTransferred)
623 		:
624 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
625 			bytesRequested, status, bytesTransferred)
626 	{
627 		Initialized();
628 	}
629 
630 	virtual void AddDump(TraceOutput& out)
631 	{
632 		PagesIOTraceEntry::AddDump(out, "write");
633 	}
634 };
635 
636 }	// namespace VFSPagesIOTracing
637 
638 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
639 #else
640 #	define TPIO(x) ;
641 #endif	// VFS_PAGES_IO_TRACING
642 
643 
644 static int
645 mount_compare(void* _m, const void* _key)
646 {
647 	struct fs_mount* mount = (fs_mount*)_m;
648 	const dev_t* id = (dev_t*)_key;
649 
650 	if (mount->id == *id)
651 		return 0;
652 
653 	return -1;
654 }
655 
656 
657 static uint32
658 mount_hash(void* _m, const void* _key, uint32 range)
659 {
660 	struct fs_mount* mount = (fs_mount*)_m;
661 	const dev_t* id = (dev_t*)_key;
662 
663 	if (mount)
664 		return mount->id % range;
665 
666 	return (uint32)*id % range;
667 }
668 
669 
670 /*! Finds the mounted device (the fs_mount structure) with the given ID.
671 	Note, you must hold the gMountMutex lock when you call this function.
672 */
673 static struct fs_mount*
674 find_mount(dev_t id)
675 {
676 	ASSERT_LOCKED_MUTEX(&sMountMutex);
677 
678 	return (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
679 }
680 
681 
682 static status_t
683 get_mount(dev_t id, struct fs_mount** _mount)
684 {
685 	struct fs_mount* mount;
686 
687 	ReadLocker nodeLocker(sVnodeLock);
688 	MutexLocker mountLocker(sMountMutex);
689 
690 	mount = find_mount(id);
691 	if (mount == NULL)
692 		return B_BAD_VALUE;
693 
694 	struct vnode* rootNode = mount->root_vnode;
695 	if (rootNode == NULL || rootNode->IsBusy() || rootNode->ref_count == 0) {
696 		// might have been called during a mount/unmount operation
697 		return B_BUSY;
698 	}
699 
700 	inc_vnode_ref_count(mount->root_vnode);
701 	*_mount = mount;
702 	return B_OK;
703 }
704 
705 
706 static void
707 put_mount(struct fs_mount* mount)
708 {
709 	if (mount)
710 		put_vnode(mount->root_vnode);
711 }
712 
713 
714 /*!	Tries to open the specified file system module.
715 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
716 	Returns a pointer to file system module interface, or NULL if it
717 	could not open the module.
718 */
719 static file_system_module_info*
720 get_file_system(const char* fsName)
721 {
722 	char name[B_FILE_NAME_LENGTH];
723 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
724 		// construct module name if we didn't get one
725 		// (we currently support only one API)
726 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
727 		fsName = NULL;
728 	}
729 
730 	file_system_module_info* info;
731 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
732 		return NULL;
733 
734 	return info;
735 }
736 
737 
738 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
739 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
740 	The name is allocated for you, and you have to free() it when you're
741 	done with it.
742 	Returns NULL if the required memory is not available.
743 */
744 static char*
745 get_file_system_name(const char* fsName)
746 {
747 	const size_t length = strlen("file_systems/");
748 
749 	if (strncmp(fsName, "file_systems/", length)) {
750 		// the name already seems to be the module's file name
751 		return strdup(fsName);
752 	}
753 
754 	fsName += length;
755 	const char* end = strchr(fsName, '/');
756 	if (end == NULL) {
757 		// this doesn't seem to be a valid name, but well...
758 		return strdup(fsName);
759 	}
760 
761 	// cut off the trailing /v1
762 
763 	char* name = (char*)malloc(end + 1 - fsName);
764 	if (name == NULL)
765 		return NULL;
766 
767 	strlcpy(name, fsName, end + 1 - fsName);
768 	return name;
769 }
770 
771 
772 /*!	Accepts a list of file system names separated by a colon, one for each
773 	layer and returns the file system name for the specified layer.
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available or if there is no
777 	name for the specified layer.
778 */
779 static char*
780 get_file_system_name_for_layer(const char* fsNames, int32 layer)
781 {
782 	while (layer >= 0) {
783 		const char* end = strchr(fsNames, ':');
784 		if (end == NULL) {
785 			if (layer == 0)
786 				return strdup(fsNames);
787 			return NULL;
788 		}
789 
790 		if (layer == 0) {
791 			size_t length = end - fsNames + 1;
792 			char* result = (char*)malloc(length);
793 			strlcpy(result, fsNames, length);
794 			return result;
795 		}
796 
797 		fsNames = end + 1;
798 		layer--;
799 	}
800 
801 	return NULL;
802 }
803 
804 
805 static int
806 vnode_compare(void* _vnode, const void* _key)
807 {
808 	struct vnode* vnode = (struct vnode*)_vnode;
809 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
810 
811 	if (vnode->device == key->device && vnode->id == key->vnode)
812 		return 0;
813 
814 	return -1;
815 }
816 
817 
818 static uint32
819 vnode_hash(void* _vnode, const void* _key, uint32 range)
820 {
821 	struct vnode* vnode = (struct vnode*)_vnode;
822 	const struct vnode_hash_key* key = (vnode_hash_key*)_key;
823 
824 #define VHASH(mountid, vnodeid) \
825 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
826 
827 	if (vnode != NULL)
828 		return VHASH(vnode->device, vnode->id) % range;
829 
830 	return VHASH(key->device, key->vnode) % range;
831 
832 #undef VHASH
833 }
834 
835 
836 static void
837 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
838 {
839 	RecursiveLocker _(mount->rlock);
840 	mount->vnodes.Add(vnode);
841 }
842 
843 
844 static void
845 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
846 {
847 	RecursiveLocker _(mount->rlock);
848 	mount->vnodes.Remove(vnode);
849 }
850 
851 
852 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
853 
854 	The caller must hold the sVnodeLock (read lock at least).
855 
856 	\param mountID the mount ID.
857 	\param vnodeID the node ID.
858 
859 	\return The vnode structure, if it was found in the hash table, \c NULL
860 			otherwise.
861 */
862 static struct vnode*
863 lookup_vnode(dev_t mountID, ino_t vnodeID)
864 {
865 	struct vnode_hash_key key;
866 
867 	key.device = mountID;
868 	key.vnode = vnodeID;
869 
870 	return (vnode*)hash_lookup(sVnodeTable, &key);
871 }
872 
873 
874 /*!	Creates a new vnode with the given mount and node ID.
875 	If the node already exists, it is returned instead and no new node is
876 	created. In either case -- but not, if an error occurs -- the function write
877 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
878 	error the lock is not not held on return.
879 
880 	\param mountID The mount ID.
881 	\param vnodeID The vnode ID.
882 	\param _vnode Will be set to the new vnode on success.
883 	\param _nodeCreated Will be set to \c true when the returned vnode has
884 		been newly created, \c false when it already existed. Will not be
885 		changed on error.
886 	\return \c B_OK, when the vnode was successfully created and inserted or
887 		a node with the given ID was found, \c B_NO_MEMORY or
888 		\c B_ENTRY_NOT_FOUND on error.
889 */
890 static status_t
891 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
892 	bool& _nodeCreated)
893 {
894 	FUNCTION(("create_new_vnode_and_lock()\n"));
895 
896 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
897 	if (vnode == NULL)
898 		return B_NO_MEMORY;
899 
900 	// initialize basic values
901 	memset(vnode, 0, sizeof(struct vnode));
902 	vnode->device = mountID;
903 	vnode->id = vnodeID;
904 	vnode->ref_count = 1;
905 	vnode->SetBusy(true);
906 
907 	// look up the the node -- it might have been added by someone else in the
908 	// meantime
909 	rw_lock_write_lock(&sVnodeLock);
910 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
911 	if (existingVnode != NULL) {
912 		free(vnode);
913 		_vnode = existingVnode;
914 		_nodeCreated = false;
915 		return B_OK;
916 	}
917 
918 	// get the mount structure
919 	mutex_lock(&sMountMutex);
920 	vnode->mount = find_mount(mountID);
921 	if (!vnode->mount || vnode->mount->unmounting) {
922 		mutex_unlock(&sMountMutex);
923 		rw_lock_write_unlock(&sVnodeLock);
924 		free(vnode);
925 		return B_ENTRY_NOT_FOUND;
926 	}
927 
928 	// add the vnode to the mount's node list and the hash table
929 	hash_insert(sVnodeTable, vnode);
930 	add_vnode_to_mount_list(vnode, vnode->mount);
931 
932 	mutex_unlock(&sMountMutex);
933 
934 	_vnode = vnode;
935 	_nodeCreated = true;
936 
937 	// keep the vnode lock locked
938 	return B_OK;
939 }
940 
941 
942 /*!	Frees the vnode and all resources it has acquired, and removes
943 	it from the vnode hash as well as from its mount structure.
944 	Will also make sure that any cache modifications are written back.
945 */
946 static void
947 free_vnode(struct vnode* vnode, bool reenter)
948 {
949 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
950 		vnode);
951 
952 	// write back any changes in this vnode's cache -- but only
953 	// if the vnode won't be deleted, in which case the changes
954 	// will be discarded
955 
956 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
957 		FS_CALL_NO_PARAMS(vnode, fsync);
958 
959 	// Note: If this vnode has a cache attached, there will still be two
960 	// references to that cache at this point. The last one belongs to the vnode
961 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
962 	// cache. Each but the last reference to a cache also includes a reference
963 	// to the vnode. The file cache, however, released its reference (cf.
964 	// file_cache_create()), so that this vnode's ref count has the chance to
965 	// ever drop to 0. Deleting the file cache now, will cause the next to last
966 	// cache reference to be released, which will also release a (no longer
967 	// existing) vnode reference. To avoid problems, we set the vnode's ref
968 	// count, so that it will neither become negative nor 0.
969 	vnode->ref_count = 2;
970 
971 	if (!vnode->IsUnpublished()) {
972 		if (vnode->IsRemoved())
973 			FS_CALL(vnode, remove_vnode, reenter);
974 		else
975 			FS_CALL(vnode, put_vnode, reenter);
976 	}
977 
978 	// If the vnode has a VMCache attached, make sure that it won't try to get
979 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
980 	// long as the vnode is busy and in the hash, that won't happen, but as
981 	// soon as we've removed it from the hash, it could reload the vnode -- with
982 	// a new cache attached!
983 	if (vnode->cache != NULL)
984 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
985 
986 	// The file system has removed the resources of the vnode now, so we can
987 	// make it available again (by removing the busy vnode from the hash).
988 	rw_lock_write_lock(&sVnodeLock);
989 	hash_remove(sVnodeTable, vnode);
990 	rw_lock_write_unlock(&sVnodeLock);
991 
992 	// if we have a VMCache attached, remove it
993 	if (vnode->cache)
994 		vnode->cache->ReleaseRef();
995 
996 	vnode->cache = NULL;
997 
998 	remove_vnode_from_mount_list(vnode, vnode->mount);
999 
1000 	free(vnode);
1001 }
1002 
1003 
1004 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1005 	if the counter dropped to 0.
1006 
1007 	The caller must, of course, own a reference to the vnode to call this
1008 	function.
1009 	The caller must not hold the sVnodeLock or the sMountMutex.
1010 
1011 	\param vnode the vnode.
1012 	\param alwaysFree don't move this vnode into the unused list, but really
1013 		   delete it if possible.
1014 	\param reenter \c true, if this function is called (indirectly) from within
1015 		   a file system. This will be passed to file system hooks only.
1016 	\return \c B_OK, if everything went fine, an error code otherwise.
1017 */
1018 static status_t
1019 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1020 {
1021 	ReadLocker locker(sVnodeLock);
1022 	AutoLocker<Vnode> nodeLocker(vnode);
1023 
1024 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1025 
1026 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1027 
1028 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1029 		vnode->ref_count));
1030 
1031 	if (oldRefCount != 1)
1032 		return B_OK;
1033 
1034 	if (vnode->IsBusy())
1035 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1036 
1037 	bool freeNode = false;
1038 	bool freeUnusedNodes = false;
1039 
1040 	// Just insert the vnode into an unused list if we don't need
1041 	// to delete it
1042 	if (vnode->IsRemoved() || alwaysFree) {
1043 		vnode_to_be_freed(vnode);
1044 		vnode->SetBusy(true);
1045 		freeNode = true;
1046 	} else
1047 		freeUnusedNodes = vnode_unused(vnode);
1048 
1049 	nodeLocker.Unlock();
1050 	locker.Unlock();
1051 
1052 	if (freeNode)
1053 		free_vnode(vnode, reenter);
1054 	else if (freeUnusedNodes)
1055 		free_unused_vnodes();
1056 
1057 	return B_OK;
1058 }
1059 
1060 
1061 /*!	\brief Increments the reference counter of the given vnode.
1062 
1063 	The caller must make sure that the node isn't deleted while this function
1064 	is called. This can be done either:
1065 	- by ensuring that a reference to the node exists and remains in existence,
1066 	  or
1067 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1068 	  or by holding sVnodeLock write locked.
1069 
1070 	In the second case the caller is responsible for dealing with the ref count
1071 	0 -> 1 transition. That is 1. this function must not be invoked when the
1072 	node is busy in the first place and 2. vnode_used() must be called for the
1073 	node.
1074 
1075 	\param vnode the vnode.
1076 */
1077 static void
1078 inc_vnode_ref_count(struct vnode* vnode)
1079 {
1080 	atomic_add(&vnode->ref_count, 1);
1081 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %ld\n", vnode,
1082 		vnode->ref_count));
1083 }
1084 
1085 
1086 static bool
1087 is_special_node_type(int type)
1088 {
1089 	// at the moment only FIFOs are supported
1090 	return S_ISFIFO(type);
1091 }
1092 
1093 
1094 static status_t
1095 create_special_sub_node(struct vnode* vnode, uint32 flags)
1096 {
1097 	if (S_ISFIFO(vnode->Type()))
1098 		return create_fifo_vnode(vnode->mount->volume, vnode);
1099 
1100 	return B_BAD_VALUE;
1101 }
1102 
1103 
1104 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1105 
1106 	If the node is not yet in memory, it will be loaded.
1107 
1108 	The caller must not hold the sVnodeLock or the sMountMutex.
1109 
1110 	\param mountID the mount ID.
1111 	\param vnodeID the node ID.
1112 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1113 		   retrieved vnode structure shall be written.
1114 	\param reenter \c true, if this function is called (indirectly) from within
1115 		   a file system.
1116 	\return \c B_OK, if everything when fine, an error code otherwise.
1117 */
1118 static status_t
1119 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1120 	int reenter)
1121 {
1122 	FUNCTION(("get_vnode: mountid %ld vnid 0x%Lx %p\n", mountID, vnodeID,
1123 		_vnode));
1124 
1125 	rw_lock_read_lock(&sVnodeLock);
1126 
1127 	int32 tries = 2000;
1128 		// try for 10 secs
1129 restart:
1130 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1131 	AutoLocker<Vnode> nodeLocker(vnode);
1132 
1133 	if (vnode && vnode->IsBusy()) {
1134 		nodeLocker.Unlock();
1135 		rw_lock_read_unlock(&sVnodeLock);
1136 		if (!canWait || --tries < 0) {
1137 			// vnode doesn't seem to become unbusy
1138 			dprintf("vnode %ld:%Ld is not becoming unbusy!\n", mountID,
1139 				vnodeID);
1140 			return B_BUSY;
1141 		}
1142 		snooze(5000); // 5 ms
1143 		rw_lock_read_lock(&sVnodeLock);
1144 		goto restart;
1145 	}
1146 
1147 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1148 
1149 	status_t status;
1150 
1151 	if (vnode) {
1152 		if (vnode->ref_count == 0) {
1153 			// this vnode has been unused before
1154 			vnode_used(vnode);
1155 		}
1156 		inc_vnode_ref_count(vnode);
1157 
1158 		nodeLocker.Unlock();
1159 		rw_lock_read_unlock(&sVnodeLock);
1160 	} else {
1161 		// we need to create a new vnode and read it in
1162 		rw_lock_read_unlock(&sVnodeLock);
1163 			// unlock -- create_new_vnode_and_lock() write-locks on success
1164 		bool nodeCreated;
1165 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1166 			nodeCreated);
1167 		if (status != B_OK)
1168 			return status;
1169 
1170 		if (!nodeCreated) {
1171 			rw_lock_read_lock(&sVnodeLock);
1172 			rw_lock_write_unlock(&sVnodeLock);
1173 			goto restart;
1174 		}
1175 
1176 		rw_lock_write_unlock(&sVnodeLock);
1177 
1178 		int type;
1179 		uint32 flags;
1180 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1181 			&flags, reenter);
1182 		if (status == B_OK && vnode->private_node == NULL)
1183 			status = B_BAD_VALUE;
1184 
1185 		bool gotNode = status == B_OK;
1186 		bool publishSpecialSubNode = false;
1187 		if (gotNode) {
1188 			vnode->SetType(type);
1189 			publishSpecialSubNode = is_special_node_type(type)
1190 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1191 		}
1192 
1193 		if (gotNode && publishSpecialSubNode)
1194 			status = create_special_sub_node(vnode, flags);
1195 
1196 		if (status != B_OK) {
1197 			if (gotNode)
1198 				FS_CALL(vnode, put_vnode, reenter);
1199 
1200 			rw_lock_write_lock(&sVnodeLock);
1201 			hash_remove(sVnodeTable, vnode);
1202 			remove_vnode_from_mount_list(vnode, vnode->mount);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 
1205 			free(vnode);
1206 			return status;
1207 		}
1208 
1209 		rw_lock_read_lock(&sVnodeLock);
1210 		vnode->Lock();
1211 
1212 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1213 		vnode->SetBusy(false);
1214 
1215 		vnode->Unlock();
1216 		rw_lock_read_unlock(&sVnodeLock);
1217 	}
1218 
1219 	TRACE(("get_vnode: returning %p\n", vnode));
1220 
1221 	*_vnode = vnode;
1222 	return B_OK;
1223 }
1224 
1225 
1226 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1227 	if the counter dropped to 0.
1228 
1229 	The caller must, of course, own a reference to the vnode to call this
1230 	function.
1231 	The caller must not hold the sVnodeLock or the sMountMutex.
1232 
1233 	\param vnode the vnode.
1234 */
1235 static inline void
1236 put_vnode(struct vnode* vnode)
1237 {
1238 	dec_vnode_ref_count(vnode, false, false);
1239 }
1240 
1241 
1242 static void
1243 free_unused_vnodes(int32 level)
1244 {
1245 	unused_vnodes_check_started();
1246 
1247 	if (level == B_NO_LOW_RESOURCE) {
1248 		unused_vnodes_check_done();
1249 		return;
1250 	}
1251 
1252 	flush_hot_vnodes();
1253 
1254 	// determine how many nodes to free
1255 	uint32 count = 1;
1256 	{
1257 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1258 
1259 		switch (level) {
1260 			case B_LOW_RESOURCE_NOTE:
1261 				count = sUnusedVnodes / 100;
1262 				break;
1263 			case B_LOW_RESOURCE_WARNING:
1264 				count = sUnusedVnodes / 10;
1265 				break;
1266 			case B_LOW_RESOURCE_CRITICAL:
1267 				count = sUnusedVnodes;
1268 				break;
1269 		}
1270 
1271 		if (count > sUnusedVnodes)
1272 			count = sUnusedVnodes;
1273 	}
1274 
1275 	// Write back the modified pages of some unused vnodes and free them.
1276 
1277 	for (uint32 i = 0; i < count; i++) {
1278 		ReadLocker vnodesReadLocker(sVnodeLock);
1279 
1280 		// get the first node
1281 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1282 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1283 			&sUnusedVnodeList);
1284 		unusedVnodesLocker.Unlock();
1285 
1286 		if (vnode == NULL)
1287 			break;
1288 
1289 		// lock the node
1290 		AutoLocker<Vnode> nodeLocker(vnode);
1291 
1292 		// Check whether the node is still unused -- since we only append to the
1293 		// the tail of the unused queue, the vnode should still be at its head.
1294 		// Alternatively we could check its ref count for 0 and its busy flag,
1295 		// but if the node is no longer at the head of the queue, it means it
1296 		// has been touched in the meantime, i.e. it is no longer the least
1297 		// recently used unused vnode and we rather don't free it.
1298 		unusedVnodesLocker.Lock();
1299 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1300 			continue;
1301 		unusedVnodesLocker.Unlock();
1302 
1303 		ASSERT(!vnode->IsBusy());
1304 
1305 		// grab a reference
1306 		inc_vnode_ref_count(vnode);
1307 		vnode_used(vnode);
1308 
1309 		// write back changes and free the node
1310 		nodeLocker.Unlock();
1311 		vnodesReadLocker.Unlock();
1312 
1313 		if (vnode->cache != NULL)
1314 			vnode->cache->WriteModified();
1315 
1316 		dec_vnode_ref_count(vnode, true, false);
1317 			// this should free the vnode when it's still unused
1318 	}
1319 
1320 	unused_vnodes_check_done();
1321 }
1322 
1323 
1324 static void
1325 free_unused_vnodes()
1326 {
1327 	free_unused_vnodes(
1328 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1329 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1330 }
1331 
1332 
1333 static void
1334 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1335 {
1336 	TRACE(("vnode_low_resource_handler(level = %ld)\n", level));
1337 
1338 	free_unused_vnodes(level);
1339 }
1340 
1341 
1342 static inline void
1343 put_advisory_locking(struct advisory_locking* locking)
1344 {
1345 	release_sem(locking->lock);
1346 }
1347 
1348 
1349 /*!	Returns the advisory_locking object of the \a vnode in case it
1350 	has one, and locks it.
1351 	You have to call put_advisory_locking() when you're done with
1352 	it.
1353 	Note, you must not have the vnode mutex locked when calling
1354 	this function.
1355 */
1356 static struct advisory_locking*
1357 get_advisory_locking(struct vnode* vnode)
1358 {
1359 	rw_lock_read_lock(&sVnodeLock);
1360 	vnode->Lock();
1361 
1362 	struct advisory_locking* locking = vnode->advisory_locking;
1363 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1364 
1365 	vnode->Unlock();
1366 	rw_lock_read_unlock(&sVnodeLock);
1367 
1368 	if (lock >= 0)
1369 		lock = acquire_sem(lock);
1370 	if (lock < 0) {
1371 		// This means the locking has been deleted in the mean time
1372 		// or had never existed in the first place - otherwise, we
1373 		// would get the lock at some point.
1374 		return NULL;
1375 	}
1376 
1377 	return locking;
1378 }
1379 
1380 
1381 /*!	Creates a locked advisory_locking object, and attaches it to the
1382 	given \a vnode.
1383 	Returns B_OK in case of success - also if the vnode got such an
1384 	object from someone else in the mean time, you'll still get this
1385 	one locked then.
1386 */
1387 static status_t
1388 create_advisory_locking(struct vnode* vnode)
1389 {
1390 	if (vnode == NULL)
1391 		return B_FILE_ERROR;
1392 
1393 	ObjectDeleter<advisory_locking> lockingDeleter;
1394 	struct advisory_locking* locking = NULL;
1395 
1396 	while (get_advisory_locking(vnode) == NULL) {
1397 		// no locking object set on the vnode yet, create one
1398 		if (locking == NULL) {
1399 			locking = new(std::nothrow) advisory_locking;
1400 			if (locking == NULL)
1401 				return B_NO_MEMORY;
1402 			lockingDeleter.SetTo(locking);
1403 
1404 			locking->wait_sem = create_sem(0, "advisory lock");
1405 			if (locking->wait_sem < 0)
1406 				return locking->wait_sem;
1407 
1408 			locking->lock = create_sem(0, "advisory locking");
1409 			if (locking->lock < 0)
1410 				return locking->lock;
1411 		}
1412 
1413 		// set our newly created locking object
1414 		ReadLocker _(sVnodeLock);
1415 		AutoLocker<Vnode> nodeLocker(vnode);
1416 		if (vnode->advisory_locking == NULL) {
1417 			vnode->advisory_locking = locking;
1418 			lockingDeleter.Detach();
1419 			return B_OK;
1420 		}
1421 	}
1422 
1423 	// The vnode already had a locking object. That's just as well.
1424 
1425 	return B_OK;
1426 }
1427 
1428 
1429 /*!	Retrieves the first lock that has been set by the current team.
1430 */
1431 static status_t
1432 get_advisory_lock(struct vnode* vnode, struct flock* flock)
1433 {
1434 	struct advisory_locking* locking = get_advisory_locking(vnode);
1435 	if (locking == NULL)
1436 		return B_BAD_VALUE;
1437 
1438 	// TODO: this should probably get the flock by its file descriptor!
1439 	team_id team = team_get_current_team_id();
1440 	status_t status = B_BAD_VALUE;
1441 
1442 	LockList::Iterator iterator = locking->locks.GetIterator();
1443 	while (iterator.HasNext()) {
1444 		struct advisory_lock* lock = iterator.Next();
1445 
1446 		if (lock->team == team) {
1447 			flock->l_start = lock->start;
1448 			flock->l_len = lock->end - lock->start + 1;
1449 			status = B_OK;
1450 			break;
1451 		}
1452 	}
1453 
1454 	put_advisory_locking(locking);
1455 	return status;
1456 }
1457 
1458 
1459 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1460 	with the advisory_lock \a lock.
1461 */
1462 static bool
1463 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1464 {
1465 	if (flock == NULL)
1466 		return true;
1467 
1468 	return lock->start <= flock->l_start - 1 + flock->l_len
1469 		&& lock->end >= flock->l_start;
1470 }
1471 
1472 
1473 /*!	Removes the specified lock, or all locks of the calling team
1474 	if \a flock is NULL.
1475 */
1476 static status_t
1477 release_advisory_lock(struct vnode* vnode, struct flock* flock)
1478 {
1479 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1480 
1481 	struct advisory_locking* locking = get_advisory_locking(vnode);
1482 	if (locking == NULL)
1483 		return B_OK;
1484 
1485 	// TODO: use the thread ID instead??
1486 	team_id team = team_get_current_team_id();
1487 	pid_t session = thread_get_current_thread()->team->session_id;
1488 
1489 	// find matching lock entries
1490 
1491 	LockList::Iterator iterator = locking->locks.GetIterator();
1492 	while (iterator.HasNext()) {
1493 		struct advisory_lock* lock = iterator.Next();
1494 		bool removeLock = false;
1495 
1496 		if (lock->session == session)
1497 			removeLock = true;
1498 		else if (lock->team == team && advisory_lock_intersects(lock, flock)) {
1499 			bool endsBeyond = false;
1500 			bool startsBefore = false;
1501 			if (flock != NULL) {
1502 				startsBefore = lock->start < flock->l_start;
1503 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1504 			}
1505 
1506 			if (!startsBefore && !endsBeyond) {
1507 				// lock is completely contained in flock
1508 				removeLock = true;
1509 			} else if (startsBefore && !endsBeyond) {
1510 				// cut the end of the lock
1511 				lock->end = flock->l_start - 1;
1512 			} else if (!startsBefore && endsBeyond) {
1513 				// cut the start of the lock
1514 				lock->start = flock->l_start + flock->l_len;
1515 			} else {
1516 				// divide the lock into two locks
1517 				struct advisory_lock* secondLock = new advisory_lock;
1518 				if (secondLock == NULL) {
1519 					// TODO: we should probably revert the locks we already
1520 					// changed... (ie. allocate upfront)
1521 					put_advisory_locking(locking);
1522 					return B_NO_MEMORY;
1523 				}
1524 
1525 				lock->end = flock->l_start - 1;
1526 
1527 				secondLock->team = lock->team;
1528 				secondLock->session = lock->session;
1529 				// values must already be normalized when getting here
1530 				secondLock->start = flock->l_start + flock->l_len;
1531 				secondLock->end = lock->end;
1532 				secondLock->shared = lock->shared;
1533 
1534 				locking->locks.Add(secondLock);
1535 			}
1536 		}
1537 
1538 		if (removeLock) {
1539 			// this lock is no longer used
1540 			iterator.Remove();
1541 			free(lock);
1542 		}
1543 	}
1544 
1545 	bool removeLocking = locking->locks.IsEmpty();
1546 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1547 
1548 	put_advisory_locking(locking);
1549 
1550 	if (removeLocking) {
1551 		// We can remove the whole advisory locking structure; it's no
1552 		// longer used
1553 		locking = get_advisory_locking(vnode);
1554 		if (locking != NULL) {
1555 			ReadLocker locker(sVnodeLock);
1556 			AutoLocker<Vnode> nodeLocker(vnode);
1557 
1558 			// the locking could have been changed in the mean time
1559 			if (locking->locks.IsEmpty()) {
1560 				vnode->advisory_locking = NULL;
1561 				nodeLocker.Unlock();
1562 				locker.Unlock();
1563 
1564 				// we've detached the locking from the vnode, so we can
1565 				// safely delete it
1566 				delete locking;
1567 			} else {
1568 				// the locking is in use again
1569 				nodeLocker.Unlock();
1570 				locker.Unlock();
1571 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1572 			}
1573 		}
1574 	}
1575 
1576 	return B_OK;
1577 }
1578 
1579 
1580 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1581 	will wait for the lock to become available, if there are any collisions
1582 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1583 
1584 	If \a session is -1, POSIX semantics are used for this lock. Otherwise,
1585 	BSD flock() semantics are used, that is, all children can unlock the file
1586 	in question (we even allow parents to remove the lock, though, but that
1587 	seems to be in line to what the BSD's are doing).
1588 */
1589 static status_t
1590 acquire_advisory_lock(struct vnode* vnode, pid_t session, struct flock* flock,
1591 	bool wait)
1592 {
1593 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1594 		vnode, flock, wait ? "yes" : "no"));
1595 
1596 	bool shared = flock->l_type == F_RDLCK;
1597 	status_t status = B_OK;
1598 
1599 	// TODO: do deadlock detection!
1600 
1601 	struct advisory_locking* locking;
1602 
1603 	while (true) {
1604 		// if this vnode has an advisory_locking structure attached,
1605 		// lock that one and search for any colliding file lock
1606 		status = create_advisory_locking(vnode);
1607 		if (status != B_OK)
1608 			return status;
1609 
1610 		locking = vnode->advisory_locking;
1611 		team_id team = team_get_current_team_id();
1612 		sem_id waitForLock = -1;
1613 
1614 		// test for collisions
1615 		LockList::Iterator iterator = locking->locks.GetIterator();
1616 		while (iterator.HasNext()) {
1617 			struct advisory_lock* lock = iterator.Next();
1618 
1619 			// TODO: locks from the same team might be joinable!
1620 			if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1621 				// locks do overlap
1622 				if (!shared || !lock->shared) {
1623 					// we need to wait
1624 					waitForLock = locking->wait_sem;
1625 					break;
1626 				}
1627 			}
1628 		}
1629 
1630 		if (waitForLock < 0)
1631 			break;
1632 
1633 		// We need to wait. Do that or fail now, if we've been asked not to.
1634 
1635 		if (!wait) {
1636 			put_advisory_locking(locking);
1637 			return session != -1 ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1638 		}
1639 
1640 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1641 			B_CAN_INTERRUPT, 0);
1642 		if (status != B_OK && status != B_BAD_SEM_ID)
1643 			return status;
1644 
1645 		// We have been notified, but we need to re-lock the locking object. So
1646 		// go another round...
1647 	}
1648 
1649 	// install new lock
1650 
1651 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1652 		sizeof(struct advisory_lock));
1653 	if (lock == NULL) {
1654 		put_advisory_locking(locking);
1655 		return B_NO_MEMORY;
1656 	}
1657 
1658 	lock->team = team_get_current_team_id();
1659 	lock->session = session;
1660 	// values must already be normalized when getting here
1661 	lock->start = flock->l_start;
1662 	lock->end = flock->l_start - 1 + flock->l_len;
1663 	lock->shared = shared;
1664 
1665 	locking->locks.Add(lock);
1666 	put_advisory_locking(locking);
1667 
1668 	return status;
1669 }
1670 
1671 
1672 /*!	Normalizes the \a flock structure to make it easier to compare the
1673 	structure with others. The l_start and l_len fields are set to absolute
1674 	values according to the l_whence field.
1675 */
1676 static status_t
1677 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1678 {
1679 	switch (flock->l_whence) {
1680 		case SEEK_SET:
1681 			break;
1682 		case SEEK_CUR:
1683 			flock->l_start += descriptor->pos;
1684 			break;
1685 		case SEEK_END:
1686 		{
1687 			struct vnode* vnode = descriptor->u.vnode;
1688 			struct stat stat;
1689 			status_t status;
1690 
1691 			if (!HAS_FS_CALL(vnode, read_stat))
1692 				return B_NOT_SUPPORTED;
1693 
1694 			status = FS_CALL(vnode, read_stat, &stat);
1695 			if (status != B_OK)
1696 				return status;
1697 
1698 			flock->l_start += stat.st_size;
1699 			break;
1700 		}
1701 		default:
1702 			return B_BAD_VALUE;
1703 	}
1704 
1705 	if (flock->l_start < 0)
1706 		flock->l_start = 0;
1707 	if (flock->l_len == 0)
1708 		flock->l_len = OFF_MAX;
1709 
1710 	// don't let the offset and length overflow
1711 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1712 		flock->l_len = OFF_MAX - flock->l_start;
1713 
1714 	if (flock->l_len < 0) {
1715 		// a negative length reverses the region
1716 		flock->l_start += flock->l_len;
1717 		flock->l_len = -flock->l_len;
1718 	}
1719 
1720 	return B_OK;
1721 }
1722 
1723 
1724 static void
1725 replace_vnode_if_disconnected(struct fs_mount* mount,
1726 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1727 	struct vnode* fallBack, bool lockRootLock)
1728 {
1729 	if (lockRootLock)
1730 		mutex_lock(&sIOContextRootLock);
1731 
1732 	struct vnode* obsoleteVnode = NULL;
1733 
1734 	if (vnode != NULL && vnode->mount == mount
1735 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1736 		obsoleteVnode = vnode;
1737 
1738 		if (vnode == mount->root_vnode) {
1739 			// redirect the vnode to the covered vnode
1740 			vnode = mount->covers_vnode;
1741 		} else
1742 			vnode = fallBack;
1743 
1744 		if (vnode != NULL)
1745 			inc_vnode_ref_count(vnode);
1746 	}
1747 
1748 	if (lockRootLock)
1749 		mutex_unlock(&sIOContextRootLock);
1750 
1751 	if (obsoleteVnode != NULL)
1752 		put_vnode(obsoleteVnode);
1753 }
1754 
1755 
1756 /*!	Disconnects all file descriptors that are associated with the
1757 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1758 	\a mount object.
1759 
1760 	Note, after you've called this function, there might still be ongoing
1761 	accesses - they won't be interrupted if they already happened before.
1762 	However, any subsequent access will fail.
1763 
1764 	This is not a cheap function and should be used with care and rarely.
1765 	TODO: there is currently no means to stop a blocking read/write!
1766 */
1767 static void
1768 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1769 	struct vnode* vnodeToDisconnect)
1770 {
1771 	// iterate over all teams and peek into their file descriptors
1772 	TeamListIterator teamIterator;
1773 	while (Team* team = teamIterator.Next()) {
1774 		BReference<Team> teamReference(team, true);
1775 
1776 		// lock the I/O context
1777 		io_context* context = team->io_context;
1778 		MutexLocker contextLocker(context->io_mutex);
1779 
1780 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1781 			sRoot, true);
1782 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1783 			sRoot, false);
1784 
1785 		for (uint32 i = 0; i < context->table_size; i++) {
1786 			if (struct file_descriptor* descriptor = context->fds[i]) {
1787 				inc_fd_ref_count(descriptor);
1788 
1789 				// if this descriptor points at this mount, we
1790 				// need to disconnect it to be able to unmount
1791 				struct vnode* vnode = fd_vnode(descriptor);
1792 				if (vnodeToDisconnect != NULL) {
1793 					if (vnode == vnodeToDisconnect)
1794 						disconnect_fd(descriptor);
1795 				} else if ((vnode != NULL && vnode->mount == mount)
1796 					|| (vnode == NULL && descriptor->u.mount == mount))
1797 					disconnect_fd(descriptor);
1798 
1799 				put_fd(descriptor);
1800 			}
1801 		}
1802 	}
1803 }
1804 
1805 
1806 /*!	\brief Gets the root node of the current IO context.
1807 	If \a kernel is \c true, the kernel IO context will be used.
1808 	The caller obtains a reference to the returned node.
1809 */
1810 struct vnode*
1811 get_root_vnode(bool kernel)
1812 {
1813 	if (!kernel) {
1814 		// Get current working directory from io context
1815 		struct io_context* context = get_current_io_context(kernel);
1816 
1817 		mutex_lock(&sIOContextRootLock);
1818 
1819 		struct vnode* root = context->root;
1820 		if (root != NULL)
1821 			inc_vnode_ref_count(root);
1822 
1823 		mutex_unlock(&sIOContextRootLock);
1824 
1825 		if (root != NULL)
1826 			return root;
1827 
1828 		// That should never happen.
1829 		dprintf("get_root_vnode(): IO context for team %ld doesn't have a "
1830 			"root\n", team_get_current_team_id());
1831 	}
1832 
1833 	inc_vnode_ref_count(sRoot);
1834 	return sRoot;
1835 }
1836 
1837 
1838 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1839 		   by.
1840 
1841 	Given an arbitrary vnode, the function checks, whether the node is covered
1842 	by the root of a volume. If it is the function obtains a reference to the
1843 	volume root node and returns it.
1844 
1845 	\param vnode The vnode in question.
1846 	\return The volume root vnode the vnode cover is covered by, if it is
1847 			indeed a mount point, or \c NULL otherwise.
1848 */
1849 static struct vnode*
1850 resolve_mount_point_to_volume_root(struct vnode* vnode)
1851 {
1852 	if (!vnode)
1853 		return NULL;
1854 
1855 	struct vnode* volumeRoot = NULL;
1856 
1857 	rw_lock_read_lock(&sVnodeLock);
1858 
1859 	if (vnode->covered_by) {
1860 		volumeRoot = vnode->covered_by;
1861 		inc_vnode_ref_count(volumeRoot);
1862 	}
1863 
1864 	rw_lock_read_unlock(&sVnodeLock);
1865 
1866 	return volumeRoot;
1867 }
1868 
1869 
1870 /*!	\brief Resolves a mount point vnode to the volume root vnode it is covered
1871 		   by.
1872 
1873 	Given an arbitrary vnode (identified by mount and node ID), the function
1874 	checks, whether the node is covered by the root of a volume. If it is the
1875 	function returns the mount and node ID of the volume root node. Otherwise
1876 	it simply returns the supplied mount and node ID.
1877 
1878 	In case of error (e.g. the supplied node could not be found) the variables
1879 	for storing the resolved mount and node ID remain untouched and an error
1880 	code is returned.
1881 
1882 	\param mountID The mount ID of the vnode in question.
1883 	\param nodeID The node ID of the vnode in question.
1884 	\param resolvedMountID Pointer to storage for the resolved mount ID.
1885 	\param resolvedNodeID Pointer to storage for the resolved node ID.
1886 	\return
1887 	- \c B_OK, if everything went fine,
1888 	- another error code, if something went wrong.
1889 */
1890 status_t
1891 resolve_mount_point_to_volume_root(dev_t mountID, ino_t nodeID,
1892 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
1893 {
1894 	// get the node
1895 	struct vnode* node;
1896 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
1897 	if (error != B_OK)
1898 		return error;
1899 
1900 	// resolve the node
1901 	struct vnode* resolvedNode = resolve_mount_point_to_volume_root(node);
1902 	if (resolvedNode) {
1903 		put_vnode(node);
1904 		node = resolvedNode;
1905 	}
1906 
1907 	// set the return values
1908 	*resolvedMountID = node->device;
1909 	*resolvedNodeID = node->id;
1910 
1911 	put_vnode(node);
1912 
1913 	return B_OK;
1914 }
1915 
1916 
1917 /*!	\brief Resolves a volume root vnode to the underlying mount point vnode.
1918 
1919 	Given an arbitrary vnode, the function checks, whether the node is the
1920 	root of a volume. If it is (and if it is not "/"), the function obtains
1921 	a reference to the underlying mount point node and returns it.
1922 
1923 	\param vnode The vnode in question (caller must have a reference).
1924 	\return The mount point vnode the vnode covers, if it is indeed a volume
1925 			root and not "/", or \c NULL otherwise.
1926 */
1927 static struct vnode*
1928 resolve_volume_root_to_mount_point(struct vnode* vnode)
1929 {
1930 	if (!vnode)
1931 		return NULL;
1932 
1933 	struct vnode* mountPoint = NULL;
1934 
1935 	struct fs_mount* mount = vnode->mount;
1936 	if (vnode == mount->root_vnode && mount->covers_vnode) {
1937 		mountPoint = mount->covers_vnode;
1938 		inc_vnode_ref_count(mountPoint);
1939 	}
1940 
1941 	return mountPoint;
1942 }
1943 
1944 
1945 /*!	\brief Gets the directory path and leaf name for a given path.
1946 
1947 	The supplied \a path is transformed to refer to the directory part of
1948 	the entry identified by the original path, and into the buffer \a filename
1949 	the leaf name of the original entry is written.
1950 	Neither the returned path nor the leaf name can be expected to be
1951 	canonical.
1952 
1953 	\param path The path to be analyzed. Must be able to store at least one
1954 		   additional character.
1955 	\param filename The buffer into which the leaf name will be written.
1956 		   Must be of size B_FILE_NAME_LENGTH at least.
1957 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
1958 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
1959 		   if the given path name is empty.
1960 */
1961 static status_t
1962 get_dir_path_and_leaf(char* path, char* filename)
1963 {
1964 	if (*path == '\0')
1965 		return B_ENTRY_NOT_FOUND;
1966 
1967 	char* last = strrchr(path, '/');
1968 		// '/' are not allowed in file names!
1969 
1970 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
1971 
1972 	if (last == NULL) {
1973 		// this path is single segment with no '/' in it
1974 		// ex. "foo"
1975 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1976 			return B_NAME_TOO_LONG;
1977 
1978 		strcpy(path, ".");
1979 	} else {
1980 		last++;
1981 		if (last[0] == '\0') {
1982 			// special case: the path ends in one or more '/' - remove them
1983 			while (*--last == '/' && last != path);
1984 			last[1] = '\0';
1985 
1986 			if (last == path && last[0] == '/') {
1987 				// This path points to the root of the file system
1988 				strcpy(filename, ".");
1989 				return B_OK;
1990 			}
1991 			for (; last != path && *(last - 1) != '/'; last--);
1992 				// rewind to the start of the leaf before the '/'
1993 		}
1994 
1995 		// normal leaf: replace the leaf portion of the path with a '.'
1996 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
1997 			return B_NAME_TOO_LONG;
1998 
1999 		last[0] = '.';
2000 		last[1] = '\0';
2001 	}
2002 	return B_OK;
2003 }
2004 
2005 
2006 static status_t
2007 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2008 	bool traverse, bool kernel, struct vnode** _vnode)
2009 {
2010 	char clonedName[B_FILE_NAME_LENGTH + 1];
2011 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2012 		return B_NAME_TOO_LONG;
2013 
2014 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2015 	struct vnode* directory;
2016 
2017 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2018 	if (status < 0)
2019 		return status;
2020 
2021 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2022 		_vnode, NULL);
2023 }
2024 
2025 
2026 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2027 	and returns the respective vnode.
2028 	On success a reference to the vnode is acquired for the caller.
2029 */
2030 static status_t
2031 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2032 {
2033 	ino_t id;
2034 
2035 	if (dir->mount->entry_cache.Lookup(dir->id, name, id))
2036 		return get_vnode(dir->device, id, _vnode, true, false);
2037 
2038 	status_t status = FS_CALL(dir, lookup, name, &id);
2039 	if (status != B_OK)
2040 		return status;
2041 
2042 	// The lookup() hook call get_vnode() or publish_vnode(), so we do already
2043 	// have a reference and just need to look the node up.
2044 	rw_lock_read_lock(&sVnodeLock);
2045 	*_vnode = lookup_vnode(dir->device, id);
2046 	rw_lock_read_unlock(&sVnodeLock);
2047 
2048 	if (*_vnode == NULL) {
2049 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%lx vnid "
2050 			"0x%Lx)\n", dir->device, id);
2051 		return B_ENTRY_NOT_FOUND;
2052 	}
2053 
2054 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2055 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2056 //		(*_vnode)->mount->id, (*_vnode)->id);
2057 
2058 	return B_OK;
2059 }
2060 
2061 
2062 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2063 	\a path must not be NULL.
2064 	If it returns successfully, \a path contains the name of the last path
2065 	component. This function clobbers the buffer pointed to by \a path only
2066 	if it does contain more than one component.
2067 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2068 	it is successful or not!
2069 */
2070 static status_t
2071 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2072 	int count, struct io_context* ioContext, struct vnode** _vnode,
2073 	ino_t* _parentID)
2074 {
2075 	status_t status = B_OK;
2076 	ino_t lastParentID = vnode->id;
2077 
2078 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2079 
2080 	if (path == NULL) {
2081 		put_vnode(vnode);
2082 		return B_BAD_VALUE;
2083 	}
2084 
2085 	if (*path == '\0') {
2086 		put_vnode(vnode);
2087 		return B_ENTRY_NOT_FOUND;
2088 	}
2089 
2090 	while (true) {
2091 		struct vnode* nextVnode;
2092 		char* nextPath;
2093 
2094 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2095 			path));
2096 
2097 		// done?
2098 		if (path[0] == '\0')
2099 			break;
2100 
2101 		// walk to find the next path component ("path" will point to a single
2102 		// path component), and filter out multiple slashes
2103 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2104 				nextPath++);
2105 
2106 		if (*nextPath == '/') {
2107 			*nextPath = '\0';
2108 			do
2109 				nextPath++;
2110 			while (*nextPath == '/');
2111 		}
2112 
2113 		// See if the '..' is at the root of a mount and move to the covered
2114 		// vnode so we pass the '..' path to the underlying filesystem.
2115 		// Also prevent breaking the root of the IO context.
2116 		if (strcmp("..", path) == 0) {
2117 			if (vnode == ioContext->root) {
2118 				// Attempted prison break! Keep it contained.
2119 				path = nextPath;
2120 				continue;
2121 			} else if (vnode->mount->root_vnode == vnode
2122 				&& vnode->mount->covers_vnode) {
2123 				nextVnode = vnode->mount->covers_vnode;
2124 				inc_vnode_ref_count(nextVnode);
2125 				put_vnode(vnode);
2126 				vnode = nextVnode;
2127 			}
2128 		}
2129 
2130 		// check if vnode is really a directory
2131 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2132 			status = B_NOT_A_DIRECTORY;
2133 
2134 		// Check if we have the right to search the current directory vnode.
2135 		// If a file system doesn't have the access() function, we assume that
2136 		// searching a directory is always allowed
2137 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2138 			status = FS_CALL(vnode, access, X_OK);
2139 
2140 		// Tell the filesystem to get the vnode of this path component (if we
2141 		// got the permission from the call above)
2142 		if (status == B_OK)
2143 			status = lookup_dir_entry(vnode, path, &nextVnode);
2144 
2145 		if (status != B_OK) {
2146 			put_vnode(vnode);
2147 			return status;
2148 		}
2149 
2150 		// If the new node is a symbolic link, resolve it (if we've been told
2151 		// to do it)
2152 		if (S_ISLNK(nextVnode->Type())
2153 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2154 			size_t bufferSize;
2155 			char* buffer;
2156 
2157 			TRACE(("traverse link\n"));
2158 
2159 			// it's not exactly nice style using goto in this way, but hey,
2160 			// it works :-/
2161 			if (count + 1 > B_MAX_SYMLINKS) {
2162 				status = B_LINK_LIMIT;
2163 				goto resolve_link_error;
2164 			}
2165 
2166 			buffer = (char*)malloc(bufferSize = B_PATH_NAME_LENGTH);
2167 			if (buffer == NULL) {
2168 				status = B_NO_MEMORY;
2169 				goto resolve_link_error;
2170 			}
2171 
2172 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2173 				bufferSize--;
2174 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2175 				// null-terminate
2176 				if (status >= 0)
2177 					buffer[bufferSize] = '\0';
2178 			} else
2179 				status = B_BAD_VALUE;
2180 
2181 			if (status != B_OK) {
2182 				free(buffer);
2183 
2184 		resolve_link_error:
2185 				put_vnode(vnode);
2186 				put_vnode(nextVnode);
2187 
2188 				return status;
2189 			}
2190 			put_vnode(nextVnode);
2191 
2192 			// Check if we start from the root directory or the current
2193 			// directory ("vnode" still points to that one).
2194 			// Cut off all leading slashes if it's the root directory
2195 			path = buffer;
2196 			bool absoluteSymlink = false;
2197 			if (path[0] == '/') {
2198 				// we don't need the old directory anymore
2199 				put_vnode(vnode);
2200 
2201 				while (*++path == '/')
2202 					;
2203 
2204 				mutex_lock(&sIOContextRootLock);
2205 				vnode = ioContext->root;
2206 				inc_vnode_ref_count(vnode);
2207 				mutex_unlock(&sIOContextRootLock);
2208 
2209 				absoluteSymlink = true;
2210 			}
2211 
2212 			inc_vnode_ref_count(vnode);
2213 				// balance the next recursion - we will decrement the
2214 				// ref_count of the vnode, no matter if we succeeded or not
2215 
2216 			if (absoluteSymlink && *path == '\0') {
2217 				// symlink was just "/"
2218 				nextVnode = vnode;
2219 			} else {
2220 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2221 					ioContext, &nextVnode, &lastParentID);
2222 			}
2223 
2224 			free(buffer);
2225 
2226 			if (status != B_OK) {
2227 				put_vnode(vnode);
2228 				return status;
2229 			}
2230 		} else
2231 			lastParentID = vnode->id;
2232 
2233 		// decrease the ref count on the old dir we just looked up into
2234 		put_vnode(vnode);
2235 
2236 		path = nextPath;
2237 		vnode = nextVnode;
2238 
2239 		// see if we hit a mount point
2240 		struct vnode* mountPoint = resolve_mount_point_to_volume_root(vnode);
2241 		if (mountPoint) {
2242 			put_vnode(vnode);
2243 			vnode = mountPoint;
2244 		}
2245 	}
2246 
2247 	*_vnode = vnode;
2248 	if (_parentID)
2249 		*_parentID = lastParentID;
2250 
2251 	return B_OK;
2252 }
2253 
2254 
2255 static status_t
2256 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2257 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2258 {
2259 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2260 		get_current_io_context(kernel), _vnode, _parentID);
2261 }
2262 
2263 
2264 static status_t
2265 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2266 	ino_t* _parentID, bool kernel)
2267 {
2268 	struct vnode* start = NULL;
2269 
2270 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2271 
2272 	if (!path)
2273 		return B_BAD_VALUE;
2274 
2275 	if (*path == '\0')
2276 		return B_ENTRY_NOT_FOUND;
2277 
2278 	// figure out if we need to start at root or at cwd
2279 	if (*path == '/') {
2280 		if (sRoot == NULL) {
2281 			// we're a bit early, aren't we?
2282 			return B_ERROR;
2283 		}
2284 
2285 		while (*++path == '/')
2286 			;
2287 		start = get_root_vnode(kernel);
2288 
2289 		if (*path == '\0') {
2290 			*_vnode = start;
2291 			return B_OK;
2292 		}
2293 
2294 	} else {
2295 		struct io_context* context = get_current_io_context(kernel);
2296 
2297 		mutex_lock(&context->io_mutex);
2298 		start = context->cwd;
2299 		if (start != NULL)
2300 			inc_vnode_ref_count(start);
2301 		mutex_unlock(&context->io_mutex);
2302 
2303 		if (start == NULL)
2304 			return B_ERROR;
2305 	}
2306 
2307 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2308 		_parentID);
2309 }
2310 
2311 
2312 /*! Returns the vnode in the next to last segment of the path, and returns
2313 	the last portion in filename.
2314 	The path buffer must be able to store at least one additional character.
2315 */
2316 static status_t
2317 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2318 	bool kernel)
2319 {
2320 	status_t status = get_dir_path_and_leaf(path, filename);
2321 	if (status != B_OK)
2322 		return status;
2323 
2324 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2325 }
2326 
2327 
2328 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2329 		   to by a FD + path pair.
2330 
2331 	\a path must be given in either case. \a fd might be omitted, in which
2332 	case \a path is either an absolute path or one relative to the current
2333 	directory. If both a supplied and \a path is relative it is reckoned off
2334 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2335 	ignored.
2336 
2337 	The caller has the responsibility to call put_vnode() on the returned
2338 	directory vnode.
2339 
2340 	\param fd The FD. May be < 0.
2341 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2342 	       is modified by this function. It must have at least room for a
2343 	       string one character longer than the path it contains.
2344 	\param _vnode A pointer to a variable the directory vnode shall be written
2345 		   into.
2346 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2347 		   the leaf name of the specified entry will be written.
2348 	\param kernel \c true, if invoked from inside the kernel, \c false if
2349 		   invoked from userland.
2350 	\return \c B_OK, if everything went fine, another error code otherwise.
2351 */
2352 static status_t
2353 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2354 	char* filename, bool kernel)
2355 {
2356 	if (!path)
2357 		return B_BAD_VALUE;
2358 	if (*path == '\0')
2359 		return B_ENTRY_NOT_FOUND;
2360 	if (fd < 0)
2361 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2362 
2363 	status_t status = get_dir_path_and_leaf(path, filename);
2364 	if (status != B_OK)
2365 		return status;
2366 
2367 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2368 }
2369 
2370 
2371 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2372 		   to by a vnode + path pair.
2373 
2374 	\a path must be given in either case. \a vnode might be omitted, in which
2375 	case \a path is either an absolute path or one relative to the current
2376 	directory. If both a supplied and \a path is relative it is reckoned off
2377 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2378 	ignored.
2379 
2380 	The caller has the responsibility to call put_vnode() on the returned
2381 	directory vnode.
2382 
2383 	\param vnode The vnode. May be \c NULL.
2384 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2385 	       is modified by this function. It must have at least room for a
2386 	       string one character longer than the path it contains.
2387 	\param _vnode A pointer to a variable the directory vnode shall be written
2388 		   into.
2389 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2390 		   the leaf name of the specified entry will be written.
2391 	\param kernel \c true, if invoked from inside the kernel, \c false if
2392 		   invoked from userland.
2393 	\return \c B_OK, if everything went fine, another error code otherwise.
2394 */
2395 static status_t
2396 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2397 	struct vnode** _vnode, char* filename, bool kernel)
2398 {
2399 	if (!path)
2400 		return B_BAD_VALUE;
2401 	if (*path == '\0')
2402 		return B_ENTRY_NOT_FOUND;
2403 	if (vnode == NULL || path[0] == '/')
2404 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2405 
2406 	status_t status = get_dir_path_and_leaf(path, filename);
2407 	if (status != B_OK)
2408 		return status;
2409 
2410 	inc_vnode_ref_count(vnode);
2411 		// vnode_path_to_vnode() always decrements the ref count
2412 
2413 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2414 }
2415 
2416 
2417 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2418 */
2419 static status_t
2420 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2421 	size_t bufferSize, struct io_context* ioContext)
2422 {
2423 	if (bufferSize < sizeof(struct dirent))
2424 		return B_BAD_VALUE;
2425 
2426 	// See if vnode is the root of a mount and move to the covered
2427 	// vnode so we get the underlying file system
2428 	VNodePutter vnodePutter;
2429 	if (vnode->mount->root_vnode == vnode
2430 		&& vnode->mount->covers_vnode != NULL) {
2431 		vnode = vnode->mount->covers_vnode;
2432 		inc_vnode_ref_count(vnode);
2433 		vnodePutter.SetTo(vnode);
2434 	}
2435 
2436 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2437 		// The FS supports getting the name of a vnode.
2438 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2439 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2440 			return B_OK;
2441 	}
2442 
2443 	// The FS doesn't support getting the name of a vnode. So we search the
2444 	// parent directory for the vnode, if the caller let us.
2445 
2446 	if (parent == NULL)
2447 		return B_NOT_SUPPORTED;
2448 
2449 	void* cookie;
2450 
2451 	status_t status = FS_CALL(parent, open_dir, &cookie);
2452 	if (status >= B_OK) {
2453 		while (true) {
2454 			uint32 num = 1;
2455 			status = dir_read(ioContext, parent, cookie, buffer, bufferSize,
2456 				&num);
2457 			if (status != B_OK)
2458 				break;
2459 			if (num == 0) {
2460 				status = B_ENTRY_NOT_FOUND;
2461 				break;
2462 			}
2463 
2464 			if (vnode->id == buffer->d_ino) {
2465 				// found correct entry!
2466 				break;
2467 			}
2468 		}
2469 
2470 		FS_CALL(vnode, close_dir, cookie);
2471 		FS_CALL(vnode, free_dir_cookie, cookie);
2472 	}
2473 	return status;
2474 }
2475 
2476 
2477 static status_t
2478 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2479 	size_t nameSize, bool kernel)
2480 {
2481 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2482 	struct dirent* dirent = (struct dirent*)buffer;
2483 
2484 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2485 		get_current_io_context(kernel));
2486 	if (status != B_OK)
2487 		return status;
2488 
2489 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2490 		return B_BUFFER_OVERFLOW;
2491 
2492 	return B_OK;
2493 }
2494 
2495 
2496 /*!	Gets the full path to a given directory vnode.
2497 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2498 	file system doesn't support this call, it will fall back to iterating
2499 	through the parent directory to get the name of the child.
2500 
2501 	To protect against circular loops, it supports a maximum tree depth
2502 	of 256 levels.
2503 
2504 	Note that the path may not be correct the time this function returns!
2505 	It doesn't use any locking to prevent returning the correct path, as
2506 	paths aren't safe anyway: the path to a file can change at any time.
2507 
2508 	It might be a good idea, though, to check if the returned path exists
2509 	in the calling function (it's not done here because of efficiency)
2510 */
2511 static status_t
2512 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2513 	bool kernel)
2514 {
2515 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2516 
2517 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2518 		return B_BAD_VALUE;
2519 
2520 	if (!S_ISDIR(vnode->Type()))
2521 		return B_NOT_A_DIRECTORY;
2522 
2523 	char* path = buffer;
2524 	int32 insert = bufferSize;
2525 	int32 maxLevel = 256;
2526 	int32 length;
2527 	status_t status;
2528 	struct io_context* ioContext = get_current_io_context(kernel);
2529 
2530 	// we don't use get_vnode() here because this call is more
2531 	// efficient and does all we need from get_vnode()
2532 	inc_vnode_ref_count(vnode);
2533 
2534 	if (vnode != ioContext->root) {
2535 		// we don't hit the IO context root
2536 		// resolve a volume root to its mount point
2537 		struct vnode* mountPoint = resolve_volume_root_to_mount_point(vnode);
2538 		if (mountPoint) {
2539 			put_vnode(vnode);
2540 			vnode = mountPoint;
2541 		}
2542 	}
2543 
2544 	path[--insert] = '\0';
2545 		// the path is filled right to left
2546 
2547 	while (true) {
2548 		// the name buffer is also used for fs_read_dir()
2549 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2550 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2551 		struct vnode* parentVnode;
2552 		ino_t parentID;
2553 
2554 		// lookup the parent vnode
2555 		if (vnode == ioContext->root) {
2556 			// we hit the IO context root
2557 			parentVnode = vnode;
2558 			inc_vnode_ref_count(vnode);
2559 		} else {
2560 			status = lookup_dir_entry(vnode, "..", &parentVnode);
2561 			if (status != B_OK)
2562 				goto out;
2563 		}
2564 
2565 		// get the node's name
2566 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2567 			sizeof(nameBuffer), ioContext);
2568 
2569 		if (vnode != ioContext->root) {
2570 			// we don't hit the IO context root
2571 			// resolve a volume root to its mount point
2572 			struct vnode* mountPoint
2573 				= resolve_volume_root_to_mount_point(parentVnode);
2574 			if (mountPoint) {
2575 				put_vnode(parentVnode);
2576 				parentVnode = mountPoint;
2577 				parentID = parentVnode->id;
2578 			}
2579 		}
2580 
2581 		bool hitRoot = (parentVnode == vnode);
2582 
2583 		// release the current vnode, we only need its parent from now on
2584 		put_vnode(vnode);
2585 		vnode = parentVnode;
2586 
2587 		if (status != B_OK)
2588 			goto out;
2589 
2590 		if (hitRoot) {
2591 			// we have reached "/", which means we have constructed the full
2592 			// path
2593 			break;
2594 		}
2595 
2596 		// TODO: add an explicit check for loops in about 10 levels to do
2597 		// real loop detection
2598 
2599 		// don't go deeper as 'maxLevel' to prevent circular loops
2600 		if (maxLevel-- < 0) {
2601 			status = B_LINK_LIMIT;
2602 			goto out;
2603 		}
2604 
2605 		// add the name in front of the current path
2606 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2607 		length = strlen(name);
2608 		insert -= length;
2609 		if (insert <= 0) {
2610 			status = B_RESULT_NOT_REPRESENTABLE;
2611 			goto out;
2612 		}
2613 		memcpy(path + insert, name, length);
2614 		path[--insert] = '/';
2615 	}
2616 
2617 	// the root dir will result in an empty path: fix it
2618 	if (path[insert] == '\0')
2619 		path[--insert] = '/';
2620 
2621 	TRACE(("  path is: %s\n", path + insert));
2622 
2623 	// move the path to the start of the buffer
2624 	length = bufferSize - insert;
2625 	memmove(buffer, path + insert, length);
2626 
2627 out:
2628 	put_vnode(vnode);
2629 	return status;
2630 }
2631 
2632 
2633 /*!	Checks the length of every path component, and adds a '.'
2634 	if the path ends in a slash.
2635 	The given path buffer must be able to store at least one
2636 	additional character.
2637 */
2638 static status_t
2639 check_path(char* to)
2640 {
2641 	int32 length = 0;
2642 
2643 	// check length of every path component
2644 
2645 	while (*to) {
2646 		char* begin;
2647 		if (*to == '/')
2648 			to++, length++;
2649 
2650 		begin = to;
2651 		while (*to != '/' && *to)
2652 			to++, length++;
2653 
2654 		if (to - begin > B_FILE_NAME_LENGTH)
2655 			return B_NAME_TOO_LONG;
2656 	}
2657 
2658 	if (length == 0)
2659 		return B_ENTRY_NOT_FOUND;
2660 
2661 	// complete path if there is a slash at the end
2662 
2663 	if (*(to - 1) == '/') {
2664 		if (length > B_PATH_NAME_LENGTH - 2)
2665 			return B_NAME_TOO_LONG;
2666 
2667 		to[0] = '.';
2668 		to[1] = '\0';
2669 	}
2670 
2671 	return B_OK;
2672 }
2673 
2674 
2675 static struct file_descriptor*
2676 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2677 {
2678 	struct file_descriptor* descriptor
2679 		= get_fd(get_current_io_context(kernel), fd);
2680 	if (descriptor == NULL)
2681 		return NULL;
2682 
2683 	struct vnode* vnode = fd_vnode(descriptor);
2684 	if (vnode == NULL) {
2685 		put_fd(descriptor);
2686 		return NULL;
2687 	}
2688 
2689 	// ToDo: when we can close a file descriptor at any point, investigate
2690 	//	if this is still valid to do (accessing the vnode without ref_count
2691 	//	or locking)
2692 	*_vnode = vnode;
2693 	return descriptor;
2694 }
2695 
2696 
2697 static struct vnode*
2698 get_vnode_from_fd(int fd, bool kernel)
2699 {
2700 	struct file_descriptor* descriptor;
2701 	struct vnode* vnode;
2702 
2703 	descriptor = get_fd(get_current_io_context(kernel), fd);
2704 	if (descriptor == NULL)
2705 		return NULL;
2706 
2707 	vnode = fd_vnode(descriptor);
2708 	if (vnode != NULL)
2709 		inc_vnode_ref_count(vnode);
2710 
2711 	put_fd(descriptor);
2712 	return vnode;
2713 }
2714 
2715 
2716 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2717 	only the path will be considered. In this case, the \a path must not be
2718 	NULL.
2719 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2720 	and should be NULL for files.
2721 */
2722 static status_t
2723 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2724 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2725 {
2726 	if (fd < 0 && !path)
2727 		return B_BAD_VALUE;
2728 
2729 	if (path != NULL && *path == '\0')
2730 		return B_ENTRY_NOT_FOUND;
2731 
2732 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2733 		// no FD or absolute path
2734 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2735 	}
2736 
2737 	// FD only, or FD + relative path
2738 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2739 	if (!vnode)
2740 		return B_FILE_ERROR;
2741 
2742 	if (path != NULL) {
2743 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2744 			_vnode, _parentID);
2745 	}
2746 
2747 	// there is no relative path to take into account
2748 
2749 	*_vnode = vnode;
2750 	if (_parentID)
2751 		*_parentID = -1;
2752 
2753 	return B_OK;
2754 }
2755 
2756 
2757 static int
2758 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2759 	void* cookie, int openMode, bool kernel)
2760 {
2761 	struct file_descriptor* descriptor;
2762 	int fd;
2763 
2764 	// If the vnode is locked, we don't allow creating a new file/directory
2765 	// file_descriptor for it
2766 	if (vnode && vnode->mandatory_locked_by != NULL
2767 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2768 		return B_BUSY;
2769 
2770 	descriptor = alloc_fd();
2771 	if (!descriptor)
2772 		return B_NO_MEMORY;
2773 
2774 	if (vnode)
2775 		descriptor->u.vnode = vnode;
2776 	else
2777 		descriptor->u.mount = mount;
2778 	descriptor->cookie = cookie;
2779 
2780 	switch (type) {
2781 		// vnode types
2782 		case FDTYPE_FILE:
2783 			descriptor->ops = &sFileOps;
2784 			break;
2785 		case FDTYPE_DIR:
2786 			descriptor->ops = &sDirectoryOps;
2787 			break;
2788 		case FDTYPE_ATTR:
2789 			descriptor->ops = &sAttributeOps;
2790 			break;
2791 		case FDTYPE_ATTR_DIR:
2792 			descriptor->ops = &sAttributeDirectoryOps;
2793 			break;
2794 
2795 		// mount types
2796 		case FDTYPE_INDEX_DIR:
2797 			descriptor->ops = &sIndexDirectoryOps;
2798 			break;
2799 		case FDTYPE_QUERY:
2800 			descriptor->ops = &sQueryOps;
2801 			break;
2802 
2803 		default:
2804 			panic("get_new_fd() called with unknown type %d\n", type);
2805 			break;
2806 	}
2807 	descriptor->type = type;
2808 	descriptor->open_mode = openMode;
2809 
2810 	io_context* context = get_current_io_context(kernel);
2811 	fd = new_fd(context, descriptor);
2812 	if (fd < 0) {
2813 		free(descriptor);
2814 		return B_NO_MORE_FDS;
2815 	}
2816 
2817 	mutex_lock(&context->io_mutex);
2818 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2819 	mutex_unlock(&context->io_mutex);
2820 
2821 	return fd;
2822 }
2823 
2824 
2825 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2826 	vfs_normalize_path(). See there for more documentation.
2827 */
2828 static status_t
2829 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2830 {
2831 	VNodePutter dirPutter;
2832 	struct vnode* dir = NULL;
2833 	status_t error;
2834 
2835 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2836 		// get dir vnode + leaf name
2837 		struct vnode* nextDir;
2838 		char leaf[B_FILE_NAME_LENGTH];
2839 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2840 		if (error != B_OK)
2841 			return error;
2842 
2843 		dir = nextDir;
2844 		strcpy(path, leaf);
2845 		dirPutter.SetTo(dir);
2846 
2847 		// get file vnode, if we shall resolve links
2848 		bool fileExists = false;
2849 		struct vnode* fileVnode;
2850 		VNodePutter fileVnodePutter;
2851 		if (traverseLink) {
2852 			inc_vnode_ref_count(dir);
2853 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2854 					NULL) == B_OK) {
2855 				fileVnodePutter.SetTo(fileVnode);
2856 				fileExists = true;
2857 			}
2858 		}
2859 
2860 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2861 			// we're done -- construct the path
2862 			bool hasLeaf = true;
2863 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2864 				// special cases "." and ".." -- get the dir, forget the leaf
2865 				inc_vnode_ref_count(dir);
2866 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2867 					&nextDir, NULL);
2868 				if (error != B_OK)
2869 					return error;
2870 				dir = nextDir;
2871 				dirPutter.SetTo(dir);
2872 				hasLeaf = false;
2873 			}
2874 
2875 			// get the directory path
2876 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2877 			if (error != B_OK)
2878 				return error;
2879 
2880 			// append the leaf name
2881 			if (hasLeaf) {
2882 				// insert a directory separator if this is not the file system
2883 				// root
2884 				if ((strcmp(path, "/") != 0
2885 					&& strlcat(path, "/", pathSize) >= pathSize)
2886 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2887 					return B_NAME_TOO_LONG;
2888 				}
2889 			}
2890 
2891 			return B_OK;
2892 		}
2893 
2894 		// read link
2895 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2896 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2897 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2898 			if (error != B_OK)
2899 				return error;
2900 			path[bufferSize] = '\0';
2901 		} else
2902 			return B_BAD_VALUE;
2903 	}
2904 
2905 	return B_LINK_LIMIT;
2906 }
2907 
2908 
2909 #ifdef ADD_DEBUGGER_COMMANDS
2910 
2911 
2912 static void
2913 _dump_advisory_locking(advisory_locking* locking)
2914 {
2915 	if (locking == NULL)
2916 		return;
2917 
2918 	kprintf("   lock:        %ld", locking->lock);
2919 	kprintf("   wait_sem:    %ld", locking->wait_sem);
2920 
2921 	int32 index = 0;
2922 	LockList::Iterator iterator = locking->locks.GetIterator();
2923 	while (iterator.HasNext()) {
2924 		struct advisory_lock* lock = iterator.Next();
2925 
2926 		kprintf("   [%2ld] team:   %ld\n", index++, lock->team);
2927 		kprintf("        start:  %Ld\n", lock->start);
2928 		kprintf("        end:    %Ld\n", lock->end);
2929 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2930 	}
2931 }
2932 
2933 
2934 static void
2935 _dump_mount(struct fs_mount* mount)
2936 {
2937 	kprintf("MOUNT: %p\n", mount);
2938 	kprintf(" id:            %ld\n", mount->id);
2939 	kprintf(" device_name:   %s\n", mount->device_name);
2940 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
2941 	kprintf(" covers_vnode:  %p\n", mount->covers_vnode);
2942 	kprintf(" partition:     %p\n", mount->partition);
2943 	kprintf(" lock:          %p\n", &mount->rlock);
2944 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
2945 		mount->owns_file_device ? " owns_file_device" : "");
2946 
2947 	fs_volume* volume = mount->volume;
2948 	while (volume != NULL) {
2949 		kprintf(" volume %p:\n", volume);
2950 		kprintf("  layer:            %ld\n", volume->layer);
2951 		kprintf("  private_volume:   %p\n", volume->private_volume);
2952 		kprintf("  ops:              %p\n", volume->ops);
2953 		kprintf("  file_system:      %p\n", volume->file_system);
2954 		kprintf("  file_system_name: %s\n", volume->file_system_name);
2955 		volume = volume->super_volume;
2956 	}
2957 
2958 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
2959 	set_debug_variable("_root", (addr_t)mount->root_vnode);
2960 	set_debug_variable("_covers", (addr_t)mount->covers_vnode);
2961 	set_debug_variable("_partition", (addr_t)mount->partition);
2962 }
2963 
2964 
2965 static bool
2966 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
2967 	const char* name)
2968 {
2969 	bool insertSlash = buffer[bufferSize] != '\0';
2970 	size_t nameLength = strlen(name);
2971 
2972 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
2973 		return false;
2974 
2975 	if (insertSlash)
2976 		buffer[--bufferSize] = '/';
2977 
2978 	bufferSize -= nameLength;
2979 	memcpy(buffer + bufferSize, name, nameLength);
2980 
2981 	return true;
2982 }
2983 
2984 
2985 static bool
2986 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
2987 	ino_t nodeID)
2988 {
2989 	if (bufferSize == 0)
2990 		return false;
2991 
2992 	bool insertSlash = buffer[bufferSize] != '\0';
2993 	if (insertSlash)
2994 		buffer[--bufferSize] = '/';
2995 
2996 	size_t size = snprintf(buffer, bufferSize,
2997 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
2998 	if (size > bufferSize) {
2999 		if (insertSlash)
3000 			bufferSize++;
3001 		return false;
3002 	}
3003 
3004 	if (size < bufferSize)
3005 		memmove(buffer + bufferSize - size, buffer, size);
3006 
3007 	bufferSize -= size;
3008 	return true;
3009 }
3010 
3011 
3012 static char*
3013 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3014 	bool& _truncated)
3015 {
3016 	// null-terminate the path
3017 	buffer[--bufferSize] = '\0';
3018 
3019 	while (true) {
3020 		while (vnode->mount->root_vnode == vnode
3021 				&& vnode->mount->covers_vnode != NULL) {
3022 			vnode = vnode->mount->covers_vnode;
3023 		}
3024 
3025 		if (vnode == sRoot) {
3026 			_truncated = bufferSize == 0;
3027 			if (!_truncated)
3028 				buffer[--bufferSize] = '/';
3029 			return buffer + bufferSize;
3030 		}
3031 
3032 		// resolve the name
3033 		ino_t dirID;
3034 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3035 			vnode->id, dirID);
3036 		if (name == NULL) {
3037 			// Failed to resolve the name -- prepend "<dev,node>/".
3038 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3039 				vnode->mount->id, vnode->id);
3040 			return buffer + bufferSize;
3041 		}
3042 
3043 		// prepend the name
3044 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3045 			_truncated = true;
3046 			return buffer + bufferSize;
3047 		}
3048 
3049 		// resolve the directory node
3050 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3051 		if (nextVnode == NULL) {
3052 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3053 				vnode->mount->id, dirID);
3054 			return buffer + bufferSize;
3055 		}
3056 
3057 		vnode = nextVnode;
3058 	}
3059 }
3060 
3061 
3062 static void
3063 _dump_vnode(struct vnode* vnode, bool printPath)
3064 {
3065 	kprintf("VNODE: %p\n", vnode);
3066 	kprintf(" device:        %ld\n", vnode->device);
3067 	kprintf(" id:            %Ld\n", vnode->id);
3068 	kprintf(" ref_count:     %ld\n", vnode->ref_count);
3069 	kprintf(" private_node:  %p\n", vnode->private_node);
3070 	kprintf(" mount:         %p\n", vnode->mount);
3071 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3072 	kprintf(" cache:         %p\n", vnode->cache);
3073 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3074 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3075 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3076 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3077 
3078 	_dump_advisory_locking(vnode->advisory_locking);
3079 
3080 	if (printPath) {
3081 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3082 		if (buffer != NULL) {
3083 			bool truncated;
3084 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3085 				B_PATH_NAME_LENGTH, truncated);
3086 			if (path != NULL) {
3087 				kprintf(" path:          ");
3088 				if (truncated)
3089 					kputs("<truncated>/");
3090 				kputs(path);
3091 				kputs("\n");
3092 			} else
3093 				kprintf("Failed to resolve vnode path.\n");
3094 
3095 			debug_free(buffer);
3096 		} else
3097 			kprintf("Failed to allocate memory for constructing the path.\n");
3098 	}
3099 
3100 	set_debug_variable("_node", (addr_t)vnode->private_node);
3101 	set_debug_variable("_mount", (addr_t)vnode->mount);
3102 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3103 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3104 }
3105 
3106 
3107 static int
3108 dump_mount(int argc, char** argv)
3109 {
3110 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3111 		kprintf("usage: %s [id|address]\n", argv[0]);
3112 		return 0;
3113 	}
3114 
3115 	uint32 id = parse_expression(argv[1]);
3116 	struct fs_mount* mount = NULL;
3117 
3118 	mount = (fs_mount*)hash_lookup(sMountsTable, (void*)&id);
3119 	if (mount == NULL) {
3120 		if (IS_USER_ADDRESS(id)) {
3121 			kprintf("fs_mount not found\n");
3122 			return 0;
3123 		}
3124 		mount = (fs_mount*)id;
3125 	}
3126 
3127 	_dump_mount(mount);
3128 	return 0;
3129 }
3130 
3131 
3132 static int
3133 dump_mounts(int argc, char** argv)
3134 {
3135 	if (argc != 1) {
3136 		kprintf("usage: %s\n", argv[0]);
3137 		return 0;
3138 	}
3139 
3140 	kprintf("address     id root       covers     cookie     fs_name\n");
3141 
3142 	struct hash_iterator iterator;
3143 	struct fs_mount* mount;
3144 
3145 	hash_open(sMountsTable, &iterator);
3146 	while ((mount = (struct fs_mount*)hash_next(sMountsTable, &iterator))
3147 			!= NULL) {
3148 		kprintf("%p%4ld %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3149 			mount->covers_vnode, mount->volume->private_volume,
3150 			mount->volume->file_system_name);
3151 
3152 		fs_volume* volume = mount->volume;
3153 		while (volume->super_volume != NULL) {
3154 			volume = volume->super_volume;
3155 			kprintf("                                     %p %s\n",
3156 				volume->private_volume, volume->file_system_name);
3157 		}
3158 	}
3159 
3160 	hash_close(sMountsTable, &iterator, false);
3161 	return 0;
3162 }
3163 
3164 
3165 static int
3166 dump_vnode(int argc, char** argv)
3167 {
3168 	bool printPath = false;
3169 	int argi = 1;
3170 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3171 		printPath = true;
3172 		argi++;
3173 	}
3174 
3175 	if (argi >= argc || argi + 2 < argc) {
3176 		print_debugger_command_usage(argv[0]);
3177 		return 0;
3178 	}
3179 
3180 	struct vnode* vnode = NULL;
3181 
3182 	if (argi + 1 == argc) {
3183 		vnode = (struct vnode*)parse_expression(argv[argi]);
3184 		if (IS_USER_ADDRESS(vnode)) {
3185 			kprintf("invalid vnode address\n");
3186 			return 0;
3187 		}
3188 		_dump_vnode(vnode, printPath);
3189 		return 0;
3190 	}
3191 
3192 	struct hash_iterator iterator;
3193 	dev_t device = parse_expression(argv[argi]);
3194 	ino_t id = parse_expression(argv[argi + 1]);
3195 
3196 	hash_open(sVnodeTable, &iterator);
3197 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3198 		if (vnode->id != id || vnode->device != device)
3199 			continue;
3200 
3201 		_dump_vnode(vnode, printPath);
3202 	}
3203 
3204 	hash_close(sVnodeTable, &iterator, false);
3205 	return 0;
3206 }
3207 
3208 
3209 static int
3210 dump_vnodes(int argc, char** argv)
3211 {
3212 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3213 		kprintf("usage: %s [device]\n", argv[0]);
3214 		return 0;
3215 	}
3216 
3217 	// restrict dumped nodes to a certain device if requested
3218 	dev_t device = parse_expression(argv[1]);
3219 
3220 	struct hash_iterator iterator;
3221 	struct vnode* vnode;
3222 
3223 	kprintf("address    dev     inode  ref cache      fs-node    locking    "
3224 		"flags\n");
3225 
3226 	hash_open(sVnodeTable, &iterator);
3227 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3228 		if (vnode->device != device)
3229 			continue;
3230 
3231 		kprintf("%p%4ld%10Ld%5ld %p %p %p %s%s%s\n", vnode, vnode->device,
3232 			vnode->id, vnode->ref_count, vnode->cache, vnode->private_node,
3233 			vnode->advisory_locking, vnode->IsRemoved() ? "r" : "-",
3234 			vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3235 	}
3236 
3237 	hash_close(sVnodeTable, &iterator, false);
3238 	return 0;
3239 }
3240 
3241 
3242 static int
3243 dump_vnode_caches(int argc, char** argv)
3244 {
3245 	struct hash_iterator iterator;
3246 	struct vnode* vnode;
3247 
3248 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3249 		kprintf("usage: %s [device]\n", argv[0]);
3250 		return 0;
3251 	}
3252 
3253 	// restrict dumped nodes to a certain device if requested
3254 	dev_t device = -1;
3255 	if (argc > 1)
3256 		device = parse_expression(argv[1]);
3257 
3258 	kprintf("address    dev     inode cache          size   pages\n");
3259 
3260 	hash_open(sVnodeTable, &iterator);
3261 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3262 		if (vnode->cache == NULL)
3263 			continue;
3264 		if (device != -1 && vnode->device != device)
3265 			continue;
3266 
3267 		kprintf("%p%4ld%10Ld %p %8Ld%8ld\n", vnode, vnode->device, vnode->id,
3268 			vnode->cache, (vnode->cache->virtual_end + B_PAGE_SIZE - 1)
3269 				/ B_PAGE_SIZE, vnode->cache->page_count);
3270 	}
3271 
3272 	hash_close(sVnodeTable, &iterator, false);
3273 	return 0;
3274 }
3275 
3276 
3277 int
3278 dump_io_context(int argc, char** argv)
3279 {
3280 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3281 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3282 		return 0;
3283 	}
3284 
3285 	struct io_context* context = NULL;
3286 
3287 	if (argc > 1) {
3288 		uint32 num = parse_expression(argv[1]);
3289 		if (IS_KERNEL_ADDRESS(num))
3290 			context = (struct io_context*)num;
3291 		else {
3292 			Team* team = team_get_team_struct_locked(num);
3293 			if (team == NULL) {
3294 				kprintf("could not find team with ID %ld\n", num);
3295 				return 0;
3296 			}
3297 			context = (struct io_context*)team->io_context;
3298 		}
3299 	} else
3300 		context = get_current_io_context(true);
3301 
3302 	kprintf("I/O CONTEXT: %p\n", context);
3303 	kprintf(" root vnode:\t%p\n", context->root);
3304 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3305 	kprintf(" used fds:\t%lu\n", context->num_used_fds);
3306 	kprintf(" max fds:\t%lu\n", context->table_size);
3307 
3308 	if (context->num_used_fds)
3309 		kprintf("   no.  type         ops  ref  open  mode         pos"
3310 			"      cookie\n");
3311 
3312 	for (uint32 i = 0; i < context->table_size; i++) {
3313 		struct file_descriptor* fd = context->fds[i];
3314 		if (fd == NULL)
3315 			continue;
3316 
3317 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3318 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3319 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3320 			fd->pos, fd->cookie,
3321 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3322 				? "mount" : "vnode",
3323 			fd->u.vnode);
3324 	}
3325 
3326 	kprintf(" used monitors:\t%lu\n", context->num_monitors);
3327 	kprintf(" max monitors:\t%lu\n", context->max_monitors);
3328 
3329 	set_debug_variable("_cwd", (addr_t)context->cwd);
3330 
3331 	return 0;
3332 }
3333 
3334 
3335 int
3336 dump_vnode_usage(int argc, char** argv)
3337 {
3338 	if (argc != 1) {
3339 		kprintf("usage: %s\n", argv[0]);
3340 		return 0;
3341 	}
3342 
3343 	kprintf("Unused vnodes: %ld (max unused %ld)\n", sUnusedVnodes,
3344 		kMaxUnusedVnodes);
3345 
3346 	struct hash_iterator iterator;
3347 	hash_open(sVnodeTable, &iterator);
3348 
3349 	uint32 count = 0;
3350 	struct vnode* vnode;
3351 	while ((vnode = (struct vnode*)hash_next(sVnodeTable, &iterator)) != NULL) {
3352 		count++;
3353 	}
3354 
3355 	hash_close(sVnodeTable, &iterator, false);
3356 
3357 	kprintf("%lu vnodes total (%ld in use).\n", count, count - sUnusedVnodes);
3358 	return 0;
3359 }
3360 
3361 #endif	// ADD_DEBUGGER_COMMANDS
3362 
3363 /*!	Clears an iovec array of physical pages.
3364 	Returns in \a _bytes the number of bytes successfully cleared.
3365 */
3366 static status_t
3367 zero_pages(const iovec* vecs, size_t vecCount, size_t* _bytes)
3368 {
3369 	size_t bytes = *_bytes;
3370 	size_t index = 0;
3371 
3372 	while (bytes > 0) {
3373 		size_t length = min_c(vecs[index].iov_len, bytes);
3374 
3375 		status_t status = vm_memset_physical((addr_t)vecs[index].iov_base, 0,
3376 			length);
3377 		if (status != B_OK) {
3378 			*_bytes -= bytes;
3379 			return status;
3380 		}
3381 
3382 		bytes -= length;
3383 	}
3384 
3385 	return B_OK;
3386 }
3387 
3388 
3389 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3390 	and calls the file system hooks to read/write the request to disk.
3391 */
3392 static status_t
3393 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3394 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3395 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3396 	bool doWrite)
3397 {
3398 	if (fileVecCount == 0) {
3399 		// There are no file vecs at this offset, so we're obviously trying
3400 		// to access the file outside of its bounds
3401 		return B_BAD_VALUE;
3402 	}
3403 
3404 	size_t numBytes = *_numBytes;
3405 	uint32 fileVecIndex;
3406 	size_t vecOffset = *_vecOffset;
3407 	uint32 vecIndex = *_vecIndex;
3408 	status_t status;
3409 	size_t size;
3410 
3411 	if (!doWrite && vecOffset == 0) {
3412 		// now directly read the data from the device
3413 		// the first file_io_vec can be read directly
3414 
3415 		if (fileVecs[0].length < numBytes)
3416 			size = fileVecs[0].length;
3417 		else
3418 			size = numBytes;
3419 
3420 		if (fileVecs[0].offset >= 0) {
3421 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3422 				&vecs[vecIndex], vecCount - vecIndex, &size);
3423 		} else {
3424 			// sparse read
3425 			status = zero_pages(&vecs[vecIndex], vecCount - vecIndex, &size);
3426 		}
3427 		if (status != B_OK)
3428 			return status;
3429 
3430 		// TODO: this is a work-around for buggy device drivers!
3431 		//	When our own drivers honour the length, we can:
3432 		//	a) also use this direct I/O for writes (otherwise, it would
3433 		//	   overwrite precious data)
3434 		//	b) panic if the term below is true (at least for writes)
3435 		if (size > fileVecs[0].length) {
3436 			//dprintf("warning: device driver %p doesn't respect total length "
3437 			//	"in read_pages() call!\n", ref->device);
3438 			size = fileVecs[0].length;
3439 		}
3440 
3441 		ASSERT(size <= fileVecs[0].length);
3442 
3443 		// If the file portion was contiguous, we're already done now
3444 		if (size == numBytes)
3445 			return B_OK;
3446 
3447 		// if we reached the end of the file, we can return as well
3448 		if (size != fileVecs[0].length) {
3449 			*_numBytes = size;
3450 			return B_OK;
3451 		}
3452 
3453 		fileVecIndex = 1;
3454 
3455 		// first, find out where we have to continue in our iovecs
3456 		for (; vecIndex < vecCount; vecIndex++) {
3457 			if (size < vecs[vecIndex].iov_len)
3458 				break;
3459 
3460 			size -= vecs[vecIndex].iov_len;
3461 		}
3462 
3463 		vecOffset = size;
3464 	} else {
3465 		fileVecIndex = 0;
3466 		size = 0;
3467 	}
3468 
3469 	// Too bad, let's process the rest of the file_io_vecs
3470 
3471 	size_t totalSize = size;
3472 	size_t bytesLeft = numBytes - size;
3473 
3474 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3475 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3476 		off_t fileOffset = fileVec.offset;
3477 		off_t fileLeft = min_c(fileVec.length, bytesLeft);
3478 
3479 		TRACE(("FILE VEC [%lu] length %Ld\n", fileVecIndex, fileLeft));
3480 
3481 		// process the complete fileVec
3482 		while (fileLeft > 0) {
3483 			iovec tempVecs[MAX_TEMP_IO_VECS];
3484 			uint32 tempCount = 0;
3485 
3486 			// size tracks how much of what is left of the current fileVec
3487 			// (fileLeft) has been assigned to tempVecs
3488 			size = 0;
3489 
3490 			// assign what is left of the current fileVec to the tempVecs
3491 			for (size = 0; size < fileLeft && vecIndex < vecCount
3492 					&& tempCount < MAX_TEMP_IO_VECS;) {
3493 				// try to satisfy one iovec per iteration (or as much as
3494 				// possible)
3495 
3496 				// bytes left of the current iovec
3497 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3498 				if (vecLeft == 0) {
3499 					vecOffset = 0;
3500 					vecIndex++;
3501 					continue;
3502 				}
3503 
3504 				TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
3505 					vecIndex, vecOffset, size));
3506 
3507 				// actually available bytes
3508 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3509 
3510 				tempVecs[tempCount].iov_base
3511 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3512 				tempVecs[tempCount].iov_len = tempVecSize;
3513 				tempCount++;
3514 
3515 				size += tempVecSize;
3516 				vecOffset += tempVecSize;
3517 			}
3518 
3519 			size_t bytes = size;
3520 
3521 			if (fileOffset == -1) {
3522 				if (doWrite) {
3523 					panic("sparse write attempt: vnode %p", vnode);
3524 					status = B_IO_ERROR;
3525 				} else {
3526 					// sparse read
3527 					status = zero_pages(tempVecs, tempCount, &bytes);
3528 				}
3529 			} else if (doWrite) {
3530 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3531 					tempVecs, tempCount, &bytes);
3532 			} else {
3533 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3534 					tempVecs, tempCount, &bytes);
3535 			}
3536 			if (status != B_OK)
3537 				return status;
3538 
3539 			totalSize += bytes;
3540 			bytesLeft -= size;
3541 			if (fileOffset >= 0)
3542 				fileOffset += size;
3543 			fileLeft -= size;
3544 			//dprintf("-> file left = %Lu\n", fileLeft);
3545 
3546 			if (size != bytes || vecIndex >= vecCount) {
3547 				// there are no more bytes or iovecs, let's bail out
3548 				*_numBytes = totalSize;
3549 				return B_OK;
3550 			}
3551 		}
3552 	}
3553 
3554 	*_vecIndex = vecIndex;
3555 	*_vecOffset = vecOffset;
3556 	*_numBytes = totalSize;
3557 	return B_OK;
3558 }
3559 
3560 
3561 //	#pragma mark - public API for file systems
3562 
3563 
3564 extern "C" status_t
3565 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3566 	fs_vnode_ops* ops)
3567 {
3568 	FUNCTION(("new_vnode(volume = %p (%ld), vnodeID = %Ld, node = %p)\n",
3569 		volume, volume->id, vnodeID, privateNode));
3570 
3571 	if (privateNode == NULL)
3572 		return B_BAD_VALUE;
3573 
3574 	// create the node
3575 	bool nodeCreated;
3576 	struct vnode* vnode;
3577 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3578 		nodeCreated);
3579 	if (status != B_OK)
3580 		return status;
3581 
3582 	WriteLocker nodeLocker(sVnodeLock, true);
3583 		// create_new_vnode_and_lock() has locked for us
3584 
3585 	// file system integrity check:
3586 	// test if the vnode already exists and bail out if this is the case!
3587 	if (!nodeCreated) {
3588 		panic("vnode %ld:%Ld already exists (node = %p, vnode->node = %p)!",
3589 			volume->id, vnodeID, privateNode, vnode->private_node);
3590 		return B_ERROR;
3591 	}
3592 
3593 	vnode->private_node = privateNode;
3594 	vnode->ops = ops;
3595 	vnode->SetUnpublished(true);
3596 
3597 	TRACE(("returns: %s\n", strerror(status)));
3598 
3599 	return status;
3600 }
3601 
3602 
3603 extern "C" status_t
3604 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3605 	fs_vnode_ops* ops, int type, uint32 flags)
3606 {
3607 	FUNCTION(("publish_vnode()\n"));
3608 
3609 	WriteLocker locker(sVnodeLock);
3610 
3611 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3612 
3613 	bool nodeCreated = false;
3614 	if (vnode == NULL) {
3615 		if (privateNode == NULL)
3616 			return B_BAD_VALUE;
3617 
3618 		// create the node
3619 		locker.Unlock();
3620 			// create_new_vnode_and_lock() will re-lock for us on success
3621 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3622 			nodeCreated);
3623 		if (status != B_OK)
3624 			return status;
3625 
3626 		locker.SetTo(sVnodeLock, true);
3627 	}
3628 
3629 	if (nodeCreated) {
3630 		vnode->private_node = privateNode;
3631 		vnode->ops = ops;
3632 		vnode->SetUnpublished(true);
3633 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3634 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3635 		// already known, but not published
3636 	} else
3637 		return B_BAD_VALUE;
3638 
3639 	bool publishSpecialSubNode = false;
3640 
3641 	vnode->SetType(type);
3642 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3643 	publishSpecialSubNode = is_special_node_type(type)
3644 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3645 
3646 	status_t status = B_OK;
3647 
3648 	// create sub vnodes, if necessary
3649 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3650 		locker.Unlock();
3651 
3652 		fs_volume* subVolume = volume;
3653 		if (volume->sub_volume != NULL) {
3654 			while (status == B_OK && subVolume->sub_volume != NULL) {
3655 				subVolume = subVolume->sub_volume;
3656 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3657 					vnode);
3658 			}
3659 		}
3660 
3661 		if (status == B_OK && publishSpecialSubNode)
3662 			status = create_special_sub_node(vnode, flags);
3663 
3664 		if (status != B_OK) {
3665 			// error -- clean up the created sub vnodes
3666 			while (subVolume->super_volume != volume) {
3667 				subVolume = subVolume->super_volume;
3668 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3669 			}
3670 		}
3671 
3672 		if (status == B_OK) {
3673 			ReadLocker vnodesReadLocker(sVnodeLock);
3674 			AutoLocker<Vnode> nodeLocker(vnode);
3675 			vnode->SetBusy(false);
3676 			vnode->SetUnpublished(false);
3677 		} else {
3678 			locker.Lock();
3679 			hash_remove(sVnodeTable, vnode);
3680 			remove_vnode_from_mount_list(vnode, vnode->mount);
3681 			free(vnode);
3682 		}
3683 	} else {
3684 		// we still hold the write lock -- mark the node unbusy and published
3685 		vnode->SetBusy(false);
3686 		vnode->SetUnpublished(false);
3687 	}
3688 
3689 	TRACE(("returns: %s\n", strerror(status)));
3690 
3691 	return status;
3692 }
3693 
3694 
3695 extern "C" status_t
3696 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3697 {
3698 	struct vnode* vnode;
3699 
3700 	if (volume == NULL)
3701 		return B_BAD_VALUE;
3702 
3703 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3704 	if (status != B_OK)
3705 		return status;
3706 
3707 	// If this is a layered FS, we need to get the node cookie for the requested
3708 	// layer.
3709 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3710 		fs_vnode resolvedNode;
3711 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3712 			&resolvedNode);
3713 		if (status != B_OK) {
3714 			panic("get_vnode(): Failed to get super node for vnode %p, "
3715 				"volume: %p", vnode, volume);
3716 			put_vnode(vnode);
3717 			return status;
3718 		}
3719 
3720 		if (_privateNode != NULL)
3721 			*_privateNode = resolvedNode.private_node;
3722 	} else if (_privateNode != NULL)
3723 		*_privateNode = vnode->private_node;
3724 
3725 	return B_OK;
3726 }
3727 
3728 
3729 extern "C" status_t
3730 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3731 {
3732 	struct vnode* vnode;
3733 
3734 	rw_lock_read_lock(&sVnodeLock);
3735 	vnode = lookup_vnode(volume->id, vnodeID);
3736 	rw_lock_read_unlock(&sVnodeLock);
3737 
3738 	if (vnode == NULL)
3739 		return B_BAD_VALUE;
3740 
3741 	inc_vnode_ref_count(vnode);
3742 	return B_OK;
3743 }
3744 
3745 
3746 extern "C" status_t
3747 put_vnode(fs_volume* volume, ino_t vnodeID)
3748 {
3749 	struct vnode* vnode;
3750 
3751 	rw_lock_read_lock(&sVnodeLock);
3752 	vnode = lookup_vnode(volume->id, vnodeID);
3753 	rw_lock_read_unlock(&sVnodeLock);
3754 
3755 	if (vnode == NULL)
3756 		return B_BAD_VALUE;
3757 
3758 	dec_vnode_ref_count(vnode, false, true);
3759 	return B_OK;
3760 }
3761 
3762 
3763 extern "C" status_t
3764 remove_vnode(fs_volume* volume, ino_t vnodeID)
3765 {
3766 	ReadLocker locker(sVnodeLock);
3767 
3768 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3769 	if (vnode == NULL)
3770 		return B_ENTRY_NOT_FOUND;
3771 
3772 	if (vnode->covered_by != NULL) {
3773 		// this vnode is in use
3774 		return B_BUSY;
3775 	}
3776 
3777 	vnode->Lock();
3778 
3779 	vnode->SetRemoved(true);
3780 	bool removeUnpublished = false;
3781 
3782 	if (vnode->IsUnpublished()) {
3783 		// prepare the vnode for deletion
3784 		removeUnpublished = true;
3785 		vnode->SetBusy(true);
3786 	}
3787 
3788 	vnode->Unlock();
3789 	locker.Unlock();
3790 
3791 	if (removeUnpublished) {
3792 		// If the vnode hasn't been published yet, we delete it here
3793 		atomic_add(&vnode->ref_count, -1);
3794 		free_vnode(vnode, true);
3795 	}
3796 
3797 	return B_OK;
3798 }
3799 
3800 
3801 extern "C" status_t
3802 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3803 {
3804 	struct vnode* vnode;
3805 
3806 	rw_lock_read_lock(&sVnodeLock);
3807 
3808 	vnode = lookup_vnode(volume->id, vnodeID);
3809 	if (vnode) {
3810 		AutoLocker<Vnode> nodeLocker(vnode);
3811 		vnode->SetRemoved(false);
3812 	}
3813 
3814 	rw_lock_read_unlock(&sVnodeLock);
3815 	return B_OK;
3816 }
3817 
3818 
3819 extern "C" status_t
3820 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3821 {
3822 	ReadLocker _(sVnodeLock);
3823 
3824 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3825 		if (_removed != NULL)
3826 			*_removed = vnode->IsRemoved();
3827 		return B_OK;
3828 	}
3829 
3830 	return B_BAD_VALUE;
3831 }
3832 
3833 
3834 extern "C" fs_volume*
3835 volume_for_vnode(fs_vnode* _vnode)
3836 {
3837 	if (_vnode == NULL)
3838 		return NULL;
3839 
3840 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3841 	return vnode->mount->volume;
3842 }
3843 
3844 
3845 #if 0
3846 extern "C" status_t
3847 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3848 	size_t* _numBytes)
3849 {
3850 	struct file_descriptor* descriptor;
3851 	struct vnode* vnode;
3852 
3853 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3854 	if (descriptor == NULL)
3855 		return B_FILE_ERROR;
3856 
3857 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
3858 		count, 0, _numBytes);
3859 
3860 	put_fd(descriptor);
3861 	return status;
3862 }
3863 
3864 
3865 extern "C" status_t
3866 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
3867 	size_t* _numBytes)
3868 {
3869 	struct file_descriptor* descriptor;
3870 	struct vnode* vnode;
3871 
3872 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3873 	if (descriptor == NULL)
3874 		return B_FILE_ERROR;
3875 
3876 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
3877 		count, 0, _numBytes);
3878 
3879 	put_fd(descriptor);
3880 	return status;
3881 }
3882 #endif
3883 
3884 
3885 extern "C" status_t
3886 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3887 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3888 	size_t* _bytes)
3889 {
3890 	struct file_descriptor* descriptor;
3891 	struct vnode* vnode;
3892 
3893 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3894 	if (descriptor == NULL)
3895 		return B_FILE_ERROR;
3896 
3897 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3898 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3899 		false);
3900 
3901 	put_fd(descriptor);
3902 	return status;
3903 }
3904 
3905 
3906 extern "C" status_t
3907 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
3908 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
3909 	size_t* _bytes)
3910 {
3911 	struct file_descriptor* descriptor;
3912 	struct vnode* vnode;
3913 
3914 	descriptor = get_fd_and_vnode(fd, &vnode, true);
3915 	if (descriptor == NULL)
3916 		return B_FILE_ERROR;
3917 
3918 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
3919 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
3920 		true);
3921 
3922 	put_fd(descriptor);
3923 	return status;
3924 }
3925 
3926 
3927 extern "C" status_t
3928 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
3929 {
3930 	// lookup mount -- the caller is required to make sure that the mount
3931 	// won't go away
3932 	MutexLocker locker(sMountMutex);
3933 	struct fs_mount* mount = find_mount(mountID);
3934 	if (mount == NULL)
3935 		return B_BAD_VALUE;
3936 	locker.Unlock();
3937 
3938 	return mount->entry_cache.Add(dirID, name, nodeID);
3939 }
3940 
3941 
3942 extern "C" status_t
3943 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
3944 {
3945 	// lookup mount -- the caller is required to make sure that the mount
3946 	// won't go away
3947 	MutexLocker locker(sMountMutex);
3948 	struct fs_mount* mount = find_mount(mountID);
3949 	if (mount == NULL)
3950 		return B_BAD_VALUE;
3951 	locker.Unlock();
3952 
3953 	return mount->entry_cache.Remove(dirID, name);
3954 }
3955 
3956 
3957 //	#pragma mark - private VFS API
3958 //	Functions the VFS exports for other parts of the kernel
3959 
3960 
3961 /*! Acquires another reference to the vnode that has to be released
3962 	by calling vfs_put_vnode().
3963 */
3964 void
3965 vfs_acquire_vnode(struct vnode* vnode)
3966 {
3967 	inc_vnode_ref_count(vnode);
3968 }
3969 
3970 
3971 /*! This is currently called from file_cache_create() only.
3972 	It's probably a temporary solution as long as devfs requires that
3973 	fs_read_pages()/fs_write_pages() are called with the standard
3974 	open cookie and not with a device cookie.
3975 	If that's done differently, remove this call; it has no other
3976 	purpose.
3977 */
3978 extern "C" status_t
3979 vfs_get_cookie_from_fd(int fd, void** _cookie)
3980 {
3981 	struct file_descriptor* descriptor;
3982 
3983 	descriptor = get_fd(get_current_io_context(true), fd);
3984 	if (descriptor == NULL)
3985 		return B_FILE_ERROR;
3986 
3987 	*_cookie = descriptor->cookie;
3988 	return B_OK;
3989 }
3990 
3991 
3992 extern "C" status_t
3993 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
3994 {
3995 	*vnode = get_vnode_from_fd(fd, kernel);
3996 
3997 	if (*vnode == NULL)
3998 		return B_FILE_ERROR;
3999 
4000 	return B_NO_ERROR;
4001 }
4002 
4003 
4004 extern "C" status_t
4005 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4006 {
4007 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4008 		path, kernel));
4009 
4010 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4011 	if (pathBuffer.InitCheck() != B_OK)
4012 		return B_NO_MEMORY;
4013 
4014 	char* buffer = pathBuffer.LockBuffer();
4015 	strlcpy(buffer, path, pathBuffer.BufferSize());
4016 
4017 	struct vnode* vnode;
4018 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4019 	if (status != B_OK)
4020 		return status;
4021 
4022 	*_vnode = vnode;
4023 	return B_OK;
4024 }
4025 
4026 
4027 extern "C" status_t
4028 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4029 {
4030 	struct vnode* vnode;
4031 
4032 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4033 	if (status != B_OK)
4034 		return status;
4035 
4036 	*_vnode = vnode;
4037 	return B_OK;
4038 }
4039 
4040 
4041 extern "C" status_t
4042 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4043 	const char* name, struct vnode** _vnode)
4044 {
4045 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4046 }
4047 
4048 
4049 extern "C" void
4050 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4051 {
4052 	*_mountID = vnode->device;
4053 	*_vnodeID = vnode->id;
4054 }
4055 
4056 
4057 /*!
4058 	Helper function abstracting the process of "converting" a given
4059 	vnode-pointer to a fs_vnode-pointer.
4060 	Currently only used in bindfs.
4061 */
4062 extern "C" fs_vnode*
4063 vfs_fsnode_for_vnode(struct vnode* vnode)
4064 {
4065 	return vnode;
4066 }
4067 
4068 
4069 /*!
4070 	Calls fs_open() on the given vnode and returns a new
4071 	file descriptor for it
4072 */
4073 int
4074 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4075 {
4076 	return open_vnode(vnode, openMode, kernel);
4077 }
4078 
4079 
4080 /*!	Looks up a vnode with the given mount and vnode ID.
4081 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4082 	to the node.
4083 	It's currently only be used by file_cache_create().
4084 */
4085 extern "C" status_t
4086 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4087 {
4088 	rw_lock_read_lock(&sVnodeLock);
4089 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4090 	rw_lock_read_unlock(&sVnodeLock);
4091 
4092 	if (vnode == NULL)
4093 		return B_ERROR;
4094 
4095 	*_vnode = vnode;
4096 	return B_OK;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4102 	bool traverseLeafLink, bool kernel, void** _node)
4103 {
4104 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4105 		volume, path, kernel));
4106 
4107 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
4108 	if (pathBuffer.InitCheck() != B_OK)
4109 		return B_NO_MEMORY;
4110 
4111 	fs_mount* mount;
4112 	status_t status = get_mount(volume->id, &mount);
4113 	if (status != B_OK)
4114 		return status;
4115 
4116 	char* buffer = pathBuffer.LockBuffer();
4117 	strlcpy(buffer, path, pathBuffer.BufferSize());
4118 
4119 	struct vnode* vnode = mount->root_vnode;
4120 
4121 	if (buffer[0] == '/')
4122 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4123 	else {
4124 		inc_vnode_ref_count(vnode);
4125 			// vnode_path_to_vnode() releases a reference to the starting vnode
4126 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4127 			kernel, &vnode, NULL);
4128 	}
4129 
4130 	put_mount(mount);
4131 
4132 	if (status != B_OK)
4133 		return status;
4134 
4135 	if (vnode->device != volume->id) {
4136 		// wrong mount ID - must not gain access on foreign file system nodes
4137 		put_vnode(vnode);
4138 		return B_BAD_VALUE;
4139 	}
4140 
4141 	// Use get_vnode() to resolve the cookie for the right layer.
4142 	status = get_vnode(volume, vnode->id, _node);
4143 	put_vnode(vnode);
4144 
4145 	return status;
4146 }
4147 
4148 
4149 status_t
4150 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4151 	struct stat* stat, bool kernel)
4152 {
4153 	status_t status;
4154 
4155 	if (path) {
4156 		// path given: get the stat of the node referred to by (fd, path)
4157 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
4158 		if (pathBuffer.InitCheck() != B_OK)
4159 			return B_NO_MEMORY;
4160 
4161 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4162 			traverseLeafLink, stat, kernel);
4163 	} else {
4164 		// no path given: get the FD and use the FD operation
4165 		struct file_descriptor* descriptor
4166 			= get_fd(get_current_io_context(kernel), fd);
4167 		if (descriptor == NULL)
4168 			return B_FILE_ERROR;
4169 
4170 		if (descriptor->ops->fd_read_stat)
4171 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4172 		else
4173 			status = B_NOT_SUPPORTED;
4174 
4175 		put_fd(descriptor);
4176 	}
4177 
4178 	return status;
4179 }
4180 
4181 
4182 /*!	Finds the full path to the file that contains the module \a moduleName,
4183 	puts it into \a pathBuffer, and returns B_OK for success.
4184 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4185 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4186 	\a pathBuffer is clobbered in any case and must not be relied on if this
4187 	functions returns unsuccessfully.
4188 	\a basePath and \a pathBuffer must not point to the same space.
4189 */
4190 status_t
4191 vfs_get_module_path(const char* basePath, const char* moduleName,
4192 	char* pathBuffer, size_t bufferSize)
4193 {
4194 	struct vnode* dir;
4195 	struct vnode* file;
4196 	status_t status;
4197 	size_t length;
4198 	char* path;
4199 
4200 	if (bufferSize == 0
4201 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4202 		return B_BUFFER_OVERFLOW;
4203 
4204 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4205 	if (status != B_OK)
4206 		return status;
4207 
4208 	// the path buffer had been clobbered by the above call
4209 	length = strlcpy(pathBuffer, basePath, bufferSize);
4210 	if (pathBuffer[length - 1] != '/')
4211 		pathBuffer[length++] = '/';
4212 
4213 	path = pathBuffer + length;
4214 	bufferSize -= length;
4215 
4216 	while (moduleName) {
4217 		char* nextPath = strchr(moduleName, '/');
4218 		if (nextPath == NULL)
4219 			length = strlen(moduleName);
4220 		else {
4221 			length = nextPath - moduleName;
4222 			nextPath++;
4223 		}
4224 
4225 		if (length + 1 >= bufferSize) {
4226 			status = B_BUFFER_OVERFLOW;
4227 			goto err;
4228 		}
4229 
4230 		memcpy(path, moduleName, length);
4231 		path[length] = '\0';
4232 		moduleName = nextPath;
4233 
4234 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4235 		if (status != B_OK) {
4236 			// vnode_path_to_vnode() has already released the reference to dir
4237 			return status;
4238 		}
4239 
4240 		if (S_ISDIR(file->Type())) {
4241 			// goto the next directory
4242 			path[length] = '/';
4243 			path[length + 1] = '\0';
4244 			path += length + 1;
4245 			bufferSize -= length + 1;
4246 
4247 			dir = file;
4248 		} else if (S_ISREG(file->Type())) {
4249 			// it's a file so it should be what we've searched for
4250 			put_vnode(file);
4251 
4252 			return B_OK;
4253 		} else {
4254 			TRACE(("vfs_get_module_path(): something is strange here: "
4255 				"0x%08lx...\n", file->Type()));
4256 			status = B_ERROR;
4257 			dir = file;
4258 			goto err;
4259 		}
4260 	}
4261 
4262 	// if we got here, the moduleName just pointed to a directory, not to
4263 	// a real module - what should we do in this case?
4264 	status = B_ENTRY_NOT_FOUND;
4265 
4266 err:
4267 	put_vnode(dir);
4268 	return status;
4269 }
4270 
4271 
4272 /*!	\brief Normalizes a given path.
4273 
4274 	The path must refer to an existing or non-existing entry in an existing
4275 	directory, that is chopping off the leaf component the remaining path must
4276 	refer to an existing directory.
4277 
4278 	The returned will be canonical in that it will be absolute, will not
4279 	contain any "." or ".." components or duplicate occurrences of '/'s,
4280 	and none of the directory components will by symbolic links.
4281 
4282 	Any two paths referring to the same entry, will result in the same
4283 	normalized path (well, that is pretty much the definition of `normalized',
4284 	isn't it :-).
4285 
4286 	\param path The path to be normalized.
4287 	\param buffer The buffer into which the normalized path will be written.
4288 		   May be the same one as \a path.
4289 	\param bufferSize The size of \a buffer.
4290 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4291 	\param kernel \c true, if the IO context of the kernel shall be used,
4292 		   otherwise that of the team this thread belongs to. Only relevant,
4293 		   if the path is relative (to get the CWD).
4294 	\return \c B_OK if everything went fine, another error code otherwise.
4295 */
4296 status_t
4297 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4298 	bool traverseLink, bool kernel)
4299 {
4300 	if (!path || !buffer || bufferSize < 1)
4301 		return B_BAD_VALUE;
4302 
4303 	if (path != buffer) {
4304 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4305 			return B_BUFFER_OVERFLOW;
4306 	}
4307 
4308 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4309 }
4310 
4311 
4312 /*!	\brief Creates a special node in the file system.
4313 
4314 	The caller gets a reference to the newly created node (which is passed
4315 	back through \a _createdVnode) and is responsible for releasing it.
4316 
4317 	\param path The path where to create the entry for the node. Can be \c NULL,
4318 		in which case the node is created without an entry in the root FS -- it
4319 		will automatically be deleted when the last reference has been released.
4320 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4321 		the target file system will just create the node with its standard
4322 		operations. Depending on the type of the node a subnode might be created
4323 		automatically, though.
4324 	\param mode The type and permissions for the node to be created.
4325 	\param flags Flags to be passed to the creating FS.
4326 	\param kernel \c true, if called in the kernel context (relevant only if
4327 		\a path is not \c NULL and not absolute).
4328 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4329 		file system creating the node, with the private data pointer and
4330 		operations for the super node. Can be \c NULL.
4331 	\param _createVnode Pointer to pre-allocated storage where to store the
4332 		pointer to the newly created node.
4333 	\return \c B_OK, if everything went fine, another error code otherwise.
4334 */
4335 status_t
4336 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4337 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4338 	struct vnode** _createdVnode)
4339 {
4340 	struct vnode* dirNode;
4341 	char _leaf[B_FILE_NAME_LENGTH];
4342 	char* leaf = NULL;
4343 
4344 	if (path) {
4345 		// We've got a path. Get the dir vnode and the leaf name.
4346 		KPath tmpPathBuffer(B_PATH_NAME_LENGTH + 1);
4347 		if (tmpPathBuffer.InitCheck() != B_OK)
4348 			return B_NO_MEMORY;
4349 
4350 		char* tmpPath = tmpPathBuffer.LockBuffer();
4351 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4352 			return B_NAME_TOO_LONG;
4353 
4354 		// get the dir vnode and the leaf name
4355 		leaf = _leaf;
4356 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4357 		if (error != B_OK)
4358 			return error;
4359 	} else {
4360 		// No path. Create the node in the root FS.
4361 		dirNode = sRoot;
4362 		inc_vnode_ref_count(dirNode);
4363 	}
4364 
4365 	VNodePutter _(dirNode);
4366 
4367 	// check support for creating special nodes
4368 	if (!HAS_FS_CALL(dirNode, create_special_node))
4369 		return B_UNSUPPORTED;
4370 
4371 	// create the node
4372 	fs_vnode superVnode;
4373 	ino_t nodeID;
4374 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4375 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4376 	if (status != B_OK)
4377 		return status;
4378 
4379 	// lookup the node
4380 	rw_lock_read_lock(&sVnodeLock);
4381 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4382 	rw_lock_read_unlock(&sVnodeLock);
4383 
4384 	if (*_createdVnode == NULL) {
4385 		panic("vfs_create_special_node(): lookup of node failed");
4386 		return B_ERROR;
4387 	}
4388 
4389 	return B_OK;
4390 }
4391 
4392 
4393 extern "C" void
4394 vfs_put_vnode(struct vnode* vnode)
4395 {
4396 	put_vnode(vnode);
4397 }
4398 
4399 
4400 extern "C" status_t
4401 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4402 {
4403 	// Get current working directory from io context
4404 	struct io_context* context = get_current_io_context(false);
4405 	status_t status = B_OK;
4406 
4407 	mutex_lock(&context->io_mutex);
4408 
4409 	if (context->cwd != NULL) {
4410 		*_mountID = context->cwd->device;
4411 		*_vnodeID = context->cwd->id;
4412 	} else
4413 		status = B_ERROR;
4414 
4415 	mutex_unlock(&context->io_mutex);
4416 	return status;
4417 }
4418 
4419 
4420 status_t
4421 vfs_unmount(dev_t mountID, uint32 flags)
4422 {
4423 	return fs_unmount(NULL, mountID, flags, true);
4424 }
4425 
4426 
4427 extern "C" status_t
4428 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4429 {
4430 	struct vnode* vnode;
4431 
4432 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4433 	if (status != B_OK)
4434 		return status;
4435 
4436 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4437 	put_vnode(vnode);
4438 	return B_OK;
4439 }
4440 
4441 
4442 extern "C" void
4443 vfs_free_unused_vnodes(int32 level)
4444 {
4445 	vnode_low_resource_handler(NULL,
4446 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4447 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4448 		level);
4449 }
4450 
4451 
4452 extern "C" bool
4453 vfs_can_page(struct vnode* vnode, void* cookie)
4454 {
4455 	FUNCTION(("vfs_canpage: vnode 0x%p\n", vnode));
4456 
4457 	if (HAS_FS_CALL(vnode, can_page))
4458 		return FS_CALL(vnode, can_page, cookie);
4459 	return false;
4460 }
4461 
4462 
4463 extern "C" status_t
4464 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4465 	const generic_io_vec* vecs, size_t count, uint32 flags,
4466 	generic_size_t* _numBytes)
4467 {
4468 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4469 		pos));
4470 
4471 #if VFS_PAGES_IO_TRACING
4472 	generic_size_t bytesRequested = *_numBytes;
4473 #endif
4474 
4475 	IORequest request;
4476 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4477 	if (status == B_OK) {
4478 		status = vfs_vnode_io(vnode, cookie, &request);
4479 		if (status == B_OK)
4480 			status = request.Wait();
4481 		*_numBytes = request.TransferredBytes();
4482 	}
4483 
4484 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4485 		status, *_numBytes));
4486 
4487 	return status;
4488 }
4489 
4490 
4491 extern "C" status_t
4492 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4493 	const generic_io_vec* vecs, size_t count, uint32 flags,
4494 	generic_size_t* _numBytes)
4495 {
4496 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %Ld\n", vnode, vecs,
4497 		pos));
4498 
4499 #if VFS_PAGES_IO_TRACING
4500 	generic_size_t bytesRequested = *_numBytes;
4501 #endif
4502 
4503 	IORequest request;
4504 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4505 	if (status == B_OK) {
4506 		status = vfs_vnode_io(vnode, cookie, &request);
4507 		if (status == B_OK)
4508 			status = request.Wait();
4509 		*_numBytes = request.TransferredBytes();
4510 	}
4511 
4512 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4513 		status, *_numBytes));
4514 
4515 	return status;
4516 }
4517 
4518 
4519 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4520 	created if \a allocate is \c true.
4521 	In case it's successful, it will also grab a reference to the cache
4522 	it returns.
4523 */
4524 extern "C" status_t
4525 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4526 {
4527 	if (vnode->cache != NULL) {
4528 		vnode->cache->AcquireRef();
4529 		*_cache = vnode->cache;
4530 		return B_OK;
4531 	}
4532 
4533 	rw_lock_read_lock(&sVnodeLock);
4534 	vnode->Lock();
4535 
4536 	status_t status = B_OK;
4537 
4538 	// The cache could have been created in the meantime
4539 	if (vnode->cache == NULL) {
4540 		if (allocate) {
4541 			// TODO: actually the vnode needs to be busy already here, or
4542 			//	else this won't work...
4543 			bool wasBusy = vnode->IsBusy();
4544 			vnode->SetBusy(true);
4545 
4546 			vnode->Unlock();
4547 			rw_lock_read_unlock(&sVnodeLock);
4548 
4549 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4550 
4551 			rw_lock_read_lock(&sVnodeLock);
4552 			vnode->Lock();
4553 			vnode->SetBusy(wasBusy);
4554 		} else
4555 			status = B_BAD_VALUE;
4556 	}
4557 
4558 	vnode->Unlock();
4559 	rw_lock_read_unlock(&sVnodeLock);
4560 
4561 	if (status == B_OK) {
4562 		vnode->cache->AcquireRef();
4563 		*_cache = vnode->cache;
4564 	}
4565 
4566 	return status;
4567 }
4568 
4569 
4570 status_t
4571 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4572 	file_io_vec* vecs, size_t* _count)
4573 {
4574 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %Ld, size = %lu\n",
4575 		vnode, vecs, offset, size));
4576 
4577 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4578 }
4579 
4580 
4581 status_t
4582 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4583 {
4584 	status_t status = FS_CALL(vnode, read_stat, stat);
4585 
4586 	// fill in the st_dev and st_ino fields
4587 	if (status == B_OK) {
4588 		stat->st_dev = vnode->device;
4589 		stat->st_ino = vnode->id;
4590 		stat->st_rdev = -1;
4591 	}
4592 
4593 	return status;
4594 }
4595 
4596 
4597 status_t
4598 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4599 {
4600 	struct vnode* vnode;
4601 	status_t status = get_vnode(device, inode, &vnode, true, false);
4602 	if (status != B_OK)
4603 		return status;
4604 
4605 	status = FS_CALL(vnode, read_stat, stat);
4606 
4607 	// fill in the st_dev and st_ino fields
4608 	if (status == B_OK) {
4609 		stat->st_dev = vnode->device;
4610 		stat->st_ino = vnode->id;
4611 		stat->st_rdev = -1;
4612 	}
4613 
4614 	put_vnode(vnode);
4615 	return status;
4616 }
4617 
4618 
4619 status_t
4620 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4621 {
4622 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4623 }
4624 
4625 
4626 status_t
4627 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4628 	char* path, size_t pathLength)
4629 {
4630 	struct vnode* vnode;
4631 	status_t status;
4632 
4633 	// filter invalid leaf names
4634 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4635 		return B_BAD_VALUE;
4636 
4637 	// get the vnode matching the dir's node_ref
4638 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4639 		// special cases "." and "..": we can directly get the vnode of the
4640 		// referenced directory
4641 		status = entry_ref_to_vnode(device, inode, leaf, false, true, &vnode);
4642 		leaf = NULL;
4643 	} else
4644 		status = get_vnode(device, inode, &vnode, true, false);
4645 	if (status != B_OK)
4646 		return status;
4647 
4648 	// get the directory path
4649 	status = dir_vnode_to_path(vnode, path, pathLength, true);
4650 	put_vnode(vnode);
4651 		// we don't need the vnode anymore
4652 	if (status != B_OK)
4653 		return status;
4654 
4655 	// append the leaf name
4656 	if (leaf) {
4657 		// insert a directory separator if this is not the file system root
4658 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4659 				>= pathLength)
4660 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4661 			return B_NAME_TOO_LONG;
4662 		}
4663 	}
4664 
4665 	return B_OK;
4666 }
4667 
4668 
4669 /*!	If the given descriptor locked its vnode, that lock will be released. */
4670 void
4671 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4672 {
4673 	struct vnode* vnode = fd_vnode(descriptor);
4674 
4675 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4676 		vnode->mandatory_locked_by = NULL;
4677 }
4678 
4679 
4680 /*!	Closes all file descriptors of the specified I/O context that
4681 	have the O_CLOEXEC flag set.
4682 */
4683 void
4684 vfs_exec_io_context(io_context* context)
4685 {
4686 	uint32 i;
4687 
4688 	for (i = 0; i < context->table_size; i++) {
4689 		mutex_lock(&context->io_mutex);
4690 
4691 		struct file_descriptor* descriptor = context->fds[i];
4692 		bool remove = false;
4693 
4694 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4695 			context->fds[i] = NULL;
4696 			context->num_used_fds--;
4697 
4698 			remove = true;
4699 		}
4700 
4701 		mutex_unlock(&context->io_mutex);
4702 
4703 		if (remove) {
4704 			close_fd(descriptor);
4705 			put_fd(descriptor);
4706 		}
4707 	}
4708 }
4709 
4710 
4711 /*! Sets up a new io_control structure, and inherits the properties
4712 	of the parent io_control if it is given.
4713 */
4714 io_context*
4715 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4716 {
4717 	io_context* context = (io_context*)malloc(sizeof(io_context));
4718 	if (context == NULL)
4719 		return NULL;
4720 
4721 	TIOC(NewIOContext(context, parentContext));
4722 
4723 	memset(context, 0, sizeof(io_context));
4724 	context->ref_count = 1;
4725 
4726 	MutexLocker parentLocker;
4727 
4728 	size_t tableSize;
4729 	if (parentContext) {
4730 		parentLocker.SetTo(parentContext->io_mutex, false);
4731 		tableSize = parentContext->table_size;
4732 	} else
4733 		tableSize = DEFAULT_FD_TABLE_SIZE;
4734 
4735 	// allocate space for FDs and their close-on-exec flag
4736 	context->fds = (file_descriptor**)malloc(
4737 		sizeof(struct file_descriptor*) * tableSize
4738 		+ sizeof(struct select_sync*) * tableSize
4739 		+ (tableSize + 7) / 8);
4740 	if (context->fds == NULL) {
4741 		free(context);
4742 		return NULL;
4743 	}
4744 
4745 	context->select_infos = (select_info**)(context->fds + tableSize);
4746 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4747 
4748 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4749 		+ sizeof(struct select_sync*) * tableSize
4750 		+ (tableSize + 7) / 8);
4751 
4752 	mutex_init(&context->io_mutex, "I/O context");
4753 
4754 	// Copy all parent file descriptors
4755 
4756 	if (parentContext) {
4757 		size_t i;
4758 
4759 		mutex_lock(&sIOContextRootLock);
4760 		context->root = parentContext->root;
4761 		if (context->root)
4762 			inc_vnode_ref_count(context->root);
4763 		mutex_unlock(&sIOContextRootLock);
4764 
4765 		context->cwd = parentContext->cwd;
4766 		if (context->cwd)
4767 			inc_vnode_ref_count(context->cwd);
4768 
4769 		for (i = 0; i < tableSize; i++) {
4770 			struct file_descriptor* descriptor = parentContext->fds[i];
4771 
4772 			if (descriptor != NULL) {
4773 				bool closeOnExec = fd_close_on_exec(parentContext, i);
4774 				if (closeOnExec && purgeCloseOnExec)
4775 					continue;
4776 
4777 				TFD(InheritFD(context, i, descriptor, parentContext));
4778 
4779 				context->fds[i] = descriptor;
4780 				context->num_used_fds++;
4781 				atomic_add(&descriptor->ref_count, 1);
4782 				atomic_add(&descriptor->open_count, 1);
4783 
4784 				if (closeOnExec)
4785 					fd_set_close_on_exec(context, i, true);
4786 			}
4787 		}
4788 
4789 		parentLocker.Unlock();
4790 	} else {
4791 		context->root = sRoot;
4792 		context->cwd = sRoot;
4793 
4794 		if (context->root)
4795 			inc_vnode_ref_count(context->root);
4796 
4797 		if (context->cwd)
4798 			inc_vnode_ref_count(context->cwd);
4799 	}
4800 
4801 	context->table_size = tableSize;
4802 
4803 	list_init(&context->node_monitors);
4804 	context->max_monitors = DEFAULT_NODE_MONITORS;
4805 
4806 	return context;
4807 }
4808 
4809 
4810 static status_t
4811 vfs_free_io_context(io_context* context)
4812 {
4813 	uint32 i;
4814 
4815 	TIOC(FreeIOContext(context));
4816 
4817 	if (context->root)
4818 		put_vnode(context->root);
4819 
4820 	if (context->cwd)
4821 		put_vnode(context->cwd);
4822 
4823 	mutex_lock(&context->io_mutex);
4824 
4825 	for (i = 0; i < context->table_size; i++) {
4826 		if (struct file_descriptor* descriptor = context->fds[i]) {
4827 			close_fd(descriptor);
4828 			put_fd(descriptor);
4829 		}
4830 	}
4831 
4832 	mutex_destroy(&context->io_mutex);
4833 
4834 	remove_node_monitors(context);
4835 	free(context->fds);
4836 	free(context);
4837 
4838 	return B_OK;
4839 }
4840 
4841 
4842 void
4843 vfs_get_io_context(io_context* context)
4844 {
4845 	atomic_add(&context->ref_count, 1);
4846 }
4847 
4848 
4849 void
4850 vfs_put_io_context(io_context* context)
4851 {
4852 	if (atomic_add(&context->ref_count, -1) == 1)
4853 		vfs_free_io_context(context);
4854 }
4855 
4856 
4857 static status_t
4858 vfs_resize_fd_table(struct io_context* context, const int newSize)
4859 {
4860 	if (newSize <= 0 || newSize > MAX_FD_TABLE_SIZE)
4861 		return B_BAD_VALUE;
4862 
4863 	TIOC(ResizeIOContext(context, newSize));
4864 
4865 	MutexLocker _(context->io_mutex);
4866 
4867 	int oldSize = context->table_size;
4868 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
4869 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
4870 
4871 	// If the tables shrink, make sure none of the fds being dropped are in use.
4872 	if (newSize < oldSize) {
4873 		for (int i = oldSize; i-- > newSize;) {
4874 			if (context->fds[i])
4875 				return B_BUSY;
4876 		}
4877 	}
4878 
4879 	// store pointers to the old tables
4880 	file_descriptor** oldFDs = context->fds;
4881 	select_info** oldSelectInfos = context->select_infos;
4882 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
4883 
4884 	// allocate new tables
4885 	file_descriptor** newFDs = (file_descriptor**)malloc(
4886 		sizeof(struct file_descriptor*) * newSize
4887 		+ sizeof(struct select_sync*) * newSize
4888 		+ newCloseOnExitBitmapSize);
4889 	if (newFDs == NULL)
4890 		return B_NO_MEMORY;
4891 
4892 	context->fds = newFDs;
4893 	context->select_infos = (select_info**)(context->fds + newSize);
4894 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
4895 	context->table_size = newSize;
4896 
4897 	// copy entries from old tables
4898 	int toCopy = min_c(oldSize, newSize);
4899 
4900 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
4901 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
4902 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
4903 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
4904 
4905 	// clear additional entries, if the tables grow
4906 	if (newSize > oldSize) {
4907 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
4908 		memset(context->select_infos + oldSize, 0,
4909 			sizeof(void*) * (newSize - oldSize));
4910 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
4911 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
4912 	}
4913 
4914 	free(oldFDs);
4915 
4916 	return B_OK;
4917 }
4918 
4919 
4920 static status_t
4921 vfs_resize_monitor_table(struct io_context* context, const int newSize)
4922 {
4923 	int	status = B_OK;
4924 
4925 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
4926 		return B_BAD_VALUE;
4927 
4928 	mutex_lock(&context->io_mutex);
4929 
4930 	if ((size_t)newSize < context->num_monitors) {
4931 		status = B_BUSY;
4932 		goto out;
4933 	}
4934 	context->max_monitors = newSize;
4935 
4936 out:
4937 	mutex_unlock(&context->io_mutex);
4938 	return status;
4939 }
4940 
4941 
4942 int
4943 vfs_getrlimit(int resource, struct rlimit* rlp)
4944 {
4945 	if (!rlp)
4946 		return B_BAD_ADDRESS;
4947 
4948 	switch (resource) {
4949 		case RLIMIT_NOFILE:
4950 		{
4951 			struct io_context* context = get_current_io_context(false);
4952 			MutexLocker _(context->io_mutex);
4953 
4954 			rlp->rlim_cur = context->table_size;
4955 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
4956 			return 0;
4957 		}
4958 
4959 		case RLIMIT_NOVMON:
4960 		{
4961 			struct io_context* context = get_current_io_context(false);
4962 			MutexLocker _(context->io_mutex);
4963 
4964 			rlp->rlim_cur = context->max_monitors;
4965 			rlp->rlim_max = MAX_NODE_MONITORS;
4966 			return 0;
4967 		}
4968 
4969 		default:
4970 			return B_BAD_VALUE;
4971 	}
4972 }
4973 
4974 
4975 int
4976 vfs_setrlimit(int resource, const struct rlimit* rlp)
4977 {
4978 	if (!rlp)
4979 		return B_BAD_ADDRESS;
4980 
4981 	switch (resource) {
4982 		case RLIMIT_NOFILE:
4983 			/* TODO: check getuid() */
4984 			if (rlp->rlim_max != RLIM_SAVED_MAX
4985 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
4986 				return B_NOT_ALLOWED;
4987 
4988 			return vfs_resize_fd_table(get_current_io_context(false),
4989 				rlp->rlim_cur);
4990 
4991 		case RLIMIT_NOVMON:
4992 			/* TODO: check getuid() */
4993 			if (rlp->rlim_max != RLIM_SAVED_MAX
4994 				&& rlp->rlim_max != MAX_NODE_MONITORS)
4995 				return B_NOT_ALLOWED;
4996 
4997 			return vfs_resize_monitor_table(get_current_io_context(false),
4998 				rlp->rlim_cur);
4999 
5000 		default:
5001 			return B_BAD_VALUE;
5002 	}
5003 }
5004 
5005 
5006 status_t
5007 vfs_init(kernel_args* args)
5008 {
5009 	vnode::StaticInit();
5010 
5011 	struct vnode dummyVnode;
5012 	sVnodeTable = hash_init(VNODE_HASH_TABLE_SIZE,
5013 		offset_of_member(dummyVnode, next), &vnode_compare, &vnode_hash);
5014 	if (sVnodeTable == NULL)
5015 		panic("vfs_init: error creating vnode hash table\n");
5016 
5017 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummyVnode, unused_link));
5018 
5019 	struct fs_mount dummyMount;
5020 	sMountsTable = hash_init(MOUNTS_HASH_TABLE_SIZE,
5021 		offset_of_member(dummyMount, next), &mount_compare, &mount_hash);
5022 	if (sMountsTable == NULL)
5023 		panic("vfs_init: error creating mounts hash table\n");
5024 
5025 	node_monitor_init();
5026 
5027 	sRoot = NULL;
5028 
5029 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5030 
5031 	if (block_cache_init() != B_OK)
5032 		return B_ERROR;
5033 
5034 #ifdef ADD_DEBUGGER_COMMANDS
5035 	// add some debugger commands
5036 	add_debugger_command_etc("vnode", &dump_vnode,
5037 		"Print info about the specified vnode",
5038 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5039 		"Prints information about the vnode specified by address <vnode> or\n"
5040 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5041 		"constructed and printed. It might not be possible to construct a\n"
5042 		"complete path, though.\n",
5043 		0);
5044 	add_debugger_command("vnodes", &dump_vnodes,
5045 		"list all vnodes (from the specified device)");
5046 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5047 		"list all vnode caches");
5048 	add_debugger_command("mount", &dump_mount,
5049 		"info about the specified fs_mount");
5050 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5051 	add_debugger_command("io_context", &dump_io_context,
5052 		"info about the I/O context");
5053 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5054 		"info about vnode usage");
5055 #endif
5056 
5057 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5058 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5059 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5060 		0);
5061 
5062 	file_map_init();
5063 
5064 	return file_cache_init();
5065 }
5066 
5067 
5068 //	#pragma mark - fd_ops implementations
5069 
5070 
5071 /*!
5072 	Calls fs_open() on the given vnode and returns a new
5073 	file descriptor for it
5074 */
5075 static int
5076 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5077 {
5078 	void* cookie;
5079 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5080 	if (status != B_OK)
5081 		return status;
5082 
5083 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5084 	if (fd < 0) {
5085 		FS_CALL(vnode, close, cookie);
5086 		FS_CALL(vnode, free_cookie, cookie);
5087 	}
5088 	return fd;
5089 }
5090 
5091 
5092 /*!
5093 	Calls fs_open() on the given vnode and returns a new
5094 	file descriptor for it
5095 */
5096 static int
5097 create_vnode(struct vnode* directory, const char* name, int openMode,
5098 	int perms, bool kernel)
5099 {
5100 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5101 	status_t status = B_ERROR;
5102 	struct vnode* vnode;
5103 	void* cookie;
5104 	ino_t newID;
5105 
5106 	// This is somewhat tricky: If the entry already exists, the FS responsible
5107 	// for the directory might not necessarily also be the one responsible for
5108 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5109 	// we can actually never call the create() hook without O_EXCL. Instead we
5110 	// try to look the entry up first. If it already exists, we just open the
5111 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5112 	// introduces a race condition, since someone else might have created the
5113 	// entry in the meantime. We hope the respective FS returns the correct
5114 	// error code and retry (up to 3 times) again.
5115 
5116 	for (int i = 0; i < 3 && status != B_OK; i++) {
5117 		// look the node up
5118 		status = lookup_dir_entry(directory, name, &vnode);
5119 		if (status == B_OK) {
5120 			VNodePutter putter(vnode);
5121 
5122 			if ((openMode & O_EXCL) != 0)
5123 				return B_FILE_EXISTS;
5124 
5125 			// If the node is a symlink, we have to follow it, unless
5126 			// O_NOTRAVERSE is set.
5127 			if (S_ISLNK(vnode->Type()) && traverse) {
5128 				putter.Put();
5129 				char clonedName[B_FILE_NAME_LENGTH + 1];
5130 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5131 						>= B_FILE_NAME_LENGTH) {
5132 					return B_NAME_TOO_LONG;
5133 				}
5134 
5135 				inc_vnode_ref_count(directory);
5136 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5137 					kernel, &vnode, NULL);
5138 				if (status != B_OK)
5139 					return status;
5140 
5141 				putter.SetTo(vnode);
5142 			}
5143 
5144 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5145 				put_vnode(vnode);
5146 				return B_LINK_LIMIT;
5147 			}
5148 
5149 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5150 			// on success keep the vnode reference for the FD
5151 			if (fd >= 0)
5152 				putter.Detach();
5153 
5154 			return fd;
5155 		}
5156 
5157 		// it doesn't exist yet -- try to create it
5158 
5159 		if (!HAS_FS_CALL(directory, create))
5160 			return B_READ_ONLY_DEVICE;
5161 
5162 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5163 			&cookie, &newID);
5164 		if (status != B_OK
5165 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5166 			return status;
5167 		}
5168 	}
5169 
5170 	if (status != B_OK)
5171 		return status;
5172 
5173 	// the node has been created successfully
5174 
5175 	rw_lock_read_lock(&sVnodeLock);
5176 	vnode = lookup_vnode(directory->device, newID);
5177 	rw_lock_read_unlock(&sVnodeLock);
5178 
5179 	if (vnode == NULL) {
5180 		panic("vfs: fs_create() returned success but there is no vnode, "
5181 			"mount ID %ld!\n", directory->device);
5182 		return B_BAD_VALUE;
5183 	}
5184 
5185 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5186 	if (fd >= 0)
5187 		return fd;
5188 
5189 	status = fd;
5190 
5191 	// something went wrong, clean up
5192 
5193 	FS_CALL(vnode, close, cookie);
5194 	FS_CALL(vnode, free_cookie, cookie);
5195 	put_vnode(vnode);
5196 
5197 	FS_CALL(directory, unlink, name);
5198 
5199 	return status;
5200 }
5201 
5202 
5203 /*! Calls fs open_dir() on the given vnode and returns a new
5204 	file descriptor for it
5205 */
5206 static int
5207 open_dir_vnode(struct vnode* vnode, bool kernel)
5208 {
5209 	void* cookie;
5210 	int status;
5211 
5212 	status = FS_CALL(vnode, open_dir, &cookie);
5213 	if (status != B_OK)
5214 		return status;
5215 
5216 	// directory is opened, create a fd
5217 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5218 	if (status >= 0)
5219 		return status;
5220 
5221 	FS_CALL(vnode, close_dir, cookie);
5222 	FS_CALL(vnode, free_dir_cookie, cookie);
5223 
5224 	return status;
5225 }
5226 
5227 
5228 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5229 	file descriptor for it.
5230 	Used by attr_dir_open(), and attr_dir_open_fd().
5231 */
5232 static int
5233 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5234 {
5235 	void* cookie;
5236 	int status;
5237 
5238 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5239 		return B_NOT_SUPPORTED;
5240 
5241 	status = FS_CALL(vnode, open_attr_dir, &cookie);
5242 	if (status != B_OK)
5243 		return status;
5244 
5245 	// directory is opened, create a fd
5246 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5247 	if (status >= 0)
5248 		return status;
5249 
5250 	FS_CALL(vnode, close_attr_dir, cookie);
5251 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5252 
5253 	return status;
5254 }
5255 
5256 
5257 static int
5258 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5259 	int openMode, int perms, bool kernel)
5260 {
5261 	struct vnode* directory;
5262 	int status;
5263 
5264 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5265 		"kernel %d\n", name, openMode, perms, kernel));
5266 
5267 	// get directory to put the new file in
5268 	status = get_vnode(mountID, directoryID, &directory, true, false);
5269 	if (status != B_OK)
5270 		return status;
5271 
5272 	status = create_vnode(directory, name, openMode, perms, kernel);
5273 	put_vnode(directory);
5274 
5275 	return status;
5276 }
5277 
5278 
5279 static int
5280 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5281 {
5282 	char name[B_FILE_NAME_LENGTH];
5283 	struct vnode* directory;
5284 	int status;
5285 
5286 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5287 		openMode, perms, kernel));
5288 
5289 	// get directory to put the new file in
5290 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5291 	if (status < 0)
5292 		return status;
5293 
5294 	status = create_vnode(directory, name, openMode, perms, kernel);
5295 
5296 	put_vnode(directory);
5297 	return status;
5298 }
5299 
5300 
5301 static int
5302 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5303 	int openMode, bool kernel)
5304 {
5305 	if (name == NULL || *name == '\0')
5306 		return B_BAD_VALUE;
5307 
5308 	FUNCTION(("file_open_entry_ref(ref = (%ld, %Ld, %s), openMode = %d)\n",
5309 		mountID, directoryID, name, openMode));
5310 
5311 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5312 
5313 	// get the vnode matching the entry_ref
5314 	struct vnode* vnode;
5315 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5316 		kernel, &vnode);
5317 	if (status != B_OK)
5318 		return status;
5319 
5320 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5321 		put_vnode(vnode);
5322 		return B_LINK_LIMIT;
5323 	}
5324 
5325 	int newFD = open_vnode(vnode, openMode, kernel);
5326 	if (newFD >= 0) {
5327 		// The vnode reference has been transferred to the FD
5328 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5329 			directoryID, vnode->id, name);
5330 	} else
5331 		put_vnode(vnode);
5332 
5333 	return newFD;
5334 }
5335 
5336 
5337 static int
5338 file_open(int fd, char* path, int openMode, bool kernel)
5339 {
5340 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5341 
5342 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5343 		fd, path, openMode, kernel));
5344 
5345 	// get the vnode matching the vnode + path combination
5346 	struct vnode* vnode;
5347 	ino_t parentID;
5348 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5349 		&parentID, kernel);
5350 	if (status != B_OK)
5351 		return status;
5352 
5353 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5354 		put_vnode(vnode);
5355 		return B_LINK_LIMIT;
5356 	}
5357 
5358 	// open the vnode
5359 	int newFD = open_vnode(vnode, openMode, kernel);
5360 	if (newFD >= 0) {
5361 		// The vnode reference has been transferred to the FD
5362 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5363 			vnode->device, parentID, vnode->id, NULL);
5364 	} else
5365 		put_vnode(vnode);
5366 
5367 	return newFD;
5368 }
5369 
5370 
5371 static status_t
5372 file_close(struct file_descriptor* descriptor)
5373 {
5374 	struct vnode* vnode = descriptor->u.vnode;
5375 	status_t status = B_OK;
5376 
5377 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5378 
5379 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5380 		vnode->id);
5381 	if (HAS_FS_CALL(vnode, close)) {
5382 		status = FS_CALL(vnode, close, descriptor->cookie);
5383 	}
5384 
5385 	if (status == B_OK) {
5386 		// remove all outstanding locks for this team
5387 		release_advisory_lock(vnode, NULL);
5388 	}
5389 	return status;
5390 }
5391 
5392 
5393 static void
5394 file_free_fd(struct file_descriptor* descriptor)
5395 {
5396 	struct vnode* vnode = descriptor->u.vnode;
5397 
5398 	if (vnode != NULL) {
5399 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5400 		put_vnode(vnode);
5401 	}
5402 }
5403 
5404 
5405 static status_t
5406 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5407 	size_t* length)
5408 {
5409 	struct vnode* vnode = descriptor->u.vnode;
5410 	FUNCTION(("file_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
5411 		*length));
5412 
5413 	if (S_ISDIR(vnode->Type()))
5414 		return B_IS_A_DIRECTORY;
5415 
5416 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5417 }
5418 
5419 
5420 static status_t
5421 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5422 	size_t* length)
5423 {
5424 	struct vnode* vnode = descriptor->u.vnode;
5425 	FUNCTION(("file_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
5426 
5427 	if (S_ISDIR(vnode->Type()))
5428 		return B_IS_A_DIRECTORY;
5429 	if (!HAS_FS_CALL(vnode, write))
5430 		return B_READ_ONLY_DEVICE;
5431 
5432 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5433 }
5434 
5435 
5436 static off_t
5437 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5438 {
5439 	struct vnode* vnode = descriptor->u.vnode;
5440 	off_t offset;
5441 
5442 	FUNCTION(("file_seek(pos = %Ld, seekType = %d)\n", pos, seekType));
5443 
5444 	// some kinds of files are not seekable
5445 	switch (vnode->Type() & S_IFMT) {
5446 		case S_IFIFO:
5447 		case S_IFSOCK:
5448 			return ESPIPE;
5449 
5450 		// The Open Group Base Specs don't mention any file types besides pipes,
5451 		// fifos, and sockets specially, so we allow seeking them.
5452 		case S_IFREG:
5453 		case S_IFBLK:
5454 		case S_IFDIR:
5455 		case S_IFLNK:
5456 		case S_IFCHR:
5457 			break;
5458 	}
5459 
5460 	switch (seekType) {
5461 		case SEEK_SET:
5462 			offset = 0;
5463 			break;
5464 		case SEEK_CUR:
5465 			offset = descriptor->pos;
5466 			break;
5467 		case SEEK_END:
5468 		{
5469 			// stat() the node
5470 			if (!HAS_FS_CALL(vnode, read_stat))
5471 				return B_NOT_SUPPORTED;
5472 
5473 			struct stat stat;
5474 			status_t status = FS_CALL(vnode, read_stat, &stat);
5475 			if (status != B_OK)
5476 				return status;
5477 
5478 			offset = stat.st_size;
5479 			break;
5480 		}
5481 		default:
5482 			return B_BAD_VALUE;
5483 	}
5484 
5485 	// assumes off_t is 64 bits wide
5486 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5487 		return B_BUFFER_OVERFLOW;
5488 
5489 	pos += offset;
5490 	if (pos < 0)
5491 		return B_BAD_VALUE;
5492 
5493 	return descriptor->pos = pos;
5494 }
5495 
5496 
5497 static status_t
5498 file_select(struct file_descriptor* descriptor, uint8 event,
5499 	struct selectsync* sync)
5500 {
5501 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5502 
5503 	struct vnode* vnode = descriptor->u.vnode;
5504 
5505 	// If the FS has no select() hook, notify select() now.
5506 	if (!HAS_FS_CALL(vnode, select))
5507 		return notify_select_event(sync, event);
5508 
5509 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5510 }
5511 
5512 
5513 static status_t
5514 file_deselect(struct file_descriptor* descriptor, uint8 event,
5515 	struct selectsync* sync)
5516 {
5517 	struct vnode* vnode = descriptor->u.vnode;
5518 
5519 	if (!HAS_FS_CALL(vnode, deselect))
5520 		return B_OK;
5521 
5522 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5523 }
5524 
5525 
5526 static status_t
5527 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5528 	bool kernel)
5529 {
5530 	struct vnode* vnode;
5531 	status_t status;
5532 
5533 	if (name == NULL || *name == '\0')
5534 		return B_BAD_VALUE;
5535 
5536 	FUNCTION(("dir_create_entry_ref(dev = %ld, ino = %Ld, name = '%s', "
5537 		"perms = %d)\n", mountID, parentID, name, perms));
5538 
5539 	status = get_vnode(mountID, parentID, &vnode, true, false);
5540 	if (status != B_OK)
5541 		return status;
5542 
5543 	if (HAS_FS_CALL(vnode, create_dir))
5544 		status = FS_CALL(vnode, create_dir, name, perms);
5545 	else
5546 		status = B_READ_ONLY_DEVICE;
5547 
5548 	put_vnode(vnode);
5549 	return status;
5550 }
5551 
5552 
5553 static status_t
5554 dir_create(int fd, char* path, int perms, bool kernel)
5555 {
5556 	char filename[B_FILE_NAME_LENGTH];
5557 	struct vnode* vnode;
5558 	status_t status;
5559 
5560 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5561 		kernel));
5562 
5563 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5564 	if (status < 0)
5565 		return status;
5566 
5567 	if (HAS_FS_CALL(vnode, create_dir)) {
5568 		status = FS_CALL(vnode, create_dir, filename, perms);
5569 	} else
5570 		status = B_READ_ONLY_DEVICE;
5571 
5572 	put_vnode(vnode);
5573 	return status;
5574 }
5575 
5576 
5577 static int
5578 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5579 {
5580 	struct vnode* vnode;
5581 	int status;
5582 
5583 	FUNCTION(("dir_open_entry_ref()\n"));
5584 
5585 	if (name && *name == '\0')
5586 		return B_BAD_VALUE;
5587 
5588 	// get the vnode matching the entry_ref/node_ref
5589 	if (name) {
5590 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5591 			&vnode);
5592 	} else
5593 		status = get_vnode(mountID, parentID, &vnode, true, false);
5594 	if (status != B_OK)
5595 		return status;
5596 
5597 	int newFD = open_dir_vnode(vnode, kernel);
5598 	if (newFD >= 0) {
5599 		// The vnode reference has been transferred to the FD
5600 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
5601 			vnode->id, name);
5602 	} else
5603 		put_vnode(vnode);
5604 
5605 	return newFD;
5606 }
5607 
5608 
5609 static int
5610 dir_open(int fd, char* path, bool kernel)
5611 {
5612 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5613 		kernel));
5614 
5615 	// get the vnode matching the vnode + path combination
5616 	struct vnode* vnode = NULL;
5617 	ino_t parentID;
5618 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
5619 		kernel);
5620 	if (status != B_OK)
5621 		return status;
5622 
5623 	// open the dir
5624 	int newFD = open_dir_vnode(vnode, kernel);
5625 	if (newFD >= 0) {
5626 		// The vnode reference has been transferred to the FD
5627 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5628 			parentID, vnode->id, NULL);
5629 	} else
5630 		put_vnode(vnode);
5631 
5632 	return newFD;
5633 }
5634 
5635 
5636 static status_t
5637 dir_close(struct file_descriptor* descriptor)
5638 {
5639 	struct vnode* vnode = descriptor->u.vnode;
5640 
5641 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5642 
5643 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5644 		vnode->id);
5645 	if (HAS_FS_CALL(vnode, close_dir))
5646 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5647 
5648 	return B_OK;
5649 }
5650 
5651 
5652 static void
5653 dir_free_fd(struct file_descriptor* descriptor)
5654 {
5655 	struct vnode* vnode = descriptor->u.vnode;
5656 
5657 	if (vnode != NULL) {
5658 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
5659 		put_vnode(vnode);
5660 	}
5661 }
5662 
5663 
5664 static status_t
5665 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
5666 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5667 {
5668 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
5669 		bufferSize, _count);
5670 }
5671 
5672 
5673 static status_t
5674 fix_dirent(struct vnode* parent, struct dirent* entry,
5675 	struct io_context* ioContext)
5676 {
5677 	// set d_pdev and d_pino
5678 	entry->d_pdev = parent->device;
5679 	entry->d_pino = parent->id;
5680 
5681 	// If this is the ".." entry and the directory is the root of a FS,
5682 	// we need to replace d_dev and d_ino with the actual values.
5683 	if (strcmp(entry->d_name, "..") == 0
5684 		&& parent->mount->root_vnode == parent
5685 		&& parent->mount->covers_vnode) {
5686 		inc_vnode_ref_count(parent);
5687 			// vnode_path_to_vnode() puts the node
5688 
5689 		// Make sure the IO context root is not bypassed.
5690 		if (parent == ioContext->root) {
5691 			entry->d_dev = parent->device;
5692 			entry->d_ino = parent->id;
5693 		} else {
5694 			// ".." is guaranteed not to be clobbered by this call
5695 			struct vnode* vnode;
5696 			status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
5697 				ioContext, &vnode, NULL);
5698 
5699 			if (status == B_OK) {
5700 				entry->d_dev = vnode->device;
5701 				entry->d_ino = vnode->id;
5702 			}
5703 		}
5704 	} else {
5705 		// resolve mount points
5706 		ReadLocker _(&sVnodeLock);
5707 
5708 		struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
5709 		if (vnode != NULL) {
5710 			if (vnode->covered_by != NULL) {
5711 				entry->d_dev = vnode->covered_by->device;
5712 				entry->d_ino = vnode->covered_by->id;
5713 			}
5714 		}
5715 	}
5716 
5717 	return B_OK;
5718 }
5719 
5720 
5721 static status_t
5722 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
5723 	struct dirent* buffer, size_t bufferSize, uint32* _count)
5724 {
5725 	if (!HAS_FS_CALL(vnode, read_dir))
5726 		return B_NOT_SUPPORTED;
5727 
5728 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
5729 		_count);
5730 	if (error != B_OK)
5731 		return error;
5732 
5733 	// we need to adjust the read dirents
5734 	uint32 count = *_count;
5735 	for (uint32 i = 0; i < count; i++) {
5736 		error = fix_dirent(vnode, buffer, ioContext);
5737 		if (error != B_OK)
5738 			return error;
5739 
5740 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
5741 	}
5742 
5743 	return error;
5744 }
5745 
5746 
5747 static status_t
5748 dir_rewind(struct file_descriptor* descriptor)
5749 {
5750 	struct vnode* vnode = descriptor->u.vnode;
5751 
5752 	if (HAS_FS_CALL(vnode, rewind_dir)) {
5753 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
5754 	}
5755 
5756 	return B_NOT_SUPPORTED;
5757 }
5758 
5759 
5760 static status_t
5761 dir_remove(int fd, char* path, bool kernel)
5762 {
5763 	char name[B_FILE_NAME_LENGTH];
5764 	struct vnode* directory;
5765 	status_t status;
5766 
5767 	if (path != NULL) {
5768 		// we need to make sure our path name doesn't stop with "/", ".",
5769 		// or ".."
5770 		char* lastSlash;
5771 		while ((lastSlash = strrchr(path, '/')) != NULL) {
5772 			char* leaf = lastSlash + 1;
5773 			if (!strcmp(leaf, ".."))
5774 				return B_NOT_ALLOWED;
5775 
5776 			// omit multiple slashes
5777 			while (lastSlash > path && lastSlash[-1] == '/')
5778 				lastSlash--;
5779 
5780 			if (leaf[0]
5781 				&& strcmp(leaf, ".")) {
5782 				break;
5783 			}
5784 			// "name/" -> "name", or "name/." -> "name"
5785 			lastSlash[0] = '\0';
5786 		}
5787 
5788 		if (!strcmp(path, ".") || !strcmp(path, ".."))
5789 			return B_NOT_ALLOWED;
5790 	}
5791 
5792 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
5793 	if (status != B_OK)
5794 		return status;
5795 
5796 	if (HAS_FS_CALL(directory, remove_dir))
5797 		status = FS_CALL(directory, remove_dir, name);
5798 	else
5799 		status = B_READ_ONLY_DEVICE;
5800 
5801 	put_vnode(directory);
5802 	return status;
5803 }
5804 
5805 
5806 static status_t
5807 common_ioctl(struct file_descriptor* descriptor, uint32 op, void* buffer,
5808 	size_t length)
5809 {
5810 	struct vnode* vnode = descriptor->u.vnode;
5811 
5812 	if (HAS_FS_CALL(vnode, ioctl))
5813 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
5814 
5815 	return B_DEV_INVALID_IOCTL;
5816 }
5817 
5818 
5819 static status_t
5820 common_fcntl(int fd, int op, uint32 argument, bool kernel)
5821 {
5822 	struct flock flock;
5823 
5824 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
5825 		fd, op, argument, kernel ? "kernel" : "user"));
5826 
5827 	struct file_descriptor* descriptor = get_fd(get_current_io_context(kernel),
5828 		fd);
5829 	if (descriptor == NULL)
5830 		return B_FILE_ERROR;
5831 
5832 	struct vnode* vnode = fd_vnode(descriptor);
5833 
5834 	status_t status = B_OK;
5835 
5836 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
5837 		if (descriptor->type != FDTYPE_FILE)
5838 			status = B_BAD_VALUE;
5839 		else if (user_memcpy(&flock, (struct flock*)argument,
5840 				sizeof(struct flock)) != B_OK)
5841 			status = B_BAD_ADDRESS;
5842 
5843 		if (status != B_OK) {
5844 			put_fd(descriptor);
5845 			return status;
5846 		}
5847 	}
5848 
5849 	switch (op) {
5850 		case F_SETFD:
5851 		{
5852 			struct io_context* context = get_current_io_context(kernel);
5853 			// Set file descriptor flags
5854 
5855 			// O_CLOEXEC is the only flag available at this time
5856 			mutex_lock(&context->io_mutex);
5857 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
5858 			mutex_unlock(&context->io_mutex);
5859 
5860 			status = B_OK;
5861 			break;
5862 		}
5863 
5864 		case F_GETFD:
5865 		{
5866 			struct io_context* context = get_current_io_context(kernel);
5867 
5868 			// Get file descriptor flags
5869 			mutex_lock(&context->io_mutex);
5870 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
5871 			mutex_unlock(&context->io_mutex);
5872 			break;
5873 		}
5874 
5875 		case F_SETFL:
5876 			// Set file descriptor open mode
5877 
5878 			// we only accept changes to O_APPEND and O_NONBLOCK
5879 			argument &= O_APPEND | O_NONBLOCK;
5880 			if (descriptor->ops->fd_set_flags != NULL) {
5881 				status = descriptor->ops->fd_set_flags(descriptor, argument);
5882 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
5883 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
5884 					(int)argument);
5885 			} else
5886 				status = B_NOT_SUPPORTED;
5887 
5888 			if (status == B_OK) {
5889 				// update this descriptor's open_mode field
5890 				descriptor->open_mode = (descriptor->open_mode
5891 					& ~(O_APPEND | O_NONBLOCK)) | argument;
5892 			}
5893 
5894 			break;
5895 
5896 		case F_GETFL:
5897 			// Get file descriptor open mode
5898 			status = descriptor->open_mode;
5899 			break;
5900 
5901 		case F_DUPFD:
5902 		{
5903 			struct io_context* context = get_current_io_context(kernel);
5904 
5905 			status = new_fd_etc(context, descriptor, (int)argument);
5906 			if (status >= 0) {
5907 				mutex_lock(&context->io_mutex);
5908 				fd_set_close_on_exec(context, fd, false);
5909 				mutex_unlock(&context->io_mutex);
5910 
5911 				atomic_add(&descriptor->ref_count, 1);
5912 			}
5913 			break;
5914 		}
5915 
5916 		case F_GETLK:
5917 			if (vnode != NULL) {
5918 				status = get_advisory_lock(vnode, &flock);
5919 				if (status == B_OK) {
5920 					// copy back flock structure
5921 					status = user_memcpy((struct flock*)argument, &flock,
5922 						sizeof(struct flock));
5923 				}
5924 			} else
5925 				status = B_BAD_VALUE;
5926 			break;
5927 
5928 		case F_SETLK:
5929 		case F_SETLKW:
5930 			status = normalize_flock(descriptor, &flock);
5931 			if (status != B_OK)
5932 				break;
5933 
5934 			if (vnode == NULL) {
5935 				status = B_BAD_VALUE;
5936 			} else if (flock.l_type == F_UNLCK) {
5937 				status = release_advisory_lock(vnode, &flock);
5938 			} else {
5939 				// the open mode must match the lock type
5940 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
5941 						&& flock.l_type == F_WRLCK)
5942 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
5943 						&& flock.l_type == F_RDLCK))
5944 					status = B_FILE_ERROR;
5945 				else {
5946 					status = acquire_advisory_lock(vnode, -1,
5947 						&flock, op == F_SETLKW);
5948 				}
5949 			}
5950 			break;
5951 
5952 		// ToDo: add support for more ops?
5953 
5954 		default:
5955 			status = B_BAD_VALUE;
5956 	}
5957 
5958 	put_fd(descriptor);
5959 	return status;
5960 }
5961 
5962 
5963 static status_t
5964 common_sync(int fd, bool kernel)
5965 {
5966 	struct file_descriptor* descriptor;
5967 	struct vnode* vnode;
5968 	status_t status;
5969 
5970 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
5971 
5972 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5973 	if (descriptor == NULL)
5974 		return B_FILE_ERROR;
5975 
5976 	if (HAS_FS_CALL(vnode, fsync))
5977 		status = FS_CALL_NO_PARAMS(vnode, fsync);
5978 	else
5979 		status = B_NOT_SUPPORTED;
5980 
5981 	put_fd(descriptor);
5982 	return status;
5983 }
5984 
5985 
5986 static status_t
5987 common_lock_node(int fd, bool kernel)
5988 {
5989 	struct file_descriptor* descriptor;
5990 	struct vnode* vnode;
5991 
5992 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
5993 	if (descriptor == NULL)
5994 		return B_FILE_ERROR;
5995 
5996 	status_t status = B_OK;
5997 
5998 	// We need to set the locking atomically - someone
5999 	// else might set one at the same time
6000 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6001 			(file_descriptor*)NULL) != NULL)
6002 		status = B_BUSY;
6003 
6004 	put_fd(descriptor);
6005 	return status;
6006 }
6007 
6008 
6009 static status_t
6010 common_unlock_node(int fd, bool kernel)
6011 {
6012 	struct file_descriptor* descriptor;
6013 	struct vnode* vnode;
6014 
6015 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6016 	if (descriptor == NULL)
6017 		return B_FILE_ERROR;
6018 
6019 	status_t status = B_OK;
6020 
6021 	// We need to set the locking atomically - someone
6022 	// else might set one at the same time
6023 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6024 			(file_descriptor*)NULL, descriptor) != descriptor)
6025 		status = B_BAD_VALUE;
6026 
6027 	put_fd(descriptor);
6028 	return status;
6029 }
6030 
6031 
6032 static status_t
6033 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6034 	bool kernel)
6035 {
6036 	struct vnode* vnode;
6037 	status_t status;
6038 
6039 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6040 	if (status != B_OK)
6041 		return status;
6042 
6043 	if (HAS_FS_CALL(vnode, read_symlink)) {
6044 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6045 	} else
6046 		status = B_BAD_VALUE;
6047 
6048 	put_vnode(vnode);
6049 	return status;
6050 }
6051 
6052 
6053 static status_t
6054 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6055 	bool kernel)
6056 {
6057 	// path validity checks have to be in the calling function!
6058 	char name[B_FILE_NAME_LENGTH];
6059 	struct vnode* vnode;
6060 	status_t status;
6061 
6062 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6063 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6064 
6065 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6066 	if (status != B_OK)
6067 		return status;
6068 
6069 	if (HAS_FS_CALL(vnode, create_symlink))
6070 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6071 	else {
6072 		status = HAS_FS_CALL(vnode, write)
6073 			? B_NOT_SUPPORTED : B_READ_ONLY_DEVICE;
6074 	}
6075 
6076 	put_vnode(vnode);
6077 
6078 	return status;
6079 }
6080 
6081 
6082 static status_t
6083 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6084 	bool traverseLeafLink, bool kernel)
6085 {
6086 	// path validity checks have to be in the calling function!
6087 
6088 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6089 		toPath, kernel));
6090 
6091 	char name[B_FILE_NAME_LENGTH];
6092 	struct vnode* directory;
6093 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6094 		kernel);
6095 	if (status != B_OK)
6096 		return status;
6097 
6098 	struct vnode* vnode;
6099 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6100 		kernel);
6101 	if (status != B_OK)
6102 		goto err;
6103 
6104 	if (directory->mount != vnode->mount) {
6105 		status = B_CROSS_DEVICE_LINK;
6106 		goto err1;
6107 	}
6108 
6109 	if (HAS_FS_CALL(directory, link))
6110 		status = FS_CALL(directory, link, name, vnode);
6111 	else
6112 		status = B_READ_ONLY_DEVICE;
6113 
6114 err1:
6115 	put_vnode(vnode);
6116 err:
6117 	put_vnode(directory);
6118 
6119 	return status;
6120 }
6121 
6122 
6123 static status_t
6124 common_unlink(int fd, char* path, bool kernel)
6125 {
6126 	char filename[B_FILE_NAME_LENGTH];
6127 	struct vnode* vnode;
6128 	status_t status;
6129 
6130 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6131 		kernel));
6132 
6133 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6134 	if (status < 0)
6135 		return status;
6136 
6137 	if (HAS_FS_CALL(vnode, unlink))
6138 		status = FS_CALL(vnode, unlink, filename);
6139 	else
6140 		status = B_READ_ONLY_DEVICE;
6141 
6142 	put_vnode(vnode);
6143 
6144 	return status;
6145 }
6146 
6147 
6148 static status_t
6149 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6150 {
6151 	struct vnode* vnode;
6152 	status_t status;
6153 
6154 	// TODO: honor effectiveUserGroup argument
6155 
6156 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6157 	if (status != B_OK)
6158 		return status;
6159 
6160 	if (HAS_FS_CALL(vnode, access))
6161 		status = FS_CALL(vnode, access, mode);
6162 	else
6163 		status = B_OK;
6164 
6165 	put_vnode(vnode);
6166 
6167 	return status;
6168 }
6169 
6170 
6171 static status_t
6172 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6173 {
6174 	struct vnode* fromVnode;
6175 	struct vnode* toVnode;
6176 	char fromName[B_FILE_NAME_LENGTH];
6177 	char toName[B_FILE_NAME_LENGTH];
6178 	status_t status;
6179 
6180 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6181 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6182 
6183 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6184 	if (status != B_OK)
6185 		return status;
6186 
6187 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6188 	if (status != B_OK)
6189 		goto err1;
6190 
6191 	if (fromVnode->device != toVnode->device) {
6192 		status = B_CROSS_DEVICE_LINK;
6193 		goto err2;
6194 	}
6195 
6196 	if (fromName[0] == '\0' || toName[0] == '\0'
6197 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6198 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6199 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6200 		status = B_BAD_VALUE;
6201 		goto err2;
6202 	}
6203 
6204 	if (HAS_FS_CALL(fromVnode, rename))
6205 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6206 	else
6207 		status = B_READ_ONLY_DEVICE;
6208 
6209 err2:
6210 	put_vnode(toVnode);
6211 err1:
6212 	put_vnode(fromVnode);
6213 
6214 	return status;
6215 }
6216 
6217 
6218 static status_t
6219 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6220 {
6221 	struct vnode* vnode = descriptor->u.vnode;
6222 
6223 	FUNCTION(("common_read_stat: stat %p\n", stat));
6224 
6225 	// TODO: remove this once all file systems properly set them!
6226 	stat->st_crtim.tv_nsec = 0;
6227 	stat->st_ctim.tv_nsec = 0;
6228 	stat->st_mtim.tv_nsec = 0;
6229 	stat->st_atim.tv_nsec = 0;
6230 
6231 	status_t status = FS_CALL(vnode, read_stat, stat);
6232 
6233 	// fill in the st_dev and st_ino fields
6234 	if (status == B_OK) {
6235 		stat->st_dev = vnode->device;
6236 		stat->st_ino = vnode->id;
6237 		stat->st_rdev = -1;
6238 	}
6239 
6240 	return status;
6241 }
6242 
6243 
6244 static status_t
6245 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6246 	int statMask)
6247 {
6248 	struct vnode* vnode = descriptor->u.vnode;
6249 
6250 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6251 		vnode, stat, statMask));
6252 
6253 	if (!HAS_FS_CALL(vnode, write_stat))
6254 		return B_READ_ONLY_DEVICE;
6255 
6256 	return FS_CALL(vnode, write_stat, stat, statMask);
6257 }
6258 
6259 
6260 static status_t
6261 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6262 	struct stat* stat, bool kernel)
6263 {
6264 	struct vnode* vnode;
6265 	status_t status;
6266 
6267 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6268 		stat));
6269 
6270 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6271 		kernel);
6272 	if (status < 0)
6273 		return status;
6274 
6275 	status = FS_CALL(vnode, read_stat, stat);
6276 
6277 	// fill in the st_dev and st_ino fields
6278 	if (status == B_OK) {
6279 		stat->st_dev = vnode->device;
6280 		stat->st_ino = vnode->id;
6281 		stat->st_rdev = -1;
6282 	}
6283 
6284 	put_vnode(vnode);
6285 	return status;
6286 }
6287 
6288 
6289 static status_t
6290 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6291 	const struct stat* stat, int statMask, bool kernel)
6292 {
6293 	struct vnode* vnode;
6294 	status_t status;
6295 
6296 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6297 		"kernel %d\n", fd, path, stat, statMask, kernel));
6298 
6299 	status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode, NULL,
6300 		kernel);
6301 	if (status < 0)
6302 		return status;
6303 
6304 	if (HAS_FS_CALL(vnode, write_stat))
6305 		status = FS_CALL(vnode, write_stat, stat, statMask);
6306 	else
6307 		status = B_READ_ONLY_DEVICE;
6308 
6309 	put_vnode(vnode);
6310 
6311 	return status;
6312 }
6313 
6314 
6315 static int
6316 attr_dir_open(int fd, char* path, bool kernel)
6317 {
6318 	struct vnode* vnode;
6319 	int status;
6320 
6321 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6322 		kernel));
6323 
6324 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6325 	if (status != B_OK)
6326 		return status;
6327 
6328 	status = open_attr_dir_vnode(vnode, kernel);
6329 	if (status < 0)
6330 		put_vnode(vnode);
6331 
6332 	return status;
6333 }
6334 
6335 
6336 static status_t
6337 attr_dir_close(struct file_descriptor* descriptor)
6338 {
6339 	struct vnode* vnode = descriptor->u.vnode;
6340 
6341 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6342 
6343 	if (HAS_FS_CALL(vnode, close_attr_dir))
6344 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6345 
6346 	return B_OK;
6347 }
6348 
6349 
6350 static void
6351 attr_dir_free_fd(struct file_descriptor* descriptor)
6352 {
6353 	struct vnode* vnode = descriptor->u.vnode;
6354 
6355 	if (vnode != NULL) {
6356 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6357 		put_vnode(vnode);
6358 	}
6359 }
6360 
6361 
6362 static status_t
6363 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6364 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6365 {
6366 	struct vnode* vnode = descriptor->u.vnode;
6367 
6368 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6369 
6370 	if (HAS_FS_CALL(vnode, read_attr_dir))
6371 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6372 			bufferSize, _count);
6373 
6374 	return B_NOT_SUPPORTED;
6375 }
6376 
6377 
6378 static status_t
6379 attr_dir_rewind(struct file_descriptor* descriptor)
6380 {
6381 	struct vnode* vnode = descriptor->u.vnode;
6382 
6383 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6384 
6385 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6386 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6387 
6388 	return B_NOT_SUPPORTED;
6389 }
6390 
6391 
6392 static int
6393 attr_create(int fd, char* path, const char* name, uint32 type,
6394 	int openMode, bool kernel)
6395 {
6396 	if (name == NULL || *name == '\0')
6397 		return B_BAD_VALUE;
6398 
6399 	struct vnode* vnode;
6400 	status_t status = fd_and_path_to_vnode(fd, path,
6401 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6402 	if (status != B_OK)
6403 		return status;
6404 
6405 	if (!HAS_FS_CALL(vnode, create_attr)) {
6406 		status = B_READ_ONLY_DEVICE;
6407 		goto err;
6408 	}
6409 
6410 	void* cookie;
6411 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6412 	if (status != B_OK)
6413 		goto err;
6414 
6415 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6416 	if (fd >= 0)
6417 		return fd;
6418 
6419 	status = fd;
6420 
6421 	FS_CALL(vnode, close_attr, cookie);
6422 	FS_CALL(vnode, free_attr_cookie, cookie);
6423 
6424 	FS_CALL(vnode, remove_attr, name);
6425 
6426 err:
6427 	put_vnode(vnode);
6428 
6429 	return status;
6430 }
6431 
6432 
6433 static int
6434 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6435 {
6436 	if (name == NULL || *name == '\0')
6437 		return B_BAD_VALUE;
6438 
6439 	struct vnode* vnode;
6440 	status_t status = fd_and_path_to_vnode(fd, path,
6441 		(openMode & O_NOTRAVERSE) != 0, &vnode, NULL, kernel);
6442 	if (status != B_OK)
6443 		return status;
6444 
6445 	if (!HAS_FS_CALL(vnode, open_attr)) {
6446 		status = B_NOT_SUPPORTED;
6447 		goto err;
6448 	}
6449 
6450 	void* cookie;
6451 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6452 	if (status != B_OK)
6453 		goto err;
6454 
6455 	// now we only need a file descriptor for this attribute and we're done
6456 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6457 	if (fd >= 0)
6458 		return fd;
6459 
6460 	status = fd;
6461 
6462 	FS_CALL(vnode, close_attr, cookie);
6463 	FS_CALL(vnode, free_attr_cookie, cookie);
6464 
6465 err:
6466 	put_vnode(vnode);
6467 
6468 	return status;
6469 }
6470 
6471 
6472 static status_t
6473 attr_close(struct file_descriptor* descriptor)
6474 {
6475 	struct vnode* vnode = descriptor->u.vnode;
6476 
6477 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6478 
6479 	if (HAS_FS_CALL(vnode, close_attr))
6480 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6481 
6482 	return B_OK;
6483 }
6484 
6485 
6486 static void
6487 attr_free_fd(struct file_descriptor* descriptor)
6488 {
6489 	struct vnode* vnode = descriptor->u.vnode;
6490 
6491 	if (vnode != NULL) {
6492 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6493 		put_vnode(vnode);
6494 	}
6495 }
6496 
6497 
6498 static status_t
6499 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6500 	size_t* length)
6501 {
6502 	struct vnode* vnode = descriptor->u.vnode;
6503 
6504 	FUNCTION(("attr_read: buf %p, pos %Ld, len %p = %ld\n", buffer, pos, length,
6505 		*length));
6506 
6507 	if (!HAS_FS_CALL(vnode, read_attr))
6508 		return B_NOT_SUPPORTED;
6509 
6510 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6511 }
6512 
6513 
6514 static status_t
6515 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6516 	size_t* length)
6517 {
6518 	struct vnode* vnode = descriptor->u.vnode;
6519 
6520 	FUNCTION(("attr_write: buf %p, pos %Ld, len %p\n", buffer, pos, length));
6521 	if (!HAS_FS_CALL(vnode, write_attr))
6522 		return B_NOT_SUPPORTED;
6523 
6524 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6525 }
6526 
6527 
6528 static off_t
6529 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6530 {
6531 	off_t offset;
6532 
6533 	switch (seekType) {
6534 		case SEEK_SET:
6535 			offset = 0;
6536 			break;
6537 		case SEEK_CUR:
6538 			offset = descriptor->pos;
6539 			break;
6540 		case SEEK_END:
6541 		{
6542 			struct vnode* vnode = descriptor->u.vnode;
6543 			if (!HAS_FS_CALL(vnode, read_stat))
6544 				return B_NOT_SUPPORTED;
6545 
6546 			struct stat stat;
6547 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6548 				&stat);
6549 			if (status != B_OK)
6550 				return status;
6551 
6552 			offset = stat.st_size;
6553 			break;
6554 		}
6555 		default:
6556 			return B_BAD_VALUE;
6557 	}
6558 
6559 	// assumes off_t is 64 bits wide
6560 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6561 		return B_BUFFER_OVERFLOW;
6562 
6563 	pos += offset;
6564 	if (pos < 0)
6565 		return B_BAD_VALUE;
6566 
6567 	return descriptor->pos = pos;
6568 }
6569 
6570 
6571 static status_t
6572 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6573 {
6574 	struct vnode* vnode = descriptor->u.vnode;
6575 
6576 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6577 
6578 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6579 		return B_NOT_SUPPORTED;
6580 
6581 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6582 }
6583 
6584 
6585 static status_t
6586 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6587 	int statMask)
6588 {
6589 	struct vnode* vnode = descriptor->u.vnode;
6590 
6591 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6592 
6593 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6594 		return B_READ_ONLY_DEVICE;
6595 
6596 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6597 }
6598 
6599 
6600 static status_t
6601 attr_remove(int fd, const char* name, bool kernel)
6602 {
6603 	struct file_descriptor* descriptor;
6604 	struct vnode* vnode;
6605 	status_t status;
6606 
6607 	if (name == NULL || *name == '\0')
6608 		return B_BAD_VALUE;
6609 
6610 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6611 		kernel));
6612 
6613 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6614 	if (descriptor == NULL)
6615 		return B_FILE_ERROR;
6616 
6617 	if (HAS_FS_CALL(vnode, remove_attr))
6618 		status = FS_CALL(vnode, remove_attr, name);
6619 	else
6620 		status = B_READ_ONLY_DEVICE;
6621 
6622 	put_fd(descriptor);
6623 
6624 	return status;
6625 }
6626 
6627 
6628 static status_t
6629 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6630 	bool kernel)
6631 {
6632 	struct file_descriptor* fromDescriptor;
6633 	struct file_descriptor* toDescriptor;
6634 	struct vnode* fromVnode;
6635 	struct vnode* toVnode;
6636 	status_t status;
6637 
6638 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6639 		|| *toName == '\0')
6640 		return B_BAD_VALUE;
6641 
6642 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
6643 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
6644 
6645 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
6646 	if (fromDescriptor == NULL)
6647 		return B_FILE_ERROR;
6648 
6649 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
6650 	if (toDescriptor == NULL) {
6651 		status = B_FILE_ERROR;
6652 		goto err;
6653 	}
6654 
6655 	// are the files on the same volume?
6656 	if (fromVnode->device != toVnode->device) {
6657 		status = B_CROSS_DEVICE_LINK;
6658 		goto err1;
6659 	}
6660 
6661 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
6662 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
6663 	} else
6664 		status = B_READ_ONLY_DEVICE;
6665 
6666 err1:
6667 	put_fd(toDescriptor);
6668 err:
6669 	put_fd(fromDescriptor);
6670 
6671 	return status;
6672 }
6673 
6674 
6675 static int
6676 index_dir_open(dev_t mountID, bool kernel)
6677 {
6678 	struct fs_mount* mount;
6679 	void* cookie;
6680 
6681 	FUNCTION(("index_dir_open(mountID = %ld, kernel = %d)\n", mountID, kernel));
6682 
6683 	status_t status = get_mount(mountID, &mount);
6684 	if (status != B_OK)
6685 		return status;
6686 
6687 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
6688 		status = B_NOT_SUPPORTED;
6689 		goto error;
6690 	}
6691 
6692 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
6693 	if (status != B_OK)
6694 		goto error;
6695 
6696 	// get fd for the index directory
6697 	int fd;
6698 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
6699 	if (fd >= 0)
6700 		return fd;
6701 
6702 	// something went wrong
6703 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
6704 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
6705 
6706 	status = fd;
6707 
6708 error:
6709 	put_mount(mount);
6710 	return status;
6711 }
6712 
6713 
6714 static status_t
6715 index_dir_close(struct file_descriptor* descriptor)
6716 {
6717 	struct fs_mount* mount = descriptor->u.mount;
6718 
6719 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
6720 
6721 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
6722 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
6723 
6724 	return B_OK;
6725 }
6726 
6727 
6728 static void
6729 index_dir_free_fd(struct file_descriptor* descriptor)
6730 {
6731 	struct fs_mount* mount = descriptor->u.mount;
6732 
6733 	if (mount != NULL) {
6734 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
6735 		put_mount(mount);
6736 	}
6737 }
6738 
6739 
6740 static status_t
6741 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6742 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6743 {
6744 	struct fs_mount* mount = descriptor->u.mount;
6745 
6746 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
6747 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
6748 			bufferSize, _count);
6749 	}
6750 
6751 	return B_NOT_SUPPORTED;
6752 }
6753 
6754 
6755 static status_t
6756 index_dir_rewind(struct file_descriptor* descriptor)
6757 {
6758 	struct fs_mount* mount = descriptor->u.mount;
6759 
6760 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
6761 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
6762 
6763 	return B_NOT_SUPPORTED;
6764 }
6765 
6766 
6767 static status_t
6768 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
6769 	bool kernel)
6770 {
6771 	FUNCTION(("index_create(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6772 		name, kernel));
6773 
6774 	struct fs_mount* mount;
6775 	status_t status = get_mount(mountID, &mount);
6776 	if (status != B_OK)
6777 		return status;
6778 
6779 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
6780 		status = B_READ_ONLY_DEVICE;
6781 		goto out;
6782 	}
6783 
6784 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
6785 
6786 out:
6787 	put_mount(mount);
6788 	return status;
6789 }
6790 
6791 
6792 #if 0
6793 static status_t
6794 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6795 {
6796 	struct vnode* vnode = descriptor->u.vnode;
6797 
6798 	// ToDo: currently unused!
6799 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
6800 	if (!HAS_FS_CALL(vnode, read_index_stat))
6801 		return B_NOT_SUPPORTED;
6802 
6803 	return B_NOT_SUPPORTED;
6804 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
6805 }
6806 
6807 
6808 static void
6809 index_free_fd(struct file_descriptor* descriptor)
6810 {
6811 	struct vnode* vnode = descriptor->u.vnode;
6812 
6813 	if (vnode != NULL) {
6814 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
6815 		put_vnode(vnode);
6816 	}
6817 }
6818 #endif
6819 
6820 
6821 static status_t
6822 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
6823 	bool kernel)
6824 {
6825 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6826 		name, kernel));
6827 
6828 	struct fs_mount* mount;
6829 	status_t status = get_mount(mountID, &mount);
6830 	if (status != B_OK)
6831 		return status;
6832 
6833 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
6834 		status = B_NOT_SUPPORTED;
6835 		goto out;
6836 	}
6837 
6838 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
6839 
6840 out:
6841 	put_mount(mount);
6842 	return status;
6843 }
6844 
6845 
6846 static status_t
6847 index_remove(dev_t mountID, const char* name, bool kernel)
6848 {
6849 	FUNCTION(("index_remove(mountID = %ld, name = %s, kernel = %d)\n", mountID,
6850 		name, kernel));
6851 
6852 	struct fs_mount* mount;
6853 	status_t status = get_mount(mountID, &mount);
6854 	if (status != B_OK)
6855 		return status;
6856 
6857 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
6858 		status = B_READ_ONLY_DEVICE;
6859 		goto out;
6860 	}
6861 
6862 	status = FS_MOUNT_CALL(mount, remove_index, name);
6863 
6864 out:
6865 	put_mount(mount);
6866 	return status;
6867 }
6868 
6869 
6870 /*!	TODO: the query FS API is still the pretty much the same as in R5.
6871 		It would be nice if the FS would find some more kernel support
6872 		for them.
6873 		For example, query parsing should be moved into the kernel.
6874 */
6875 static int
6876 query_open(dev_t device, const char* query, uint32 flags, port_id port,
6877 	int32 token, bool kernel)
6878 {
6879 	struct fs_mount* mount;
6880 	void* cookie;
6881 
6882 	FUNCTION(("query_open(device = %ld, query = \"%s\", kernel = %d)\n", device,
6883 		query, kernel));
6884 
6885 	status_t status = get_mount(device, &mount);
6886 	if (status != B_OK)
6887 		return status;
6888 
6889 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
6890 		status = B_NOT_SUPPORTED;
6891 		goto error;
6892 	}
6893 
6894 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
6895 		&cookie);
6896 	if (status != B_OK)
6897 		goto error;
6898 
6899 	// get fd for the index directory
6900 	int fd;
6901 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
6902 	if (fd >= 0)
6903 		return fd;
6904 
6905 	status = fd;
6906 
6907 	// something went wrong
6908 	FS_MOUNT_CALL(mount, close_query, cookie);
6909 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
6910 
6911 error:
6912 	put_mount(mount);
6913 	return status;
6914 }
6915 
6916 
6917 static status_t
6918 query_close(struct file_descriptor* descriptor)
6919 {
6920 	struct fs_mount* mount = descriptor->u.mount;
6921 
6922 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
6923 
6924 	if (HAS_FS_MOUNT_CALL(mount, close_query))
6925 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
6926 
6927 	return B_OK;
6928 }
6929 
6930 
6931 static void
6932 query_free_fd(struct file_descriptor* descriptor)
6933 {
6934 	struct fs_mount* mount = descriptor->u.mount;
6935 
6936 	if (mount != NULL) {
6937 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
6938 		put_mount(mount);
6939 	}
6940 }
6941 
6942 
6943 static status_t
6944 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6945 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6946 {
6947 	struct fs_mount* mount = descriptor->u.mount;
6948 
6949 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
6950 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
6951 			bufferSize, _count);
6952 	}
6953 
6954 	return B_NOT_SUPPORTED;
6955 }
6956 
6957 
6958 static status_t
6959 query_rewind(struct file_descriptor* descriptor)
6960 {
6961 	struct fs_mount* mount = descriptor->u.mount;
6962 
6963 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
6964 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
6965 
6966 	return B_NOT_SUPPORTED;
6967 }
6968 
6969 
6970 //	#pragma mark - General File System functions
6971 
6972 
6973 static dev_t
6974 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
6975 	const char* args, bool kernel)
6976 {
6977 	struct ::fs_mount* mount;
6978 	status_t status = B_OK;
6979 	fs_volume* volume = NULL;
6980 	int32 layer = 0;
6981 
6982 	FUNCTION(("fs_mount: entry. path = '%s', fs_name = '%s'\n", path, fsName));
6983 
6984 	// The path is always safe, we just have to make sure that fsName is
6985 	// almost valid - we can't make any assumptions about args, though.
6986 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
6987 	// We'll get it from the DDM later.
6988 	if (fsName == NULL) {
6989 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
6990 			return B_BAD_VALUE;
6991 	} else if (fsName[0] == '\0')
6992 		return B_BAD_VALUE;
6993 
6994 	RecursiveLocker mountOpLocker(sMountOpLock);
6995 
6996 	// Helper to delete a newly created file device on failure.
6997 	// Not exactly beautiful, but helps to keep the code below cleaner.
6998 	struct FileDeviceDeleter {
6999 		FileDeviceDeleter() : id(-1) {}
7000 		~FileDeviceDeleter()
7001 		{
7002 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7003 		}
7004 
7005 		partition_id id;
7006 	} fileDeviceDeleter;
7007 
7008 	// If the file system is not a "virtual" one, the device argument should
7009 	// point to a real file/device (if given at all).
7010 	// get the partition
7011 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7012 	KPartition* partition = NULL;
7013 	KPath normalizedDevice;
7014 	bool newlyCreatedFileDevice = false;
7015 
7016 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7017 		// normalize the device path
7018 		status = normalizedDevice.SetTo(device, true);
7019 		if (status != B_OK)
7020 			return status;
7021 
7022 		// get a corresponding partition from the DDM
7023 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7024 		if (partition == NULL) {
7025 			// Partition not found: This either means, the user supplied
7026 			// an invalid path, or the path refers to an image file. We try
7027 			// to let the DDM create a file device for the path.
7028 			partition_id deviceID = ddm->CreateFileDevice(
7029 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7030 			if (deviceID >= 0) {
7031 				partition = ddm->RegisterPartition(deviceID);
7032 				if (newlyCreatedFileDevice)
7033 					fileDeviceDeleter.id = deviceID;
7034 			}
7035 		}
7036 
7037 		if (!partition) {
7038 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7039 				normalizedDevice.Path()));
7040 			return B_ENTRY_NOT_FOUND;
7041 		}
7042 
7043 		device = normalizedDevice.Path();
7044 			// correct path to file device
7045 	}
7046 	PartitionRegistrar partitionRegistrar(partition, true);
7047 
7048 	// Write lock the partition's device. For the time being, we keep the lock
7049 	// until we're done mounting -- not nice, but ensure, that no-one is
7050 	// interfering.
7051 	// TODO: Just mark the partition busy while mounting!
7052 	KDiskDevice* diskDevice = NULL;
7053 	if (partition) {
7054 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7055 		if (!diskDevice) {
7056 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7057 			return B_ERROR;
7058 		}
7059 	}
7060 
7061 	DeviceWriteLocker writeLocker(diskDevice, true);
7062 		// this takes over the write lock acquired before
7063 
7064 	if (partition != NULL) {
7065 		// make sure, that the partition is not busy
7066 		if (partition->IsBusy()) {
7067 			TRACE(("fs_mount(): Partition is busy.\n"));
7068 			return B_BUSY;
7069 		}
7070 
7071 		// if no FS name had been supplied, we get it from the partition
7072 		if (fsName == NULL) {
7073 			KDiskSystem* diskSystem = partition->DiskSystem();
7074 			if (!diskSystem) {
7075 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7076 					"recognize it.\n"));
7077 				return B_BAD_VALUE;
7078 			}
7079 
7080 			if (!diskSystem->IsFileSystem()) {
7081 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7082 					"partitioning system.\n"));
7083 				return B_BAD_VALUE;
7084 			}
7085 
7086 			// The disk system name will not change, and the KDiskSystem
7087 			// object will not go away while the disk device is locked (and
7088 			// the partition has a reference to it), so this is safe.
7089 			fsName = diskSystem->Name();
7090 		}
7091 	}
7092 
7093 	mount = new(std::nothrow) (struct ::fs_mount);
7094 	if (mount == NULL)
7095 		return B_NO_MEMORY;
7096 
7097 	mount->device_name = strdup(device);
7098 		// "device" can be NULL
7099 
7100 	status = mount->entry_cache.Init();
7101 	if (status != B_OK)
7102 		goto err1;
7103 
7104 	// initialize structure
7105 	mount->id = sNextMountID++;
7106 	mount->partition = NULL;
7107 	mount->root_vnode = NULL;
7108 	mount->covers_vnode = NULL;
7109 	mount->unmounting = false;
7110 	mount->owns_file_device = false;
7111 	mount->volume = NULL;
7112 
7113 	// build up the volume(s)
7114 	while (true) {
7115 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7116 		if (layerFSName == NULL) {
7117 			if (layer == 0) {
7118 				status = B_NO_MEMORY;
7119 				goto err1;
7120 			}
7121 
7122 			break;
7123 		}
7124 
7125 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7126 		if (volume == NULL) {
7127 			status = B_NO_MEMORY;
7128 			free(layerFSName);
7129 			goto err1;
7130 		}
7131 
7132 		volume->id = mount->id;
7133 		volume->partition = partition != NULL ? partition->ID() : -1;
7134 		volume->layer = layer++;
7135 		volume->private_volume = NULL;
7136 		volume->ops = NULL;
7137 		volume->sub_volume = NULL;
7138 		volume->super_volume = NULL;
7139 		volume->file_system = NULL;
7140 		volume->file_system_name = NULL;
7141 
7142 		volume->file_system_name = get_file_system_name(layerFSName);
7143 		if (volume->file_system_name == NULL) {
7144 			status = B_NO_MEMORY;
7145 			free(layerFSName);
7146 			free(volume);
7147 			goto err1;
7148 		}
7149 
7150 		volume->file_system = get_file_system(layerFSName);
7151 		if (volume->file_system == NULL) {
7152 			status = B_DEVICE_NOT_FOUND;
7153 			free(layerFSName);
7154 			free(volume->file_system_name);
7155 			free(volume);
7156 			goto err1;
7157 		}
7158 
7159 		if (mount->volume == NULL)
7160 			mount->volume = volume;
7161 		else {
7162 			volume->super_volume = mount->volume;
7163 			mount->volume->sub_volume = volume;
7164 			mount->volume = volume;
7165 		}
7166 	}
7167 
7168 	// insert mount struct into list before we call FS's mount() function
7169 	// so that vnodes can be created for this mount
7170 	mutex_lock(&sMountMutex);
7171 	hash_insert(sMountsTable, mount);
7172 	mutex_unlock(&sMountMutex);
7173 
7174 	ino_t rootID;
7175 
7176 	if (!sRoot) {
7177 		// we haven't mounted anything yet
7178 		if (strcmp(path, "/") != 0) {
7179 			status = B_ERROR;
7180 			goto err2;
7181 		}
7182 
7183 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7184 			args, &rootID);
7185 		if (status != 0)
7186 			goto err2;
7187 	} else {
7188 		status = path_to_vnode(path, true, &mount->covers_vnode, NULL, kernel);
7189 		if (status != B_OK)
7190 			goto err2;
7191 
7192 		// make sure covered_vnode is a directory
7193 		if (!S_ISDIR(mount->covers_vnode->Type())) {
7194 			status = B_NOT_A_DIRECTORY;
7195 			goto err3;
7196 		}
7197 
7198 		if (mount->covers_vnode->mount->root_vnode == mount->covers_vnode) {
7199 			// this is already a mount point
7200 			status = B_BUSY;
7201 			goto err3;
7202 		}
7203 
7204 		// mount it/them
7205 		fs_volume* volume = mount->volume;
7206 		while (volume) {
7207 			status = volume->file_system->mount(volume, device, flags, args,
7208 				&rootID);
7209 			if (status != B_OK) {
7210 				if (volume->sub_volume)
7211 					goto err4;
7212 				goto err3;
7213 			}
7214 
7215 			volume = volume->super_volume;
7216 		}
7217 
7218 		volume = mount->volume;
7219 		while (volume) {
7220 			if (volume->ops->all_layers_mounted != NULL)
7221 				volume->ops->all_layers_mounted(volume);
7222 			volume = volume->super_volume;
7223 		}
7224 	}
7225 
7226 	// the root node is supposed to be owned by the file system - it must
7227 	// exist at this point
7228 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7229 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7230 		panic("fs_mount: file system does not own its root node!\n");
7231 		status = B_ERROR;
7232 		goto err4;
7233 	}
7234 
7235 	// No race here, since fs_mount() is the only function changing
7236 	// covers_vnode (and holds sMountOpLock at that time).
7237 	rw_lock_write_lock(&sVnodeLock);
7238 	if (mount->covers_vnode)
7239 		mount->covers_vnode->covered_by = mount->root_vnode;
7240 	rw_lock_write_unlock(&sVnodeLock);
7241 
7242 	if (!sRoot) {
7243 		sRoot = mount->root_vnode;
7244 		mutex_lock(&sIOContextRootLock);
7245 		get_current_io_context(true)->root = sRoot;
7246 		mutex_unlock(&sIOContextRootLock);
7247 		inc_vnode_ref_count(sRoot);
7248 	}
7249 
7250 	// supply the partition (if any) with the mount cookie and mark it mounted
7251 	if (partition) {
7252 		partition->SetMountCookie(mount->volume->private_volume);
7253 		partition->SetVolumeID(mount->id);
7254 
7255 		// keep a partition reference as long as the partition is mounted
7256 		partitionRegistrar.Detach();
7257 		mount->partition = partition;
7258 		mount->owns_file_device = newlyCreatedFileDevice;
7259 		fileDeviceDeleter.id = -1;
7260 	}
7261 
7262 	notify_mount(mount->id,
7263 		mount->covers_vnode ? mount->covers_vnode->device : -1,
7264 		mount->covers_vnode ? mount->covers_vnode->id : -1);
7265 
7266 	return mount->id;
7267 
7268 err4:
7269 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7270 err3:
7271 	if (mount->covers_vnode != NULL)
7272 		put_vnode(mount->covers_vnode);
7273 err2:
7274 	mutex_lock(&sMountMutex);
7275 	hash_remove(sMountsTable, mount);
7276 	mutex_unlock(&sMountMutex);
7277 err1:
7278 	delete mount;
7279 
7280 	return status;
7281 }
7282 
7283 
7284 static status_t
7285 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7286 {
7287 	struct fs_mount* mount;
7288 	status_t err;
7289 
7290 	FUNCTION(("fs_unmount(path '%s', dev %ld, kernel %d\n", path, mountID,
7291 		kernel));
7292 
7293 	struct vnode* pathVnode = NULL;
7294 	if (path != NULL) {
7295 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7296 		if (err != B_OK)
7297 			return B_ENTRY_NOT_FOUND;
7298 	}
7299 
7300 	RecursiveLocker mountOpLocker(sMountOpLock);
7301 
7302 	// this lock is not strictly necessary, but here in case of KDEBUG
7303 	// to keep the ASSERT in find_mount() working.
7304 	KDEBUG_ONLY(mutex_lock(&sMountMutex));
7305 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7306 	KDEBUG_ONLY(mutex_unlock(&sMountMutex));
7307 	if (mount == NULL) {
7308 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7309 			pathVnode);
7310 	}
7311 
7312 	if (path != NULL) {
7313 		put_vnode(pathVnode);
7314 
7315 		if (mount->root_vnode != pathVnode) {
7316 			// not mountpoint
7317 			return B_BAD_VALUE;
7318 		}
7319 	}
7320 
7321 	// if the volume is associated with a partition, lock the device of the
7322 	// partition as long as we are unmounting
7323 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7324 	KPartition* partition = mount->partition;
7325 	KDiskDevice* diskDevice = NULL;
7326 	if (partition != NULL) {
7327 		if (partition->Device() == NULL) {
7328 			dprintf("fs_unmount(): There is no device!\n");
7329 			return B_ERROR;
7330 		}
7331 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7332 		if (!diskDevice) {
7333 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7334 			return B_ERROR;
7335 		}
7336 	}
7337 	DeviceWriteLocker writeLocker(diskDevice, true);
7338 
7339 	// make sure, that the partition is not busy
7340 	if (partition != NULL) {
7341 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7342 			TRACE(("fs_unmount(): Partition is busy.\n"));
7343 			return B_BUSY;
7344 		}
7345 	}
7346 
7347 	// grab the vnode master mutex to keep someone from creating
7348 	// a vnode while we're figuring out if we can continue
7349 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7350 
7351 	bool disconnectedDescriptors = false;
7352 
7353 	while (true) {
7354 		bool busy = false;
7355 
7356 		// cycle through the list of vnodes associated with this mount and
7357 		// make sure all of them are not busy or have refs on them
7358 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7359 		while (struct vnode* vnode = iterator.Next()) {
7360 			// The root vnode ref_count needs to be 1 here (the mount has a
7361 			// reference).
7362 			if (vnode->IsBusy()
7363 				|| ((vnode->ref_count != 0 && mount->root_vnode != vnode)
7364 					|| (vnode->ref_count != 1 && mount->root_vnode == vnode))) {
7365 				// there are still vnodes in use on this mount, so we cannot
7366 				// unmount yet
7367 				busy = true;
7368 				break;
7369 			}
7370 		}
7371 
7372 		if (!busy)
7373 			break;
7374 
7375 		if ((flags & B_FORCE_UNMOUNT) == 0)
7376 			return B_BUSY;
7377 
7378 		if (disconnectedDescriptors) {
7379 			// wait a bit until the last access is finished, and then try again
7380 			vnodesWriteLocker.Unlock();
7381 			snooze(100000);
7382 			// TODO: if there is some kind of bug that prevents the ref counts
7383 			// from getting back to zero, this will fall into an endless loop...
7384 			vnodesWriteLocker.Lock();
7385 			continue;
7386 		}
7387 
7388 		// the file system is still busy - but we're forced to unmount it,
7389 		// so let's disconnect all open file descriptors
7390 
7391 		mount->unmounting = true;
7392 			// prevent new vnodes from being created
7393 
7394 		vnodesWriteLocker.Unlock();
7395 
7396 		disconnect_mount_or_vnode_fds(mount, NULL);
7397 		disconnectedDescriptors = true;
7398 
7399 		vnodesWriteLocker.Lock();
7400 	}
7401 
7402 	// we can safely continue, mark all of the vnodes busy and this mount
7403 	// structure in unmounting state
7404 	mount->unmounting = true;
7405 
7406 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7407 	while (struct vnode* vnode = iterator.Next()) {
7408 		vnode->SetBusy(true);
7409 		vnode_to_be_freed(vnode);
7410 	}
7411 
7412 	// The ref_count of the root node is 1 at this point, see above why this is
7413 	mount->root_vnode->ref_count--;
7414 	vnode_to_be_freed(mount->root_vnode);
7415 
7416 	mount->covers_vnode->covered_by = NULL;
7417 
7418 	vnodesWriteLocker.Unlock();
7419 
7420 	put_vnode(mount->covers_vnode);
7421 
7422 	// Free all vnodes associated with this mount.
7423 	// They will be removed from the mount list by free_vnode(), so
7424 	// we don't have to do this.
7425 	while (struct vnode* vnode = mount->vnodes.Head())
7426 		free_vnode(vnode, false);
7427 
7428 	// remove the mount structure from the hash table
7429 	mutex_lock(&sMountMutex);
7430 	hash_remove(sMountsTable, mount);
7431 	mutex_unlock(&sMountMutex);
7432 
7433 	mountOpLocker.Unlock();
7434 
7435 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7436 	notify_unmount(mount->id);
7437 
7438 	// dereference the partition and mark it unmounted
7439 	if (partition) {
7440 		partition->SetVolumeID(-1);
7441 		partition->SetMountCookie(NULL);
7442 
7443 		if (mount->owns_file_device)
7444 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7445 		partition->Unregister();
7446 	}
7447 
7448 	delete mount;
7449 	return B_OK;
7450 }
7451 
7452 
7453 static status_t
7454 fs_sync(dev_t device)
7455 {
7456 	struct fs_mount* mount;
7457 	status_t status = get_mount(device, &mount);
7458 	if (status != B_OK)
7459 		return status;
7460 
7461 	struct vnode marker;
7462 	memset(&marker, 0, sizeof(marker));
7463 	marker.SetBusy(true);
7464 	marker.SetRemoved(true);
7465 
7466 	// First, synchronize all file caches
7467 
7468 	while (true) {
7469 		WriteLocker locker(sVnodeLock);
7470 			// Note: That's the easy way. Which is probably OK for sync(),
7471 			// since it's a relatively rare call and doesn't need to allow for
7472 			// a lot of concurrency. Using a read lock would be possible, but
7473 			// also more involved, since we had to lock the individual nodes
7474 			// and take care of the locking order, which we might not want to
7475 			// do while holding fs_mount::rlock.
7476 
7477 		// synchronize access to vnode list
7478 		recursive_lock_lock(&mount->rlock);
7479 
7480 		struct vnode* vnode;
7481 		if (!marker.IsRemoved()) {
7482 			vnode = mount->vnodes.GetNext(&marker);
7483 			mount->vnodes.Remove(&marker);
7484 			marker.SetRemoved(true);
7485 		} else
7486 			vnode = mount->vnodes.First();
7487 
7488 		while (vnode != NULL && (vnode->cache == NULL
7489 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7490 			// TODO: we could track writes (and writable mapped vnodes)
7491 			//	and have a simple flag that we could test for here
7492 			vnode = mount->vnodes.GetNext(vnode);
7493 		}
7494 
7495 		if (vnode != NULL) {
7496 			// insert marker vnode again
7497 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
7498 			marker.SetRemoved(false);
7499 		}
7500 
7501 		recursive_lock_unlock(&mount->rlock);
7502 
7503 		if (vnode == NULL)
7504 			break;
7505 
7506 		vnode = lookup_vnode(mount->id, vnode->id);
7507 		if (vnode == NULL || vnode->IsBusy())
7508 			continue;
7509 
7510 		if (vnode->ref_count == 0) {
7511 			// this vnode has been unused before
7512 			vnode_used(vnode);
7513 		}
7514 		inc_vnode_ref_count(vnode);
7515 
7516 		locker.Unlock();
7517 
7518 		if (vnode->cache != NULL && !vnode->IsRemoved())
7519 			vnode->cache->WriteModified();
7520 
7521 		put_vnode(vnode);
7522 	}
7523 
7524 	// And then, let the file systems do their synchronizing work
7525 
7526 	if (HAS_FS_MOUNT_CALL(mount, sync))
7527 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7528 
7529 	put_mount(mount);
7530 	return status;
7531 }
7532 
7533 
7534 static status_t
7535 fs_read_info(dev_t device, struct fs_info* info)
7536 {
7537 	struct fs_mount* mount;
7538 	status_t status = get_mount(device, &mount);
7539 	if (status != B_OK)
7540 		return status;
7541 
7542 	memset(info, 0, sizeof(struct fs_info));
7543 
7544 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7545 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7546 
7547 	// fill in info the file system doesn't (have to) know about
7548 	if (status == B_OK) {
7549 		info->dev = mount->id;
7550 		info->root = mount->root_vnode->id;
7551 
7552 		fs_volume* volume = mount->volume;
7553 		while (volume->super_volume != NULL)
7554 			volume = volume->super_volume;
7555 
7556 		strlcpy(info->fsh_name, volume->file_system_name,
7557 			sizeof(info->fsh_name));
7558 		if (mount->device_name != NULL) {
7559 			strlcpy(info->device_name, mount->device_name,
7560 				sizeof(info->device_name));
7561 		}
7562 	}
7563 
7564 	// if the call is not supported by the file system, there are still
7565 	// the parts that we filled out ourselves
7566 
7567 	put_mount(mount);
7568 	return status;
7569 }
7570 
7571 
7572 static status_t
7573 fs_write_info(dev_t device, const struct fs_info* info, int mask)
7574 {
7575 	struct fs_mount* mount;
7576 	status_t status = get_mount(device, &mount);
7577 	if (status != B_OK)
7578 		return status;
7579 
7580 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
7581 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
7582 	else
7583 		status = B_READ_ONLY_DEVICE;
7584 
7585 	put_mount(mount);
7586 	return status;
7587 }
7588 
7589 
7590 static dev_t
7591 fs_next_device(int32* _cookie)
7592 {
7593 	struct fs_mount* mount = NULL;
7594 	dev_t device = *_cookie;
7595 
7596 	mutex_lock(&sMountMutex);
7597 
7598 	// Since device IDs are assigned sequentially, this algorithm
7599 	// does work good enough. It makes sure that the device list
7600 	// returned is sorted, and that no device is skipped when an
7601 	// already visited device got unmounted.
7602 
7603 	while (device < sNextMountID) {
7604 		mount = find_mount(device++);
7605 		if (mount != NULL && mount->volume->private_volume != NULL)
7606 			break;
7607 	}
7608 
7609 	*_cookie = device;
7610 
7611 	if (mount != NULL)
7612 		device = mount->id;
7613 	else
7614 		device = B_BAD_VALUE;
7615 
7616 	mutex_unlock(&sMountMutex);
7617 
7618 	return device;
7619 }
7620 
7621 
7622 ssize_t
7623 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
7624 	void *buffer, size_t readBytes)
7625 {
7626 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
7627 	if (attrFD < 0)
7628 		return attrFD;
7629 
7630 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
7631 
7632 	_kern_close(attrFD);
7633 
7634 	return bytesRead;
7635 }
7636 
7637 
7638 static status_t
7639 get_cwd(char* buffer, size_t size, bool kernel)
7640 {
7641 	// Get current working directory from io context
7642 	struct io_context* context = get_current_io_context(kernel);
7643 	status_t status;
7644 
7645 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
7646 
7647 	mutex_lock(&context->io_mutex);
7648 
7649 	struct vnode* vnode = context->cwd;
7650 	if (vnode)
7651 		inc_vnode_ref_count(vnode);
7652 
7653 	mutex_unlock(&context->io_mutex);
7654 
7655 	if (vnode) {
7656 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
7657 		put_vnode(vnode);
7658 	} else
7659 		status = B_ERROR;
7660 
7661 	return status;
7662 }
7663 
7664 
7665 static status_t
7666 set_cwd(int fd, char* path, bool kernel)
7667 {
7668 	struct io_context* context;
7669 	struct vnode* vnode = NULL;
7670 	struct vnode* oldDirectory;
7671 	status_t status;
7672 
7673 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
7674 
7675 	// Get vnode for passed path, and bail if it failed
7676 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
7677 	if (status < 0)
7678 		return status;
7679 
7680 	if (!S_ISDIR(vnode->Type())) {
7681 		// nope, can't cwd to here
7682 		status = B_NOT_A_DIRECTORY;
7683 		goto err;
7684 	}
7685 
7686 	// Get current io context and lock
7687 	context = get_current_io_context(kernel);
7688 	mutex_lock(&context->io_mutex);
7689 
7690 	// save the old current working directory first
7691 	oldDirectory = context->cwd;
7692 	context->cwd = vnode;
7693 
7694 	mutex_unlock(&context->io_mutex);
7695 
7696 	if (oldDirectory)
7697 		put_vnode(oldDirectory);
7698 
7699 	return B_NO_ERROR;
7700 
7701 err:
7702 	put_vnode(vnode);
7703 	return status;
7704 }
7705 
7706 
7707 //	#pragma mark - kernel mirrored syscalls
7708 
7709 
7710 dev_t
7711 _kern_mount(const char* path, const char* device, const char* fsName,
7712 	uint32 flags, const char* args, size_t argsLength)
7713 {
7714 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7715 	if (pathBuffer.InitCheck() != B_OK)
7716 		return B_NO_MEMORY;
7717 
7718 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
7719 }
7720 
7721 
7722 status_t
7723 _kern_unmount(const char* path, uint32 flags)
7724 {
7725 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7726 	if (pathBuffer.InitCheck() != B_OK)
7727 		return B_NO_MEMORY;
7728 
7729 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
7730 }
7731 
7732 
7733 status_t
7734 _kern_read_fs_info(dev_t device, struct fs_info* info)
7735 {
7736 	if (info == NULL)
7737 		return B_BAD_VALUE;
7738 
7739 	return fs_read_info(device, info);
7740 }
7741 
7742 
7743 status_t
7744 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
7745 {
7746 	if (info == NULL)
7747 		return B_BAD_VALUE;
7748 
7749 	return fs_write_info(device, info, mask);
7750 }
7751 
7752 
7753 status_t
7754 _kern_sync(void)
7755 {
7756 	// Note: _kern_sync() is also called from _user_sync()
7757 	int32 cookie = 0;
7758 	dev_t device;
7759 	while ((device = next_dev(&cookie)) >= 0) {
7760 		status_t status = fs_sync(device);
7761 		if (status != B_OK && status != B_BAD_VALUE) {
7762 			dprintf("sync: device %ld couldn't sync: %s\n", device,
7763 				strerror(status));
7764 		}
7765 	}
7766 
7767 	return B_OK;
7768 }
7769 
7770 
7771 dev_t
7772 _kern_next_device(int32* _cookie)
7773 {
7774 	return fs_next_device(_cookie);
7775 }
7776 
7777 
7778 status_t
7779 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
7780 	size_t infoSize)
7781 {
7782 	if (infoSize != sizeof(fd_info))
7783 		return B_BAD_VALUE;
7784 
7785 	// get the team
7786 	Team* team = Team::Get(teamID);
7787 	if (team == NULL)
7788 		return B_BAD_TEAM_ID;
7789 	BReference<Team> teamReference(team, true);
7790 
7791 	// now that we have a team reference, its I/O context won't go away
7792 	io_context* context = team->io_context;
7793 	MutexLocker contextLocker(context->io_mutex);
7794 
7795 	uint32 slot = *_cookie;
7796 
7797 	struct file_descriptor* descriptor;
7798 	while (slot < context->table_size
7799 		&& (descriptor = context->fds[slot]) == NULL) {
7800 		slot++;
7801 	}
7802 
7803 	if (slot >= context->table_size)
7804 		return B_ENTRY_NOT_FOUND;
7805 
7806 	info->number = slot;
7807 	info->open_mode = descriptor->open_mode;
7808 
7809 	struct vnode* vnode = fd_vnode(descriptor);
7810 	if (vnode != NULL) {
7811 		info->device = vnode->device;
7812 		info->node = vnode->id;
7813 	} else if (descriptor->u.mount != NULL) {
7814 		info->device = descriptor->u.mount->id;
7815 		info->node = -1;
7816 	}
7817 
7818 	*_cookie = slot + 1;
7819 	return B_OK;
7820 }
7821 
7822 
7823 int
7824 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
7825 	int perms)
7826 {
7827 	if ((openMode & O_CREAT) != 0) {
7828 		return file_create_entry_ref(device, inode, name, openMode, perms,
7829 			true);
7830 	}
7831 
7832 	return file_open_entry_ref(device, inode, name, openMode, true);
7833 }
7834 
7835 
7836 /*!	\brief Opens a node specified by a FD + path pair.
7837 
7838 	At least one of \a fd and \a path must be specified.
7839 	If only \a fd is given, the function opens the node identified by this
7840 	FD. If only a path is given, this path is opened. If both are given and
7841 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7842 	of the directory (!) identified by \a fd.
7843 
7844 	\param fd The FD. May be < 0.
7845 	\param path The absolute or relative path. May be \c NULL.
7846 	\param openMode The open mode.
7847 	\return A FD referring to the newly opened node, or an error code,
7848 			if an error occurs.
7849 */
7850 int
7851 _kern_open(int fd, const char* path, int openMode, int perms)
7852 {
7853 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7854 	if (pathBuffer.InitCheck() != B_OK)
7855 		return B_NO_MEMORY;
7856 
7857 	if (openMode & O_CREAT)
7858 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
7859 
7860 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
7861 }
7862 
7863 
7864 /*!	\brief Opens a directory specified by entry_ref or node_ref.
7865 
7866 	The supplied name may be \c NULL, in which case directory identified
7867 	by \a device and \a inode will be opened. Otherwise \a device and
7868 	\a inode identify the parent directory of the directory to be opened
7869 	and \a name its entry name.
7870 
7871 	\param device If \a name is specified the ID of the device the parent
7872 		   directory of the directory to be opened resides on, otherwise
7873 		   the device of the directory itself.
7874 	\param inode If \a name is specified the node ID of the parent
7875 		   directory of the directory to be opened, otherwise node ID of the
7876 		   directory itself.
7877 	\param name The entry name of the directory to be opened. If \c NULL,
7878 		   the \a device + \a inode pair identify the node to be opened.
7879 	\return The FD of the newly opened directory or an error code, if
7880 			something went wrong.
7881 */
7882 int
7883 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
7884 {
7885 	return dir_open_entry_ref(device, inode, name, true);
7886 }
7887 
7888 
7889 /*!	\brief Opens a directory specified by a FD + path pair.
7890 
7891 	At least one of \a fd and \a path must be specified.
7892 	If only \a fd is given, the function opens the directory identified by this
7893 	FD. If only a path is given, this path is opened. If both are given and
7894 	the path is absolute, \a fd is ignored; a relative path is reckoned off
7895 	of the directory (!) identified by \a fd.
7896 
7897 	\param fd The FD. May be < 0.
7898 	\param path The absolute or relative path. May be \c NULL.
7899 	\return A FD referring to the newly opened directory, or an error code,
7900 			if an error occurs.
7901 */
7902 int
7903 _kern_open_dir(int fd, const char* path)
7904 {
7905 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7906 	if (pathBuffer.InitCheck() != B_OK)
7907 		return B_NO_MEMORY;
7908 
7909 	return dir_open(fd, pathBuffer.LockBuffer(), true);
7910 }
7911 
7912 
7913 status_t
7914 _kern_fcntl(int fd, int op, uint32 argument)
7915 {
7916 	return common_fcntl(fd, op, argument, true);
7917 }
7918 
7919 
7920 status_t
7921 _kern_fsync(int fd)
7922 {
7923 	return common_sync(fd, true);
7924 }
7925 
7926 
7927 status_t
7928 _kern_lock_node(int fd)
7929 {
7930 	return common_lock_node(fd, true);
7931 }
7932 
7933 
7934 status_t
7935 _kern_unlock_node(int fd)
7936 {
7937 	return common_unlock_node(fd, true);
7938 }
7939 
7940 
7941 status_t
7942 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
7943 	int perms)
7944 {
7945 	return dir_create_entry_ref(device, inode, name, perms, true);
7946 }
7947 
7948 
7949 /*!	\brief Creates a directory specified by a FD + path pair.
7950 
7951 	\a path must always be specified (it contains the name of the new directory
7952 	at least). If only a path is given, this path identifies the location at
7953 	which the directory shall be created. If both \a fd and \a path are given
7954 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
7955 	of the directory (!) identified by \a fd.
7956 
7957 	\param fd The FD. May be < 0.
7958 	\param path The absolute or relative path. Must not be \c NULL.
7959 	\param perms The access permissions the new directory shall have.
7960 	\return \c B_OK, if the directory has been created successfully, another
7961 			error code otherwise.
7962 */
7963 status_t
7964 _kern_create_dir(int fd, const char* path, int perms)
7965 {
7966 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7967 	if (pathBuffer.InitCheck() != B_OK)
7968 		return B_NO_MEMORY;
7969 
7970 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
7971 }
7972 
7973 
7974 status_t
7975 _kern_remove_dir(int fd, const char* path)
7976 {
7977 	if (path) {
7978 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
7979 		if (pathBuffer.InitCheck() != B_OK)
7980 			return B_NO_MEMORY;
7981 
7982 		return dir_remove(fd, pathBuffer.LockBuffer(), true);
7983 	}
7984 
7985 	return dir_remove(fd, NULL, true);
7986 }
7987 
7988 
7989 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
7990 
7991 	At least one of \a fd and \a path must be specified.
7992 	If only \a fd is given, the function the symlink to be read is the node
7993 	identified by this FD. If only a path is given, this path identifies the
7994 	symlink to be read. If both are given and the path is absolute, \a fd is
7995 	ignored; a relative path is reckoned off of the directory (!) identified
7996 	by \a fd.
7997 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
7998 	will still be updated to reflect the required buffer size.
7999 
8000 	\param fd The FD. May be < 0.
8001 	\param path The absolute or relative path. May be \c NULL.
8002 	\param buffer The buffer into which the contents of the symlink shall be
8003 		   written.
8004 	\param _bufferSize A pointer to the size of the supplied buffer.
8005 	\return The length of the link on success or an appropriate error code
8006 */
8007 status_t
8008 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8009 {
8010 	if (path) {
8011 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8012 		if (pathBuffer.InitCheck() != B_OK)
8013 			return B_NO_MEMORY;
8014 
8015 		return common_read_link(fd, pathBuffer.LockBuffer(),
8016 			buffer, _bufferSize, true);
8017 	}
8018 
8019 	return common_read_link(fd, NULL, buffer, _bufferSize, true);
8020 }
8021 
8022 
8023 /*!	\brief Creates a symlink specified by a FD + path pair.
8024 
8025 	\a path must always be specified (it contains the name of the new symlink
8026 	at least). If only a path is given, this path identifies the location at
8027 	which the symlink shall be created. If both \a fd and \a path are given and
8028 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8029 	of the directory (!) identified by \a fd.
8030 
8031 	\param fd The FD. May be < 0.
8032 	\param toPath The absolute or relative path. Must not be \c NULL.
8033 	\param mode The access permissions the new symlink shall have.
8034 	\return \c B_OK, if the symlink has been created successfully, another
8035 			error code otherwise.
8036 */
8037 status_t
8038 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8039 {
8040 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8041 	if (pathBuffer.InitCheck() != B_OK)
8042 		return B_NO_MEMORY;
8043 
8044 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8045 		toPath, mode, true);
8046 }
8047 
8048 
8049 status_t
8050 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8051 	bool traverseLeafLink)
8052 {
8053 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8054 	KPath toPathBuffer(toPath, false, B_PATH_NAME_LENGTH + 1);
8055 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8056 		return B_NO_MEMORY;
8057 
8058 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8059 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8060 }
8061 
8062 
8063 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8064 
8065 	\a path must always be specified (it contains at least the name of the entry
8066 	to be deleted). If only a path is given, this path identifies the entry
8067 	directly. If both \a fd and \a path are given and the path is absolute,
8068 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8069 	identified by \a fd.
8070 
8071 	\param fd The FD. May be < 0.
8072 	\param path The absolute or relative path. Must not be \c NULL.
8073 	\return \c B_OK, if the entry has been removed successfully, another
8074 			error code otherwise.
8075 */
8076 status_t
8077 _kern_unlink(int fd, const char* path)
8078 {
8079 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8080 	if (pathBuffer.InitCheck() != B_OK)
8081 		return B_NO_MEMORY;
8082 
8083 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8084 }
8085 
8086 
8087 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8088 		   by another FD + path pair.
8089 
8090 	\a oldPath and \a newPath must always be specified (they contain at least
8091 	the name of the entry). If only a path is given, this path identifies the
8092 	entry directly. If both a FD and a path are given and the path is absolute,
8093 	the FD is ignored; a relative path is reckoned off of the directory (!)
8094 	identified by the respective FD.
8095 
8096 	\param oldFD The FD of the old location. May be < 0.
8097 	\param oldPath The absolute or relative path of the old location. Must not
8098 		   be \c NULL.
8099 	\param newFD The FD of the new location. May be < 0.
8100 	\param newPath The absolute or relative path of the new location. Must not
8101 		   be \c NULL.
8102 	\return \c B_OK, if the entry has been moved successfully, another
8103 			error code otherwise.
8104 */
8105 status_t
8106 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8107 {
8108 	KPath oldPathBuffer(oldPath, false, B_PATH_NAME_LENGTH + 1);
8109 	KPath newPathBuffer(newPath, false, B_PATH_NAME_LENGTH + 1);
8110 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8111 		return B_NO_MEMORY;
8112 
8113 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8114 		newFD, newPathBuffer.LockBuffer(), true);
8115 }
8116 
8117 
8118 status_t
8119 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8120 {
8121 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8122 	if (pathBuffer.InitCheck() != B_OK)
8123 		return B_NO_MEMORY;
8124 
8125 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8126 		true);
8127 }
8128 
8129 
8130 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8131 
8132 	If only \a fd is given, the stat operation associated with the type
8133 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8134 	given, this path identifies the entry for whose node to retrieve the
8135 	stat data. If both \a fd and \a path are given and the path is absolute,
8136 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8137 	identified by \a fd and specifies the entry whose stat data shall be
8138 	retrieved.
8139 
8140 	\param fd The FD. May be < 0.
8141 	\param path The absolute or relative path. Must not be \c NULL.
8142 	\param traverseLeafLink If \a path is given, \c true specifies that the
8143 		   function shall not stick to symlinks, but traverse them.
8144 	\param stat The buffer the stat data shall be written into.
8145 	\param statSize The size of the supplied stat buffer.
8146 	\return \c B_OK, if the the stat data have been read successfully, another
8147 			error code otherwise.
8148 */
8149 status_t
8150 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8151 	struct stat* stat, size_t statSize)
8152 {
8153 	struct stat completeStat;
8154 	struct stat* originalStat = NULL;
8155 	status_t status;
8156 
8157 	if (statSize > sizeof(struct stat))
8158 		return B_BAD_VALUE;
8159 
8160 	// this supports different stat extensions
8161 	if (statSize < sizeof(struct stat)) {
8162 		originalStat = stat;
8163 		stat = &completeStat;
8164 	}
8165 
8166 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8167 
8168 	if (status == B_OK && originalStat != NULL)
8169 		memcpy(originalStat, stat, statSize);
8170 
8171 	return status;
8172 }
8173 
8174 
8175 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8176 
8177 	If only \a fd is given, the stat operation associated with the type
8178 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8179 	given, this path identifies the entry for whose node to write the
8180 	stat data. If both \a fd and \a path are given and the path is absolute,
8181 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8182 	identified by \a fd and specifies the entry whose stat data shall be
8183 	written.
8184 
8185 	\param fd The FD. May be < 0.
8186 	\param path The absolute or relative path. Must not be \c NULL.
8187 	\param traverseLeafLink If \a path is given, \c true specifies that the
8188 		   function shall not stick to symlinks, but traverse them.
8189 	\param stat The buffer containing the stat data to be written.
8190 	\param statSize The size of the supplied stat buffer.
8191 	\param statMask A mask specifying which parts of the stat data shall be
8192 		   written.
8193 	\return \c B_OK, if the the stat data have been written successfully,
8194 			another error code otherwise.
8195 */
8196 status_t
8197 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8198 	const struct stat* stat, size_t statSize, int statMask)
8199 {
8200 	struct stat completeStat;
8201 
8202 	if (statSize > sizeof(struct stat))
8203 		return B_BAD_VALUE;
8204 
8205 	// this supports different stat extensions
8206 	if (statSize < sizeof(struct stat)) {
8207 		memset((uint8*)&completeStat + statSize, 0,
8208 			sizeof(struct stat) - statSize);
8209 		memcpy(&completeStat, stat, statSize);
8210 		stat = &completeStat;
8211 	}
8212 
8213 	status_t status;
8214 
8215 	if (path) {
8216 		// path given: write the stat of the node referred to by (fd, path)
8217 		KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8218 		if (pathBuffer.InitCheck() != B_OK)
8219 			return B_NO_MEMORY;
8220 
8221 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8222 			traverseLeafLink, stat, statMask, true);
8223 	} else {
8224 		// no path given: get the FD and use the FD operation
8225 		struct file_descriptor* descriptor
8226 			= get_fd(get_current_io_context(true), fd);
8227 		if (descriptor == NULL)
8228 			return B_FILE_ERROR;
8229 
8230 		if (descriptor->ops->fd_write_stat)
8231 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8232 		else
8233 			status = B_NOT_SUPPORTED;
8234 
8235 		put_fd(descriptor);
8236 	}
8237 
8238 	return status;
8239 }
8240 
8241 
8242 int
8243 _kern_open_attr_dir(int fd, const char* path)
8244 {
8245 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8246 	if (pathBuffer.InitCheck() != B_OK)
8247 		return B_NO_MEMORY;
8248 
8249 	if (path != NULL)
8250 		pathBuffer.SetTo(path);
8251 
8252 	return attr_dir_open(fd, path ? pathBuffer.LockBuffer() : NULL, true);
8253 }
8254 
8255 
8256 int
8257 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8258 	int openMode)
8259 {
8260 	KPath pathBuffer(path, false, B_PATH_NAME_LENGTH + 1);
8261 	if (pathBuffer.InitCheck() != B_OK)
8262 		return B_NO_MEMORY;
8263 
8264 	if ((openMode & O_CREAT) != 0) {
8265 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8266 			true);
8267 	}
8268 
8269 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8270 }
8271 
8272 
8273 status_t
8274 _kern_remove_attr(int fd, const char* name)
8275 {
8276 	return attr_remove(fd, name, true);
8277 }
8278 
8279 
8280 status_t
8281 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8282 	const char* toName)
8283 {
8284 	return attr_rename(fromFile, fromName, toFile, toName, true);
8285 }
8286 
8287 
8288 int
8289 _kern_open_index_dir(dev_t device)
8290 {
8291 	return index_dir_open(device, true);
8292 }
8293 
8294 
8295 status_t
8296 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8297 {
8298 	return index_create(device, name, type, flags, true);
8299 }
8300 
8301 
8302 status_t
8303 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8304 {
8305 	return index_name_read_stat(device, name, stat, true);
8306 }
8307 
8308 
8309 status_t
8310 _kern_remove_index(dev_t device, const char* name)
8311 {
8312 	return index_remove(device, name, true);
8313 }
8314 
8315 
8316 status_t
8317 _kern_getcwd(char* buffer, size_t size)
8318 {
8319 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8320 
8321 	// Call vfs to get current working directory
8322 	return get_cwd(buffer, size, true);
8323 }
8324 
8325 
8326 status_t
8327 _kern_setcwd(int fd, const char* path)
8328 {
8329 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8330 	if (pathBuffer.InitCheck() != B_OK)
8331 		return B_NO_MEMORY;
8332 
8333 	if (path != NULL)
8334 		pathBuffer.SetTo(path);
8335 
8336 	return set_cwd(fd, path != NULL ? pathBuffer.LockBuffer() : NULL, true);
8337 }
8338 
8339 
8340 //	#pragma mark - userland syscalls
8341 
8342 
8343 dev_t
8344 _user_mount(const char* userPath, const char* userDevice,
8345 	const char* userFileSystem, uint32 flags, const char* userArgs,
8346 	size_t argsLength)
8347 {
8348 	char fileSystem[B_FILE_NAME_LENGTH];
8349 	KPath path, device;
8350 	char* args = NULL;
8351 	status_t status;
8352 
8353 	if (!IS_USER_ADDRESS(userPath)
8354 		|| !IS_USER_ADDRESS(userFileSystem)
8355 		|| !IS_USER_ADDRESS(userDevice))
8356 		return B_BAD_ADDRESS;
8357 
8358 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8359 		return B_NO_MEMORY;
8360 
8361 	if (user_strlcpy(path.LockBuffer(), userPath, B_PATH_NAME_LENGTH) < B_OK)
8362 		return B_BAD_ADDRESS;
8363 
8364 	if (userFileSystem != NULL
8365 		&& user_strlcpy(fileSystem, userFileSystem, sizeof(fileSystem)) < B_OK)
8366 		return B_BAD_ADDRESS;
8367 
8368 	if (userDevice != NULL
8369 		&& user_strlcpy(device.LockBuffer(), userDevice, B_PATH_NAME_LENGTH)
8370 			< B_OK)
8371 		return B_BAD_ADDRESS;
8372 
8373 	if (userArgs != NULL && argsLength > 0) {
8374 		// this is a safety restriction
8375 		if (argsLength >= 65536)
8376 			return B_NAME_TOO_LONG;
8377 
8378 		args = (char*)malloc(argsLength + 1);
8379 		if (args == NULL)
8380 			return B_NO_MEMORY;
8381 
8382 		if (user_strlcpy(args, userArgs, argsLength + 1) < B_OK) {
8383 			free(args);
8384 			return B_BAD_ADDRESS;
8385 		}
8386 	}
8387 	path.UnlockBuffer();
8388 	device.UnlockBuffer();
8389 
8390 	status = fs_mount(path.LockBuffer(),
8391 		userDevice != NULL ? device.Path() : NULL,
8392 		userFileSystem ? fileSystem : NULL, flags, args, false);
8393 
8394 	free(args);
8395 	return status;
8396 }
8397 
8398 
8399 status_t
8400 _user_unmount(const char* userPath, uint32 flags)
8401 {
8402 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8403 	if (pathBuffer.InitCheck() != B_OK)
8404 		return B_NO_MEMORY;
8405 
8406 	char* path = pathBuffer.LockBuffer();
8407 
8408 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8409 		return B_BAD_ADDRESS;
8410 
8411 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8412 }
8413 
8414 
8415 status_t
8416 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8417 {
8418 	struct fs_info info;
8419 	status_t status;
8420 
8421 	if (userInfo == NULL)
8422 		return B_BAD_VALUE;
8423 
8424 	if (!IS_USER_ADDRESS(userInfo))
8425 		return B_BAD_ADDRESS;
8426 
8427 	status = fs_read_info(device, &info);
8428 	if (status != B_OK)
8429 		return status;
8430 
8431 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8432 		return B_BAD_ADDRESS;
8433 
8434 	return B_OK;
8435 }
8436 
8437 
8438 status_t
8439 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8440 {
8441 	struct fs_info info;
8442 
8443 	if (userInfo == NULL)
8444 		return B_BAD_VALUE;
8445 
8446 	if (!IS_USER_ADDRESS(userInfo)
8447 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8448 		return B_BAD_ADDRESS;
8449 
8450 	return fs_write_info(device, &info, mask);
8451 }
8452 
8453 
8454 dev_t
8455 _user_next_device(int32* _userCookie)
8456 {
8457 	int32 cookie;
8458 	dev_t device;
8459 
8460 	if (!IS_USER_ADDRESS(_userCookie)
8461 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8462 		return B_BAD_ADDRESS;
8463 
8464 	device = fs_next_device(&cookie);
8465 
8466 	if (device >= B_OK) {
8467 		// update user cookie
8468 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8469 			return B_BAD_ADDRESS;
8470 	}
8471 
8472 	return device;
8473 }
8474 
8475 
8476 status_t
8477 _user_sync(void)
8478 {
8479 	return _kern_sync();
8480 }
8481 
8482 
8483 status_t
8484 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8485 	size_t infoSize)
8486 {
8487 	struct fd_info info;
8488 	uint32 cookie;
8489 
8490 	// only root can do this (or should root's group be enough?)
8491 	if (geteuid() != 0)
8492 		return B_NOT_ALLOWED;
8493 
8494 	if (infoSize != sizeof(fd_info))
8495 		return B_BAD_VALUE;
8496 
8497 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8498 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8499 		return B_BAD_ADDRESS;
8500 
8501 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8502 	if (status != B_OK)
8503 		return status;
8504 
8505 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8506 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8507 		return B_BAD_ADDRESS;
8508 
8509 	return status;
8510 }
8511 
8512 
8513 status_t
8514 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8515 	char* userPath, size_t pathLength)
8516 {
8517 	if (!IS_USER_ADDRESS(userPath))
8518 		return B_BAD_ADDRESS;
8519 
8520 	KPath path(B_PATH_NAME_LENGTH + 1);
8521 	if (path.InitCheck() != B_OK)
8522 		return B_NO_MEMORY;
8523 
8524 	// copy the leaf name onto the stack
8525 	char stackLeaf[B_FILE_NAME_LENGTH];
8526 	if (leaf) {
8527 		if (!IS_USER_ADDRESS(leaf))
8528 			return B_BAD_ADDRESS;
8529 
8530 		int length = user_strlcpy(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8531 		if (length < 0)
8532 			return length;
8533 		if (length >= B_FILE_NAME_LENGTH)
8534 			return B_NAME_TOO_LONG;
8535 
8536 		leaf = stackLeaf;
8537 	}
8538 
8539 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8540 		path.LockBuffer(), path.BufferSize());
8541 	if (status != B_OK)
8542 		return status;
8543 
8544 	path.UnlockBuffer();
8545 
8546 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8547 	if (length < 0)
8548 		return length;
8549 	if (length >= (int)pathLength)
8550 		return B_BUFFER_OVERFLOW;
8551 
8552 	return B_OK;
8553 }
8554 
8555 
8556 status_t
8557 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
8558 {
8559 	if (userPath == NULL || buffer == NULL)
8560 		return B_BAD_VALUE;
8561 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
8562 		return B_BAD_ADDRESS;
8563 
8564 	// copy path from userland
8565 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8566 	if (pathBuffer.InitCheck() != B_OK)
8567 		return B_NO_MEMORY;
8568 	char* path = pathBuffer.LockBuffer();
8569 
8570 	if (user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8571 		return B_BAD_ADDRESS;
8572 
8573 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
8574 		false);
8575 	if (error != B_OK)
8576 		return error;
8577 
8578 	// copy back to userland
8579 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
8580 	if (len < 0)
8581 		return len;
8582 	if (len >= B_PATH_NAME_LENGTH)
8583 		return B_BUFFER_OVERFLOW;
8584 
8585 	return B_OK;
8586 }
8587 
8588 
8589 int
8590 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
8591 	int openMode, int perms)
8592 {
8593 	char name[B_FILE_NAME_LENGTH];
8594 
8595 	if (userName == NULL || device < 0 || inode < 0)
8596 		return B_BAD_VALUE;
8597 	if (!IS_USER_ADDRESS(userName)
8598 		|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8599 		return B_BAD_ADDRESS;
8600 
8601 	if ((openMode & O_CREAT) != 0) {
8602 		return file_create_entry_ref(device, inode, name, openMode, perms,
8603 		 false);
8604 	}
8605 
8606 	return file_open_entry_ref(device, inode, name, openMode, false);
8607 }
8608 
8609 
8610 int
8611 _user_open(int fd, const char* userPath, int openMode, int perms)
8612 {
8613 	KPath path(B_PATH_NAME_LENGTH + 1);
8614 	if (path.InitCheck() != B_OK)
8615 		return B_NO_MEMORY;
8616 
8617 	char* buffer = path.LockBuffer();
8618 
8619 	if (!IS_USER_ADDRESS(userPath)
8620 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8621 		return B_BAD_ADDRESS;
8622 
8623 	if ((openMode & O_CREAT) != 0)
8624 		return file_create(fd, buffer, openMode, perms, false);
8625 
8626 	return file_open(fd, buffer, openMode, false);
8627 }
8628 
8629 
8630 int
8631 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
8632 {
8633 	if (userName != NULL) {
8634 		char name[B_FILE_NAME_LENGTH];
8635 
8636 		if (!IS_USER_ADDRESS(userName)
8637 			|| user_strlcpy(name, userName, sizeof(name)) < B_OK)
8638 			return B_BAD_ADDRESS;
8639 
8640 		return dir_open_entry_ref(device, inode, name, false);
8641 	}
8642 	return dir_open_entry_ref(device, inode, NULL, false);
8643 }
8644 
8645 
8646 int
8647 _user_open_dir(int fd, const char* userPath)
8648 {
8649 	if (userPath == NULL)
8650 		return dir_open(fd, NULL, false);
8651 
8652 	KPath path(B_PATH_NAME_LENGTH + 1);
8653 	if (path.InitCheck() != B_OK)
8654 		return B_NO_MEMORY;
8655 
8656 	char* buffer = path.LockBuffer();
8657 
8658 	if (!IS_USER_ADDRESS(userPath)
8659 		|| user_strlcpy(buffer, userPath, B_PATH_NAME_LENGTH) < B_OK)
8660 		return B_BAD_ADDRESS;
8661 
8662 	return dir_open(fd, buffer, false);
8663 }
8664 
8665 
8666 /*!	\brief Opens a directory's parent directory and returns the entry name
8667 		   of the former.
8668 
8669 	Aside from that is returns the directory's entry name, this method is
8670 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
8671 	equivalent, if \a userName is \c NULL.
8672 
8673 	If a name buffer is supplied and the name does not fit the buffer, the
8674 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
8675 
8676 	\param fd A FD referring to a directory.
8677 	\param userName Buffer the directory's entry name shall be written into.
8678 		   May be \c NULL.
8679 	\param nameLength Size of the name buffer.
8680 	\return The file descriptor of the opened parent directory, if everything
8681 			went fine, an error code otherwise.
8682 */
8683 int
8684 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
8685 {
8686 	bool kernel = false;
8687 
8688 	if (userName && !IS_USER_ADDRESS(userName))
8689 		return B_BAD_ADDRESS;
8690 
8691 	// open the parent dir
8692 	int parentFD = dir_open(fd, (char*)"..", kernel);
8693 	if (parentFD < 0)
8694 		return parentFD;
8695 	FDCloser fdCloser(parentFD, kernel);
8696 
8697 	if (userName) {
8698 		// get the vnodes
8699 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
8700 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
8701 		VNodePutter parentVNodePutter(parentVNode);
8702 		VNodePutter dirVNodePutter(dirVNode);
8703 		if (!parentVNode || !dirVNode)
8704 			return B_FILE_ERROR;
8705 
8706 		// get the vnode name
8707 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
8708 		struct dirent* buffer = (struct dirent*)_buffer;
8709 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
8710 			sizeof(_buffer), get_current_io_context(false));
8711 		if (status != B_OK)
8712 			return status;
8713 
8714 		// copy the name to the userland buffer
8715 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
8716 		if (len < 0)
8717 			return len;
8718 		if (len >= (int)nameLength)
8719 			return B_BUFFER_OVERFLOW;
8720 	}
8721 
8722 	return fdCloser.Detach();
8723 }
8724 
8725 
8726 status_t
8727 _user_fcntl(int fd, int op, uint32 argument)
8728 {
8729 	status_t status = common_fcntl(fd, op, argument, false);
8730 	if (op == F_SETLKW)
8731 		syscall_restart_handle_post(status);
8732 
8733 	return status;
8734 }
8735 
8736 
8737 status_t
8738 _user_fsync(int fd)
8739 {
8740 	return common_sync(fd, false);
8741 }
8742 
8743 
8744 status_t
8745 _user_flock(int fd, int operation)
8746 {
8747 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
8748 
8749 	// Check if the operation is valid
8750 	switch (operation & ~LOCK_NB) {
8751 		case LOCK_UN:
8752 		case LOCK_SH:
8753 		case LOCK_EX:
8754 			break;
8755 
8756 		default:
8757 			return B_BAD_VALUE;
8758 	}
8759 
8760 	struct file_descriptor* descriptor;
8761 	struct vnode* vnode;
8762 	descriptor = get_fd_and_vnode(fd, &vnode, false);
8763 	if (descriptor == NULL)
8764 		return B_FILE_ERROR;
8765 
8766 	if (descriptor->type != FDTYPE_FILE) {
8767 		put_fd(descriptor);
8768 		return B_BAD_VALUE;
8769 	}
8770 
8771 	struct flock flock;
8772 	flock.l_start = 0;
8773 	flock.l_len = OFF_MAX;
8774 	flock.l_whence = 0;
8775 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
8776 
8777 	status_t status;
8778 	if ((operation & LOCK_UN) != 0)
8779 		status = release_advisory_lock(vnode, &flock);
8780 	else {
8781 		status = acquire_advisory_lock(vnode,
8782 			thread_get_current_thread()->team->session_id, &flock,
8783 			(operation & LOCK_NB) == 0);
8784 	}
8785 
8786 	syscall_restart_handle_post(status);
8787 
8788 	put_fd(descriptor);
8789 	return status;
8790 }
8791 
8792 
8793 status_t
8794 _user_lock_node(int fd)
8795 {
8796 	return common_lock_node(fd, false);
8797 }
8798 
8799 
8800 status_t
8801 _user_unlock_node(int fd)
8802 {
8803 	return common_unlock_node(fd, false);
8804 }
8805 
8806 
8807 status_t
8808 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
8809 	int perms)
8810 {
8811 	char name[B_FILE_NAME_LENGTH];
8812 	status_t status;
8813 
8814 	if (!IS_USER_ADDRESS(userName))
8815 		return B_BAD_ADDRESS;
8816 
8817 	status = user_strlcpy(name, userName, sizeof(name));
8818 	if (status < 0)
8819 		return status;
8820 
8821 	return dir_create_entry_ref(device, inode, name, perms, false);
8822 }
8823 
8824 
8825 status_t
8826 _user_create_dir(int fd, const char* userPath, int perms)
8827 {
8828 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8829 	if (pathBuffer.InitCheck() != B_OK)
8830 		return B_NO_MEMORY;
8831 
8832 	char* path = pathBuffer.LockBuffer();
8833 
8834 	if (!IS_USER_ADDRESS(userPath)
8835 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8836 		return B_BAD_ADDRESS;
8837 
8838 	return dir_create(fd, path, perms, false);
8839 }
8840 
8841 
8842 status_t
8843 _user_remove_dir(int fd, const char* userPath)
8844 {
8845 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8846 	if (pathBuffer.InitCheck() != B_OK)
8847 		return B_NO_MEMORY;
8848 
8849 	char* path = pathBuffer.LockBuffer();
8850 
8851 	if (userPath != NULL) {
8852 		if (!IS_USER_ADDRESS(userPath)
8853 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8854 			return B_BAD_ADDRESS;
8855 	}
8856 
8857 	return dir_remove(fd, userPath ? path : NULL, false);
8858 }
8859 
8860 
8861 status_t
8862 _user_read_link(int fd, const char* userPath, char* userBuffer,
8863 	size_t* userBufferSize)
8864 {
8865 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1), linkBuffer;
8866 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
8867 		return B_NO_MEMORY;
8868 
8869 	size_t bufferSize;
8870 
8871 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
8872 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
8873 		return B_BAD_ADDRESS;
8874 
8875 	char* path = pathBuffer.LockBuffer();
8876 	char* buffer = linkBuffer.LockBuffer();
8877 
8878 	if (userPath) {
8879 		if (!IS_USER_ADDRESS(userPath)
8880 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8881 			return B_BAD_ADDRESS;
8882 
8883 		if (bufferSize > B_PATH_NAME_LENGTH)
8884 			bufferSize = B_PATH_NAME_LENGTH;
8885 	}
8886 
8887 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
8888 		&bufferSize, false);
8889 
8890 	// we also update the bufferSize in case of errors
8891 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
8892 	if (user_memcpy(userBufferSize, &bufferSize, sizeof(size_t)) != B_OK)
8893 		return B_BAD_ADDRESS;
8894 
8895 	if (status != B_OK)
8896 		return status;
8897 
8898 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
8899 		return B_BAD_ADDRESS;
8900 
8901 	return B_OK;
8902 }
8903 
8904 
8905 status_t
8906 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
8907 	int mode)
8908 {
8909 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8910 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8911 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8912 		return B_NO_MEMORY;
8913 
8914 	char* path = pathBuffer.LockBuffer();
8915 	char* toPath = toPathBuffer.LockBuffer();
8916 
8917 	if (!IS_USER_ADDRESS(userPath)
8918 		|| !IS_USER_ADDRESS(userToPath)
8919 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8920 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8921 		return B_BAD_ADDRESS;
8922 
8923 	return common_create_symlink(fd, path, toPath, mode, false);
8924 }
8925 
8926 
8927 status_t
8928 _user_create_link(int pathFD, const char* userPath, int toFD,
8929 	const char* userToPath, bool traverseLeafLink)
8930 {
8931 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8932 	KPath toPathBuffer(B_PATH_NAME_LENGTH + 1);
8933 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8934 		return B_NO_MEMORY;
8935 
8936 	char* path = pathBuffer.LockBuffer();
8937 	char* toPath = toPathBuffer.LockBuffer();
8938 
8939 	if (!IS_USER_ADDRESS(userPath)
8940 		|| !IS_USER_ADDRESS(userToPath)
8941 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK
8942 		|| user_strlcpy(toPath, userToPath, B_PATH_NAME_LENGTH) < B_OK)
8943 		return B_BAD_ADDRESS;
8944 
8945 	status_t status = check_path(toPath);
8946 	if (status != B_OK)
8947 		return status;
8948 
8949 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
8950 		false);
8951 }
8952 
8953 
8954 status_t
8955 _user_unlink(int fd, const char* userPath)
8956 {
8957 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8958 	if (pathBuffer.InitCheck() != B_OK)
8959 		return B_NO_MEMORY;
8960 
8961 	char* path = pathBuffer.LockBuffer();
8962 
8963 	if (!IS_USER_ADDRESS(userPath)
8964 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
8965 		return B_BAD_ADDRESS;
8966 
8967 	return common_unlink(fd, path, false);
8968 }
8969 
8970 
8971 status_t
8972 _user_rename(int oldFD, const char* userOldPath, int newFD,
8973 	const char* userNewPath)
8974 {
8975 	KPath oldPathBuffer(B_PATH_NAME_LENGTH + 1);
8976 	KPath newPathBuffer(B_PATH_NAME_LENGTH + 1);
8977 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8978 		return B_NO_MEMORY;
8979 
8980 	char* oldPath = oldPathBuffer.LockBuffer();
8981 	char* newPath = newPathBuffer.LockBuffer();
8982 
8983 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath)
8984 		|| user_strlcpy(oldPath, userOldPath, B_PATH_NAME_LENGTH) < B_OK
8985 		|| user_strlcpy(newPath, userNewPath, B_PATH_NAME_LENGTH) < B_OK)
8986 		return B_BAD_ADDRESS;
8987 
8988 	return common_rename(oldFD, oldPath, newFD, newPath, false);
8989 }
8990 
8991 
8992 status_t
8993 _user_create_fifo(int fd, const char* userPath, mode_t perms)
8994 {
8995 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
8996 	if (pathBuffer.InitCheck() != B_OK)
8997 		return B_NO_MEMORY;
8998 
8999 	char* path = pathBuffer.LockBuffer();
9000 
9001 	if (!IS_USER_ADDRESS(userPath)
9002 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK) {
9003 		return B_BAD_ADDRESS;
9004 	}
9005 
9006 	// split into directory vnode and filename path
9007 	char filename[B_FILE_NAME_LENGTH];
9008 	struct vnode* dir;
9009 	status_t status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9010 	if (status != B_OK)
9011 		return status;
9012 
9013 	VNodePutter _(dir);
9014 
9015 	// the underlying FS needs to support creating FIFOs
9016 	if (!HAS_FS_CALL(dir, create_special_node))
9017 		return B_UNSUPPORTED;
9018 
9019 	// create the entry	-- the FIFO sub node is set up automatically
9020 	fs_vnode superVnode;
9021 	ino_t nodeID;
9022 	status = FS_CALL(dir, create_special_node, filename, NULL,
9023 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9024 
9025 	// create_special_node() acquired a reference for us that we don't need.
9026 	if (status == B_OK)
9027 		put_vnode(dir->mount->volume, nodeID);
9028 
9029 	return status;
9030 }
9031 
9032 
9033 status_t
9034 _user_create_pipe(int* userFDs)
9035 {
9036 	// rootfs should support creating FIFOs, but let's be sure
9037 	if (!HAS_FS_CALL(sRoot, create_special_node))
9038 		return B_UNSUPPORTED;
9039 
9040 	// create the node	-- the FIFO sub node is set up automatically
9041 	fs_vnode superVnode;
9042 	ino_t nodeID;
9043 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9044 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9045 	if (status != B_OK)
9046 		return status;
9047 
9048 	// We've got one reference to the node and need another one.
9049 	struct vnode* vnode;
9050 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9051 	if (status != B_OK) {
9052 		// that should not happen
9053 		dprintf("_user_create_pipe(): Failed to lookup vnode (%ld, %lld)\n",
9054 			sRoot->mount->id, sRoot->id);
9055 		return status;
9056 	}
9057 
9058 	// Everything looks good so far. Open two FDs for reading respectively
9059 	// writing.
9060 	int fds[2];
9061 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9062 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9063 
9064 	FDCloser closer0(fds[0], false);
9065 	FDCloser closer1(fds[1], false);
9066 
9067 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9068 
9069 	// copy FDs to userland
9070 	if (status == B_OK) {
9071 		if (!IS_USER_ADDRESS(userFDs)
9072 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9073 			status = B_BAD_ADDRESS;
9074 		}
9075 	}
9076 
9077 	// keep FDs, if everything went fine
9078 	if (status == B_OK) {
9079 		closer0.Detach();
9080 		closer1.Detach();
9081 	}
9082 
9083 	return status;
9084 }
9085 
9086 
9087 status_t
9088 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9089 {
9090 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9091 	if (pathBuffer.InitCheck() != B_OK)
9092 		return B_NO_MEMORY;
9093 
9094 	char* path = pathBuffer.LockBuffer();
9095 
9096 	if (!IS_USER_ADDRESS(userPath)
9097 		|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9098 		return B_BAD_ADDRESS;
9099 
9100 	return common_access(fd, path, mode, effectiveUserGroup, false);
9101 }
9102 
9103 
9104 status_t
9105 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9106 	struct stat* userStat, size_t statSize)
9107 {
9108 	struct stat stat;
9109 	status_t status;
9110 
9111 	if (statSize > sizeof(struct stat))
9112 		return B_BAD_VALUE;
9113 
9114 	if (!IS_USER_ADDRESS(userStat))
9115 		return B_BAD_ADDRESS;
9116 
9117 	if (userPath) {
9118 		// path given: get the stat of the node referred to by (fd, path)
9119 		if (!IS_USER_ADDRESS(userPath))
9120 			return B_BAD_ADDRESS;
9121 
9122 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9123 		if (pathBuffer.InitCheck() != B_OK)
9124 			return B_NO_MEMORY;
9125 
9126 		char* path = pathBuffer.LockBuffer();
9127 
9128 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9129 		if (length < B_OK)
9130 			return length;
9131 		if (length >= B_PATH_NAME_LENGTH)
9132 			return B_NAME_TOO_LONG;
9133 
9134 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9135 	} else {
9136 		// no path given: get the FD and use the FD operation
9137 		struct file_descriptor* descriptor
9138 			= get_fd(get_current_io_context(false), fd);
9139 		if (descriptor == NULL)
9140 			return B_FILE_ERROR;
9141 
9142 		if (descriptor->ops->fd_read_stat)
9143 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9144 		else
9145 			status = B_NOT_SUPPORTED;
9146 
9147 		put_fd(descriptor);
9148 	}
9149 
9150 	if (status != B_OK)
9151 		return status;
9152 
9153 	return user_memcpy(userStat, &stat, statSize);
9154 }
9155 
9156 
9157 status_t
9158 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9159 	const struct stat* userStat, size_t statSize, int statMask)
9160 {
9161 	if (statSize > sizeof(struct stat))
9162 		return B_BAD_VALUE;
9163 
9164 	struct stat stat;
9165 
9166 	if (!IS_USER_ADDRESS(userStat)
9167 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9168 		return B_BAD_ADDRESS;
9169 
9170 	// clear additional stat fields
9171 	if (statSize < sizeof(struct stat))
9172 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9173 
9174 	status_t status;
9175 
9176 	if (userPath) {
9177 		// path given: write the stat of the node referred to by (fd, path)
9178 		if (!IS_USER_ADDRESS(userPath))
9179 			return B_BAD_ADDRESS;
9180 
9181 		KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9182 		if (pathBuffer.InitCheck() != B_OK)
9183 			return B_NO_MEMORY;
9184 
9185 		char* path = pathBuffer.LockBuffer();
9186 
9187 		ssize_t length = user_strlcpy(path, userPath, B_PATH_NAME_LENGTH);
9188 		if (length < B_OK)
9189 			return length;
9190 		if (length >= B_PATH_NAME_LENGTH)
9191 			return B_NAME_TOO_LONG;
9192 
9193 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9194 			statMask, false);
9195 	} else {
9196 		// no path given: get the FD and use the FD operation
9197 		struct file_descriptor* descriptor
9198 			= get_fd(get_current_io_context(false), fd);
9199 		if (descriptor == NULL)
9200 			return B_FILE_ERROR;
9201 
9202 		if (descriptor->ops->fd_write_stat) {
9203 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9204 				statMask);
9205 		} else
9206 			status = B_NOT_SUPPORTED;
9207 
9208 		put_fd(descriptor);
9209 	}
9210 
9211 	return status;
9212 }
9213 
9214 
9215 int
9216 _user_open_attr_dir(int fd, const char* userPath)
9217 {
9218 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9219 	if (pathBuffer.InitCheck() != B_OK)
9220 		return B_NO_MEMORY;
9221 
9222 	char* path = pathBuffer.LockBuffer();
9223 
9224 	if (userPath != NULL) {
9225 		if (!IS_USER_ADDRESS(userPath)
9226 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9227 			return B_BAD_ADDRESS;
9228 	}
9229 
9230 	return attr_dir_open(fd, userPath ? path : NULL, false);
9231 }
9232 
9233 
9234 ssize_t
9235 _user_read_attr(int fd, const char* attribute, off_t pos, void* userBuffer,
9236 	size_t readBytes)
9237 {
9238 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9239 	if (attr < 0)
9240 		return attr;
9241 
9242 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9243 	_user_close(attr);
9244 
9245 	return bytes;
9246 }
9247 
9248 
9249 ssize_t
9250 _user_write_attr(int fd, const char* attribute, uint32 type, off_t pos,
9251 	const void* buffer, size_t writeBytes)
9252 {
9253 	// Try to support the BeOS typical truncation as well as the position
9254 	// argument
9255 	int attr = attr_create(fd, NULL, attribute, type,
9256 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9257 	if (attr < 0)
9258 		return attr;
9259 
9260 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9261 	_user_close(attr);
9262 
9263 	return bytes;
9264 }
9265 
9266 
9267 status_t
9268 _user_stat_attr(int fd, const char* attribute, struct attr_info* userAttrInfo)
9269 {
9270 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9271 	if (attr < 0)
9272 		return attr;
9273 
9274 	struct file_descriptor* descriptor
9275 		= get_fd(get_current_io_context(false), attr);
9276 	if (descriptor == NULL) {
9277 		_user_close(attr);
9278 		return B_FILE_ERROR;
9279 	}
9280 
9281 	struct stat stat;
9282 	status_t status;
9283 	if (descriptor->ops->fd_read_stat)
9284 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9285 	else
9286 		status = B_NOT_SUPPORTED;
9287 
9288 	put_fd(descriptor);
9289 	_user_close(attr);
9290 
9291 	if (status == B_OK) {
9292 		attr_info info;
9293 		info.type = stat.st_type;
9294 		info.size = stat.st_size;
9295 
9296 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9297 			return B_BAD_ADDRESS;
9298 	}
9299 
9300 	return status;
9301 }
9302 
9303 
9304 int
9305 _user_open_attr(int fd, const char* userPath, const char* userName,
9306 	uint32 type, int openMode)
9307 {
9308 	char name[B_FILE_NAME_LENGTH];
9309 
9310 	if (!IS_USER_ADDRESS(userName)
9311 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9312 		return B_BAD_ADDRESS;
9313 
9314 	KPath pathBuffer(B_PATH_NAME_LENGTH + 1);
9315 	if (pathBuffer.InitCheck() != B_OK)
9316 		return B_NO_MEMORY;
9317 
9318 	char* path = pathBuffer.LockBuffer();
9319 
9320 	if (userPath != NULL) {
9321 		if (!IS_USER_ADDRESS(userPath)
9322 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9323 			return B_BAD_ADDRESS;
9324 	}
9325 
9326 	if ((openMode & O_CREAT) != 0) {
9327 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9328 			false);
9329 	}
9330 
9331 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9332 }
9333 
9334 
9335 status_t
9336 _user_remove_attr(int fd, const char* userName)
9337 {
9338 	char name[B_FILE_NAME_LENGTH];
9339 
9340 	if (!IS_USER_ADDRESS(userName)
9341 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9342 		return B_BAD_ADDRESS;
9343 
9344 	return attr_remove(fd, name, false);
9345 }
9346 
9347 
9348 status_t
9349 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9350 	const char* userToName)
9351 {
9352 	if (!IS_USER_ADDRESS(userFromName)
9353 		|| !IS_USER_ADDRESS(userToName))
9354 		return B_BAD_ADDRESS;
9355 
9356 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9357 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9358 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9359 		return B_NO_MEMORY;
9360 
9361 	char* fromName = fromNameBuffer.LockBuffer();
9362 	char* toName = toNameBuffer.LockBuffer();
9363 
9364 	if (user_strlcpy(fromName, userFromName, B_FILE_NAME_LENGTH) < B_OK
9365 		|| user_strlcpy(toName, userToName, B_FILE_NAME_LENGTH) < B_OK)
9366 		return B_BAD_ADDRESS;
9367 
9368 	return attr_rename(fromFile, fromName, toFile, toName, false);
9369 }
9370 
9371 
9372 int
9373 _user_open_index_dir(dev_t device)
9374 {
9375 	return index_dir_open(device, false);
9376 }
9377 
9378 
9379 status_t
9380 _user_create_index(dev_t device, const char* userName, uint32 type,
9381 	uint32 flags)
9382 {
9383 	char name[B_FILE_NAME_LENGTH];
9384 
9385 	if (!IS_USER_ADDRESS(userName)
9386 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9387 		return B_BAD_ADDRESS;
9388 
9389 	return index_create(device, name, type, flags, false);
9390 }
9391 
9392 
9393 status_t
9394 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9395 {
9396 	char name[B_FILE_NAME_LENGTH];
9397 	struct stat stat;
9398 	status_t status;
9399 
9400 	if (!IS_USER_ADDRESS(userName)
9401 		|| !IS_USER_ADDRESS(userStat)
9402 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9403 		return B_BAD_ADDRESS;
9404 
9405 	status = index_name_read_stat(device, name, &stat, false);
9406 	if (status == B_OK) {
9407 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9408 			return B_BAD_ADDRESS;
9409 	}
9410 
9411 	return status;
9412 }
9413 
9414 
9415 status_t
9416 _user_remove_index(dev_t device, const char* userName)
9417 {
9418 	char name[B_FILE_NAME_LENGTH];
9419 
9420 	if (!IS_USER_ADDRESS(userName)
9421 		|| user_strlcpy(name, userName, B_FILE_NAME_LENGTH) < B_OK)
9422 		return B_BAD_ADDRESS;
9423 
9424 	return index_remove(device, name, false);
9425 }
9426 
9427 
9428 status_t
9429 _user_getcwd(char* userBuffer, size_t size)
9430 {
9431 	if (size == 0)
9432 		return B_BAD_VALUE;
9433 	if (!IS_USER_ADDRESS(userBuffer))
9434 		return B_BAD_ADDRESS;
9435 
9436 	if (size > kMaxPathLength)
9437 		size = kMaxPathLength;
9438 
9439 	KPath pathBuffer(size);
9440 	if (pathBuffer.InitCheck() != B_OK)
9441 		return B_NO_MEMORY;
9442 
9443 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9444 
9445 	char* path = pathBuffer.LockBuffer();
9446 
9447 	status_t status = get_cwd(path, size, false);
9448 	if (status != B_OK)
9449 		return status;
9450 
9451 	// Copy back the result
9452 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9453 		return B_BAD_ADDRESS;
9454 
9455 	return status;
9456 }
9457 
9458 
9459 status_t
9460 _user_setcwd(int fd, const char* userPath)
9461 {
9462 	TRACE(("user_setcwd: path = %p\n", userPath));
9463 
9464 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9465 	if (pathBuffer.InitCheck() != B_OK)
9466 		return B_NO_MEMORY;
9467 
9468 	char* path = pathBuffer.LockBuffer();
9469 
9470 	if (userPath != NULL) {
9471 		if (!IS_USER_ADDRESS(userPath)
9472 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9473 			return B_BAD_ADDRESS;
9474 	}
9475 
9476 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
9477 }
9478 
9479 
9480 status_t
9481 _user_change_root(const char* userPath)
9482 {
9483 	// only root is allowed to chroot()
9484 	if (geteuid() != 0)
9485 		return B_NOT_ALLOWED;
9486 
9487 	// alloc path buffer
9488 	KPath pathBuffer(B_PATH_NAME_LENGTH);
9489 	if (pathBuffer.InitCheck() != B_OK)
9490 		return B_NO_MEMORY;
9491 
9492 	// copy userland path to kernel
9493 	char* path = pathBuffer.LockBuffer();
9494 	if (userPath != NULL) {
9495 		if (!IS_USER_ADDRESS(userPath)
9496 			|| user_strlcpy(path, userPath, B_PATH_NAME_LENGTH) < B_OK)
9497 			return B_BAD_ADDRESS;
9498 	}
9499 
9500 	// get the vnode
9501 	struct vnode* vnode;
9502 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
9503 	if (status != B_OK)
9504 		return status;
9505 
9506 	// set the new root
9507 	struct io_context* context = get_current_io_context(false);
9508 	mutex_lock(&sIOContextRootLock);
9509 	struct vnode* oldRoot = context->root;
9510 	context->root = vnode;
9511 	mutex_unlock(&sIOContextRootLock);
9512 
9513 	put_vnode(oldRoot);
9514 
9515 	return B_OK;
9516 }
9517 
9518 
9519 int
9520 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
9521 	uint32 flags, port_id port, int32 token)
9522 {
9523 	char* query;
9524 
9525 	if (device < 0 || userQuery == NULL || queryLength == 0)
9526 		return B_BAD_VALUE;
9527 
9528 	// this is a safety restriction
9529 	if (queryLength >= 65536)
9530 		return B_NAME_TOO_LONG;
9531 
9532 	query = (char*)malloc(queryLength + 1);
9533 	if (query == NULL)
9534 		return B_NO_MEMORY;
9535 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK) {
9536 		free(query);
9537 		return B_BAD_ADDRESS;
9538 	}
9539 
9540 	int fd = query_open(device, query, flags, port, token, false);
9541 
9542 	free(query);
9543 	return fd;
9544 }
9545 
9546 
9547 #include "vfs_request_io.cpp"
9548