xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 02354704729d38c3b078c696adc1bbbd33cbcf72)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <block_cache.h>
36 #include <boot/kernel_args.h>
37 #include <debug_heap.h>
38 #include <disk_device_manager/KDiskDevice.h>
39 #include <disk_device_manager/KDiskDeviceManager.h>
40 #include <disk_device_manager/KDiskDeviceUtils.h>
41 #include <disk_device_manager/KDiskSystem.h>
42 #include <fd.h>
43 #include <file_cache.h>
44 #include <fs/node_monitor.h>
45 #include <KPath.h>
46 #include <lock.h>
47 #include <low_resource_manager.h>
48 #include <slab/Slab.h>
49 #include <StackOrHeapArray.h>
50 #include <syscalls.h>
51 #include <syscall_restart.h>
52 #include <tracing.h>
53 #include <util/atomic.h>
54 #include <util/AutoLock.h>
55 #include <util/ThreadAutoLock.h>
56 #include <util/DoublyLinkedList.h>
57 #include <vfs.h>
58 #include <vm/vm.h>
59 #include <vm/VMCache.h>
60 #include <wait_for_objects.h>
61 
62 #include "EntryCache.h"
63 #include "fifo.h"
64 #include "IORequest.h"
65 #include "unused_vnodes.h"
66 #include "vfs_tracing.h"
67 #include "Vnode.h"
68 #include "../cache/vnode_store.h"
69 
70 
71 //#define TRACE_VFS
72 #ifdef TRACE_VFS
73 #	define TRACE(x) dprintf x
74 #	define FUNCTION(x) dprintf x
75 #else
76 #	define TRACE(x) ;
77 #	define FUNCTION(x) ;
78 #endif
79 
80 #define ADD_DEBUGGER_COMMANDS
81 
82 
83 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
84 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
85 
86 #if KDEBUG
87 #	define FS_CALL(vnode, op, params...) \
88 		( HAS_FS_CALL(vnode, op) ? \
89 			vnode->ops->op(vnode->mount->volume, vnode, params) \
90 			: (panic("FS_CALL op " #op " is NULL"), 0))
91 #	define FS_CALL_NO_PARAMS(vnode, op) \
92 		( HAS_FS_CALL(vnode, op) ? \
93 			vnode->ops->op(vnode->mount->volume, vnode) \
94 			: (panic("FS_CALL_NO_PARAMS op " #op " is NULL"), 0))
95 #	define FS_MOUNT_CALL(mount, op, params...) \
96 		( HAS_FS_MOUNT_CALL(mount, op) ? \
97 			mount->volume->ops->op(mount->volume, params) \
98 			: (panic("FS_MOUNT_CALL op " #op " is NULL"), 0))
99 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
100 		( HAS_FS_MOUNT_CALL(mount, op) ? \
101 			mount->volume->ops->op(mount->volume) \
102 			: (panic("FS_MOUNT_CALL_NO_PARAMS op " #op " is NULL"), 0))
103 #else
104 #	define FS_CALL(vnode, op, params...) \
105 			vnode->ops->op(vnode->mount->volume, vnode, params)
106 #	define FS_CALL_NO_PARAMS(vnode, op) \
107 			vnode->ops->op(vnode->mount->volume, vnode)
108 #	define FS_MOUNT_CALL(mount, op, params...) \
109 			mount->volume->ops->op(mount->volume, params)
110 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
111 			mount->volume->ops->op(mount->volume)
112 #endif
113 
114 
115 const static size_t kMaxPathLength = 65536;
116 	// The absolute maximum path length (for getcwd() - this is not depending
117 	// on PATH_MAX
118 
119 
120 typedef DoublyLinkedList<vnode> VnodeList;
121 
122 /*!	\brief Structure to manage a mounted file system
123 
124 	Note: The root_vnode and root_vnode->covers fields (what others?) are
125 	initialized in fs_mount() and not changed afterwards. That is as soon
126 	as the mount is mounted and it is made sure it won't be unmounted
127 	(e.g. by holding a reference to a vnode of that mount) (read) access
128 	to those fields is always safe, even without additional locking. Morever
129 	while mounted the mount holds a reference to the root_vnode->covers vnode,
130 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
131 	safe if a reference to vnode is held (note that for the root mount
132 	root_vnode->covers is NULL, though).
133 */
134 struct fs_mount {
135 	fs_mount()
136 		:
137 		volume(NULL),
138 		device_name(NULL)
139 	{
140 		mutex_init(&lock, "mount lock");
141 	}
142 
143 	~fs_mount()
144 	{
145 		mutex_destroy(&lock);
146 		free(device_name);
147 
148 		while (volume) {
149 			fs_volume* superVolume = volume->super_volume;
150 
151 			if (volume->file_system != NULL)
152 				put_module(volume->file_system->info.name);
153 
154 			free(volume->file_system_name);
155 			free(volume);
156 			volume = superVolume;
157 		}
158 	}
159 
160 	struct fs_mount* next;
161 	dev_t			id;
162 	fs_volume*		volume;
163 	char*			device_name;
164 	mutex			lock;	// guards the vnodes list
165 	struct vnode*	root_vnode;
166 	struct vnode*	covers_vnode;	// immutable
167 	KPartition*		partition;
168 	VnodeList		vnodes;
169 	EntryCache		entry_cache;
170 	bool			unmounting;
171 	bool			owns_file_device;
172 };
173 
174 
175 namespace {
176 
177 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
178 	list_link		link;
179 	void*			bound_to;
180 	team_id			team;
181 	pid_t			session;
182 	off_t			start;
183 	off_t			end;
184 	bool			shared;
185 };
186 
187 typedef DoublyLinkedList<advisory_lock> LockList;
188 
189 } // namespace
190 
191 
192 struct advisory_locking {
193 	sem_id			lock;
194 	sem_id			wait_sem;
195 	LockList		locks;
196 
197 	advisory_locking()
198 		:
199 		lock(-1),
200 		wait_sem(-1)
201 	{
202 	}
203 
204 	~advisory_locking()
205 	{
206 		if (lock >= 0)
207 			delete_sem(lock);
208 		if (wait_sem >= 0)
209 			delete_sem(wait_sem);
210 	}
211 };
212 
213 /*!	\brief Guards sMountsTable.
214 
215 	The holder is allowed to read/write access the sMountsTable.
216 	Manipulation of the fs_mount structures themselves
217 	(and their destruction) requires different locks though.
218 */
219 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
220 
221 /*!	\brief Guards mount/unmount operations.
222 
223 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
224 	That is locking the lock ensures that no FS is mounted/unmounted. In
225 	particular this means that
226 	- sMountsTable will not be modified,
227 	- the fields immutable after initialization of the fs_mount structures in
228 	  sMountsTable will not be modified,
229 
230 	The thread trying to lock the lock must not hold sVnodeLock or
231 	sMountLock.
232 */
233 static recursive_lock sMountOpLock;
234 
235 /*!	\brief Guards sVnodeTable.
236 
237 	The holder is allowed read/write access to sVnodeTable and to
238 	any unbusy vnode in that table, save to the immutable fields (device, id,
239 	private_node, mount) to which only read-only access is allowed.
240 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
241 	well as the busy, removed, unused flags, and the vnode's type can also be
242 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
243 	locked. Write access to covered_by and covers requires to write lock
244 	sVnodeLock.
245 
246 	The thread trying to acquire the lock must not hold sMountLock.
247 	You must not hold this lock when calling create_sem(), as this might call
248 	vfs_free_unused_vnodes() and thus cause a deadlock.
249 */
250 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
251 
252 /*!	\brief Guards io_context::root.
253 
254 	Must be held when setting or getting the io_context::root field.
255 	The only operation allowed while holding this lock besides getting or
256 	setting the field is inc_vnode_ref_count() on io_context::root.
257 */
258 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
259 
260 
261 namespace {
262 
263 struct vnode_hash_key {
264 	dev_t	device;
265 	ino_t	vnode;
266 };
267 
268 struct VnodeHash {
269 	typedef vnode_hash_key	KeyType;
270 	typedef	struct vnode	ValueType;
271 
272 #define VHASH(mountid, vnodeid) \
273 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
274 
275 	size_t HashKey(KeyType key) const
276 	{
277 		return VHASH(key.device, key.vnode);
278 	}
279 
280 	size_t Hash(ValueType* vnode) const
281 	{
282 		return VHASH(vnode->device, vnode->id);
283 	}
284 
285 #undef VHASH
286 
287 	bool Compare(KeyType key, ValueType* vnode) const
288 	{
289 		return vnode->device == key.device && vnode->id == key.vnode;
290 	}
291 
292 	ValueType*& GetLink(ValueType* value) const
293 	{
294 		return value->next;
295 	}
296 };
297 
298 typedef BOpenHashTable<VnodeHash> VnodeTable;
299 
300 
301 struct MountHash {
302 	typedef dev_t			KeyType;
303 	typedef	struct fs_mount	ValueType;
304 
305 	size_t HashKey(KeyType key) const
306 	{
307 		return key;
308 	}
309 
310 	size_t Hash(ValueType* mount) const
311 	{
312 		return mount->id;
313 	}
314 
315 	bool Compare(KeyType key, ValueType* mount) const
316 	{
317 		return mount->id == key;
318 	}
319 
320 	ValueType*& GetLink(ValueType* value) const
321 	{
322 		return value->next;
323 	}
324 };
325 
326 typedef BOpenHashTable<MountHash> MountTable;
327 
328 } // namespace
329 
330 
331 object_cache* sPathNameCache;
332 object_cache* sFileDescriptorCache;
333 
334 #define VNODE_HASH_TABLE_SIZE 1024
335 static VnodeTable* sVnodeTable;
336 static struct vnode* sRoot;
337 
338 #define MOUNTS_HASH_TABLE_SIZE 16
339 static MountTable* sMountsTable;
340 static dev_t sNextMountID = 1;
341 
342 #define MAX_TEMP_IO_VECS 8
343 
344 // How long to wait for busy vnodes (10s)
345 #define BUSY_VNODE_RETRIES 2000
346 #define BUSY_VNODE_DELAY 5000
347 
348 mode_t __gUmask = 022;
349 
350 /* function declarations */
351 
352 static void free_unused_vnodes();
353 
354 // file descriptor operation prototypes
355 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
356 	void* buffer, size_t* _bytes);
357 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
358 	const void* buffer, size_t* _bytes);
359 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
360 	int seekType);
361 static void file_free_fd(struct file_descriptor* descriptor);
362 static status_t file_close(struct file_descriptor* descriptor);
363 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
364 	struct selectsync* sync);
365 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
366 	struct selectsync* sync);
367 static status_t dir_read(struct io_context* context,
368 	struct file_descriptor* descriptor, struct dirent* buffer,
369 	size_t bufferSize, uint32* _count);
370 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
371 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
372 static status_t dir_rewind(struct file_descriptor* descriptor);
373 static void dir_free_fd(struct file_descriptor* descriptor);
374 static status_t dir_close(struct file_descriptor* descriptor);
375 static status_t attr_dir_read(struct io_context* context,
376 	struct file_descriptor* descriptor, struct dirent* buffer,
377 	size_t bufferSize, uint32* _count);
378 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
379 static void attr_dir_free_fd(struct file_descriptor* descriptor);
380 static status_t attr_dir_close(struct file_descriptor* descriptor);
381 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
382 	void* buffer, size_t* _bytes);
383 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
384 	const void* buffer, size_t* _bytes);
385 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
386 	int seekType);
387 static void attr_free_fd(struct file_descriptor* descriptor);
388 static status_t attr_close(struct file_descriptor* descriptor);
389 static status_t attr_read_stat(struct file_descriptor* descriptor,
390 	struct stat* statData);
391 static status_t attr_write_stat(struct file_descriptor* descriptor,
392 	const struct stat* stat, int statMask);
393 static status_t index_dir_read(struct io_context* context,
394 	struct file_descriptor* descriptor, struct dirent* buffer,
395 	size_t bufferSize, uint32* _count);
396 static status_t index_dir_rewind(struct file_descriptor* descriptor);
397 static void index_dir_free_fd(struct file_descriptor* descriptor);
398 static status_t index_dir_close(struct file_descriptor* descriptor);
399 static status_t query_read(struct io_context* context,
400 	struct file_descriptor* descriptor, struct dirent* buffer,
401 	size_t bufferSize, uint32* _count);
402 static status_t query_rewind(struct file_descriptor* descriptor);
403 static void query_free_fd(struct file_descriptor* descriptor);
404 static status_t query_close(struct file_descriptor* descriptor);
405 
406 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
407 	void* buffer, size_t length);
408 static status_t common_read_stat(struct file_descriptor* descriptor,
409 	struct stat* statData);
410 static status_t common_write_stat(struct file_descriptor* descriptor,
411 	const struct stat* statData, int statMask);
412 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
413 	struct stat* stat, bool kernel);
414 
415 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
416 	bool traverseLeafLink, int count, bool kernel,
417 	struct vnode** _vnode, ino_t* _parentID);
418 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
419 	size_t bufferSize, bool kernel);
420 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
421 	struct vnode** _vnode, ino_t* _parentID, bool kernel);
422 static void inc_vnode_ref_count(struct vnode* vnode);
423 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
424 	bool reenter);
425 static inline void put_vnode(struct vnode* vnode);
426 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
427 	bool kernel);
428 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
429 
430 
431 static struct fd_ops sFileOps = {
432 	file_read,
433 	file_write,
434 	file_seek,
435 	common_ioctl,
436 	NULL,		// set_flags
437 	file_select,
438 	file_deselect,
439 	NULL,		// read_dir()
440 	NULL,		// rewind_dir()
441 	common_read_stat,
442 	common_write_stat,
443 	file_close,
444 	file_free_fd
445 };
446 
447 static struct fd_ops sDirectoryOps = {
448 	NULL,		// read()
449 	NULL,		// write()
450 	NULL,		// seek()
451 	common_ioctl,
452 	NULL,		// set_flags
453 	NULL,		// select()
454 	NULL,		// deselect()
455 	dir_read,
456 	dir_rewind,
457 	common_read_stat,
458 	common_write_stat,
459 	dir_close,
460 	dir_free_fd
461 };
462 
463 static struct fd_ops sAttributeDirectoryOps = {
464 	NULL,		// read()
465 	NULL,		// write()
466 	NULL,		// seek()
467 	common_ioctl,
468 	NULL,		// set_flags
469 	NULL,		// select()
470 	NULL,		// deselect()
471 	attr_dir_read,
472 	attr_dir_rewind,
473 	common_read_stat,
474 	common_write_stat,
475 	attr_dir_close,
476 	attr_dir_free_fd
477 };
478 
479 static struct fd_ops sAttributeOps = {
480 	attr_read,
481 	attr_write,
482 	attr_seek,
483 	common_ioctl,
484 	NULL,		// set_flags
485 	NULL,		// select()
486 	NULL,		// deselect()
487 	NULL,		// read_dir()
488 	NULL,		// rewind_dir()
489 	attr_read_stat,
490 	attr_write_stat,
491 	attr_close,
492 	attr_free_fd
493 };
494 
495 static struct fd_ops sIndexDirectoryOps = {
496 	NULL,		// read()
497 	NULL,		// write()
498 	NULL,		// seek()
499 	NULL,		// ioctl()
500 	NULL,		// set_flags
501 	NULL,		// select()
502 	NULL,		// deselect()
503 	index_dir_read,
504 	index_dir_rewind,
505 	NULL,		// read_stat()
506 	NULL,		// write_stat()
507 	index_dir_close,
508 	index_dir_free_fd
509 };
510 
511 #if 0
512 static struct fd_ops sIndexOps = {
513 	NULL,		// read()
514 	NULL,		// write()
515 	NULL,		// seek()
516 	NULL,		// ioctl()
517 	NULL,		// set_flags
518 	NULL,		// select()
519 	NULL,		// deselect()
520 	NULL,		// dir_read()
521 	NULL,		// dir_rewind()
522 	index_read_stat,	// read_stat()
523 	NULL,		// write_stat()
524 	NULL,		// dir_close()
525 	NULL		// free_fd()
526 };
527 #endif
528 
529 static struct fd_ops sQueryOps = {
530 	NULL,		// read()
531 	NULL,		// write()
532 	NULL,		// seek()
533 	NULL,		// ioctl()
534 	NULL,		// set_flags
535 	NULL,		// select()
536 	NULL,		// deselect()
537 	query_read,
538 	query_rewind,
539 	NULL,		// read_stat()
540 	NULL,		// write_stat()
541 	query_close,
542 	query_free_fd
543 };
544 
545 
546 namespace {
547 
548 class VNodePutter {
549 public:
550 	VNodePutter(struct vnode* vnode = NULL) : fVNode(vnode) {}
551 
552 	~VNodePutter()
553 	{
554 		Put();
555 	}
556 
557 	void SetTo(struct vnode* vnode)
558 	{
559 		Put();
560 		fVNode = vnode;
561 	}
562 
563 	void Put()
564 	{
565 		if (fVNode) {
566 			put_vnode(fVNode);
567 			fVNode = NULL;
568 		}
569 	}
570 
571 	struct vnode* Detach()
572 	{
573 		struct vnode* vnode = fVNode;
574 		fVNode = NULL;
575 		return vnode;
576 	}
577 
578 private:
579 	struct vnode* fVNode;
580 };
581 
582 
583 class FDCloser {
584 public:
585 	FDCloser() : fFD(-1), fKernel(true) {}
586 
587 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
588 
589 	~FDCloser()
590 	{
591 		Close();
592 	}
593 
594 	void SetTo(int fd, bool kernel)
595 	{
596 		Close();
597 		fFD = fd;
598 		fKernel = kernel;
599 	}
600 
601 	void Close()
602 	{
603 		if (fFD >= 0) {
604 			if (fKernel)
605 				_kern_close(fFD);
606 			else
607 				_user_close(fFD);
608 			fFD = -1;
609 		}
610 	}
611 
612 	int Detach()
613 	{
614 		int fd = fFD;
615 		fFD = -1;
616 		return fd;
617 	}
618 
619 private:
620 	int		fFD;
621 	bool	fKernel;
622 };
623 
624 } // namespace
625 
626 
627 #if VFS_PAGES_IO_TRACING
628 
629 namespace VFSPagesIOTracing {
630 
631 class PagesIOTraceEntry : public AbstractTraceEntry {
632 protected:
633 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
634 		const generic_io_vec* vecs, uint32 count, uint32 flags,
635 		generic_size_t bytesRequested, status_t status,
636 		generic_size_t bytesTransferred)
637 		:
638 		fVnode(vnode),
639 		fMountID(vnode->mount->id),
640 		fNodeID(vnode->id),
641 		fCookie(cookie),
642 		fPos(pos),
643 		fCount(count),
644 		fFlags(flags),
645 		fBytesRequested(bytesRequested),
646 		fStatus(status),
647 		fBytesTransferred(bytesTransferred)
648 	{
649 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
650 			sizeof(generic_io_vec) * count, false);
651 	}
652 
653 	void AddDump(TraceOutput& out, const char* mode)
654 	{
655 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
656 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
657 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
658 			(uint64)fBytesRequested);
659 
660 		if (fVecs != NULL) {
661 			for (uint32 i = 0; i < fCount; i++) {
662 				if (i > 0)
663 					out.Print(", ");
664 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
665 					(uint64)fVecs[i].length);
666 			}
667 		}
668 
669 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
670 			"transferred: %" B_PRIu64, fFlags, fStatus,
671 			(uint64)fBytesTransferred);
672 	}
673 
674 protected:
675 	struct vnode*	fVnode;
676 	dev_t			fMountID;
677 	ino_t			fNodeID;
678 	void*			fCookie;
679 	off_t			fPos;
680 	generic_io_vec*	fVecs;
681 	uint32			fCount;
682 	uint32			fFlags;
683 	generic_size_t	fBytesRequested;
684 	status_t		fStatus;
685 	generic_size_t	fBytesTransferred;
686 };
687 
688 
689 class ReadPages : public PagesIOTraceEntry {
690 public:
691 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
692 		const generic_io_vec* vecs, uint32 count, uint32 flags,
693 		generic_size_t bytesRequested, status_t status,
694 		generic_size_t bytesTransferred)
695 		:
696 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
697 			bytesRequested, status, bytesTransferred)
698 	{
699 		Initialized();
700 	}
701 
702 	virtual void AddDump(TraceOutput& out)
703 	{
704 		PagesIOTraceEntry::AddDump(out, "read");
705 	}
706 };
707 
708 
709 class WritePages : public PagesIOTraceEntry {
710 public:
711 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
712 		const generic_io_vec* vecs, uint32 count, uint32 flags,
713 		generic_size_t bytesRequested, status_t status,
714 		generic_size_t bytesTransferred)
715 		:
716 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
717 			bytesRequested, status, bytesTransferred)
718 	{
719 		Initialized();
720 	}
721 
722 	virtual void AddDump(TraceOutput& out)
723 	{
724 		PagesIOTraceEntry::AddDump(out, "write");
725 	}
726 };
727 
728 }	// namespace VFSPagesIOTracing
729 
730 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
731 #else
732 #	define TPIO(x) ;
733 #endif	// VFS_PAGES_IO_TRACING
734 
735 
736 /*! Finds the mounted device (the fs_mount structure) with the given ID.
737 	Note, you must hold the sMountLock lock when you call this function.
738 */
739 static struct fs_mount*
740 find_mount(dev_t id)
741 {
742 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
743 
744 	return sMountsTable->Lookup(id);
745 }
746 
747 
748 static status_t
749 get_mount(dev_t id, struct fs_mount** _mount)
750 {
751 	struct fs_mount* mount;
752 
753 	ReadLocker nodeLocker(sVnodeLock);
754 	ReadLocker mountLocker(sMountLock);
755 
756 	mount = find_mount(id);
757 	if (mount == NULL)
758 		return B_BAD_VALUE;
759 
760 	struct vnode* rootNode = mount->root_vnode;
761 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
762 		|| rootNode->ref_count == 0) {
763 		// might have been called during a mount/unmount operation
764 		return B_BUSY;
765 	}
766 
767 	inc_vnode_ref_count(rootNode);
768 	*_mount = mount;
769 	return B_OK;
770 }
771 
772 
773 static void
774 put_mount(struct fs_mount* mount)
775 {
776 	if (mount)
777 		put_vnode(mount->root_vnode);
778 }
779 
780 
781 /*!	Tries to open the specified file system module.
782 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
783 	Returns a pointer to file system module interface, or NULL if it
784 	could not open the module.
785 */
786 static file_system_module_info*
787 get_file_system(const char* fsName)
788 {
789 	char name[B_FILE_NAME_LENGTH];
790 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
791 		// construct module name if we didn't get one
792 		// (we currently support only one API)
793 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
794 		fsName = NULL;
795 	}
796 
797 	file_system_module_info* info;
798 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
799 		return NULL;
800 
801 	return info;
802 }
803 
804 
805 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
806 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
807 	The name is allocated for you, and you have to free() it when you're
808 	done with it.
809 	Returns NULL if the required memory is not available.
810 */
811 static char*
812 get_file_system_name(const char* fsName)
813 {
814 	const size_t length = strlen("file_systems/");
815 
816 	if (strncmp(fsName, "file_systems/", length)) {
817 		// the name already seems to be the module's file name
818 		return strdup(fsName);
819 	}
820 
821 	fsName += length;
822 	const char* end = strchr(fsName, '/');
823 	if (end == NULL) {
824 		// this doesn't seem to be a valid name, but well...
825 		return strdup(fsName);
826 	}
827 
828 	// cut off the trailing /v1
829 
830 	char* name = (char*)malloc(end + 1 - fsName);
831 	if (name == NULL)
832 		return NULL;
833 
834 	strlcpy(name, fsName, end + 1 - fsName);
835 	return name;
836 }
837 
838 
839 /*!	Accepts a list of file system names separated by a colon, one for each
840 	layer and returns the file system name for the specified layer.
841 	The name is allocated for you, and you have to free() it when you're
842 	done with it.
843 	Returns NULL if the required memory is not available or if there is no
844 	name for the specified layer.
845 */
846 static char*
847 get_file_system_name_for_layer(const char* fsNames, int32 layer)
848 {
849 	while (layer >= 0) {
850 		const char* end = strchr(fsNames, ':');
851 		if (end == NULL) {
852 			if (layer == 0)
853 				return strdup(fsNames);
854 			return NULL;
855 		}
856 
857 		if (layer == 0) {
858 			size_t length = end - fsNames + 1;
859 			char* result = (char*)malloc(length);
860 			strlcpy(result, fsNames, length);
861 			return result;
862 		}
863 
864 		fsNames = end + 1;
865 		layer--;
866 	}
867 
868 	return NULL;
869 }
870 
871 
872 static void
873 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
874 {
875 	MutexLocker _(mount->lock);
876 	mount->vnodes.Add(vnode);
877 }
878 
879 
880 static void
881 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
882 {
883 	MutexLocker _(mount->lock);
884 	mount->vnodes.Remove(vnode);
885 }
886 
887 
888 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
889 
890 	The caller must hold the sVnodeLock (read lock at least).
891 
892 	\param mountID the mount ID.
893 	\param vnodeID the node ID.
894 
895 	\return The vnode structure, if it was found in the hash table, \c NULL
896 			otherwise.
897 */
898 static struct vnode*
899 lookup_vnode(dev_t mountID, ino_t vnodeID)
900 {
901 	struct vnode_hash_key key;
902 
903 	key.device = mountID;
904 	key.vnode = vnodeID;
905 
906 	return sVnodeTable->Lookup(key);
907 }
908 
909 
910 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
911 
912 	This will also wait for BUSY_VNODE_DELAY before returning if one should
913 	still wait for the vnode becoming unbusy.
914 
915 	\return \c true if one should retry, \c false if not.
916 */
917 static bool
918 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
919 {
920 	if (--tries < 0) {
921 		// vnode doesn't seem to become unbusy
922 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
923 			" is not becoming unbusy!\n", mountID, vnodeID);
924 		return false;
925 	}
926 	snooze(BUSY_VNODE_DELAY);
927 	return true;
928 }
929 
930 
931 /*!	Creates a new vnode with the given mount and node ID.
932 	If the node already exists, it is returned instead and no new node is
933 	created. In either case -- but not, if an error occurs -- the function write
934 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
935 	error the lock is not held on return.
936 
937 	\param mountID The mount ID.
938 	\param vnodeID The vnode ID.
939 	\param _vnode Will be set to the new vnode on success.
940 	\param _nodeCreated Will be set to \c true when the returned vnode has
941 		been newly created, \c false when it already existed. Will not be
942 		changed on error.
943 	\return \c B_OK, when the vnode was successfully created and inserted or
944 		a node with the given ID was found, \c B_NO_MEMORY or
945 		\c B_ENTRY_NOT_FOUND on error.
946 */
947 static status_t
948 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
949 	bool& _nodeCreated)
950 {
951 	FUNCTION(("create_new_vnode_and_lock()\n"));
952 
953 	struct vnode* vnode = (struct vnode*)malloc(sizeof(struct vnode));
954 	if (vnode == NULL)
955 		return B_NO_MEMORY;
956 
957 	// initialize basic values
958 	memset(vnode, 0, sizeof(struct vnode));
959 	vnode->device = mountID;
960 	vnode->id = vnodeID;
961 	vnode->ref_count = 1;
962 	vnode->SetBusy(true);
963 
964 	// look up the node -- it might have been added by someone else in the
965 	// meantime
966 	rw_lock_write_lock(&sVnodeLock);
967 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
968 	if (existingVnode != NULL) {
969 		free(vnode);
970 		_vnode = existingVnode;
971 		_nodeCreated = false;
972 		return B_OK;
973 	}
974 
975 	// get the mount structure
976 	rw_lock_read_lock(&sMountLock);
977 	vnode->mount = find_mount(mountID);
978 	if (!vnode->mount || vnode->mount->unmounting) {
979 		rw_lock_read_unlock(&sMountLock);
980 		rw_lock_write_unlock(&sVnodeLock);
981 		free(vnode);
982 		return B_ENTRY_NOT_FOUND;
983 	}
984 
985 	// add the vnode to the mount's node list and the hash table
986 	sVnodeTable->Insert(vnode);
987 	add_vnode_to_mount_list(vnode, vnode->mount);
988 
989 	rw_lock_read_unlock(&sMountLock);
990 
991 	_vnode = vnode;
992 	_nodeCreated = true;
993 
994 	// keep the vnode lock locked
995 	return B_OK;
996 }
997 
998 
999 /*!	Frees the vnode and all resources it has acquired, and removes
1000 	it from the vnode hash as well as from its mount structure.
1001 	Will also make sure that any cache modifications are written back.
1002 */
1003 static void
1004 free_vnode(struct vnode* vnode, bool reenter)
1005 {
1006 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
1007 		vnode);
1008 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
1009 
1010 	// write back any changes in this vnode's cache -- but only
1011 	// if the vnode won't be deleted, in which case the changes
1012 	// will be discarded
1013 
1014 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
1015 		FS_CALL_NO_PARAMS(vnode, fsync);
1016 
1017 	// Note: If this vnode has a cache attached, there will still be two
1018 	// references to that cache at this point. The last one belongs to the vnode
1019 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
1020 	// cache. Each but the last reference to a cache also includes a reference
1021 	// to the vnode. The file cache, however, released its reference (cf.
1022 	// file_cache_create()), so that this vnode's ref count has the chance to
1023 	// ever drop to 0. Deleting the file cache now, will cause the next to last
1024 	// cache reference to be released, which will also release a (no longer
1025 	// existing) vnode reference. To avoid problems, we set the vnode's ref
1026 	// count, so that it will neither become negative nor 0.
1027 	vnode->ref_count = 2;
1028 
1029 	if (!vnode->IsUnpublished()) {
1030 		if (vnode->IsRemoved())
1031 			FS_CALL(vnode, remove_vnode, reenter);
1032 		else
1033 			FS_CALL(vnode, put_vnode, reenter);
1034 	}
1035 
1036 	// If the vnode has a VMCache attached, make sure that it won't try to get
1037 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1038 	// long as the vnode is busy and in the hash, that won't happen, but as
1039 	// soon as we've removed it from the hash, it could reload the vnode -- with
1040 	// a new cache attached!
1041 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1042 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1043 
1044 	// The file system has removed the resources of the vnode now, so we can
1045 	// make it available again (by removing the busy vnode from the hash).
1046 	rw_lock_write_lock(&sVnodeLock);
1047 	sVnodeTable->Remove(vnode);
1048 	rw_lock_write_unlock(&sVnodeLock);
1049 
1050 	// if we have a VMCache attached, remove it
1051 	if (vnode->cache)
1052 		vnode->cache->ReleaseRef();
1053 
1054 	vnode->cache = NULL;
1055 
1056 	remove_vnode_from_mount_list(vnode, vnode->mount);
1057 
1058 	free(vnode);
1059 }
1060 
1061 
1062 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1063 	if the counter dropped to 0.
1064 
1065 	The caller must, of course, own a reference to the vnode to call this
1066 	function.
1067 	The caller must not hold the sVnodeLock or the sMountLock.
1068 
1069 	\param vnode the vnode.
1070 	\param alwaysFree don't move this vnode into the unused list, but really
1071 		   delete it if possible.
1072 	\param reenter \c true, if this function is called (indirectly) from within
1073 		   a file system. This will be passed to file system hooks only.
1074 	\return \c B_OK, if everything went fine, an error code otherwise.
1075 */
1076 static status_t
1077 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1078 {
1079 	ReadLocker locker(sVnodeLock);
1080 	AutoLocker<Vnode> nodeLocker(vnode);
1081 
1082 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1083 
1084 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1085 
1086 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1087 		vnode->ref_count));
1088 
1089 	if (oldRefCount != 1)
1090 		return B_OK;
1091 
1092 	if (vnode->IsBusy())
1093 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1094 
1095 	bool freeNode = false;
1096 	bool freeUnusedNodes = false;
1097 
1098 	// Just insert the vnode into an unused list if we don't need
1099 	// to delete it
1100 	if (vnode->IsRemoved() || alwaysFree) {
1101 		vnode_to_be_freed(vnode);
1102 		vnode->SetBusy(true);
1103 		freeNode = true;
1104 	} else
1105 		freeUnusedNodes = vnode_unused(vnode);
1106 
1107 	nodeLocker.Unlock();
1108 	locker.Unlock();
1109 
1110 	if (freeNode)
1111 		free_vnode(vnode, reenter);
1112 	else if (freeUnusedNodes)
1113 		free_unused_vnodes();
1114 
1115 	return B_OK;
1116 }
1117 
1118 
1119 /*!	\brief Increments the reference counter of the given vnode.
1120 
1121 	The caller must make sure that the node isn't deleted while this function
1122 	is called. This can be done either:
1123 	- by ensuring that a reference to the node exists and remains in existence,
1124 	  or
1125 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1126 	  or by holding sVnodeLock write locked.
1127 
1128 	In the second case the caller is responsible for dealing with the ref count
1129 	0 -> 1 transition. That is 1. this function must not be invoked when the
1130 	node is busy in the first place and 2. vnode_used() must be called for the
1131 	node.
1132 
1133 	\param vnode the vnode.
1134 */
1135 static void
1136 inc_vnode_ref_count(struct vnode* vnode)
1137 {
1138 	atomic_add(&vnode->ref_count, 1);
1139 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1140 		vnode->ref_count));
1141 }
1142 
1143 
1144 static bool
1145 is_special_node_type(int type)
1146 {
1147 	// at the moment only FIFOs are supported
1148 	return S_ISFIFO(type);
1149 }
1150 
1151 
1152 static status_t
1153 create_special_sub_node(struct vnode* vnode, uint32 flags)
1154 {
1155 	if (S_ISFIFO(vnode->Type()))
1156 		return create_fifo_vnode(vnode->mount->volume, vnode);
1157 
1158 	return B_BAD_VALUE;
1159 }
1160 
1161 
1162 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1163 
1164 	If the node is not yet in memory, it will be loaded.
1165 
1166 	The caller must not hold the sVnodeLock or the sMountLock.
1167 
1168 	\param mountID the mount ID.
1169 	\param vnodeID the node ID.
1170 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1171 		   retrieved vnode structure shall be written.
1172 	\param reenter \c true, if this function is called (indirectly) from within
1173 		   a file system.
1174 	\return \c B_OK, if everything when fine, an error code otherwise.
1175 */
1176 static status_t
1177 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1178 	int reenter)
1179 {
1180 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1181 		mountID, vnodeID, _vnode));
1182 
1183 	rw_lock_read_lock(&sVnodeLock);
1184 
1185 	int32 tries = BUSY_VNODE_RETRIES;
1186 restart:
1187 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1188 	AutoLocker<Vnode> nodeLocker(vnode);
1189 
1190 	if (vnode && vnode->IsBusy()) {
1191 		nodeLocker.Unlock();
1192 		rw_lock_read_unlock(&sVnodeLock);
1193 		if (!canWait) {
1194 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1195 				mountID, vnodeID);
1196 			return B_BUSY;
1197 		}
1198 		if (!retry_busy_vnode(tries, mountID, vnodeID))
1199 			return B_BUSY;
1200 
1201 		rw_lock_read_lock(&sVnodeLock);
1202 		goto restart;
1203 	}
1204 
1205 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1206 
1207 	status_t status;
1208 
1209 	if (vnode) {
1210 		if (vnode->ref_count == 0) {
1211 			// this vnode has been unused before
1212 			vnode_used(vnode);
1213 		}
1214 		inc_vnode_ref_count(vnode);
1215 
1216 		nodeLocker.Unlock();
1217 		rw_lock_read_unlock(&sVnodeLock);
1218 	} else {
1219 		// we need to create a new vnode and read it in
1220 		rw_lock_read_unlock(&sVnodeLock);
1221 			// unlock -- create_new_vnode_and_lock() write-locks on success
1222 		bool nodeCreated;
1223 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1224 			nodeCreated);
1225 		if (status != B_OK)
1226 			return status;
1227 
1228 		if (!nodeCreated) {
1229 			rw_lock_read_lock(&sVnodeLock);
1230 			rw_lock_write_unlock(&sVnodeLock);
1231 			goto restart;
1232 		}
1233 
1234 		rw_lock_write_unlock(&sVnodeLock);
1235 
1236 		int type;
1237 		uint32 flags;
1238 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1239 			&flags, reenter);
1240 		if (status == B_OK && vnode->private_node == NULL)
1241 			status = B_BAD_VALUE;
1242 
1243 		bool gotNode = status == B_OK;
1244 		bool publishSpecialSubNode = false;
1245 		if (gotNode) {
1246 			vnode->SetType(type);
1247 			publishSpecialSubNode = is_special_node_type(type)
1248 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1249 		}
1250 
1251 		if (gotNode && publishSpecialSubNode)
1252 			status = create_special_sub_node(vnode, flags);
1253 
1254 		if (status != B_OK) {
1255 			if (gotNode)
1256 				FS_CALL(vnode, put_vnode, reenter);
1257 
1258 			rw_lock_write_lock(&sVnodeLock);
1259 			sVnodeTable->Remove(vnode);
1260 			remove_vnode_from_mount_list(vnode, vnode->mount);
1261 			rw_lock_write_unlock(&sVnodeLock);
1262 
1263 			free(vnode);
1264 			return status;
1265 		}
1266 
1267 		rw_lock_read_lock(&sVnodeLock);
1268 		vnode->Lock();
1269 
1270 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1271 		vnode->SetBusy(false);
1272 
1273 		vnode->Unlock();
1274 		rw_lock_read_unlock(&sVnodeLock);
1275 	}
1276 
1277 	TRACE(("get_vnode: returning %p\n", vnode));
1278 
1279 	*_vnode = vnode;
1280 	return B_OK;
1281 }
1282 
1283 
1284 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1285 	if the counter dropped to 0.
1286 
1287 	The caller must, of course, own a reference to the vnode to call this
1288 	function.
1289 	The caller must not hold the sVnodeLock or the sMountLock.
1290 
1291 	\param vnode the vnode.
1292 */
1293 static inline void
1294 put_vnode(struct vnode* vnode)
1295 {
1296 	dec_vnode_ref_count(vnode, false, false);
1297 }
1298 
1299 
1300 static void
1301 free_unused_vnodes(int32 level)
1302 {
1303 	unused_vnodes_check_started();
1304 
1305 	if (level == B_NO_LOW_RESOURCE) {
1306 		unused_vnodes_check_done();
1307 		return;
1308 	}
1309 
1310 	flush_hot_vnodes();
1311 
1312 	// determine how many nodes to free
1313 	uint32 count = 1;
1314 	{
1315 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1316 
1317 		switch (level) {
1318 			case B_LOW_RESOURCE_NOTE:
1319 				count = sUnusedVnodes / 100;
1320 				break;
1321 			case B_LOW_RESOURCE_WARNING:
1322 				count = sUnusedVnodes / 10;
1323 				break;
1324 			case B_LOW_RESOURCE_CRITICAL:
1325 				count = sUnusedVnodes;
1326 				break;
1327 		}
1328 
1329 		if (count > sUnusedVnodes)
1330 			count = sUnusedVnodes;
1331 	}
1332 
1333 	// Write back the modified pages of some unused vnodes and free them.
1334 
1335 	for (uint32 i = 0; i < count; i++) {
1336 		ReadLocker vnodesReadLocker(sVnodeLock);
1337 
1338 		// get the first node
1339 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1340 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1341 			&sUnusedVnodeList);
1342 		unusedVnodesLocker.Unlock();
1343 
1344 		if (vnode == NULL)
1345 			break;
1346 
1347 		// lock the node
1348 		AutoLocker<Vnode> nodeLocker(vnode);
1349 
1350 		// Check whether the node is still unused -- since we only append to the
1351 		// tail of the unused queue, the vnode should still be at its head.
1352 		// Alternatively we could check its ref count for 0 and its busy flag,
1353 		// but if the node is no longer at the head of the queue, it means it
1354 		// has been touched in the meantime, i.e. it is no longer the least
1355 		// recently used unused vnode and we rather don't free it.
1356 		unusedVnodesLocker.Lock();
1357 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1358 			continue;
1359 		unusedVnodesLocker.Unlock();
1360 
1361 		ASSERT(!vnode->IsBusy());
1362 
1363 		// grab a reference
1364 		inc_vnode_ref_count(vnode);
1365 		vnode_used(vnode);
1366 
1367 		// write back changes and free the node
1368 		nodeLocker.Unlock();
1369 		vnodesReadLocker.Unlock();
1370 
1371 		if (vnode->cache != NULL)
1372 			vnode->cache->WriteModified();
1373 
1374 		dec_vnode_ref_count(vnode, true, false);
1375 			// this should free the vnode when it's still unused
1376 	}
1377 
1378 	unused_vnodes_check_done();
1379 }
1380 
1381 
1382 /*!	Gets the vnode the given vnode is covering.
1383 
1384 	The caller must have \c sVnodeLock read-locked at least.
1385 
1386 	The function returns a reference to the retrieved vnode (if any), the caller
1387 	is responsible to free.
1388 
1389 	\param vnode The vnode whose covered node shall be returned.
1390 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1391 		vnode.
1392 */
1393 static inline Vnode*
1394 get_covered_vnode_locked(Vnode* vnode)
1395 {
1396 	if (Vnode* coveredNode = vnode->covers) {
1397 		while (coveredNode->covers != NULL)
1398 			coveredNode = coveredNode->covers;
1399 
1400 		inc_vnode_ref_count(coveredNode);
1401 		return coveredNode;
1402 	}
1403 
1404 	return NULL;
1405 }
1406 
1407 
1408 /*!	Gets the vnode the given vnode is covering.
1409 
1410 	The caller must not hold \c sVnodeLock. Note that this implies a race
1411 	condition, since the situation can change at any time.
1412 
1413 	The function returns a reference to the retrieved vnode (if any), the caller
1414 	is responsible to free.
1415 
1416 	\param vnode The vnode whose covered node shall be returned.
1417 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1418 		vnode.
1419 */
1420 static inline Vnode*
1421 get_covered_vnode(Vnode* vnode)
1422 {
1423 	if (!vnode->IsCovering())
1424 		return NULL;
1425 
1426 	ReadLocker vnodeReadLocker(sVnodeLock);
1427 	return get_covered_vnode_locked(vnode);
1428 }
1429 
1430 
1431 /*!	Gets the vnode the given vnode is covered by.
1432 
1433 	The caller must have \c sVnodeLock read-locked at least.
1434 
1435 	The function returns a reference to the retrieved vnode (if any), the caller
1436 	is responsible to free.
1437 
1438 	\param vnode The vnode whose covering node shall be returned.
1439 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1440 		any vnode.
1441 */
1442 static Vnode*
1443 get_covering_vnode_locked(Vnode* vnode)
1444 {
1445 	if (Vnode* coveringNode = vnode->covered_by) {
1446 		while (coveringNode->covered_by != NULL)
1447 			coveringNode = coveringNode->covered_by;
1448 
1449 		inc_vnode_ref_count(coveringNode);
1450 		return coveringNode;
1451 	}
1452 
1453 	return NULL;
1454 }
1455 
1456 
1457 /*!	Gets the vnode the given vnode is covered by.
1458 
1459 	The caller must not hold \c sVnodeLock. Note that this implies a race
1460 	condition, since the situation can change at any time.
1461 
1462 	The function returns a reference to the retrieved vnode (if any), the caller
1463 	is responsible to free.
1464 
1465 	\param vnode The vnode whose covering node shall be returned.
1466 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1467 		any vnode.
1468 */
1469 static inline Vnode*
1470 get_covering_vnode(Vnode* vnode)
1471 {
1472 	if (!vnode->IsCovered())
1473 		return NULL;
1474 
1475 	ReadLocker vnodeReadLocker(sVnodeLock);
1476 	return get_covering_vnode_locked(vnode);
1477 }
1478 
1479 
1480 static void
1481 free_unused_vnodes()
1482 {
1483 	free_unused_vnodes(
1484 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1485 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1486 }
1487 
1488 
1489 static void
1490 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1491 {
1492 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1493 
1494 	free_unused_vnodes(level);
1495 }
1496 
1497 
1498 static inline void
1499 put_advisory_locking(struct advisory_locking* locking)
1500 {
1501 	release_sem(locking->lock);
1502 }
1503 
1504 
1505 /*!	Returns the advisory_locking object of the \a vnode in case it
1506 	has one, and locks it.
1507 	You have to call put_advisory_locking() when you're done with
1508 	it.
1509 	Note, you must not have the vnode mutex locked when calling
1510 	this function.
1511 */
1512 static struct advisory_locking*
1513 get_advisory_locking(struct vnode* vnode)
1514 {
1515 	rw_lock_read_lock(&sVnodeLock);
1516 	vnode->Lock();
1517 
1518 	struct advisory_locking* locking = vnode->advisory_locking;
1519 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1520 
1521 	vnode->Unlock();
1522 	rw_lock_read_unlock(&sVnodeLock);
1523 
1524 	if (lock >= 0)
1525 		lock = acquire_sem(lock);
1526 	if (lock < 0) {
1527 		// This means the locking has been deleted in the mean time
1528 		// or had never existed in the first place - otherwise, we
1529 		// would get the lock at some point.
1530 		return NULL;
1531 	}
1532 
1533 	return locking;
1534 }
1535 
1536 
1537 /*!	Creates a locked advisory_locking object, and attaches it to the
1538 	given \a vnode.
1539 	Returns B_OK in case of success - also if the vnode got such an
1540 	object from someone else in the mean time, you'll still get this
1541 	one locked then.
1542 */
1543 static status_t
1544 create_advisory_locking(struct vnode* vnode)
1545 {
1546 	if (vnode == NULL)
1547 		return B_FILE_ERROR;
1548 
1549 	ObjectDeleter<advisory_locking> lockingDeleter;
1550 	struct advisory_locking* locking = NULL;
1551 
1552 	while (get_advisory_locking(vnode) == NULL) {
1553 		// no locking object set on the vnode yet, create one
1554 		if (locking == NULL) {
1555 			locking = new(std::nothrow) advisory_locking;
1556 			if (locking == NULL)
1557 				return B_NO_MEMORY;
1558 			lockingDeleter.SetTo(locking);
1559 
1560 			locking->wait_sem = create_sem(0, "advisory lock");
1561 			if (locking->wait_sem < 0)
1562 				return locking->wait_sem;
1563 
1564 			locking->lock = create_sem(0, "advisory locking");
1565 			if (locking->lock < 0)
1566 				return locking->lock;
1567 		}
1568 
1569 		// set our newly created locking object
1570 		ReadLocker _(sVnodeLock);
1571 		AutoLocker<Vnode> nodeLocker(vnode);
1572 		if (vnode->advisory_locking == NULL) {
1573 			vnode->advisory_locking = locking;
1574 			lockingDeleter.Detach();
1575 			return B_OK;
1576 		}
1577 	}
1578 
1579 	// The vnode already had a locking object. That's just as well.
1580 
1581 	return B_OK;
1582 }
1583 
1584 
1585 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1586 	with the advisory_lock \a lock.
1587 */
1588 static bool
1589 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1590 {
1591 	if (flock == NULL)
1592 		return true;
1593 
1594 	return lock->start <= flock->l_start - 1 + flock->l_len
1595 		&& lock->end >= flock->l_start;
1596 }
1597 
1598 
1599 /*!	Tests whether acquiring a lock would block.
1600 */
1601 static status_t
1602 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1603 {
1604 	flock->l_type = F_UNLCK;
1605 
1606 	struct advisory_locking* locking = get_advisory_locking(vnode);
1607 	if (locking == NULL)
1608 		return B_OK;
1609 
1610 	team_id team = team_get_current_team_id();
1611 
1612 	LockList::Iterator iterator = locking->locks.GetIterator();
1613 	while (iterator.HasNext()) {
1614 		struct advisory_lock* lock = iterator.Next();
1615 
1616 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1617 			// locks do overlap
1618 			if (flock->l_type != F_RDLCK || !lock->shared) {
1619 				// collision
1620 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1621 				flock->l_whence = SEEK_SET;
1622 				flock->l_start = lock->start;
1623 				flock->l_len = lock->end - lock->start + 1;
1624 				flock->l_pid = lock->team;
1625 				break;
1626 			}
1627 		}
1628 	}
1629 
1630 	put_advisory_locking(locking);
1631 	return B_OK;
1632 }
1633 
1634 
1635 /*!	Removes the specified lock, or all locks of the calling team
1636 	if \a flock is NULL.
1637 */
1638 static status_t
1639 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1640 	struct file_descriptor* descriptor, struct flock* flock)
1641 {
1642 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1643 
1644 	struct advisory_locking* locking = get_advisory_locking(vnode);
1645 	if (locking == NULL)
1646 		return B_OK;
1647 
1648 	// find matching lock entries
1649 
1650 	LockList::Iterator iterator = locking->locks.GetIterator();
1651 	while (iterator.HasNext()) {
1652 		struct advisory_lock* lock = iterator.Next();
1653 		bool removeLock = false;
1654 
1655 		if (descriptor != NULL && lock->bound_to == descriptor) {
1656 			// Remove flock() locks
1657 			removeLock = true;
1658 		} else if (lock->bound_to == context
1659 				&& advisory_lock_intersects(lock, flock)) {
1660 			// Remove POSIX locks
1661 			bool endsBeyond = false;
1662 			bool startsBefore = false;
1663 			if (flock != NULL) {
1664 				startsBefore = lock->start < flock->l_start;
1665 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1666 			}
1667 
1668 			if (!startsBefore && !endsBeyond) {
1669 				// lock is completely contained in flock
1670 				removeLock = true;
1671 			} else if (startsBefore && !endsBeyond) {
1672 				// cut the end of the lock
1673 				lock->end = flock->l_start - 1;
1674 			} else if (!startsBefore && endsBeyond) {
1675 				// cut the start of the lock
1676 				lock->start = flock->l_start + flock->l_len;
1677 			} else {
1678 				// divide the lock into two locks
1679 				struct advisory_lock* secondLock = new advisory_lock;
1680 				if (secondLock == NULL) {
1681 					// TODO: we should probably revert the locks we already
1682 					// changed... (ie. allocate upfront)
1683 					put_advisory_locking(locking);
1684 					return B_NO_MEMORY;
1685 				}
1686 
1687 				lock->end = flock->l_start - 1;
1688 
1689 				secondLock->bound_to = context;
1690 				secondLock->team = lock->team;
1691 				secondLock->session = lock->session;
1692 				// values must already be normalized when getting here
1693 				secondLock->start = flock->l_start + flock->l_len;
1694 				secondLock->end = lock->end;
1695 				secondLock->shared = lock->shared;
1696 
1697 				locking->locks.Add(secondLock);
1698 			}
1699 		}
1700 
1701 		if (removeLock) {
1702 			// this lock is no longer used
1703 			iterator.Remove();
1704 			free(lock);
1705 		}
1706 	}
1707 
1708 	bool removeLocking = locking->locks.IsEmpty();
1709 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1710 
1711 	put_advisory_locking(locking);
1712 
1713 	if (removeLocking) {
1714 		// We can remove the whole advisory locking structure; it's no
1715 		// longer used
1716 		locking = get_advisory_locking(vnode);
1717 		if (locking != NULL) {
1718 			ReadLocker locker(sVnodeLock);
1719 			AutoLocker<Vnode> nodeLocker(vnode);
1720 
1721 			// the locking could have been changed in the mean time
1722 			if (locking->locks.IsEmpty()) {
1723 				vnode->advisory_locking = NULL;
1724 				nodeLocker.Unlock();
1725 				locker.Unlock();
1726 
1727 				// we've detached the locking from the vnode, so we can
1728 				// safely delete it
1729 				delete locking;
1730 			} else {
1731 				// the locking is in use again
1732 				nodeLocker.Unlock();
1733 				locker.Unlock();
1734 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1735 			}
1736 		}
1737 	}
1738 
1739 	return B_OK;
1740 }
1741 
1742 
1743 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1744 	will wait for the lock to become available, if there are any collisions
1745 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1746 
1747 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1748 	BSD flock() semantics are used, that is, all children can unlock the file
1749 	in question (we even allow parents to remove the lock, though, but that
1750 	seems to be in line to what the BSD's are doing).
1751 */
1752 static status_t
1753 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1754 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1755 {
1756 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1757 		vnode, flock, wait ? "yes" : "no"));
1758 
1759 	bool shared = flock->l_type == F_RDLCK;
1760 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1761 	status_t status = B_OK;
1762 
1763 	// TODO: do deadlock detection!
1764 
1765 	struct advisory_locking* locking;
1766 
1767 	while (true) {
1768 		// if this vnode has an advisory_locking structure attached,
1769 		// lock that one and search for any colliding file lock
1770 		status = create_advisory_locking(vnode);
1771 		if (status != B_OK)
1772 			return status;
1773 
1774 		locking = vnode->advisory_locking;
1775 		team_id team = team_get_current_team_id();
1776 		sem_id waitForLock = -1;
1777 
1778 		// test for collisions
1779 		LockList::Iterator iterator = locking->locks.GetIterator();
1780 		while (iterator.HasNext()) {
1781 			struct advisory_lock* lock = iterator.Next();
1782 
1783 			// TODO: locks from the same team might be joinable!
1784 			if ((lock->team != team || lock->bound_to != boundTo)
1785 					&& advisory_lock_intersects(lock, flock)) {
1786 				// locks do overlap
1787 				if (!shared || !lock->shared) {
1788 					// we need to wait
1789 					waitForLock = locking->wait_sem;
1790 					break;
1791 				}
1792 			}
1793 		}
1794 
1795 		if (waitForLock < 0)
1796 			break;
1797 
1798 		// We need to wait. Do that or fail now, if we've been asked not to.
1799 
1800 		if (!wait) {
1801 			put_advisory_locking(locking);
1802 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1803 		}
1804 
1805 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1806 			B_CAN_INTERRUPT, 0);
1807 		if (status != B_OK && status != B_BAD_SEM_ID)
1808 			return status;
1809 
1810 		// We have been notified, but we need to re-lock the locking object. So
1811 		// go another round...
1812 	}
1813 
1814 	// install new lock
1815 
1816 	struct advisory_lock* lock = (struct advisory_lock*)malloc(
1817 		sizeof(struct advisory_lock));
1818 	if (lock == NULL) {
1819 		put_advisory_locking(locking);
1820 		return B_NO_MEMORY;
1821 	}
1822 
1823 	lock->bound_to = boundTo;
1824 	lock->team = team_get_current_team_id();
1825 	lock->session = thread_get_current_thread()->team->session_id;
1826 	// values must already be normalized when getting here
1827 	lock->start = flock->l_start;
1828 	lock->end = flock->l_start - 1 + flock->l_len;
1829 	lock->shared = shared;
1830 
1831 	locking->locks.Add(lock);
1832 	put_advisory_locking(locking);
1833 
1834 	return status;
1835 }
1836 
1837 
1838 /*!	Normalizes the \a flock structure to make it easier to compare the
1839 	structure with others. The l_start and l_len fields are set to absolute
1840 	values according to the l_whence field.
1841 */
1842 static status_t
1843 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1844 {
1845 	switch (flock->l_whence) {
1846 		case SEEK_SET:
1847 			break;
1848 		case SEEK_CUR:
1849 			flock->l_start += descriptor->pos;
1850 			break;
1851 		case SEEK_END:
1852 		{
1853 			struct vnode* vnode = descriptor->u.vnode;
1854 			struct stat stat;
1855 			status_t status;
1856 
1857 			if (!HAS_FS_CALL(vnode, read_stat))
1858 				return B_UNSUPPORTED;
1859 
1860 			status = FS_CALL(vnode, read_stat, &stat);
1861 			if (status != B_OK)
1862 				return status;
1863 
1864 			flock->l_start += stat.st_size;
1865 			break;
1866 		}
1867 		default:
1868 			return B_BAD_VALUE;
1869 	}
1870 
1871 	if (flock->l_start < 0)
1872 		flock->l_start = 0;
1873 	if (flock->l_len == 0)
1874 		flock->l_len = OFF_MAX;
1875 
1876 	// don't let the offset and length overflow
1877 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1878 		flock->l_len = OFF_MAX - flock->l_start;
1879 
1880 	if (flock->l_len < 0) {
1881 		// a negative length reverses the region
1882 		flock->l_start += flock->l_len;
1883 		flock->l_len = -flock->l_len;
1884 	}
1885 
1886 	return B_OK;
1887 }
1888 
1889 
1890 static void
1891 replace_vnode_if_disconnected(struct fs_mount* mount,
1892 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1893 	struct vnode* fallBack, bool lockRootLock)
1894 {
1895 	struct vnode* givenVnode = vnode;
1896 	bool vnodeReplaced = false;
1897 
1898 	ReadLocker vnodeReadLocker(sVnodeLock);
1899 
1900 	if (lockRootLock)
1901 		mutex_lock(&sIOContextRootLock);
1902 
1903 	while (vnode != NULL && vnode->mount == mount
1904 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1905 		if (vnode->covers != NULL) {
1906 			// redirect the vnode to the covered vnode
1907 			vnode = vnode->covers;
1908 		} else
1909 			vnode = fallBack;
1910 
1911 		vnodeReplaced = true;
1912 	}
1913 
1914 	// If we've replaced the node, grab a reference for the new one.
1915 	if (vnodeReplaced && vnode != NULL)
1916 		inc_vnode_ref_count(vnode);
1917 
1918 	if (lockRootLock)
1919 		mutex_unlock(&sIOContextRootLock);
1920 
1921 	vnodeReadLocker.Unlock();
1922 
1923 	if (vnodeReplaced)
1924 		put_vnode(givenVnode);
1925 }
1926 
1927 
1928 /*!	Disconnects all file descriptors that are associated with the
1929 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1930 	\a mount object.
1931 
1932 	Note, after you've called this function, there might still be ongoing
1933 	accesses - they won't be interrupted if they already happened before.
1934 	However, any subsequent access will fail.
1935 
1936 	This is not a cheap function and should be used with care and rarely.
1937 	TODO: there is currently no means to stop a blocking read/write!
1938 */
1939 static void
1940 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1941 	struct vnode* vnodeToDisconnect)
1942 {
1943 	// iterate over all teams and peek into their file descriptors
1944 	TeamListIterator teamIterator;
1945 	while (Team* team = teamIterator.Next()) {
1946 		BReference<Team> teamReference(team, true);
1947 		TeamLocker teamLocker(team);
1948 
1949 		// lock the I/O context
1950 		io_context* context = team->io_context;
1951 		if (context == NULL)
1952 			continue;
1953 		MutexLocker contextLocker(context->io_mutex);
1954 
1955 		teamLocker.Unlock();
1956 
1957 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1958 			sRoot, true);
1959 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1960 			sRoot, false);
1961 
1962 		for (uint32 i = 0; i < context->table_size; i++) {
1963 			struct file_descriptor* descriptor = context->fds[i];
1964 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1965 				continue;
1966 
1967 			inc_fd_ref_count(descriptor);
1968 
1969 			// if this descriptor points at this mount, we
1970 			// need to disconnect it to be able to unmount
1971 			struct vnode* vnode = fd_vnode(descriptor);
1972 			if (vnodeToDisconnect != NULL) {
1973 				if (vnode == vnodeToDisconnect)
1974 					disconnect_fd(descriptor);
1975 			} else if ((vnode != NULL && vnode->mount == mount)
1976 				|| (vnode == NULL && descriptor->u.mount == mount))
1977 				disconnect_fd(descriptor);
1978 
1979 			put_fd(descriptor);
1980 		}
1981 	}
1982 }
1983 
1984 
1985 /*!	\brief Gets the root node of the current IO context.
1986 	If \a kernel is \c true, the kernel IO context will be used.
1987 	The caller obtains a reference to the returned node.
1988 */
1989 struct vnode*
1990 get_root_vnode(bool kernel)
1991 {
1992 	if (!kernel) {
1993 		// Get current working directory from io context
1994 		struct io_context* context = get_current_io_context(kernel);
1995 
1996 		mutex_lock(&sIOContextRootLock);
1997 
1998 		struct vnode* root = context->root;
1999 		if (root != NULL)
2000 			inc_vnode_ref_count(root);
2001 
2002 		mutex_unlock(&sIOContextRootLock);
2003 
2004 		if (root != NULL)
2005 			return root;
2006 
2007 		// That should never happen.
2008 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
2009 			"have a root\n", team_get_current_team_id());
2010 	}
2011 
2012 	inc_vnode_ref_count(sRoot);
2013 	return sRoot;
2014 }
2015 
2016 
2017 /*!	\brief Gets the directory path and leaf name for a given path.
2018 
2019 	The supplied \a path is transformed to refer to the directory part of
2020 	the entry identified by the original path, and into the buffer \a filename
2021 	the leaf name of the original entry is written.
2022 	Neither the returned path nor the leaf name can be expected to be
2023 	canonical.
2024 
2025 	\param path The path to be analyzed. Must be able to store at least one
2026 		   additional character.
2027 	\param filename The buffer into which the leaf name will be written.
2028 		   Must be of size B_FILE_NAME_LENGTH at least.
2029 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2030 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2031 		   if the given path name is empty.
2032 */
2033 static status_t
2034 get_dir_path_and_leaf(char* path, char* filename)
2035 {
2036 	if (*path == '\0')
2037 		return B_ENTRY_NOT_FOUND;
2038 
2039 	char* last = strrchr(path, '/');
2040 		// '/' are not allowed in file names!
2041 
2042 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2043 
2044 	if (last == NULL) {
2045 		// this path is single segment with no '/' in it
2046 		// ex. "foo"
2047 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2048 			return B_NAME_TOO_LONG;
2049 
2050 		strcpy(path, ".");
2051 	} else {
2052 		last++;
2053 		if (last[0] == '\0') {
2054 			// special case: the path ends in one or more '/' - remove them
2055 			while (*--last == '/' && last != path);
2056 			last[1] = '\0';
2057 
2058 			if (last == path && last[0] == '/') {
2059 				// This path points to the root of the file system
2060 				strcpy(filename, ".");
2061 				return B_OK;
2062 			}
2063 			for (; last != path && *(last - 1) != '/'; last--);
2064 				// rewind to the start of the leaf before the '/'
2065 		}
2066 
2067 		// normal leaf: replace the leaf portion of the path with a '.'
2068 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2069 			return B_NAME_TOO_LONG;
2070 
2071 		last[0] = '.';
2072 		last[1] = '\0';
2073 	}
2074 	return B_OK;
2075 }
2076 
2077 
2078 static status_t
2079 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2080 	bool traverse, bool kernel, struct vnode** _vnode)
2081 {
2082 	char clonedName[B_FILE_NAME_LENGTH + 1];
2083 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2084 		return B_NAME_TOO_LONG;
2085 
2086 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2087 	struct vnode* directory;
2088 
2089 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2090 	if (status < 0)
2091 		return status;
2092 
2093 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2094 		_vnode, NULL);
2095 }
2096 
2097 
2098 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2099 	and returns the respective vnode.
2100 	On success a reference to the vnode is acquired for the caller.
2101 */
2102 static status_t
2103 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2104 {
2105 	ino_t id;
2106 	bool missing;
2107 
2108 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2109 		return missing ? B_ENTRY_NOT_FOUND
2110 			: get_vnode(dir->device, id, _vnode, true, false);
2111 	}
2112 
2113 	status_t status = FS_CALL(dir, lookup, name, &id);
2114 	if (status != B_OK)
2115 		return status;
2116 
2117 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2118 	// have a reference and just need to look the node up.
2119 	rw_lock_read_lock(&sVnodeLock);
2120 	*_vnode = lookup_vnode(dir->device, id);
2121 	rw_lock_read_unlock(&sVnodeLock);
2122 
2123 	if (*_vnode == NULL) {
2124 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2125 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2126 		return B_ENTRY_NOT_FOUND;
2127 	}
2128 
2129 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2130 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2131 //		(*_vnode)->mount->id, (*_vnode)->id);
2132 
2133 	return B_OK;
2134 }
2135 
2136 
2137 /*!	Returns the vnode for the relative path starting at the specified \a vnode.
2138 	\a path must not be NULL.
2139 	If it returns successfully, \a path contains the name of the last path
2140 	component. This function clobbers the buffer pointed to by \a path only
2141 	if it does contain more than one component.
2142 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2143 	it is successful or not!
2144 */
2145 static status_t
2146 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2147 	int count, struct io_context* ioContext, struct vnode** _vnode,
2148 	ino_t* _parentID)
2149 {
2150 	status_t status = B_OK;
2151 	ino_t lastParentID = vnode->id;
2152 
2153 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2154 
2155 	if (path == NULL) {
2156 		put_vnode(vnode);
2157 		return B_BAD_VALUE;
2158 	}
2159 
2160 	if (*path == '\0') {
2161 		put_vnode(vnode);
2162 		return B_ENTRY_NOT_FOUND;
2163 	}
2164 
2165 	while (true) {
2166 		struct vnode* nextVnode;
2167 		char* nextPath;
2168 
2169 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2170 			path));
2171 
2172 		// done?
2173 		if (path[0] == '\0')
2174 			break;
2175 
2176 		// walk to find the next path component ("path" will point to a single
2177 		// path component), and filter out multiple slashes
2178 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2179 				nextPath++);
2180 
2181 		if (*nextPath == '/') {
2182 			*nextPath = '\0';
2183 			do
2184 				nextPath++;
2185 			while (*nextPath == '/');
2186 		}
2187 
2188 		// See if the '..' is at a covering vnode move to the covered
2189 		// vnode so we pass the '..' path to the underlying filesystem.
2190 		// Also prevent breaking the root of the IO context.
2191 		if (strcmp("..", path) == 0) {
2192 			if (vnode == ioContext->root) {
2193 				// Attempted prison break! Keep it contained.
2194 				path = nextPath;
2195 				continue;
2196 			}
2197 
2198 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2199 				nextVnode = coveredVnode;
2200 				put_vnode(vnode);
2201 				vnode = nextVnode;
2202 			}
2203 		}
2204 
2205 		// check if vnode is really a directory
2206 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2207 			status = B_NOT_A_DIRECTORY;
2208 
2209 		// Check if we have the right to search the current directory vnode.
2210 		// If a file system doesn't have the access() function, we assume that
2211 		// searching a directory is always allowed
2212 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2213 			status = FS_CALL(vnode, access, X_OK);
2214 
2215 		// Tell the filesystem to get the vnode of this path component (if we
2216 		// got the permission from the call above)
2217 		if (status == B_OK)
2218 			status = lookup_dir_entry(vnode, path, &nextVnode);
2219 
2220 		if (status != B_OK) {
2221 			put_vnode(vnode);
2222 			return status;
2223 		}
2224 
2225 		// If the new node is a symbolic link, resolve it (if we've been told
2226 		// to do it)
2227 		if (S_ISLNK(nextVnode->Type())
2228 			&& (traverseLeafLink || nextPath[0] != '\0')) {
2229 			size_t bufferSize;
2230 			char* buffer;
2231 
2232 			TRACE(("traverse link\n"));
2233 
2234 			// it's not exactly nice style using goto in this way, but hey,
2235 			// it works :-/
2236 			if (count + 1 > B_MAX_SYMLINKS) {
2237 				status = B_LINK_LIMIT;
2238 				goto resolve_link_error;
2239 			}
2240 
2241 			bufferSize = B_PATH_NAME_LENGTH;
2242 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2243 			if (buffer == NULL) {
2244 				status = B_NO_MEMORY;
2245 				goto resolve_link_error;
2246 			}
2247 
2248 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2249 				bufferSize--;
2250 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2251 				// null-terminate
2252 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2253 					buffer[bufferSize] = '\0';
2254 			} else
2255 				status = B_BAD_VALUE;
2256 
2257 			if (status != B_OK) {
2258 				free(buffer);
2259 
2260 		resolve_link_error:
2261 				put_vnode(vnode);
2262 				put_vnode(nextVnode);
2263 
2264 				return status;
2265 			}
2266 			put_vnode(nextVnode);
2267 
2268 			// Check if we start from the root directory or the current
2269 			// directory ("vnode" still points to that one).
2270 			// Cut off all leading slashes if it's the root directory
2271 			path = buffer;
2272 			bool absoluteSymlink = false;
2273 			if (path[0] == '/') {
2274 				// we don't need the old directory anymore
2275 				put_vnode(vnode);
2276 
2277 				while (*++path == '/')
2278 					;
2279 
2280 				mutex_lock(&sIOContextRootLock);
2281 				vnode = ioContext->root;
2282 				inc_vnode_ref_count(vnode);
2283 				mutex_unlock(&sIOContextRootLock);
2284 
2285 				absoluteSymlink = true;
2286 			}
2287 
2288 			inc_vnode_ref_count(vnode);
2289 				// balance the next recursion - we will decrement the
2290 				// ref_count of the vnode, no matter if we succeeded or not
2291 
2292 			if (absoluteSymlink && *path == '\0') {
2293 				// symlink was just "/"
2294 				nextVnode = vnode;
2295 			} else {
2296 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2297 					ioContext, &nextVnode, &lastParentID);
2298 			}
2299 
2300 			object_cache_free(sPathNameCache, buffer, 0);
2301 
2302 			if (status != B_OK) {
2303 				put_vnode(vnode);
2304 				return status;
2305 			}
2306 		} else
2307 			lastParentID = vnode->id;
2308 
2309 		// decrease the ref count on the old dir we just looked up into
2310 		put_vnode(vnode);
2311 
2312 		path = nextPath;
2313 		vnode = nextVnode;
2314 
2315 		// see if we hit a covered node
2316 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2317 			put_vnode(vnode);
2318 			vnode = coveringNode;
2319 		}
2320 	}
2321 
2322 	*_vnode = vnode;
2323 	if (_parentID)
2324 		*_parentID = lastParentID;
2325 
2326 	return B_OK;
2327 }
2328 
2329 
2330 static status_t
2331 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2332 	int count, bool kernel, struct vnode** _vnode, ino_t* _parentID)
2333 {
2334 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2335 		get_current_io_context(kernel), _vnode, _parentID);
2336 }
2337 
2338 
2339 static status_t
2340 path_to_vnode(char* path, bool traverseLink, struct vnode** _vnode,
2341 	ino_t* _parentID, bool kernel)
2342 {
2343 	struct vnode* start = NULL;
2344 
2345 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2346 
2347 	if (!path)
2348 		return B_BAD_VALUE;
2349 
2350 	if (*path == '\0')
2351 		return B_ENTRY_NOT_FOUND;
2352 
2353 	// figure out if we need to start at root or at cwd
2354 	if (*path == '/') {
2355 		if (sRoot == NULL) {
2356 			// we're a bit early, aren't we?
2357 			return B_ERROR;
2358 		}
2359 
2360 		while (*++path == '/')
2361 			;
2362 		start = get_root_vnode(kernel);
2363 
2364 		if (*path == '\0') {
2365 			*_vnode = start;
2366 			return B_OK;
2367 		}
2368 
2369 	} else {
2370 		struct io_context* context = get_current_io_context(kernel);
2371 
2372 		mutex_lock(&context->io_mutex);
2373 		start = context->cwd;
2374 		if (start != NULL)
2375 			inc_vnode_ref_count(start);
2376 		mutex_unlock(&context->io_mutex);
2377 
2378 		if (start == NULL)
2379 			return B_ERROR;
2380 	}
2381 
2382 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2383 		_parentID);
2384 }
2385 
2386 
2387 /*! Returns the vnode in the next to last segment of the path, and returns
2388 	the last portion in filename.
2389 	The path buffer must be able to store at least one additional character.
2390 */
2391 static status_t
2392 path_to_dir_vnode(char* path, struct vnode** _vnode, char* filename,
2393 	bool kernel)
2394 {
2395 	status_t status = get_dir_path_and_leaf(path, filename);
2396 	if (status != B_OK)
2397 		return status;
2398 
2399 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2400 }
2401 
2402 
2403 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2404 		   to by a FD + path pair.
2405 
2406 	\a path must be given in either case. \a fd might be omitted, in which
2407 	case \a path is either an absolute path or one relative to the current
2408 	directory. If both a supplied and \a path is relative it is reckoned off
2409 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2410 	ignored.
2411 
2412 	The caller has the responsibility to call put_vnode() on the returned
2413 	directory vnode.
2414 
2415 	\param fd The FD. May be < 0.
2416 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2417 	       is modified by this function. It must have at least room for a
2418 	       string one character longer than the path it contains.
2419 	\param _vnode A pointer to a variable the directory vnode shall be written
2420 		   into.
2421 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2422 		   the leaf name of the specified entry will be written.
2423 	\param kernel \c true, if invoked from inside the kernel, \c false if
2424 		   invoked from userland.
2425 	\return \c B_OK, if everything went fine, another error code otherwise.
2426 */
2427 static status_t
2428 fd_and_path_to_dir_vnode(int fd, char* path, struct vnode** _vnode,
2429 	char* filename, bool kernel)
2430 {
2431 	if (!path)
2432 		return B_BAD_VALUE;
2433 	if (*path == '\0')
2434 		return B_ENTRY_NOT_FOUND;
2435 	if (fd < 0)
2436 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2437 
2438 	status_t status = get_dir_path_and_leaf(path, filename);
2439 	if (status != B_OK)
2440 		return status;
2441 
2442 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2443 }
2444 
2445 
2446 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2447 		   to by a vnode + path pair.
2448 
2449 	\a path must be given in either case. \a vnode might be omitted, in which
2450 	case \a path is either an absolute path or one relative to the current
2451 	directory. If both a supplied and \a path is relative it is reckoned off
2452 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2453 	ignored.
2454 
2455 	The caller has the responsibility to call put_vnode() on the returned
2456 	directory vnode.
2457 
2458 	\param vnode The vnode. May be \c NULL.
2459 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2460 	       is modified by this function. It must have at least room for a
2461 	       string one character longer than the path it contains.
2462 	\param _vnode A pointer to a variable the directory vnode shall be written
2463 		   into.
2464 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2465 		   the leaf name of the specified entry will be written.
2466 	\param kernel \c true, if invoked from inside the kernel, \c false if
2467 		   invoked from userland.
2468 	\return \c B_OK, if everything went fine, another error code otherwise.
2469 */
2470 static status_t
2471 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2472 	struct vnode** _vnode, char* filename, bool kernel)
2473 {
2474 	if (!path)
2475 		return B_BAD_VALUE;
2476 	if (*path == '\0')
2477 		return B_ENTRY_NOT_FOUND;
2478 	if (vnode == NULL || path[0] == '/')
2479 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2480 
2481 	status_t status = get_dir_path_and_leaf(path, filename);
2482 	if (status != B_OK)
2483 		return status;
2484 
2485 	inc_vnode_ref_count(vnode);
2486 		// vnode_path_to_vnode() always decrements the ref count
2487 
2488 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2489 }
2490 
2491 
2492 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2493 */
2494 static status_t
2495 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2496 	size_t bufferSize, struct io_context* ioContext)
2497 {
2498 	if (bufferSize < sizeof(struct dirent))
2499 		return B_BAD_VALUE;
2500 
2501 	// See if the vnode is covering another vnode and move to the covered
2502 	// vnode so we get the underlying file system
2503 	VNodePutter vnodePutter;
2504 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2505 		vnode = coveredVnode;
2506 		vnodePutter.SetTo(vnode);
2507 	}
2508 
2509 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2510 		// The FS supports getting the name of a vnode.
2511 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2512 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2513 			return B_OK;
2514 	}
2515 
2516 	// The FS doesn't support getting the name of a vnode. So we search the
2517 	// parent directory for the vnode, if the caller let us.
2518 
2519 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2520 		return B_UNSUPPORTED;
2521 
2522 	void* cookie;
2523 
2524 	status_t status = FS_CALL(parent, open_dir, &cookie);
2525 	if (status >= B_OK) {
2526 		while (true) {
2527 			uint32 num = 1;
2528 			// We use the FS hook directly instead of dir_read(), since we don't
2529 			// want the entries to be fixed. We have already resolved vnode to
2530 			// the covered node.
2531 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2532 				&num);
2533 			if (status != B_OK)
2534 				break;
2535 			if (num == 0) {
2536 				status = B_ENTRY_NOT_FOUND;
2537 				break;
2538 			}
2539 
2540 			if (vnode->id == buffer->d_ino) {
2541 				// found correct entry!
2542 				break;
2543 			}
2544 		}
2545 
2546 		FS_CALL(parent, close_dir, cookie);
2547 		FS_CALL(parent, free_dir_cookie, cookie);
2548 	}
2549 	return status;
2550 }
2551 
2552 
2553 static status_t
2554 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2555 	size_t nameSize, bool kernel)
2556 {
2557 	char buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2558 	struct dirent* dirent = (struct dirent*)buffer;
2559 
2560 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2561 		get_current_io_context(kernel));
2562 	if (status != B_OK)
2563 		return status;
2564 
2565 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2566 		return B_BUFFER_OVERFLOW;
2567 
2568 	return B_OK;
2569 }
2570 
2571 
2572 /*!	Gets the full path to a given directory vnode.
2573 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2574 	file system doesn't support this call, it will fall back to iterating
2575 	through the parent directory to get the name of the child.
2576 
2577 	To protect against circular loops, it supports a maximum tree depth
2578 	of 256 levels.
2579 
2580 	Note that the path may not be correct the time this function returns!
2581 	It doesn't use any locking to prevent returning the correct path, as
2582 	paths aren't safe anyway: the path to a file can change at any time.
2583 
2584 	It might be a good idea, though, to check if the returned path exists
2585 	in the calling function (it's not done here because of efficiency)
2586 */
2587 static status_t
2588 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2589 	bool kernel)
2590 {
2591 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2592 
2593 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2594 		return B_BAD_VALUE;
2595 
2596 	if (!S_ISDIR(vnode->Type()))
2597 		return B_NOT_A_DIRECTORY;
2598 
2599 	char* path = buffer;
2600 	int32 insert = bufferSize;
2601 	int32 maxLevel = 256;
2602 	int32 length;
2603 	status_t status = B_OK;
2604 	struct io_context* ioContext = get_current_io_context(kernel);
2605 
2606 	// we don't use get_vnode() here because this call is more
2607 	// efficient and does all we need from get_vnode()
2608 	inc_vnode_ref_count(vnode);
2609 
2610 	path[--insert] = '\0';
2611 		// the path is filled right to left
2612 
2613 	while (true) {
2614 		// If the node is the context's root, bail out. Otherwise resolve mount
2615 		// points.
2616 		if (vnode == ioContext->root)
2617 			break;
2618 
2619 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2620 			put_vnode(vnode);
2621 			vnode = coveredVnode;
2622 		}
2623 
2624 		// lookup the parent vnode
2625 		struct vnode* parentVnode;
2626 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2627 		if (status != B_OK)
2628 			goto out;
2629 
2630 		if (parentVnode == vnode) {
2631 			// The caller apparently got their hands on a node outside of their
2632 			// context's root. Now we've hit the global root.
2633 			put_vnode(parentVnode);
2634 			break;
2635 		}
2636 
2637 		// get the node's name
2638 		char nameBuffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
2639 			// also used for fs_read_dir()
2640 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2641 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2642 			sizeof(nameBuffer), ioContext);
2643 
2644 		// release the current vnode, we only need its parent from now on
2645 		put_vnode(vnode);
2646 		vnode = parentVnode;
2647 
2648 		if (status != B_OK)
2649 			goto out;
2650 
2651 		// TODO: add an explicit check for loops in about 10 levels to do
2652 		// real loop detection
2653 
2654 		// don't go deeper as 'maxLevel' to prevent circular loops
2655 		if (maxLevel-- < 0) {
2656 			status = B_LINK_LIMIT;
2657 			goto out;
2658 		}
2659 
2660 		// add the name in front of the current path
2661 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2662 		length = strlen(name);
2663 		insert -= length;
2664 		if (insert <= 0) {
2665 			status = B_RESULT_NOT_REPRESENTABLE;
2666 			goto out;
2667 		}
2668 		memcpy(path + insert, name, length);
2669 		path[--insert] = '/';
2670 	}
2671 
2672 	// the root dir will result in an empty path: fix it
2673 	if (path[insert] == '\0')
2674 		path[--insert] = '/';
2675 
2676 	TRACE(("  path is: %s\n", path + insert));
2677 
2678 	// move the path to the start of the buffer
2679 	length = bufferSize - insert;
2680 	memmove(buffer, path + insert, length);
2681 
2682 out:
2683 	put_vnode(vnode);
2684 	return status;
2685 }
2686 
2687 
2688 /*!	Checks the length of every path component, and adds a '.'
2689 	if the path ends in a slash.
2690 	The given path buffer must be able to store at least one
2691 	additional character.
2692 */
2693 static status_t
2694 check_path(char* to)
2695 {
2696 	int32 length = 0;
2697 
2698 	// check length of every path component
2699 
2700 	while (*to) {
2701 		char* begin;
2702 		if (*to == '/')
2703 			to++, length++;
2704 
2705 		begin = to;
2706 		while (*to != '/' && *to)
2707 			to++, length++;
2708 
2709 		if (to - begin > B_FILE_NAME_LENGTH)
2710 			return B_NAME_TOO_LONG;
2711 	}
2712 
2713 	if (length == 0)
2714 		return B_ENTRY_NOT_FOUND;
2715 
2716 	// complete path if there is a slash at the end
2717 
2718 	if (*(to - 1) == '/') {
2719 		if (length > B_PATH_NAME_LENGTH - 2)
2720 			return B_NAME_TOO_LONG;
2721 
2722 		to[0] = '.';
2723 		to[1] = '\0';
2724 	}
2725 
2726 	return B_OK;
2727 }
2728 
2729 
2730 static struct file_descriptor*
2731 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2732 {
2733 	struct file_descriptor* descriptor
2734 		= get_fd(get_current_io_context(kernel), fd);
2735 	if (descriptor == NULL)
2736 		return NULL;
2737 
2738 	struct vnode* vnode = fd_vnode(descriptor);
2739 	if (vnode == NULL) {
2740 		put_fd(descriptor);
2741 		return NULL;
2742 	}
2743 
2744 	// ToDo: when we can close a file descriptor at any point, investigate
2745 	//	if this is still valid to do (accessing the vnode without ref_count
2746 	//	or locking)
2747 	*_vnode = vnode;
2748 	return descriptor;
2749 }
2750 
2751 
2752 static struct vnode*
2753 get_vnode_from_fd(int fd, bool kernel)
2754 {
2755 	struct file_descriptor* descriptor;
2756 	struct vnode* vnode;
2757 
2758 	descriptor = get_fd(get_current_io_context(kernel), fd);
2759 	if (descriptor == NULL)
2760 		return NULL;
2761 
2762 	vnode = fd_vnode(descriptor);
2763 	if (vnode != NULL)
2764 		inc_vnode_ref_count(vnode);
2765 
2766 	put_fd(descriptor);
2767 	return vnode;
2768 }
2769 
2770 
2771 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2772 	only the path will be considered. In this case, the \a path must not be
2773 	NULL.
2774 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2775 	and should be NULL for files.
2776 */
2777 static status_t
2778 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2779 	struct vnode** _vnode, ino_t* _parentID, bool kernel)
2780 {
2781 	if (fd < 0 && !path)
2782 		return B_BAD_VALUE;
2783 
2784 	if (path != NULL && *path == '\0')
2785 		return B_ENTRY_NOT_FOUND;
2786 
2787 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2788 		// no FD or absolute path
2789 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2790 	}
2791 
2792 	// FD only, or FD + relative path
2793 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2794 	if (vnode == NULL)
2795 		return B_FILE_ERROR;
2796 
2797 	if (path != NULL) {
2798 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2799 			_vnode, _parentID);
2800 	}
2801 
2802 	// there is no relative path to take into account
2803 
2804 	*_vnode = vnode;
2805 	if (_parentID)
2806 		*_parentID = -1;
2807 
2808 	return B_OK;
2809 }
2810 
2811 
2812 static int
2813 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2814 	void* cookie, int openMode, bool kernel)
2815 {
2816 	struct file_descriptor* descriptor;
2817 	int fd;
2818 
2819 	// If the vnode is locked, we don't allow creating a new file/directory
2820 	// file_descriptor for it
2821 	if (vnode && vnode->mandatory_locked_by != NULL
2822 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2823 		return B_BUSY;
2824 
2825 	descriptor = alloc_fd();
2826 	if (!descriptor)
2827 		return B_NO_MEMORY;
2828 
2829 	if (vnode)
2830 		descriptor->u.vnode = vnode;
2831 	else
2832 		descriptor->u.mount = mount;
2833 	descriptor->cookie = cookie;
2834 
2835 	switch (type) {
2836 		// vnode types
2837 		case FDTYPE_FILE:
2838 			descriptor->ops = &sFileOps;
2839 			break;
2840 		case FDTYPE_DIR:
2841 			descriptor->ops = &sDirectoryOps;
2842 			break;
2843 		case FDTYPE_ATTR:
2844 			descriptor->ops = &sAttributeOps;
2845 			break;
2846 		case FDTYPE_ATTR_DIR:
2847 			descriptor->ops = &sAttributeDirectoryOps;
2848 			break;
2849 
2850 		// mount types
2851 		case FDTYPE_INDEX_DIR:
2852 			descriptor->ops = &sIndexDirectoryOps;
2853 			break;
2854 		case FDTYPE_QUERY:
2855 			descriptor->ops = &sQueryOps;
2856 			break;
2857 
2858 		default:
2859 			panic("get_new_fd() called with unknown type %d\n", type);
2860 			break;
2861 	}
2862 	descriptor->type = type;
2863 	descriptor->open_mode = openMode;
2864 
2865 	io_context* context = get_current_io_context(kernel);
2866 	fd = new_fd(context, descriptor);
2867 	if (fd < 0) {
2868 		descriptor->ops = NULL;
2869 		put_fd(descriptor);
2870 		return B_NO_MORE_FDS;
2871 	}
2872 
2873 	mutex_lock(&context->io_mutex);
2874 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2875 	mutex_unlock(&context->io_mutex);
2876 
2877 	return fd;
2878 }
2879 
2880 
2881 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2882 	vfs_normalize_path(). See there for more documentation.
2883 */
2884 static status_t
2885 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2886 {
2887 	VNodePutter dirPutter;
2888 	struct vnode* dir = NULL;
2889 	status_t error;
2890 
2891 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2892 		// get dir vnode + leaf name
2893 		struct vnode* nextDir;
2894 		char leaf[B_FILE_NAME_LENGTH];
2895 		error = vnode_and_path_to_dir_vnode(dir, path, &nextDir, leaf, kernel);
2896 		if (error != B_OK)
2897 			return error;
2898 
2899 		dir = nextDir;
2900 		strcpy(path, leaf);
2901 		dirPutter.SetTo(dir);
2902 
2903 		// get file vnode, if we shall resolve links
2904 		bool fileExists = false;
2905 		struct vnode* fileVnode;
2906 		VNodePutter fileVnodePutter;
2907 		if (traverseLink) {
2908 			inc_vnode_ref_count(dir);
2909 			if (vnode_path_to_vnode(dir, path, false, 0, kernel, &fileVnode,
2910 					NULL) == B_OK) {
2911 				fileVnodePutter.SetTo(fileVnode);
2912 				fileExists = true;
2913 			}
2914 		}
2915 
2916 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2917 			// we're done -- construct the path
2918 			bool hasLeaf = true;
2919 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2920 				// special cases "." and ".." -- get the dir, forget the leaf
2921 				inc_vnode_ref_count(dir);
2922 				error = vnode_path_to_vnode(dir, leaf, false, 0, kernel,
2923 					&nextDir, NULL);
2924 				if (error != B_OK)
2925 					return error;
2926 				dir = nextDir;
2927 				dirPutter.SetTo(dir);
2928 				hasLeaf = false;
2929 			}
2930 
2931 			// get the directory path
2932 			error = dir_vnode_to_path(dir, path, B_PATH_NAME_LENGTH, kernel);
2933 			if (error != B_OK)
2934 				return error;
2935 
2936 			// append the leaf name
2937 			if (hasLeaf) {
2938 				// insert a directory separator if this is not the file system
2939 				// root
2940 				if ((strcmp(path, "/") != 0
2941 					&& strlcat(path, "/", pathSize) >= pathSize)
2942 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2943 					return B_NAME_TOO_LONG;
2944 				}
2945 			}
2946 
2947 			return B_OK;
2948 		}
2949 
2950 		// read link
2951 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2952 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2953 			error = FS_CALL(fileVnode, read_symlink, path, &bufferSize);
2954 			if (error != B_OK)
2955 				return error;
2956 			if (bufferSize < B_PATH_NAME_LENGTH)
2957 				path[bufferSize] = '\0';
2958 		} else
2959 			return B_BAD_VALUE;
2960 	}
2961 
2962 	return B_LINK_LIMIT;
2963 }
2964 
2965 
2966 static status_t
2967 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2968 	struct io_context* ioContext)
2969 {
2970 	// Make sure the IO context root is not bypassed.
2971 	if (parent == ioContext->root) {
2972 		*_device = parent->device;
2973 		*_node = parent->id;
2974 		return B_OK;
2975 	}
2976 
2977 	inc_vnode_ref_count(parent);
2978 		// vnode_path_to_vnode() puts the node
2979 
2980 	// ".." is guaranteed not to be clobbered by this call
2981 	struct vnode* vnode;
2982 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2983 		ioContext, &vnode, NULL);
2984 	if (status == B_OK) {
2985 		*_device = vnode->device;
2986 		*_node = vnode->id;
2987 		put_vnode(vnode);
2988 	}
2989 
2990 	return status;
2991 }
2992 
2993 
2994 #ifdef ADD_DEBUGGER_COMMANDS
2995 
2996 
2997 static void
2998 _dump_advisory_locking(advisory_locking* locking)
2999 {
3000 	if (locking == NULL)
3001 		return;
3002 
3003 	kprintf("   lock:        %" B_PRId32, locking->lock);
3004 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
3005 
3006 	int32 index = 0;
3007 	LockList::Iterator iterator = locking->locks.GetIterator();
3008 	while (iterator.HasNext()) {
3009 		struct advisory_lock* lock = iterator.Next();
3010 
3011 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
3012 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3013 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3014 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3015 	}
3016 }
3017 
3018 
3019 static void
3020 _dump_mount(struct fs_mount* mount)
3021 {
3022 	kprintf("MOUNT: %p\n", mount);
3023 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3024 	kprintf(" device_name:   %s\n", mount->device_name);
3025 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3026 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3027 	kprintf(" partition:     %p\n", mount->partition);
3028 	kprintf(" lock:          %p\n", &mount->lock);
3029 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3030 		mount->owns_file_device ? " owns_file_device" : "");
3031 
3032 	fs_volume* volume = mount->volume;
3033 	while (volume != NULL) {
3034 		kprintf(" volume %p:\n", volume);
3035 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3036 		kprintf("  private_volume:   %p\n", volume->private_volume);
3037 		kprintf("  ops:              %p\n", volume->ops);
3038 		kprintf("  file_system:      %p\n", volume->file_system);
3039 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3040 		volume = volume->super_volume;
3041 	}
3042 
3043 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3044 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3045 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3046 	set_debug_variable("_partition", (addr_t)mount->partition);
3047 }
3048 
3049 
3050 static bool
3051 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3052 	const char* name)
3053 {
3054 	bool insertSlash = buffer[bufferSize] != '\0';
3055 	size_t nameLength = strlen(name);
3056 
3057 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3058 		return false;
3059 
3060 	if (insertSlash)
3061 		buffer[--bufferSize] = '/';
3062 
3063 	bufferSize -= nameLength;
3064 	memcpy(buffer + bufferSize, name, nameLength);
3065 
3066 	return true;
3067 }
3068 
3069 
3070 static bool
3071 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3072 	ino_t nodeID)
3073 {
3074 	if (bufferSize == 0)
3075 		return false;
3076 
3077 	bool insertSlash = buffer[bufferSize] != '\0';
3078 	if (insertSlash)
3079 		buffer[--bufferSize] = '/';
3080 
3081 	size_t size = snprintf(buffer, bufferSize,
3082 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3083 	if (size > bufferSize) {
3084 		if (insertSlash)
3085 			bufferSize++;
3086 		return false;
3087 	}
3088 
3089 	if (size < bufferSize)
3090 		memmove(buffer + bufferSize - size, buffer, size);
3091 
3092 	bufferSize -= size;
3093 	return true;
3094 }
3095 
3096 
3097 static char*
3098 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3099 	bool& _truncated)
3100 {
3101 	// null-terminate the path
3102 	buffer[--bufferSize] = '\0';
3103 
3104 	while (true) {
3105 		while (vnode->covers != NULL)
3106 			vnode = vnode->covers;
3107 
3108 		if (vnode == sRoot) {
3109 			_truncated = bufferSize == 0;
3110 			if (!_truncated)
3111 				buffer[--bufferSize] = '/';
3112 			return buffer + bufferSize;
3113 		}
3114 
3115 		// resolve the name
3116 		ino_t dirID;
3117 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3118 			vnode->id, dirID);
3119 		if (name == NULL) {
3120 			// Failed to resolve the name -- prepend "<dev,node>/".
3121 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3122 				vnode->mount->id, vnode->id);
3123 			return buffer + bufferSize;
3124 		}
3125 
3126 		// prepend the name
3127 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3128 			_truncated = true;
3129 			return buffer + bufferSize;
3130 		}
3131 
3132 		// resolve the directory node
3133 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3134 		if (nextVnode == NULL) {
3135 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3136 				vnode->mount->id, dirID);
3137 			return buffer + bufferSize;
3138 		}
3139 
3140 		vnode = nextVnode;
3141 	}
3142 }
3143 
3144 
3145 static void
3146 _dump_vnode(struct vnode* vnode, bool printPath)
3147 {
3148 	kprintf("VNODE: %p\n", vnode);
3149 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3150 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3151 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3152 	kprintf(" private_node:  %p\n", vnode->private_node);
3153 	kprintf(" mount:         %p\n", vnode->mount);
3154 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3155 	kprintf(" covers:        %p\n", vnode->covers);
3156 	kprintf(" cache:         %p\n", vnode->cache);
3157 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3158 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3159 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3160 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3161 
3162 	_dump_advisory_locking(vnode->advisory_locking);
3163 
3164 	if (printPath) {
3165 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3166 		if (buffer != NULL) {
3167 			bool truncated;
3168 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3169 				B_PATH_NAME_LENGTH, truncated);
3170 			if (path != NULL) {
3171 				kprintf(" path:          ");
3172 				if (truncated)
3173 					kputs("<truncated>/");
3174 				kputs(path);
3175 				kputs("\n");
3176 			} else
3177 				kprintf("Failed to resolve vnode path.\n");
3178 
3179 			debug_free(buffer);
3180 		} else
3181 			kprintf("Failed to allocate memory for constructing the path.\n");
3182 	}
3183 
3184 	set_debug_variable("_node", (addr_t)vnode->private_node);
3185 	set_debug_variable("_mount", (addr_t)vnode->mount);
3186 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3187 	set_debug_variable("_covers", (addr_t)vnode->covers);
3188 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3189 }
3190 
3191 
3192 static int
3193 dump_mount(int argc, char** argv)
3194 {
3195 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3196 		kprintf("usage: %s [id|address]\n", argv[0]);
3197 		return 0;
3198 	}
3199 
3200 	ulong val = parse_expression(argv[1]);
3201 	uint32 id = val;
3202 
3203 	struct fs_mount* mount = sMountsTable->Lookup(id);
3204 	if (mount == NULL) {
3205 		if (IS_USER_ADDRESS(id)) {
3206 			kprintf("fs_mount not found\n");
3207 			return 0;
3208 		}
3209 		mount = (fs_mount*)val;
3210 	}
3211 
3212 	_dump_mount(mount);
3213 	return 0;
3214 }
3215 
3216 
3217 static int
3218 dump_mounts(int argc, char** argv)
3219 {
3220 	if (argc != 1) {
3221 		kprintf("usage: %s\n", argv[0]);
3222 		return 0;
3223 	}
3224 
3225 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3226 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3227 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3228 
3229 	struct fs_mount* mount;
3230 
3231 	MountTable::Iterator iterator(sMountsTable);
3232 	while (iterator.HasNext()) {
3233 		mount = iterator.Next();
3234 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3235 			mount->root_vnode->covers, mount->volume->private_volume,
3236 			mount->volume->file_system_name);
3237 
3238 		fs_volume* volume = mount->volume;
3239 		while (volume->super_volume != NULL) {
3240 			volume = volume->super_volume;
3241 			kprintf("                                     %p %s\n",
3242 				volume->private_volume, volume->file_system_name);
3243 		}
3244 	}
3245 
3246 	return 0;
3247 }
3248 
3249 
3250 static int
3251 dump_vnode(int argc, char** argv)
3252 {
3253 	bool printPath = false;
3254 	int argi = 1;
3255 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3256 		printPath = true;
3257 		argi++;
3258 	}
3259 
3260 	if (argi >= argc || argi + 2 < argc) {
3261 		print_debugger_command_usage(argv[0]);
3262 		return 0;
3263 	}
3264 
3265 	struct vnode* vnode = NULL;
3266 
3267 	if (argi + 1 == argc) {
3268 		vnode = (struct vnode*)parse_expression(argv[argi]);
3269 		if (IS_USER_ADDRESS(vnode)) {
3270 			kprintf("invalid vnode address\n");
3271 			return 0;
3272 		}
3273 		_dump_vnode(vnode, printPath);
3274 		return 0;
3275 	}
3276 
3277 	dev_t device = parse_expression(argv[argi]);
3278 	ino_t id = parse_expression(argv[argi + 1]);
3279 
3280 	VnodeTable::Iterator iterator(sVnodeTable);
3281 	while (iterator.HasNext()) {
3282 		vnode = iterator.Next();
3283 		if (vnode->id != id || vnode->device != device)
3284 			continue;
3285 
3286 		_dump_vnode(vnode, printPath);
3287 	}
3288 
3289 	return 0;
3290 }
3291 
3292 
3293 static int
3294 dump_vnodes(int argc, char** argv)
3295 {
3296 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3297 		kprintf("usage: %s [device]\n", argv[0]);
3298 		return 0;
3299 	}
3300 
3301 	// restrict dumped nodes to a certain device if requested
3302 	dev_t device = parse_expression(argv[1]);
3303 
3304 	struct vnode* vnode;
3305 
3306 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3307 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3308 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3309 
3310 	VnodeTable::Iterator iterator(sVnodeTable);
3311 	while (iterator.HasNext()) {
3312 		vnode = iterator.Next();
3313 		if (vnode->device != device)
3314 			continue;
3315 
3316 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3317 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3318 			vnode->private_node, vnode->advisory_locking,
3319 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3320 			vnode->IsUnpublished() ? "u" : "-");
3321 	}
3322 
3323 	return 0;
3324 }
3325 
3326 
3327 static int
3328 dump_vnode_caches(int argc, char** argv)
3329 {
3330 	struct vnode* vnode;
3331 
3332 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3333 		kprintf("usage: %s [device]\n", argv[0]);
3334 		return 0;
3335 	}
3336 
3337 	// restrict dumped nodes to a certain device if requested
3338 	dev_t device = -1;
3339 	if (argc > 1)
3340 		device = parse_expression(argv[1]);
3341 
3342 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3343 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3344 
3345 	VnodeTable::Iterator iterator(sVnodeTable);
3346 	while (iterator.HasNext()) {
3347 		vnode = iterator.Next();
3348 		if (vnode->cache == NULL)
3349 			continue;
3350 		if (device != -1 && vnode->device != device)
3351 			continue;
3352 
3353 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3354 			vnode, vnode->device, vnode->id, vnode->cache,
3355 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3356 			vnode->cache->page_count);
3357 	}
3358 
3359 	return 0;
3360 }
3361 
3362 
3363 int
3364 dump_io_context(int argc, char** argv)
3365 {
3366 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3367 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3368 		return 0;
3369 	}
3370 
3371 	struct io_context* context = NULL;
3372 
3373 	if (argc > 1) {
3374 		ulong num = parse_expression(argv[1]);
3375 		if (IS_KERNEL_ADDRESS(num))
3376 			context = (struct io_context*)num;
3377 		else {
3378 			Team* team = team_get_team_struct_locked(num);
3379 			if (team == NULL) {
3380 				kprintf("could not find team with ID %lu\n", num);
3381 				return 0;
3382 			}
3383 			context = (struct io_context*)team->io_context;
3384 		}
3385 	} else
3386 		context = get_current_io_context(true);
3387 
3388 	kprintf("I/O CONTEXT: %p\n", context);
3389 	kprintf(" root vnode:\t%p\n", context->root);
3390 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3391 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3392 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3393 
3394 	if (context->num_used_fds) {
3395 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3396 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3397 	}
3398 
3399 	for (uint32 i = 0; i < context->table_size; i++) {
3400 		struct file_descriptor* fd = context->fds[i];
3401 		if (fd == NULL)
3402 			continue;
3403 
3404 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3405 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3406 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3407 			fd->pos, fd->cookie,
3408 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3409 				? "mount" : "vnode",
3410 			fd->u.vnode);
3411 	}
3412 
3413 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3414 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3415 
3416 	set_debug_variable("_cwd", (addr_t)context->cwd);
3417 
3418 	return 0;
3419 }
3420 
3421 
3422 int
3423 dump_vnode_usage(int argc, char** argv)
3424 {
3425 	if (argc != 1) {
3426 		kprintf("usage: %s\n", argv[0]);
3427 		return 0;
3428 	}
3429 
3430 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3431 		sUnusedVnodes, kMaxUnusedVnodes);
3432 
3433 	uint32 count = sVnodeTable->CountElements();
3434 
3435 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3436 		count - sUnusedVnodes);
3437 	return 0;
3438 }
3439 
3440 #endif	// ADD_DEBUGGER_COMMANDS
3441 
3442 
3443 /*!	Clears memory specified by an iovec array.
3444 */
3445 static void
3446 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3447 {
3448 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3449 		size_t length = std::min(vecs[i].iov_len, bytes);
3450 		memset(vecs[i].iov_base, 0, length);
3451 		bytes -= length;
3452 	}
3453 }
3454 
3455 
3456 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3457 	and calls the file system hooks to read/write the request to disk.
3458 */
3459 static status_t
3460 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3461 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3462 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3463 	bool doWrite)
3464 {
3465 	if (fileVecCount == 0) {
3466 		// There are no file vecs at this offset, so we're obviously trying
3467 		// to access the file outside of its bounds
3468 		return B_BAD_VALUE;
3469 	}
3470 
3471 	size_t numBytes = *_numBytes;
3472 	uint32 fileVecIndex;
3473 	size_t vecOffset = *_vecOffset;
3474 	uint32 vecIndex = *_vecIndex;
3475 	status_t status;
3476 	size_t size;
3477 
3478 	if (!doWrite && vecOffset == 0) {
3479 		// now directly read the data from the device
3480 		// the first file_io_vec can be read directly
3481 
3482 		if (fileVecs[0].length < (off_t)numBytes)
3483 			size = fileVecs[0].length;
3484 		else
3485 			size = numBytes;
3486 
3487 		if (fileVecs[0].offset >= 0) {
3488 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3489 				&vecs[vecIndex], vecCount - vecIndex, &size);
3490 		} else {
3491 			// sparse read
3492 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3493 			status = B_OK;
3494 		}
3495 		if (status != B_OK)
3496 			return status;
3497 
3498 		// TODO: this is a work-around for buggy device drivers!
3499 		//	When our own drivers honour the length, we can:
3500 		//	a) also use this direct I/O for writes (otherwise, it would
3501 		//	   overwrite precious data)
3502 		//	b) panic if the term below is true (at least for writes)
3503 		if ((off_t)size > fileVecs[0].length) {
3504 			//dprintf("warning: device driver %p doesn't respect total length "
3505 			//	"in read_pages() call!\n", ref->device);
3506 			size = fileVecs[0].length;
3507 		}
3508 
3509 		ASSERT((off_t)size <= fileVecs[0].length);
3510 
3511 		// If the file portion was contiguous, we're already done now
3512 		if (size == numBytes)
3513 			return B_OK;
3514 
3515 		// if we reached the end of the file, we can return as well
3516 		if ((off_t)size != fileVecs[0].length) {
3517 			*_numBytes = size;
3518 			return B_OK;
3519 		}
3520 
3521 		fileVecIndex = 1;
3522 
3523 		// first, find out where we have to continue in our iovecs
3524 		for (; vecIndex < vecCount; vecIndex++) {
3525 			if (size < vecs[vecIndex].iov_len)
3526 				break;
3527 
3528 			size -= vecs[vecIndex].iov_len;
3529 		}
3530 
3531 		vecOffset = size;
3532 	} else {
3533 		fileVecIndex = 0;
3534 		size = 0;
3535 	}
3536 
3537 	// Too bad, let's process the rest of the file_io_vecs
3538 
3539 	size_t totalSize = size;
3540 	size_t bytesLeft = numBytes - size;
3541 
3542 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3543 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3544 		off_t fileOffset = fileVec.offset;
3545 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3546 
3547 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3548 			fileLeft));
3549 
3550 		// process the complete fileVec
3551 		while (fileLeft > 0) {
3552 			iovec tempVecs[MAX_TEMP_IO_VECS];
3553 			uint32 tempCount = 0;
3554 
3555 			// size tracks how much of what is left of the current fileVec
3556 			// (fileLeft) has been assigned to tempVecs
3557 			size = 0;
3558 
3559 			// assign what is left of the current fileVec to the tempVecs
3560 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3561 					&& tempCount < MAX_TEMP_IO_VECS;) {
3562 				// try to satisfy one iovec per iteration (or as much as
3563 				// possible)
3564 
3565 				// bytes left of the current iovec
3566 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3567 				if (vecLeft == 0) {
3568 					vecOffset = 0;
3569 					vecIndex++;
3570 					continue;
3571 				}
3572 
3573 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3574 					vecIndex, vecOffset, size));
3575 
3576 				// actually available bytes
3577 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3578 
3579 				tempVecs[tempCount].iov_base
3580 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3581 				tempVecs[tempCount].iov_len = tempVecSize;
3582 				tempCount++;
3583 
3584 				size += tempVecSize;
3585 				vecOffset += tempVecSize;
3586 			}
3587 
3588 			size_t bytes = size;
3589 
3590 			if (fileOffset == -1) {
3591 				if (doWrite) {
3592 					panic("sparse write attempt: vnode %p", vnode);
3593 					status = B_IO_ERROR;
3594 				} else {
3595 					// sparse read
3596 					zero_iovecs(tempVecs, tempCount, bytes);
3597 					status = B_OK;
3598 				}
3599 			} else if (doWrite) {
3600 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3601 					tempVecs, tempCount, &bytes);
3602 			} else {
3603 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3604 					tempVecs, tempCount, &bytes);
3605 			}
3606 			if (status != B_OK)
3607 				return status;
3608 
3609 			totalSize += bytes;
3610 			bytesLeft -= size;
3611 			if (fileOffset >= 0)
3612 				fileOffset += size;
3613 			fileLeft -= size;
3614 			//dprintf("-> file left = %Lu\n", fileLeft);
3615 
3616 			if (size != bytes || vecIndex >= vecCount) {
3617 				// there are no more bytes or iovecs, let's bail out
3618 				*_numBytes = totalSize;
3619 				return B_OK;
3620 			}
3621 		}
3622 	}
3623 
3624 	*_vecIndex = vecIndex;
3625 	*_vecOffset = vecOffset;
3626 	*_numBytes = totalSize;
3627 	return B_OK;
3628 }
3629 
3630 
3631 static bool
3632 is_user_in_group(gid_t gid)
3633 {
3634 	if (gid == getegid())
3635 		return true;
3636 
3637 	gid_t groups[NGROUPS_MAX];
3638 	int groupCount = getgroups(NGROUPS_MAX, groups);
3639 	for (int i = 0; i < groupCount; i++) {
3640 		if (gid == groups[i])
3641 			return true;
3642 	}
3643 
3644 	return false;
3645 }
3646 
3647 
3648 static status_t
3649 free_io_context(io_context* context)
3650 {
3651 	uint32 i;
3652 
3653 	TIOC(FreeIOContext(context));
3654 
3655 	if (context->root)
3656 		put_vnode(context->root);
3657 
3658 	if (context->cwd)
3659 		put_vnode(context->cwd);
3660 
3661 	mutex_lock(&context->io_mutex);
3662 
3663 	for (i = 0; i < context->table_size; i++) {
3664 		if (struct file_descriptor* descriptor = context->fds[i]) {
3665 			close_fd(context, descriptor);
3666 			put_fd(descriptor);
3667 		}
3668 	}
3669 
3670 	mutex_destroy(&context->io_mutex);
3671 
3672 	remove_node_monitors(context);
3673 	free(context->fds);
3674 	free(context);
3675 
3676 	return B_OK;
3677 }
3678 
3679 
3680 static status_t
3681 resize_monitor_table(struct io_context* context, const int newSize)
3682 {
3683 	int	status = B_OK;
3684 
3685 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3686 		return B_BAD_VALUE;
3687 
3688 	mutex_lock(&context->io_mutex);
3689 
3690 	if ((size_t)newSize < context->num_monitors) {
3691 		status = B_BUSY;
3692 		goto out;
3693 	}
3694 	context->max_monitors = newSize;
3695 
3696 out:
3697 	mutex_unlock(&context->io_mutex);
3698 	return status;
3699 }
3700 
3701 
3702 //	#pragma mark - public API for file systems
3703 
3704 
3705 extern "C" status_t
3706 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3707 	fs_vnode_ops* ops)
3708 {
3709 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3710 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3711 
3712 	if (privateNode == NULL)
3713 		return B_BAD_VALUE;
3714 
3715 	int32 tries = BUSY_VNODE_RETRIES;
3716 restart:
3717 	// create the node
3718 	bool nodeCreated;
3719 	struct vnode* vnode;
3720 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3721 		nodeCreated);
3722 	if (status != B_OK)
3723 		return status;
3724 
3725 	WriteLocker nodeLocker(sVnodeLock, true);
3726 		// create_new_vnode_and_lock() has locked for us
3727 
3728 	if (!nodeCreated && vnode->IsBusy()) {
3729 		nodeLocker.Unlock();
3730 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3731 			return B_BUSY;
3732 		goto restart;
3733 	}
3734 
3735 	// file system integrity check:
3736 	// test if the vnode already exists and bail out if this is the case!
3737 	if (!nodeCreated) {
3738 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3739 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3740 			vnode->private_node);
3741 		return B_ERROR;
3742 	}
3743 
3744 	vnode->private_node = privateNode;
3745 	vnode->ops = ops;
3746 	vnode->SetUnpublished(true);
3747 
3748 	TRACE(("returns: %s\n", strerror(status)));
3749 
3750 	return status;
3751 }
3752 
3753 
3754 extern "C" status_t
3755 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3756 	fs_vnode_ops* ops, int type, uint32 flags)
3757 {
3758 	FUNCTION(("publish_vnode()\n"));
3759 
3760 	int32 tries = BUSY_VNODE_RETRIES;
3761 restart:
3762 	WriteLocker locker(sVnodeLock);
3763 
3764 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3765 
3766 	bool nodeCreated = false;
3767 	if (vnode == NULL) {
3768 		if (privateNode == NULL)
3769 			return B_BAD_VALUE;
3770 
3771 		// create the node
3772 		locker.Unlock();
3773 			// create_new_vnode_and_lock() will re-lock for us on success
3774 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3775 			nodeCreated);
3776 		if (status != B_OK)
3777 			return status;
3778 
3779 		locker.SetTo(sVnodeLock, true);
3780 	}
3781 
3782 	if (nodeCreated) {
3783 		vnode->private_node = privateNode;
3784 		vnode->ops = ops;
3785 		vnode->SetUnpublished(true);
3786 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3787 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3788 		// already known, but not published
3789 	} else if (vnode->IsBusy()) {
3790 		locker.Unlock();
3791 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3792 			return B_BUSY;
3793 		goto restart;
3794 	} else
3795 		return B_BAD_VALUE;
3796 
3797 	bool publishSpecialSubNode = false;
3798 
3799 	vnode->SetType(type);
3800 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3801 	publishSpecialSubNode = is_special_node_type(type)
3802 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3803 
3804 	status_t status = B_OK;
3805 
3806 	// create sub vnodes, if necessary
3807 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3808 		locker.Unlock();
3809 
3810 		fs_volume* subVolume = volume;
3811 		if (volume->sub_volume != NULL) {
3812 			while (status == B_OK && subVolume->sub_volume != NULL) {
3813 				subVolume = subVolume->sub_volume;
3814 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3815 					vnode);
3816 			}
3817 		}
3818 
3819 		if (status == B_OK && publishSpecialSubNode)
3820 			status = create_special_sub_node(vnode, flags);
3821 
3822 		if (status != B_OK) {
3823 			// error -- clean up the created sub vnodes
3824 			while (subVolume->super_volume != volume) {
3825 				subVolume = subVolume->super_volume;
3826 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3827 			}
3828 		}
3829 
3830 		if (status == B_OK) {
3831 			ReadLocker vnodesReadLocker(sVnodeLock);
3832 			AutoLocker<Vnode> nodeLocker(vnode);
3833 			vnode->SetBusy(false);
3834 			vnode->SetUnpublished(false);
3835 		} else {
3836 			locker.Lock();
3837 			sVnodeTable->Remove(vnode);
3838 			remove_vnode_from_mount_list(vnode, vnode->mount);
3839 			free(vnode);
3840 		}
3841 	} else {
3842 		// we still hold the write lock -- mark the node unbusy and published
3843 		vnode->SetBusy(false);
3844 		vnode->SetUnpublished(false);
3845 	}
3846 
3847 	TRACE(("returns: %s\n", strerror(status)));
3848 
3849 	return status;
3850 }
3851 
3852 
3853 extern "C" status_t
3854 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3855 {
3856 	struct vnode* vnode;
3857 
3858 	if (volume == NULL)
3859 		return B_BAD_VALUE;
3860 
3861 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3862 	if (status != B_OK)
3863 		return status;
3864 
3865 	// If this is a layered FS, we need to get the node cookie for the requested
3866 	// layer.
3867 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3868 		fs_vnode resolvedNode;
3869 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3870 			&resolvedNode);
3871 		if (status != B_OK) {
3872 			panic("get_vnode(): Failed to get super node for vnode %p, "
3873 				"volume: %p", vnode, volume);
3874 			put_vnode(vnode);
3875 			return status;
3876 		}
3877 
3878 		if (_privateNode != NULL)
3879 			*_privateNode = resolvedNode.private_node;
3880 	} else if (_privateNode != NULL)
3881 		*_privateNode = vnode->private_node;
3882 
3883 	return B_OK;
3884 }
3885 
3886 
3887 extern "C" status_t
3888 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3889 {
3890 	struct vnode* vnode;
3891 
3892 	rw_lock_read_lock(&sVnodeLock);
3893 	vnode = lookup_vnode(volume->id, vnodeID);
3894 	rw_lock_read_unlock(&sVnodeLock);
3895 
3896 	if (vnode == NULL)
3897 		return B_BAD_VALUE;
3898 
3899 	inc_vnode_ref_count(vnode);
3900 	return B_OK;
3901 }
3902 
3903 
3904 extern "C" status_t
3905 put_vnode(fs_volume* volume, ino_t vnodeID)
3906 {
3907 	struct vnode* vnode;
3908 
3909 	rw_lock_read_lock(&sVnodeLock);
3910 	vnode = lookup_vnode(volume->id, vnodeID);
3911 	rw_lock_read_unlock(&sVnodeLock);
3912 
3913 	if (vnode == NULL)
3914 		return B_BAD_VALUE;
3915 
3916 	dec_vnode_ref_count(vnode, false, true);
3917 	return B_OK;
3918 }
3919 
3920 
3921 extern "C" status_t
3922 remove_vnode(fs_volume* volume, ino_t vnodeID)
3923 {
3924 	ReadLocker locker(sVnodeLock);
3925 
3926 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3927 	if (vnode == NULL)
3928 		return B_ENTRY_NOT_FOUND;
3929 
3930 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3931 		// this vnode is in use
3932 		return B_BUSY;
3933 	}
3934 
3935 	vnode->Lock();
3936 
3937 	vnode->SetRemoved(true);
3938 	bool removeUnpublished = false;
3939 
3940 	if (vnode->IsUnpublished()) {
3941 		// prepare the vnode for deletion
3942 		removeUnpublished = true;
3943 		vnode->SetBusy(true);
3944 	}
3945 
3946 	vnode->Unlock();
3947 	locker.Unlock();
3948 
3949 	if (removeUnpublished) {
3950 		// If the vnode hasn't been published yet, we delete it here
3951 		atomic_add(&vnode->ref_count, -1);
3952 		free_vnode(vnode, true);
3953 	}
3954 
3955 	return B_OK;
3956 }
3957 
3958 
3959 extern "C" status_t
3960 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3961 {
3962 	struct vnode* vnode;
3963 
3964 	rw_lock_read_lock(&sVnodeLock);
3965 
3966 	vnode = lookup_vnode(volume->id, vnodeID);
3967 	if (vnode) {
3968 		AutoLocker<Vnode> nodeLocker(vnode);
3969 		vnode->SetRemoved(false);
3970 	}
3971 
3972 	rw_lock_read_unlock(&sVnodeLock);
3973 	return B_OK;
3974 }
3975 
3976 
3977 extern "C" status_t
3978 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3979 {
3980 	ReadLocker _(sVnodeLock);
3981 
3982 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3983 		if (_removed != NULL)
3984 			*_removed = vnode->IsRemoved();
3985 		return B_OK;
3986 	}
3987 
3988 	return B_BAD_VALUE;
3989 }
3990 
3991 
3992 extern "C" status_t
3993 mark_vnode_busy(fs_volume* volume, ino_t vnodeID, bool busy)
3994 {
3995 	ReadLocker locker(sVnodeLock);
3996 
3997 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3998 	if (vnode == NULL)
3999 		return B_ENTRY_NOT_FOUND;
4000 
4001 	// are we trying to mark an already busy node busy again?
4002 	if (busy && vnode->IsBusy())
4003 		return B_BUSY;
4004 
4005 	vnode->Lock();
4006 	vnode->SetBusy(busy);
4007 	vnode->Unlock();
4008 
4009 	return B_OK;
4010 }
4011 
4012 
4013 extern "C" status_t
4014 change_vnode_id(fs_volume* volume, ino_t vnodeID, ino_t newID)
4015 {
4016 	WriteLocker locker(sVnodeLock);
4017 
4018 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
4019 	if (vnode == NULL)
4020 		return B_ENTRY_NOT_FOUND;
4021 
4022 	sVnodeTable->Remove(vnode);
4023 	vnode->id = newID;
4024 	sVnodeTable->Insert(vnode);
4025 
4026 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
4027 		((VMVnodeCache*)vnode->cache)->SetVnodeID(newID);
4028 
4029 	return B_OK;
4030 }
4031 
4032 
4033 extern "C" fs_volume*
4034 volume_for_vnode(fs_vnode* _vnode)
4035 {
4036 	if (_vnode == NULL)
4037 		return NULL;
4038 
4039 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
4040 	return vnode->mount->volume;
4041 }
4042 
4043 
4044 extern "C" status_t
4045 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
4046 	uid_t nodeUserID)
4047 {
4048 	// get node permissions
4049 	int userPermissions = (mode & S_IRWXU) >> 6;
4050 	int groupPermissions = (mode & S_IRWXG) >> 3;
4051 	int otherPermissions = mode & S_IRWXO;
4052 
4053 	// get the node permissions for this uid/gid
4054 	int permissions = 0;
4055 	uid_t uid = geteuid();
4056 
4057 	if (uid == 0) {
4058 		// user is root
4059 		// root has always read/write permission, but at least one of the
4060 		// X bits must be set for execute permission
4061 		permissions = userPermissions | groupPermissions | otherPermissions
4062 			| S_IROTH | S_IWOTH;
4063 		if (S_ISDIR(mode))
4064 			permissions |= S_IXOTH;
4065 	} else if (uid == nodeUserID) {
4066 		// user is node owner
4067 		permissions = userPermissions;
4068 	} else if (is_user_in_group(nodeGroupID)) {
4069 		// user is in owning group
4070 		permissions = groupPermissions;
4071 	} else {
4072 		// user is one of the others
4073 		permissions = otherPermissions;
4074 	}
4075 
4076 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4077 }
4078 
4079 
4080 #if 0
4081 extern "C" status_t
4082 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4083 	size_t* _numBytes)
4084 {
4085 	struct file_descriptor* descriptor;
4086 	struct vnode* vnode;
4087 
4088 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4089 	if (descriptor == NULL)
4090 		return B_FILE_ERROR;
4091 
4092 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4093 		count, 0, _numBytes);
4094 
4095 	put_fd(descriptor);
4096 	return status;
4097 }
4098 
4099 
4100 extern "C" status_t
4101 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4102 	size_t* _numBytes)
4103 {
4104 	struct file_descriptor* descriptor;
4105 	struct vnode* vnode;
4106 
4107 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4108 	if (descriptor == NULL)
4109 		return B_FILE_ERROR;
4110 
4111 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4112 		count, 0, _numBytes);
4113 
4114 	put_fd(descriptor);
4115 	return status;
4116 }
4117 #endif
4118 
4119 
4120 extern "C" status_t
4121 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4122 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4123 	size_t* _bytes)
4124 {
4125 	struct file_descriptor* descriptor;
4126 	struct vnode* vnode;
4127 
4128 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4129 	if (descriptor == NULL)
4130 		return B_FILE_ERROR;
4131 
4132 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4133 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4134 		false);
4135 
4136 	put_fd(descriptor);
4137 	return status;
4138 }
4139 
4140 
4141 extern "C" status_t
4142 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4143 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4144 	size_t* _bytes)
4145 {
4146 	struct file_descriptor* descriptor;
4147 	struct vnode* vnode;
4148 
4149 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4150 	if (descriptor == NULL)
4151 		return B_FILE_ERROR;
4152 
4153 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4154 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4155 		true);
4156 
4157 	put_fd(descriptor);
4158 	return status;
4159 }
4160 
4161 
4162 extern "C" status_t
4163 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4164 {
4165 	// lookup mount -- the caller is required to make sure that the mount
4166 	// won't go away
4167 	ReadLocker locker(sMountLock);
4168 	struct fs_mount* mount = find_mount(mountID);
4169 	if (mount == NULL)
4170 		return B_BAD_VALUE;
4171 	locker.Unlock();
4172 
4173 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4174 }
4175 
4176 
4177 extern "C" status_t
4178 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4179 {
4180 	// lookup mount -- the caller is required to make sure that the mount
4181 	// won't go away
4182 	ReadLocker locker(sMountLock);
4183 	struct fs_mount* mount = find_mount(mountID);
4184 	if (mount == NULL)
4185 		return B_BAD_VALUE;
4186 	locker.Unlock();
4187 
4188 	return mount->entry_cache.Add(dirID, name, -1, true);
4189 }
4190 
4191 
4192 extern "C" status_t
4193 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4194 {
4195 	// lookup mount -- the caller is required to make sure that the mount
4196 	// won't go away
4197 	ReadLocker locker(sMountLock);
4198 	struct fs_mount* mount = find_mount(mountID);
4199 	if (mount == NULL)
4200 		return B_BAD_VALUE;
4201 	locker.Unlock();
4202 
4203 	return mount->entry_cache.Remove(dirID, name);
4204 }
4205 
4206 
4207 //	#pragma mark - private VFS API
4208 //	Functions the VFS exports for other parts of the kernel
4209 
4210 
4211 /*! Acquires another reference to the vnode that has to be released
4212 	by calling vfs_put_vnode().
4213 */
4214 void
4215 vfs_acquire_vnode(struct vnode* vnode)
4216 {
4217 	inc_vnode_ref_count(vnode);
4218 }
4219 
4220 
4221 /*! This is currently called from file_cache_create() only.
4222 	It's probably a temporary solution as long as devfs requires that
4223 	fs_read_pages()/fs_write_pages() are called with the standard
4224 	open cookie and not with a device cookie.
4225 	If that's done differently, remove this call; it has no other
4226 	purpose.
4227 */
4228 extern "C" status_t
4229 vfs_get_cookie_from_fd(int fd, void** _cookie)
4230 {
4231 	struct file_descriptor* descriptor;
4232 
4233 	descriptor = get_fd(get_current_io_context(true), fd);
4234 	if (descriptor == NULL)
4235 		return B_FILE_ERROR;
4236 
4237 	*_cookie = descriptor->cookie;
4238 	return B_OK;
4239 }
4240 
4241 
4242 extern "C" status_t
4243 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4244 {
4245 	*vnode = get_vnode_from_fd(fd, kernel);
4246 
4247 	if (*vnode == NULL)
4248 		return B_FILE_ERROR;
4249 
4250 	return B_NO_ERROR;
4251 }
4252 
4253 
4254 extern "C" status_t
4255 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4256 {
4257 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4258 		path, kernel));
4259 
4260 	KPath pathBuffer;
4261 	if (pathBuffer.InitCheck() != B_OK)
4262 		return B_NO_MEMORY;
4263 
4264 	char* buffer = pathBuffer.LockBuffer();
4265 	strlcpy(buffer, path, pathBuffer.BufferSize());
4266 
4267 	struct vnode* vnode;
4268 	status_t status = path_to_vnode(buffer, true, &vnode, NULL, kernel);
4269 	if (status != B_OK)
4270 		return status;
4271 
4272 	*_vnode = vnode;
4273 	return B_OK;
4274 }
4275 
4276 
4277 extern "C" status_t
4278 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4279 {
4280 	struct vnode* vnode = NULL;
4281 
4282 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4283 	if (status != B_OK)
4284 		return status;
4285 
4286 	*_vnode = vnode;
4287 	return B_OK;
4288 }
4289 
4290 
4291 extern "C" status_t
4292 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4293 	const char* name, struct vnode** _vnode)
4294 {
4295 	return entry_ref_to_vnode(mountID, directoryID, name, false, true, _vnode);
4296 }
4297 
4298 
4299 extern "C" void
4300 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4301 {
4302 	*_mountID = vnode->device;
4303 	*_vnodeID = vnode->id;
4304 }
4305 
4306 
4307 /*!
4308 	Helper function abstracting the process of "converting" a given
4309 	vnode-pointer to a fs_vnode-pointer.
4310 	Currently only used in bindfs.
4311 */
4312 extern "C" fs_vnode*
4313 vfs_fsnode_for_vnode(struct vnode* vnode)
4314 {
4315 	return vnode;
4316 }
4317 
4318 
4319 /*!
4320 	Calls fs_open() on the given vnode and returns a new
4321 	file descriptor for it
4322 */
4323 int
4324 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4325 {
4326 	return open_vnode(vnode, openMode, kernel);
4327 }
4328 
4329 
4330 /*!	Looks up a vnode with the given mount and vnode ID.
4331 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4332 	to the node.
4333 	It's currently only be used by file_cache_create().
4334 */
4335 extern "C" status_t
4336 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4337 {
4338 	rw_lock_read_lock(&sVnodeLock);
4339 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4340 	rw_lock_read_unlock(&sVnodeLock);
4341 
4342 	if (vnode == NULL)
4343 		return B_ERROR;
4344 
4345 	*_vnode = vnode;
4346 	return B_OK;
4347 }
4348 
4349 
4350 extern "C" status_t
4351 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4352 	bool traverseLeafLink, bool kernel, void** _node)
4353 {
4354 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4355 		volume, path, kernel));
4356 
4357 	KPath pathBuffer;
4358 	if (pathBuffer.InitCheck() != B_OK)
4359 		return B_NO_MEMORY;
4360 
4361 	fs_mount* mount;
4362 	status_t status = get_mount(volume->id, &mount);
4363 	if (status != B_OK)
4364 		return status;
4365 
4366 	char* buffer = pathBuffer.LockBuffer();
4367 	strlcpy(buffer, path, pathBuffer.BufferSize());
4368 
4369 	struct vnode* vnode = mount->root_vnode;
4370 
4371 	if (buffer[0] == '/')
4372 		status = path_to_vnode(buffer, traverseLeafLink, &vnode, NULL, kernel);
4373 	else {
4374 		inc_vnode_ref_count(vnode);
4375 			// vnode_path_to_vnode() releases a reference to the starting vnode
4376 		status = vnode_path_to_vnode(vnode, buffer, traverseLeafLink, 0,
4377 			kernel, &vnode, NULL);
4378 	}
4379 
4380 	put_mount(mount);
4381 
4382 	if (status != B_OK)
4383 		return status;
4384 
4385 	if (vnode->device != volume->id) {
4386 		// wrong mount ID - must not gain access on foreign file system nodes
4387 		put_vnode(vnode);
4388 		return B_BAD_VALUE;
4389 	}
4390 
4391 	// Use get_vnode() to resolve the cookie for the right layer.
4392 	status = get_vnode(volume, vnode->id, _node);
4393 	put_vnode(vnode);
4394 
4395 	return status;
4396 }
4397 
4398 
4399 status_t
4400 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4401 	struct stat* stat, bool kernel)
4402 {
4403 	status_t status;
4404 
4405 	if (path != NULL) {
4406 		// path given: get the stat of the node referred to by (fd, path)
4407 		KPath pathBuffer(path);
4408 		if (pathBuffer.InitCheck() != B_OK)
4409 			return B_NO_MEMORY;
4410 
4411 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4412 			traverseLeafLink, stat, kernel);
4413 	} else {
4414 		// no path given: get the FD and use the FD operation
4415 		struct file_descriptor* descriptor
4416 			= get_fd(get_current_io_context(kernel), fd);
4417 		if (descriptor == NULL)
4418 			return B_FILE_ERROR;
4419 
4420 		if (descriptor->ops->fd_read_stat)
4421 			status = descriptor->ops->fd_read_stat(descriptor, stat);
4422 		else
4423 			status = B_UNSUPPORTED;
4424 
4425 		put_fd(descriptor);
4426 	}
4427 
4428 	return status;
4429 }
4430 
4431 
4432 /*!	Finds the full path to the file that contains the module \a moduleName,
4433 	puts it into \a pathBuffer, and returns B_OK for success.
4434 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4435 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4436 	\a pathBuffer is clobbered in any case and must not be relied on if this
4437 	functions returns unsuccessfully.
4438 	\a basePath and \a pathBuffer must not point to the same space.
4439 */
4440 status_t
4441 vfs_get_module_path(const char* basePath, const char* moduleName,
4442 	char* pathBuffer, size_t bufferSize)
4443 {
4444 	struct vnode* dir;
4445 	struct vnode* file;
4446 	status_t status;
4447 	size_t length;
4448 	char* path;
4449 
4450 	if (bufferSize == 0
4451 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4452 		return B_BUFFER_OVERFLOW;
4453 
4454 	status = path_to_vnode(pathBuffer, true, &dir, NULL, true);
4455 	if (status != B_OK)
4456 		return status;
4457 
4458 	// the path buffer had been clobbered by the above call
4459 	length = strlcpy(pathBuffer, basePath, bufferSize);
4460 	if (pathBuffer[length - 1] != '/')
4461 		pathBuffer[length++] = '/';
4462 
4463 	path = pathBuffer + length;
4464 	bufferSize -= length;
4465 
4466 	while (moduleName) {
4467 		char* nextPath = strchr(moduleName, '/');
4468 		if (nextPath == NULL)
4469 			length = strlen(moduleName);
4470 		else {
4471 			length = nextPath - moduleName;
4472 			nextPath++;
4473 		}
4474 
4475 		if (length + 1 >= bufferSize) {
4476 			status = B_BUFFER_OVERFLOW;
4477 			goto err;
4478 		}
4479 
4480 		memcpy(path, moduleName, length);
4481 		path[length] = '\0';
4482 		moduleName = nextPath;
4483 
4484 		status = vnode_path_to_vnode(dir, path, true, 0, true, &file, NULL);
4485 		if (status != B_OK) {
4486 			// vnode_path_to_vnode() has already released the reference to dir
4487 			return status;
4488 		}
4489 
4490 		if (S_ISDIR(file->Type())) {
4491 			// goto the next directory
4492 			path[length] = '/';
4493 			path[length + 1] = '\0';
4494 			path += length + 1;
4495 			bufferSize -= length + 1;
4496 
4497 			dir = file;
4498 		} else if (S_ISREG(file->Type())) {
4499 			// it's a file so it should be what we've searched for
4500 			put_vnode(file);
4501 
4502 			return B_OK;
4503 		} else {
4504 			TRACE(("vfs_get_module_path(): something is strange here: "
4505 				"0x%08" B_PRIx32 "...\n", file->Type()));
4506 			status = B_ERROR;
4507 			dir = file;
4508 			goto err;
4509 		}
4510 	}
4511 
4512 	// if we got here, the moduleName just pointed to a directory, not to
4513 	// a real module - what should we do in this case?
4514 	status = B_ENTRY_NOT_FOUND;
4515 
4516 err:
4517 	put_vnode(dir);
4518 	return status;
4519 }
4520 
4521 
4522 /*!	\brief Normalizes a given path.
4523 
4524 	The path must refer to an existing or non-existing entry in an existing
4525 	directory, that is chopping off the leaf component the remaining path must
4526 	refer to an existing directory.
4527 
4528 	The returned will be canonical in that it will be absolute, will not
4529 	contain any "." or ".." components or duplicate occurrences of '/'s,
4530 	and none of the directory components will by symbolic links.
4531 
4532 	Any two paths referring to the same entry, will result in the same
4533 	normalized path (well, that is pretty much the definition of `normalized',
4534 	isn't it :-).
4535 
4536 	\param path The path to be normalized.
4537 	\param buffer The buffer into which the normalized path will be written.
4538 		   May be the same one as \a path.
4539 	\param bufferSize The size of \a buffer.
4540 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4541 	\param kernel \c true, if the IO context of the kernel shall be used,
4542 		   otherwise that of the team this thread belongs to. Only relevant,
4543 		   if the path is relative (to get the CWD).
4544 	\return \c B_OK if everything went fine, another error code otherwise.
4545 */
4546 status_t
4547 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4548 	bool traverseLink, bool kernel)
4549 {
4550 	if (!path || !buffer || bufferSize < 1)
4551 		return B_BAD_VALUE;
4552 
4553 	if (path != buffer) {
4554 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4555 			return B_BUFFER_OVERFLOW;
4556 	}
4557 
4558 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4559 }
4560 
4561 
4562 /*!	\brief Gets the parent of the passed in node.
4563 
4564 	Gets the parent of the passed in node, and correctly resolves covered
4565 	nodes.
4566 */
4567 extern "C" status_t
4568 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4569 {
4570 	return resolve_covered_parent(parent, device, node,
4571 		get_current_io_context(true));
4572 }
4573 
4574 
4575 /*!	\brief Creates a special node in the file system.
4576 
4577 	The caller gets a reference to the newly created node (which is passed
4578 	back through \a _createdVnode) and is responsible for releasing it.
4579 
4580 	\param path The path where to create the entry for the node. Can be \c NULL,
4581 		in which case the node is created without an entry in the root FS -- it
4582 		will automatically be deleted when the last reference has been released.
4583 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4584 		the target file system will just create the node with its standard
4585 		operations. Depending on the type of the node a subnode might be created
4586 		automatically, though.
4587 	\param mode The type and permissions for the node to be created.
4588 	\param flags Flags to be passed to the creating FS.
4589 	\param kernel \c true, if called in the kernel context (relevant only if
4590 		\a path is not \c NULL and not absolute).
4591 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4592 		file system creating the node, with the private data pointer and
4593 		operations for the super node. Can be \c NULL.
4594 	\param _createVnode Pointer to pre-allocated storage where to store the
4595 		pointer to the newly created node.
4596 	\return \c B_OK, if everything went fine, another error code otherwise.
4597 */
4598 status_t
4599 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4600 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4601 	struct vnode** _createdVnode)
4602 {
4603 	struct vnode* dirNode;
4604 	char _leaf[B_FILE_NAME_LENGTH];
4605 	char* leaf = NULL;
4606 
4607 	if (path) {
4608 		// We've got a path. Get the dir vnode and the leaf name.
4609 		KPath tmpPathBuffer;
4610 		if (tmpPathBuffer.InitCheck() != B_OK)
4611 			return B_NO_MEMORY;
4612 
4613 		char* tmpPath = tmpPathBuffer.LockBuffer();
4614 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4615 			return B_NAME_TOO_LONG;
4616 
4617 		// get the dir vnode and the leaf name
4618 		leaf = _leaf;
4619 		status_t error = path_to_dir_vnode(tmpPath, &dirNode, leaf, kernel);
4620 		if (error != B_OK)
4621 			return error;
4622 	} else {
4623 		// No path. Create the node in the root FS.
4624 		dirNode = sRoot;
4625 		inc_vnode_ref_count(dirNode);
4626 	}
4627 
4628 	VNodePutter _(dirNode);
4629 
4630 	// check support for creating special nodes
4631 	if (!HAS_FS_CALL(dirNode, create_special_node))
4632 		return B_UNSUPPORTED;
4633 
4634 	// create the node
4635 	fs_vnode superVnode;
4636 	ino_t nodeID;
4637 	status_t status = FS_CALL(dirNode, create_special_node, leaf, subVnode,
4638 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4639 	if (status != B_OK)
4640 		return status;
4641 
4642 	// lookup the node
4643 	rw_lock_read_lock(&sVnodeLock);
4644 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4645 	rw_lock_read_unlock(&sVnodeLock);
4646 
4647 	if (*_createdVnode == NULL) {
4648 		panic("vfs_create_special_node(): lookup of node failed");
4649 		return B_ERROR;
4650 	}
4651 
4652 	return B_OK;
4653 }
4654 
4655 
4656 extern "C" void
4657 vfs_put_vnode(struct vnode* vnode)
4658 {
4659 	put_vnode(vnode);
4660 }
4661 
4662 
4663 extern "C" status_t
4664 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4665 {
4666 	// Get current working directory from io context
4667 	struct io_context* context = get_current_io_context(false);
4668 	status_t status = B_OK;
4669 
4670 	mutex_lock(&context->io_mutex);
4671 
4672 	if (context->cwd != NULL) {
4673 		*_mountID = context->cwd->device;
4674 		*_vnodeID = context->cwd->id;
4675 	} else
4676 		status = B_ERROR;
4677 
4678 	mutex_unlock(&context->io_mutex);
4679 	return status;
4680 }
4681 
4682 
4683 status_t
4684 vfs_unmount(dev_t mountID, uint32 flags)
4685 {
4686 	return fs_unmount(NULL, mountID, flags, true);
4687 }
4688 
4689 
4690 extern "C" status_t
4691 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4692 {
4693 	struct vnode* vnode;
4694 
4695 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4696 	if (status != B_OK)
4697 		return status;
4698 
4699 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4700 	put_vnode(vnode);
4701 	return B_OK;
4702 }
4703 
4704 
4705 extern "C" void
4706 vfs_free_unused_vnodes(int32 level)
4707 {
4708 	vnode_low_resource_handler(NULL,
4709 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4710 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4711 		level);
4712 }
4713 
4714 
4715 extern "C" bool
4716 vfs_can_page(struct vnode* vnode, void* cookie)
4717 {
4718 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4719 
4720 	if (HAS_FS_CALL(vnode, can_page))
4721 		return FS_CALL(vnode, can_page, cookie);
4722 	return false;
4723 }
4724 
4725 
4726 extern "C" status_t
4727 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4728 	const generic_io_vec* vecs, size_t count, uint32 flags,
4729 	generic_size_t* _numBytes)
4730 {
4731 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4732 		vecs, pos));
4733 
4734 #if VFS_PAGES_IO_TRACING
4735 	generic_size_t bytesRequested = *_numBytes;
4736 #endif
4737 
4738 	IORequest request;
4739 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4740 	if (status == B_OK) {
4741 		status = vfs_vnode_io(vnode, cookie, &request);
4742 		if (status == B_OK)
4743 			status = request.Wait();
4744 		*_numBytes = request.TransferredBytes();
4745 	}
4746 
4747 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4748 		status, *_numBytes));
4749 
4750 	return status;
4751 }
4752 
4753 
4754 extern "C" status_t
4755 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4756 	const generic_io_vec* vecs, size_t count, uint32 flags,
4757 	generic_size_t* _numBytes)
4758 {
4759 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4760 		vecs, pos));
4761 
4762 #if VFS_PAGES_IO_TRACING
4763 	generic_size_t bytesRequested = *_numBytes;
4764 #endif
4765 
4766 	IORequest request;
4767 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4768 	if (status == B_OK) {
4769 		status = vfs_vnode_io(vnode, cookie, &request);
4770 		if (status == B_OK)
4771 			status = request.Wait();
4772 		*_numBytes = request.TransferredBytes();
4773 	}
4774 
4775 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4776 		status, *_numBytes));
4777 
4778 	return status;
4779 }
4780 
4781 
4782 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4783 	created if \a allocate is \c true.
4784 	In case it's successful, it will also grab a reference to the cache
4785 	it returns.
4786 */
4787 extern "C" status_t
4788 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4789 {
4790 	if (vnode->cache != NULL) {
4791 		vnode->cache->AcquireRef();
4792 		*_cache = vnode->cache;
4793 		return B_OK;
4794 	}
4795 
4796 	rw_lock_read_lock(&sVnodeLock);
4797 	vnode->Lock();
4798 
4799 	status_t status = B_OK;
4800 
4801 	// The cache could have been created in the meantime
4802 	if (vnode->cache == NULL) {
4803 		if (allocate) {
4804 			// TODO: actually the vnode needs to be busy already here, or
4805 			//	else this won't work...
4806 			bool wasBusy = vnode->IsBusy();
4807 			vnode->SetBusy(true);
4808 
4809 			vnode->Unlock();
4810 			rw_lock_read_unlock(&sVnodeLock);
4811 
4812 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4813 
4814 			rw_lock_read_lock(&sVnodeLock);
4815 			vnode->Lock();
4816 			vnode->SetBusy(wasBusy);
4817 		} else
4818 			status = B_BAD_VALUE;
4819 	}
4820 
4821 	vnode->Unlock();
4822 	rw_lock_read_unlock(&sVnodeLock);
4823 
4824 	if (status == B_OK) {
4825 		vnode->cache->AcquireRef();
4826 		*_cache = vnode->cache;
4827 	}
4828 
4829 	return status;
4830 }
4831 
4832 
4833 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4834 	their own.
4835 	In case it's successful, it will also grab a reference to the cache
4836 	it returns.
4837 */
4838 extern "C" status_t
4839 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4840 {
4841 	rw_lock_read_lock(&sVnodeLock);
4842 	vnode->Lock();
4843 
4844 	status_t status = B_OK;
4845 	if (vnode->cache != NULL) {
4846 		status = B_NOT_ALLOWED;
4847 	} else {
4848 		vnode->cache = _cache;
4849 		_cache->AcquireRef();
4850 	}
4851 
4852 	vnode->Unlock();
4853 	rw_lock_read_unlock(&sVnodeLock);
4854 	return status;
4855 }
4856 
4857 
4858 status_t
4859 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4860 	file_io_vec* vecs, size_t* _count)
4861 {
4862 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4863 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4864 
4865 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4866 }
4867 
4868 
4869 status_t
4870 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4871 {
4872 	status_t status = FS_CALL(vnode, read_stat, stat);
4873 
4874 	// fill in the st_dev and st_ino fields
4875 	if (status == B_OK) {
4876 		stat->st_dev = vnode->device;
4877 		stat->st_ino = vnode->id;
4878 		// the rdev field must stay unset for non-special files
4879 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4880 			stat->st_rdev = -1;
4881 	}
4882 
4883 	return status;
4884 }
4885 
4886 
4887 status_t
4888 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4889 {
4890 	struct vnode* vnode;
4891 	status_t status = get_vnode(device, inode, &vnode, true, false);
4892 	if (status != B_OK)
4893 		return status;
4894 
4895 	status = vfs_stat_vnode(vnode, stat);
4896 
4897 	put_vnode(vnode);
4898 	return status;
4899 }
4900 
4901 
4902 status_t
4903 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4904 {
4905 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4906 }
4907 
4908 
4909 status_t
4910 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4911 	bool kernel, char* path, size_t pathLength)
4912 {
4913 	struct vnode* vnode;
4914 	status_t status;
4915 
4916 	// filter invalid leaf names
4917 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4918 		return B_BAD_VALUE;
4919 
4920 	// get the vnode matching the dir's node_ref
4921 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4922 		// special cases "." and "..": we can directly get the vnode of the
4923 		// referenced directory
4924 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, &vnode);
4925 		leaf = NULL;
4926 	} else
4927 		status = get_vnode(device, inode, &vnode, true, false);
4928 	if (status != B_OK)
4929 		return status;
4930 
4931 	// get the directory path
4932 	status = dir_vnode_to_path(vnode, path, pathLength, kernel);
4933 	put_vnode(vnode);
4934 		// we don't need the vnode anymore
4935 	if (status != B_OK)
4936 		return status;
4937 
4938 	// append the leaf name
4939 	if (leaf) {
4940 		// insert a directory separator if this is not the file system root
4941 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4942 				>= pathLength)
4943 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4944 			return B_NAME_TOO_LONG;
4945 		}
4946 	}
4947 
4948 	return B_OK;
4949 }
4950 
4951 
4952 /*!	If the given descriptor locked its vnode, that lock will be released. */
4953 void
4954 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4955 {
4956 	struct vnode* vnode = fd_vnode(descriptor);
4957 
4958 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4959 		vnode->mandatory_locked_by = NULL;
4960 }
4961 
4962 
4963 /*!	Releases any POSIX locks on the file descriptor. */
4964 status_t
4965 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4966 {
4967 	struct vnode* vnode = descriptor->u.vnode;
4968 	if (vnode == NULL)
4969 		return B_OK;
4970 
4971 	if (HAS_FS_CALL(vnode, release_lock))
4972 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4973 
4974 	return release_advisory_lock(vnode, context, NULL, NULL);
4975 }
4976 
4977 
4978 /*!	Closes all file descriptors of the specified I/O context that
4979 	have the O_CLOEXEC flag set.
4980 */
4981 void
4982 vfs_exec_io_context(io_context* context)
4983 {
4984 	uint32 i;
4985 
4986 	for (i = 0; i < context->table_size; i++) {
4987 		mutex_lock(&context->io_mutex);
4988 
4989 		struct file_descriptor* descriptor = context->fds[i];
4990 		bool remove = false;
4991 
4992 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4993 			context->fds[i] = NULL;
4994 			context->num_used_fds--;
4995 
4996 			remove = true;
4997 		}
4998 
4999 		mutex_unlock(&context->io_mutex);
5000 
5001 		if (remove) {
5002 			close_fd(context, descriptor);
5003 			put_fd(descriptor);
5004 		}
5005 	}
5006 }
5007 
5008 
5009 /*! Sets up a new io_control structure, and inherits the properties
5010 	of the parent io_control if it is given.
5011 */
5012 io_context*
5013 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
5014 {
5015 	io_context* context = (io_context*)malloc(sizeof(io_context));
5016 	if (context == NULL)
5017 		return NULL;
5018 
5019 	TIOC(NewIOContext(context, parentContext));
5020 
5021 	memset(context, 0, sizeof(io_context));
5022 	context->ref_count = 1;
5023 
5024 	MutexLocker parentLocker;
5025 
5026 	size_t tableSize;
5027 	if (parentContext != NULL) {
5028 		parentLocker.SetTo(parentContext->io_mutex, false);
5029 		tableSize = parentContext->table_size;
5030 	} else
5031 		tableSize = DEFAULT_FD_TABLE_SIZE;
5032 
5033 	// allocate space for FDs and their close-on-exec flag
5034 	context->fds = (file_descriptor**)malloc(
5035 		sizeof(struct file_descriptor*) * tableSize
5036 		+ sizeof(struct select_sync*) * tableSize
5037 		+ (tableSize + 7) / 8);
5038 	if (context->fds == NULL) {
5039 		free(context);
5040 		return NULL;
5041 	}
5042 
5043 	context->select_infos = (select_info**)(context->fds + tableSize);
5044 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
5045 
5046 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
5047 		+ sizeof(struct select_sync*) * tableSize
5048 		+ (tableSize + 7) / 8);
5049 
5050 	mutex_init(&context->io_mutex, "I/O context");
5051 
5052 	// Copy all parent file descriptors
5053 
5054 	if (parentContext != NULL) {
5055 		size_t i;
5056 
5057 		mutex_lock(&sIOContextRootLock);
5058 		context->root = parentContext->root;
5059 		if (context->root)
5060 			inc_vnode_ref_count(context->root);
5061 		mutex_unlock(&sIOContextRootLock);
5062 
5063 		context->cwd = parentContext->cwd;
5064 		if (context->cwd)
5065 			inc_vnode_ref_count(context->cwd);
5066 
5067 		if (parentContext->inherit_fds) {
5068 			for (i = 0; i < tableSize; i++) {
5069 				struct file_descriptor* descriptor = parentContext->fds[i];
5070 
5071 				if (descriptor != NULL
5072 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
5073 					bool closeOnExec = fd_close_on_exec(parentContext, i);
5074 					if (closeOnExec && purgeCloseOnExec)
5075 						continue;
5076 
5077 					TFD(InheritFD(context, i, descriptor, parentContext));
5078 
5079 					context->fds[i] = descriptor;
5080 					context->num_used_fds++;
5081 					atomic_add(&descriptor->ref_count, 1);
5082 					atomic_add(&descriptor->open_count, 1);
5083 
5084 					if (closeOnExec)
5085 						fd_set_close_on_exec(context, i, true);
5086 				}
5087 			}
5088 		}
5089 
5090 		parentLocker.Unlock();
5091 	} else {
5092 		context->root = sRoot;
5093 		context->cwd = sRoot;
5094 
5095 		if (context->root)
5096 			inc_vnode_ref_count(context->root);
5097 
5098 		if (context->cwd)
5099 			inc_vnode_ref_count(context->cwd);
5100 	}
5101 
5102 	context->table_size = tableSize;
5103 	context->inherit_fds = parentContext != NULL;
5104 
5105 	list_init(&context->node_monitors);
5106 	context->max_monitors = DEFAULT_NODE_MONITORS;
5107 
5108 	return context;
5109 }
5110 
5111 
5112 void
5113 vfs_get_io_context(io_context* context)
5114 {
5115 	atomic_add(&context->ref_count, 1);
5116 }
5117 
5118 
5119 void
5120 vfs_put_io_context(io_context* context)
5121 {
5122 	if (atomic_add(&context->ref_count, -1) == 1)
5123 		free_io_context(context);
5124 }
5125 
5126 
5127 status_t
5128 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5129 {
5130 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5131 		return B_BAD_VALUE;
5132 
5133 	TIOC(ResizeIOContext(context, newSize));
5134 
5135 	MutexLocker _(context->io_mutex);
5136 
5137 	uint32 oldSize = context->table_size;
5138 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5139 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5140 
5141 	// If the tables shrink, make sure none of the fds being dropped are in use.
5142 	if (newSize < oldSize) {
5143 		for (uint32 i = oldSize; i-- > newSize;) {
5144 			if (context->fds[i])
5145 				return B_BUSY;
5146 		}
5147 	}
5148 
5149 	// store pointers to the old tables
5150 	file_descriptor** oldFDs = context->fds;
5151 	select_info** oldSelectInfos = context->select_infos;
5152 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5153 
5154 	// allocate new tables
5155 	file_descriptor** newFDs = (file_descriptor**)malloc(
5156 		sizeof(struct file_descriptor*) * newSize
5157 		+ sizeof(struct select_sync*) * newSize
5158 		+ newCloseOnExitBitmapSize);
5159 	if (newFDs == NULL)
5160 		return B_NO_MEMORY;
5161 
5162 	context->fds = newFDs;
5163 	context->select_infos = (select_info**)(context->fds + newSize);
5164 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5165 	context->table_size = newSize;
5166 
5167 	// copy entries from old tables
5168 	uint32 toCopy = min_c(oldSize, newSize);
5169 
5170 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5171 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5172 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5173 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5174 
5175 	// clear additional entries, if the tables grow
5176 	if (newSize > oldSize) {
5177 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5178 		memset(context->select_infos + oldSize, 0,
5179 			sizeof(void*) * (newSize - oldSize));
5180 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5181 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5182 	}
5183 
5184 	free(oldFDs);
5185 
5186 	return B_OK;
5187 }
5188 
5189 
5190 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5191 
5192 	Given an arbitrary vnode (identified by mount and node ID), the function
5193 	checks, whether the vnode is covered by another vnode. If it is, the
5194 	function returns the mount and node ID of the covering vnode. Otherwise
5195 	it simply returns the supplied mount and node ID.
5196 
5197 	In case of error (e.g. the supplied node could not be found) the variables
5198 	for storing the resolved mount and node ID remain untouched and an error
5199 	code is returned.
5200 
5201 	\param mountID The mount ID of the vnode in question.
5202 	\param nodeID The node ID of the vnode in question.
5203 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5204 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5205 	\return
5206 	- \c B_OK, if everything went fine,
5207 	- another error code, if something went wrong.
5208 */
5209 status_t
5210 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5211 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5212 {
5213 	// get the node
5214 	struct vnode* node;
5215 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5216 	if (error != B_OK)
5217 		return error;
5218 
5219 	// resolve the node
5220 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5221 		put_vnode(node);
5222 		node = coveringNode;
5223 	}
5224 
5225 	// set the return values
5226 	*resolvedMountID = node->device;
5227 	*resolvedNodeID = node->id;
5228 
5229 	put_vnode(node);
5230 
5231 	return B_OK;
5232 }
5233 
5234 
5235 status_t
5236 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5237 	ino_t* _mountPointNodeID)
5238 {
5239 	ReadLocker nodeLocker(sVnodeLock);
5240 	ReadLocker mountLocker(sMountLock);
5241 
5242 	struct fs_mount* mount = find_mount(mountID);
5243 	if (mount == NULL)
5244 		return B_BAD_VALUE;
5245 
5246 	Vnode* mountPoint = mount->covers_vnode;
5247 
5248 	*_mountPointMountID = mountPoint->device;
5249 	*_mountPointNodeID = mountPoint->id;
5250 
5251 	return B_OK;
5252 }
5253 
5254 
5255 status_t
5256 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5257 	ino_t coveredNodeID)
5258 {
5259 	// get the vnodes
5260 	Vnode* vnode;
5261 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5262 	if (error != B_OK)
5263 		return B_BAD_VALUE;
5264 	VNodePutter vnodePutter(vnode);
5265 
5266 	Vnode* coveredVnode;
5267 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5268 		false);
5269 	if (error != B_OK)
5270 		return B_BAD_VALUE;
5271 	VNodePutter coveredVnodePutter(coveredVnode);
5272 
5273 	// establish the covered/covering links
5274 	WriteLocker locker(sVnodeLock);
5275 
5276 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5277 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5278 		return B_BUSY;
5279 	}
5280 
5281 	vnode->covers = coveredVnode;
5282 	vnode->SetCovering(true);
5283 
5284 	coveredVnode->covered_by = vnode;
5285 	coveredVnode->SetCovered(true);
5286 
5287 	// the vnodes do now reference each other
5288 	inc_vnode_ref_count(vnode);
5289 	inc_vnode_ref_count(coveredVnode);
5290 
5291 	return B_OK;
5292 }
5293 
5294 
5295 int
5296 vfs_getrlimit(int resource, struct rlimit* rlp)
5297 {
5298 	if (!rlp)
5299 		return B_BAD_ADDRESS;
5300 
5301 	switch (resource) {
5302 		case RLIMIT_NOFILE:
5303 		{
5304 			struct io_context* context = get_current_io_context(false);
5305 			MutexLocker _(context->io_mutex);
5306 
5307 			rlp->rlim_cur = context->table_size;
5308 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5309 			return 0;
5310 		}
5311 
5312 		case RLIMIT_NOVMON:
5313 		{
5314 			struct io_context* context = get_current_io_context(false);
5315 			MutexLocker _(context->io_mutex);
5316 
5317 			rlp->rlim_cur = context->max_monitors;
5318 			rlp->rlim_max = MAX_NODE_MONITORS;
5319 			return 0;
5320 		}
5321 
5322 		default:
5323 			return B_BAD_VALUE;
5324 	}
5325 }
5326 
5327 
5328 int
5329 vfs_setrlimit(int resource, const struct rlimit* rlp)
5330 {
5331 	if (!rlp)
5332 		return B_BAD_ADDRESS;
5333 
5334 	switch (resource) {
5335 		case RLIMIT_NOFILE:
5336 			/* TODO: check getuid() */
5337 			if (rlp->rlim_max != RLIM_SAVED_MAX
5338 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5339 				return B_NOT_ALLOWED;
5340 
5341 			return vfs_resize_fd_table(get_current_io_context(false),
5342 				rlp->rlim_cur);
5343 
5344 		case RLIMIT_NOVMON:
5345 			/* TODO: check getuid() */
5346 			if (rlp->rlim_max != RLIM_SAVED_MAX
5347 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5348 				return B_NOT_ALLOWED;
5349 
5350 			return resize_monitor_table(get_current_io_context(false),
5351 				rlp->rlim_cur);
5352 
5353 		default:
5354 			return B_BAD_VALUE;
5355 	}
5356 }
5357 
5358 
5359 status_t
5360 vfs_init(kernel_args* args)
5361 {
5362 	vnode::StaticInit();
5363 
5364 	sVnodeTable = new(std::nothrow) VnodeTable();
5365 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5366 		panic("vfs_init: error creating vnode hash table\n");
5367 
5368 	struct vnode dummy_vnode;
5369 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5370 
5371 	struct fs_mount dummyMount;
5372 	sMountsTable = new(std::nothrow) MountTable();
5373 	if (sMountsTable == NULL
5374 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5375 		panic("vfs_init: error creating mounts hash table\n");
5376 
5377 	sPathNameCache = create_object_cache("vfs path names",
5378 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5379 	if (sPathNameCache == NULL)
5380 		panic("vfs_init: error creating path name object_cache\n");
5381 
5382 	sFileDescriptorCache = create_object_cache("vfs fds",
5383 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5384 	if (sFileDescriptorCache == NULL)
5385 		panic("vfs_init: error creating file descriptor object_cache\n");
5386 
5387 	node_monitor_init();
5388 
5389 	sRoot = NULL;
5390 
5391 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5392 
5393 	if (block_cache_init() != B_OK)
5394 		return B_ERROR;
5395 
5396 #ifdef ADD_DEBUGGER_COMMANDS
5397 	// add some debugger commands
5398 	add_debugger_command_etc("vnode", &dump_vnode,
5399 		"Print info about the specified vnode",
5400 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5401 		"Prints information about the vnode specified by address <vnode> or\n"
5402 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5403 		"constructed and printed. It might not be possible to construct a\n"
5404 		"complete path, though.\n",
5405 		0);
5406 	add_debugger_command("vnodes", &dump_vnodes,
5407 		"list all vnodes (from the specified device)");
5408 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5409 		"list all vnode caches");
5410 	add_debugger_command("mount", &dump_mount,
5411 		"info about the specified fs_mount");
5412 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5413 	add_debugger_command("io_context", &dump_io_context,
5414 		"info about the I/O context");
5415 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5416 		"info about vnode usage");
5417 #endif
5418 
5419 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5420 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5421 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5422 		0);
5423 
5424 	fifo_init();
5425 	file_map_init();
5426 
5427 	return file_cache_init();
5428 }
5429 
5430 
5431 //	#pragma mark - fd_ops implementations
5432 
5433 
5434 /*!
5435 	Calls fs_open() on the given vnode and returns a new
5436 	file descriptor for it
5437 */
5438 static int
5439 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5440 {
5441 	void* cookie;
5442 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5443 	if (status != B_OK)
5444 		return status;
5445 
5446 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5447 	if (fd < 0) {
5448 		FS_CALL(vnode, close, cookie);
5449 		FS_CALL(vnode, free_cookie, cookie);
5450 	}
5451 	return fd;
5452 }
5453 
5454 
5455 /*!
5456 	Calls fs_open() on the given vnode and returns a new
5457 	file descriptor for it
5458 */
5459 static int
5460 create_vnode(struct vnode* directory, const char* name, int openMode,
5461 	int perms, bool kernel)
5462 {
5463 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5464 	status_t status = B_ERROR;
5465 	struct vnode* vnode;
5466 	void* cookie;
5467 	ino_t newID;
5468 
5469 	// This is somewhat tricky: If the entry already exists, the FS responsible
5470 	// for the directory might not necessarily also be the one responsible for
5471 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5472 	// we can actually never call the create() hook without O_EXCL. Instead we
5473 	// try to look the entry up first. If it already exists, we just open the
5474 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5475 	// introduces a race condition, since someone else might have created the
5476 	// entry in the meantime. We hope the respective FS returns the correct
5477 	// error code and retry (up to 3 times) again.
5478 
5479 	for (int i = 0; i < 3 && status != B_OK; i++) {
5480 		// look the node up
5481 		status = lookup_dir_entry(directory, name, &vnode);
5482 		if (status == B_OK) {
5483 			VNodePutter putter(vnode);
5484 
5485 			if ((openMode & O_EXCL) != 0)
5486 				return B_FILE_EXISTS;
5487 
5488 			// If the node is a symlink, we have to follow it, unless
5489 			// O_NOTRAVERSE is set.
5490 			if (S_ISLNK(vnode->Type()) && traverse) {
5491 				putter.Put();
5492 				char clonedName[B_FILE_NAME_LENGTH + 1];
5493 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5494 						>= B_FILE_NAME_LENGTH) {
5495 					return B_NAME_TOO_LONG;
5496 				}
5497 
5498 				inc_vnode_ref_count(directory);
5499 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5500 					kernel, &vnode, NULL);
5501 				if (status != B_OK)
5502 					return status;
5503 
5504 				putter.SetTo(vnode);
5505 			}
5506 
5507 			if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5508 				return B_LINK_LIMIT;
5509 
5510 			int fd = open_vnode(vnode, openMode & ~O_CREAT, kernel);
5511 			// on success keep the vnode reference for the FD
5512 			if (fd >= 0)
5513 				putter.Detach();
5514 
5515 			return fd;
5516 		}
5517 
5518 		// it doesn't exist yet -- try to create it
5519 
5520 		if (!HAS_FS_CALL(directory, create))
5521 			return B_READ_ONLY_DEVICE;
5522 
5523 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5524 			&cookie, &newID);
5525 		if (status != B_OK
5526 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5527 			return status;
5528 		}
5529 	}
5530 
5531 	if (status != B_OK)
5532 		return status;
5533 
5534 	// the node has been created successfully
5535 
5536 	rw_lock_read_lock(&sVnodeLock);
5537 	vnode = lookup_vnode(directory->device, newID);
5538 	rw_lock_read_unlock(&sVnodeLock);
5539 
5540 	if (vnode == NULL) {
5541 		panic("vfs: fs_create() returned success but there is no vnode, "
5542 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5543 		return B_BAD_VALUE;
5544 	}
5545 
5546 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5547 	if (fd >= 0)
5548 		return fd;
5549 
5550 	status = fd;
5551 
5552 	// something went wrong, clean up
5553 
5554 	FS_CALL(vnode, close, cookie);
5555 	FS_CALL(vnode, free_cookie, cookie);
5556 	put_vnode(vnode);
5557 
5558 	FS_CALL(directory, unlink, name);
5559 
5560 	return status;
5561 }
5562 
5563 
5564 /*! Calls fs open_dir() on the given vnode and returns a new
5565 	file descriptor for it
5566 */
5567 static int
5568 open_dir_vnode(struct vnode* vnode, bool kernel)
5569 {
5570 	void* cookie;
5571 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5572 	if (status != B_OK)
5573 		return status;
5574 
5575 	// directory is opened, create a fd
5576 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5577 	if (status >= 0)
5578 		return status;
5579 
5580 	FS_CALL(vnode, close_dir, cookie);
5581 	FS_CALL(vnode, free_dir_cookie, cookie);
5582 
5583 	return status;
5584 }
5585 
5586 
5587 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5588 	file descriptor for it.
5589 	Used by attr_dir_open(), and attr_dir_open_fd().
5590 */
5591 static int
5592 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5593 {
5594 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5595 		return B_UNSUPPORTED;
5596 
5597 	void* cookie;
5598 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5599 	if (status != B_OK)
5600 		return status;
5601 
5602 	// directory is opened, create a fd
5603 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5604 		kernel);
5605 	if (status >= 0)
5606 		return status;
5607 
5608 	FS_CALL(vnode, close_attr_dir, cookie);
5609 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5610 
5611 	return status;
5612 }
5613 
5614 
5615 static int
5616 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5617 	int openMode, int perms, bool kernel)
5618 {
5619 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5620 		"kernel %d\n", name, openMode, perms, kernel));
5621 
5622 	// get directory to put the new file in
5623 	struct vnode* directory;
5624 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5625 	if (status != B_OK)
5626 		return status;
5627 
5628 	status = create_vnode(directory, name, openMode, perms, kernel);
5629 	put_vnode(directory);
5630 
5631 	return status;
5632 }
5633 
5634 
5635 static int
5636 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5637 {
5638 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5639 		openMode, perms, kernel));
5640 
5641 	// get directory to put the new file in
5642 	char name[B_FILE_NAME_LENGTH];
5643 	struct vnode* directory;
5644 	status_t status = fd_and_path_to_dir_vnode(fd, path, &directory, name,
5645 		kernel);
5646 	if (status < 0)
5647 		return status;
5648 
5649 	status = create_vnode(directory, name, openMode, perms, kernel);
5650 
5651 	put_vnode(directory);
5652 	return status;
5653 }
5654 
5655 
5656 static int
5657 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5658 	int openMode, bool kernel)
5659 {
5660 	if (name == NULL || *name == '\0')
5661 		return B_BAD_VALUE;
5662 
5663 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5664 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5665 
5666 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5667 
5668 	// get the vnode matching the entry_ref
5669 	struct vnode* vnode;
5670 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5671 		kernel, &vnode);
5672 	if (status != B_OK)
5673 		return status;
5674 
5675 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5676 		put_vnode(vnode);
5677 		return B_LINK_LIMIT;
5678 	}
5679 
5680 	int newFD = open_vnode(vnode, openMode, kernel);
5681 	if (newFD >= 0) {
5682 		// The vnode reference has been transferred to the FD
5683 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache, mountID,
5684 			directoryID, vnode->id, name);
5685 	} else
5686 		put_vnode(vnode);
5687 
5688 	return newFD;
5689 }
5690 
5691 
5692 static int
5693 file_open(int fd, char* path, int openMode, bool kernel)
5694 {
5695 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5696 
5697 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5698 		fd, path, openMode, kernel));
5699 
5700 	// get the vnode matching the vnode + path combination
5701 	struct vnode* vnode;
5702 	ino_t parentID;
5703 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode,
5704 		&parentID, kernel);
5705 	if (status != B_OK)
5706 		return status;
5707 
5708 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
5709 		put_vnode(vnode);
5710 		return B_LINK_LIMIT;
5711 	}
5712 
5713 	// open the vnode
5714 	int newFD = open_vnode(vnode, openMode, kernel);
5715 	if (newFD >= 0) {
5716 		// The vnode reference has been transferred to the FD
5717 		cache_node_opened(vnode, FDTYPE_FILE, vnode->cache,
5718 			vnode->device, parentID, vnode->id, NULL);
5719 	} else
5720 		put_vnode(vnode);
5721 
5722 	return newFD;
5723 }
5724 
5725 
5726 static status_t
5727 file_close(struct file_descriptor* descriptor)
5728 {
5729 	struct vnode* vnode = descriptor->u.vnode;
5730 	status_t status = B_OK;
5731 
5732 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5733 
5734 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5735 		vnode->id);
5736 	if (HAS_FS_CALL(vnode, close)) {
5737 		status = FS_CALL(vnode, close, descriptor->cookie);
5738 	}
5739 
5740 	if (status == B_OK) {
5741 		// remove all outstanding locks for this team
5742 		if (HAS_FS_CALL(vnode, release_lock))
5743 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5744 		else
5745 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5746 	}
5747 	return status;
5748 }
5749 
5750 
5751 static void
5752 file_free_fd(struct file_descriptor* descriptor)
5753 {
5754 	struct vnode* vnode = descriptor->u.vnode;
5755 
5756 	if (vnode != NULL) {
5757 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5758 		put_vnode(vnode);
5759 	}
5760 }
5761 
5762 
5763 static status_t
5764 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5765 	size_t* length)
5766 {
5767 	struct vnode* vnode = descriptor->u.vnode;
5768 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5769 		pos, length, *length));
5770 
5771 	if (S_ISDIR(vnode->Type()))
5772 		return B_IS_A_DIRECTORY;
5773 
5774 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5775 }
5776 
5777 
5778 static status_t
5779 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5780 	size_t* length)
5781 {
5782 	struct vnode* vnode = descriptor->u.vnode;
5783 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5784 		length));
5785 
5786 	if (S_ISDIR(vnode->Type()))
5787 		return B_IS_A_DIRECTORY;
5788 	if (!HAS_FS_CALL(vnode, write))
5789 		return B_READ_ONLY_DEVICE;
5790 
5791 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5792 }
5793 
5794 
5795 static off_t
5796 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5797 {
5798 	struct vnode* vnode = descriptor->u.vnode;
5799 	off_t offset;
5800 	bool isDevice = false;
5801 
5802 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5803 		seekType));
5804 
5805 	// some kinds of files are not seekable
5806 	switch (vnode->Type() & S_IFMT) {
5807 		case S_IFIFO:
5808 		case S_IFSOCK:
5809 			return ESPIPE;
5810 
5811 		// drivers publish block devices as chr, so pick both
5812 		case S_IFBLK:
5813 		case S_IFCHR:
5814 			isDevice = true;
5815 			break;
5816 		// The Open Group Base Specs don't mention any file types besides pipes,
5817 		// fifos, and sockets specially, so we allow seeking them.
5818 		case S_IFREG:
5819 		case S_IFDIR:
5820 		case S_IFLNK:
5821 			break;
5822 	}
5823 
5824 	switch (seekType) {
5825 		case SEEK_SET:
5826 			offset = 0;
5827 			break;
5828 		case SEEK_CUR:
5829 			offset = descriptor->pos;
5830 			break;
5831 		case SEEK_END:
5832 		{
5833 			// stat() the node
5834 			if (!HAS_FS_CALL(vnode, read_stat))
5835 				return B_UNSUPPORTED;
5836 
5837 			struct stat stat;
5838 			status_t status = FS_CALL(vnode, read_stat, &stat);
5839 			if (status != B_OK)
5840 				return status;
5841 
5842 			offset = stat.st_size;
5843 
5844 			if (offset == 0 && isDevice) {
5845 				// stat() on regular drivers doesn't report size
5846 				device_geometry geometry;
5847 
5848 				if (HAS_FS_CALL(vnode, ioctl)) {
5849 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5850 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5851 					if (status == B_OK)
5852 						offset = (off_t)geometry.bytes_per_sector
5853 							* geometry.sectors_per_track
5854 							* geometry.cylinder_count
5855 							* geometry.head_count;
5856 				}
5857 			}
5858 
5859 			break;
5860 		}
5861 		case SEEK_DATA:
5862 		case SEEK_HOLE:
5863 		{
5864 			status_t status = B_BAD_VALUE;
5865 			if (HAS_FS_CALL(vnode, ioctl)) {
5866 				offset = pos;
5867 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5868 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5869 					&offset, sizeof(offset));
5870 				if (status == B_OK) {
5871 					if (offset > pos)
5872 						offset -= pos;
5873 					break;
5874 				}
5875 			}
5876 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5877 				return status;
5878 
5879 			// basic implementation with stat() the node
5880 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5881 				return B_BAD_VALUE;
5882 
5883 			struct stat stat;
5884 			status = FS_CALL(vnode, read_stat, &stat);
5885 			if (status != B_OK)
5886 				return status;
5887 
5888 			off_t end = stat.st_size;
5889 			if (pos >= end)
5890 				return ENXIO;
5891 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5892 			break;
5893 		}
5894 		default:
5895 			return B_BAD_VALUE;
5896 	}
5897 
5898 	// assumes off_t is 64 bits wide
5899 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5900 		return B_BUFFER_OVERFLOW;
5901 
5902 	pos += offset;
5903 	if (pos < 0)
5904 		return B_BAD_VALUE;
5905 
5906 	return descriptor->pos = pos;
5907 }
5908 
5909 
5910 static status_t
5911 file_select(struct file_descriptor* descriptor, uint8 event,
5912 	struct selectsync* sync)
5913 {
5914 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5915 
5916 	struct vnode* vnode = descriptor->u.vnode;
5917 
5918 	// If the FS has no select() hook, notify select() now.
5919 	if (!HAS_FS_CALL(vnode, select)) {
5920 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5921 			return notify_select_event(sync, event);
5922 		else
5923 			return B_OK;
5924 	}
5925 
5926 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5927 }
5928 
5929 
5930 static status_t
5931 file_deselect(struct file_descriptor* descriptor, uint8 event,
5932 	struct selectsync* sync)
5933 {
5934 	struct vnode* vnode = descriptor->u.vnode;
5935 
5936 	if (!HAS_FS_CALL(vnode, deselect))
5937 		return B_OK;
5938 
5939 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5940 }
5941 
5942 
5943 static status_t
5944 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5945 	bool kernel)
5946 {
5947 	struct vnode* vnode;
5948 	status_t status;
5949 
5950 	if (name == NULL || *name == '\0')
5951 		return B_BAD_VALUE;
5952 
5953 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5954 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5955 
5956 	status = get_vnode(mountID, parentID, &vnode, true, false);
5957 	if (status != B_OK)
5958 		return status;
5959 
5960 	if (HAS_FS_CALL(vnode, create_dir))
5961 		status = FS_CALL(vnode, create_dir, name, perms);
5962 	else
5963 		status = B_READ_ONLY_DEVICE;
5964 
5965 	put_vnode(vnode);
5966 	return status;
5967 }
5968 
5969 
5970 static status_t
5971 dir_create(int fd, char* path, int perms, bool kernel)
5972 {
5973 	char filename[B_FILE_NAME_LENGTH];
5974 	struct vnode* vnode;
5975 	status_t status;
5976 
5977 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5978 		kernel));
5979 
5980 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
5981 	if (status < 0)
5982 		return status;
5983 
5984 	if (HAS_FS_CALL(vnode, create_dir)) {
5985 		status = FS_CALL(vnode, create_dir, filename, perms);
5986 	} else
5987 		status = B_READ_ONLY_DEVICE;
5988 
5989 	put_vnode(vnode);
5990 	return status;
5991 }
5992 
5993 
5994 static int
5995 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5996 {
5997 	FUNCTION(("dir_open_entry_ref()\n"));
5998 
5999 	if (name && name[0] == '\0')
6000 		return B_BAD_VALUE;
6001 
6002 	// get the vnode matching the entry_ref/node_ref
6003 	struct vnode* vnode;
6004 	status_t status;
6005 	if (name) {
6006 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
6007 			&vnode);
6008 	} else
6009 		status = get_vnode(mountID, parentID, &vnode, true, false);
6010 	if (status != B_OK)
6011 		return status;
6012 
6013 	int newFD = open_dir_vnode(vnode, kernel);
6014 	if (newFD >= 0) {
6015 		// The vnode reference has been transferred to the FD
6016 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, mountID, parentID,
6017 			vnode->id, name);
6018 	} else
6019 		put_vnode(vnode);
6020 
6021 	return newFD;
6022 }
6023 
6024 
6025 static int
6026 dir_open(int fd, char* path, bool kernel)
6027 {
6028 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
6029 		kernel));
6030 
6031 	// get the vnode matching the vnode + path combination
6032 	struct vnode* vnode = NULL;
6033 	ino_t parentID;
6034 	status_t status = fd_and_path_to_vnode(fd, path, true, &vnode, &parentID,
6035 		kernel);
6036 	if (status != B_OK)
6037 		return status;
6038 
6039 	// open the dir
6040 	int newFD = open_dir_vnode(vnode, kernel);
6041 	if (newFD >= 0) {
6042 		// The vnode reference has been transferred to the FD
6043 		cache_node_opened(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6044 			parentID, vnode->id, NULL);
6045 	} else
6046 		put_vnode(vnode);
6047 
6048 	return newFD;
6049 }
6050 
6051 
6052 static status_t
6053 dir_close(struct file_descriptor* descriptor)
6054 {
6055 	struct vnode* vnode = descriptor->u.vnode;
6056 
6057 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
6058 
6059 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
6060 		vnode->id);
6061 	if (HAS_FS_CALL(vnode, close_dir))
6062 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6063 
6064 	return B_OK;
6065 }
6066 
6067 
6068 static void
6069 dir_free_fd(struct file_descriptor* descriptor)
6070 {
6071 	struct vnode* vnode = descriptor->u.vnode;
6072 
6073 	if (vnode != NULL) {
6074 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6075 		put_vnode(vnode);
6076 	}
6077 }
6078 
6079 
6080 static status_t
6081 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6082 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6083 {
6084 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6085 		bufferSize, _count);
6086 }
6087 
6088 
6089 static status_t
6090 fix_dirent(struct vnode* parent, struct dirent* entry,
6091 	struct io_context* ioContext)
6092 {
6093 	// set d_pdev and d_pino
6094 	entry->d_pdev = parent->device;
6095 	entry->d_pino = parent->id;
6096 
6097 	// If this is the ".." entry and the directory covering another vnode,
6098 	// we need to replace d_dev and d_ino with the actual values.
6099 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6100 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6101 			ioContext);
6102 	}
6103 
6104 	// resolve covered vnodes
6105 	ReadLocker _(&sVnodeLock);
6106 
6107 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6108 	if (vnode != NULL && vnode->covered_by != NULL) {
6109 		do {
6110 			vnode = vnode->covered_by;
6111 		} while (vnode->covered_by != NULL);
6112 
6113 		entry->d_dev = vnode->device;
6114 		entry->d_ino = vnode->id;
6115 	}
6116 
6117 	return B_OK;
6118 }
6119 
6120 
6121 static status_t
6122 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6123 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6124 {
6125 	if (!HAS_FS_CALL(vnode, read_dir))
6126 		return B_UNSUPPORTED;
6127 
6128 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6129 		_count);
6130 	if (error != B_OK)
6131 		return error;
6132 
6133 	// we need to adjust the read dirents
6134 	uint32 count = *_count;
6135 	for (uint32 i = 0; i < count; i++) {
6136 		error = fix_dirent(vnode, buffer, ioContext);
6137 		if (error != B_OK)
6138 			return error;
6139 
6140 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6141 	}
6142 
6143 	return error;
6144 }
6145 
6146 
6147 static status_t
6148 dir_rewind(struct file_descriptor* descriptor)
6149 {
6150 	struct vnode* vnode = descriptor->u.vnode;
6151 
6152 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6153 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6154 	}
6155 
6156 	return B_UNSUPPORTED;
6157 }
6158 
6159 
6160 static status_t
6161 dir_remove(int fd, char* path, bool kernel)
6162 {
6163 	char name[B_FILE_NAME_LENGTH];
6164 	struct vnode* directory;
6165 	status_t status;
6166 
6167 	if (path != NULL) {
6168 		// we need to make sure our path name doesn't stop with "/", ".",
6169 		// or ".."
6170 		char* lastSlash;
6171 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6172 			char* leaf = lastSlash + 1;
6173 			if (!strcmp(leaf, ".."))
6174 				return B_NOT_ALLOWED;
6175 
6176 			// omit multiple slashes
6177 			while (lastSlash > path && lastSlash[-1] == '/')
6178 				lastSlash--;
6179 
6180 			if (leaf[0]
6181 				&& strcmp(leaf, ".")) {
6182 				break;
6183 			}
6184 			// "name/" -> "name", or "name/." -> "name"
6185 			lastSlash[0] = '\0';
6186 		}
6187 
6188 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6189 			return B_NOT_ALLOWED;
6190 	}
6191 
6192 	status = fd_and_path_to_dir_vnode(fd, path, &directory, name, kernel);
6193 	if (status != B_OK)
6194 		return status;
6195 
6196 	if (HAS_FS_CALL(directory, remove_dir))
6197 		status = FS_CALL(directory, remove_dir, name);
6198 	else
6199 		status = B_READ_ONLY_DEVICE;
6200 
6201 	put_vnode(directory);
6202 	return status;
6203 }
6204 
6205 
6206 static status_t
6207 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6208 	size_t length)
6209 {
6210 	struct vnode* vnode = descriptor->u.vnode;
6211 
6212 	if (HAS_FS_CALL(vnode, ioctl))
6213 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6214 
6215 	return B_DEV_INVALID_IOCTL;
6216 }
6217 
6218 
6219 static status_t
6220 common_fcntl(int fd, int op, size_t argument, bool kernel)
6221 {
6222 	struct flock flock;
6223 
6224 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6225 		fd, op, argument, kernel ? "kernel" : "user"));
6226 
6227 	struct io_context* context = get_current_io_context(kernel);
6228 
6229 	struct file_descriptor* descriptor = get_fd(context, fd);
6230 	if (descriptor == NULL)
6231 		return B_FILE_ERROR;
6232 
6233 	struct vnode* vnode = fd_vnode(descriptor);
6234 
6235 	status_t status = B_OK;
6236 
6237 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6238 		if (descriptor->type != FDTYPE_FILE)
6239 			status = B_BAD_VALUE;
6240 		else if (kernel)
6241 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6242 		else if (user_memcpy(&flock, (struct flock*)argument,
6243 				sizeof(struct flock)) != B_OK)
6244 			status = B_BAD_ADDRESS;
6245 		if (status != B_OK) {
6246 			put_fd(descriptor);
6247 			return status;
6248 		}
6249 	}
6250 
6251 	switch (op) {
6252 		case F_SETFD:
6253 		{
6254 			// Set file descriptor flags
6255 
6256 			// O_CLOEXEC is the only flag available at this time
6257 			mutex_lock(&context->io_mutex);
6258 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6259 			mutex_unlock(&context->io_mutex);
6260 
6261 			status = B_OK;
6262 			break;
6263 		}
6264 
6265 		case F_GETFD:
6266 		{
6267 			// Get file descriptor flags
6268 			mutex_lock(&context->io_mutex);
6269 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6270 			mutex_unlock(&context->io_mutex);
6271 			break;
6272 		}
6273 
6274 		case F_SETFL:
6275 			// Set file descriptor open mode
6276 
6277 			// we only accept changes to O_APPEND and O_NONBLOCK
6278 			argument &= O_APPEND | O_NONBLOCK;
6279 			if (descriptor->ops->fd_set_flags != NULL) {
6280 				status = descriptor->ops->fd_set_flags(descriptor, argument);
6281 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6282 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6283 					(int)argument);
6284 			} else
6285 				status = B_UNSUPPORTED;
6286 
6287 			if (status == B_OK) {
6288 				// update this descriptor's open_mode field
6289 				descriptor->open_mode = (descriptor->open_mode
6290 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6291 			}
6292 
6293 			break;
6294 
6295 		case F_GETFL:
6296 			// Get file descriptor open mode
6297 			status = descriptor->open_mode;
6298 			break;
6299 
6300 		case F_DUPFD:
6301 		case F_DUPFD_CLOEXEC:
6302 		{
6303 			status = new_fd_etc(context, descriptor, (int)argument);
6304 			if (status >= 0) {
6305 				mutex_lock(&context->io_mutex);
6306 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6307 				mutex_unlock(&context->io_mutex);
6308 
6309 				atomic_add(&descriptor->ref_count, 1);
6310 			}
6311 			break;
6312 		}
6313 
6314 		case F_GETLK:
6315 			if (vnode != NULL) {
6316 				struct flock normalizedLock;
6317 
6318 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6319 				status = normalize_flock(descriptor, &normalizedLock);
6320 				if (status != B_OK)
6321 					break;
6322 
6323 				if (HAS_FS_CALL(vnode, test_lock)) {
6324 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6325 						&normalizedLock);
6326 				} else
6327 					status = test_advisory_lock(vnode, &normalizedLock);
6328 				if (status == B_OK) {
6329 					if (normalizedLock.l_type == F_UNLCK) {
6330 						// no conflicting lock found, copy back the same struct
6331 						// we were given except change type to F_UNLCK
6332 						flock.l_type = F_UNLCK;
6333 						if (kernel) {
6334 							memcpy((struct flock*)argument, &flock,
6335 								sizeof(struct flock));
6336 						} else {
6337 							status = user_memcpy((struct flock*)argument,
6338 								&flock, sizeof(struct flock));
6339 						}
6340 					} else {
6341 						// a conflicting lock was found, copy back its range and
6342 						// type
6343 						if (normalizedLock.l_len == OFF_MAX)
6344 							normalizedLock.l_len = 0;
6345 
6346 						if (kernel) {
6347 							memcpy((struct flock*)argument,
6348 								&normalizedLock, sizeof(struct flock));
6349 						} else {
6350 							status = user_memcpy((struct flock*)argument,
6351 								&normalizedLock, sizeof(struct flock));
6352 						}
6353 					}
6354 				}
6355 			} else
6356 				status = B_BAD_VALUE;
6357 			break;
6358 
6359 		case F_SETLK:
6360 		case F_SETLKW:
6361 			status = normalize_flock(descriptor, &flock);
6362 			if (status != B_OK)
6363 				break;
6364 
6365 			if (vnode == NULL) {
6366 				status = B_BAD_VALUE;
6367 			} else if (flock.l_type == F_UNLCK) {
6368 				if (HAS_FS_CALL(vnode, release_lock)) {
6369 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6370 						&flock);
6371 				} else {
6372 					status = release_advisory_lock(vnode, context, NULL,
6373 						&flock);
6374 				}
6375 			} else {
6376 				// the open mode must match the lock type
6377 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6378 						&& flock.l_type == F_WRLCK)
6379 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6380 						&& flock.l_type == F_RDLCK))
6381 					status = B_FILE_ERROR;
6382 				else {
6383 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6384 						status = FS_CALL(vnode, acquire_lock,
6385 							descriptor->cookie, &flock, op == F_SETLKW);
6386 					} else {
6387 						status = acquire_advisory_lock(vnode, context, NULL,
6388 							&flock, op == F_SETLKW);
6389 					}
6390 				}
6391 			}
6392 			break;
6393 
6394 		// ToDo: add support for more ops?
6395 
6396 		default:
6397 			status = B_BAD_VALUE;
6398 	}
6399 
6400 	put_fd(descriptor);
6401 	return status;
6402 }
6403 
6404 
6405 static status_t
6406 common_sync(int fd, bool kernel)
6407 {
6408 	struct file_descriptor* descriptor;
6409 	struct vnode* vnode;
6410 	status_t status;
6411 
6412 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6413 
6414 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6415 	if (descriptor == NULL)
6416 		return B_FILE_ERROR;
6417 
6418 	if (HAS_FS_CALL(vnode, fsync))
6419 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6420 	else
6421 		status = B_UNSUPPORTED;
6422 
6423 	put_fd(descriptor);
6424 	return status;
6425 }
6426 
6427 
6428 static status_t
6429 common_lock_node(int fd, bool kernel)
6430 {
6431 	struct file_descriptor* descriptor;
6432 	struct vnode* vnode;
6433 
6434 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6435 	if (descriptor == NULL)
6436 		return B_FILE_ERROR;
6437 
6438 	status_t status = B_OK;
6439 
6440 	// We need to set the locking atomically - someone
6441 	// else might set one at the same time
6442 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by, descriptor,
6443 			(file_descriptor*)NULL) != NULL)
6444 		status = B_BUSY;
6445 
6446 	put_fd(descriptor);
6447 	return status;
6448 }
6449 
6450 
6451 static status_t
6452 common_unlock_node(int fd, bool kernel)
6453 {
6454 	struct file_descriptor* descriptor;
6455 	struct vnode* vnode;
6456 
6457 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6458 	if (descriptor == NULL)
6459 		return B_FILE_ERROR;
6460 
6461 	status_t status = B_OK;
6462 
6463 	// We need to set the locking atomically - someone
6464 	// else might set one at the same time
6465 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6466 			(file_descriptor*)NULL, descriptor) != descriptor)
6467 		status = B_BAD_VALUE;
6468 
6469 	put_fd(descriptor);
6470 	return status;
6471 }
6472 
6473 
6474 static status_t
6475 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6476 {
6477 	struct file_descriptor* descriptor;
6478 	struct vnode* vnode;
6479 
6480 	if (offset < 0 || length == 0)
6481 		return B_BAD_VALUE;
6482 	if (offset > OFF_MAX - length)
6483 		return B_FILE_TOO_LARGE;
6484 
6485 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
6486 	if (descriptor == NULL || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6487 		return B_FILE_ERROR;
6488 
6489 	switch (vnode->Type() & S_IFMT) {
6490 		case S_IFIFO:
6491 		case S_IFSOCK:
6492 			return ESPIPE;
6493 
6494 		case S_IFBLK:
6495 		case S_IFCHR:
6496 		case S_IFDIR:
6497 		case S_IFLNK:
6498 			return B_DEVICE_NOT_FOUND;
6499 
6500 		case S_IFREG:
6501 			break;
6502 	}
6503 
6504 	status_t status = B_OK;
6505 	if (HAS_FS_CALL(vnode, preallocate)) {
6506 		status = FS_CALL(vnode, preallocate, offset, length);
6507 	} else {
6508 		status = HAS_FS_CALL(vnode, write)
6509 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6510 	}
6511 
6512 	return status;
6513 }
6514 
6515 
6516 static status_t
6517 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6518 	bool kernel)
6519 {
6520 	struct vnode* vnode;
6521 	status_t status;
6522 
6523 	status = fd_and_path_to_vnode(fd, path, false, &vnode, NULL, kernel);
6524 	if (status != B_OK)
6525 		return status;
6526 
6527 	if (HAS_FS_CALL(vnode, read_symlink)) {
6528 		status = FS_CALL(vnode, read_symlink, buffer, _bufferSize);
6529 	} else
6530 		status = B_BAD_VALUE;
6531 
6532 	put_vnode(vnode);
6533 	return status;
6534 }
6535 
6536 
6537 static status_t
6538 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6539 	bool kernel)
6540 {
6541 	// path validity checks have to be in the calling function!
6542 	char name[B_FILE_NAME_LENGTH];
6543 	struct vnode* vnode;
6544 	status_t status;
6545 
6546 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6547 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6548 
6549 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, name, kernel);
6550 	if (status != B_OK)
6551 		return status;
6552 
6553 	if (HAS_FS_CALL(vnode, create_symlink))
6554 		status = FS_CALL(vnode, create_symlink, name, toPath, mode);
6555 	else {
6556 		status = HAS_FS_CALL(vnode, write)
6557 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6558 	}
6559 
6560 	put_vnode(vnode);
6561 
6562 	return status;
6563 }
6564 
6565 
6566 static status_t
6567 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6568 	bool traverseLeafLink, bool kernel)
6569 {
6570 	// path validity checks have to be in the calling function!
6571 
6572 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6573 		toPath, kernel));
6574 
6575 	char name[B_FILE_NAME_LENGTH];
6576 	struct vnode* directory;
6577 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, &directory, name,
6578 		kernel);
6579 	if (status != B_OK)
6580 		return status;
6581 
6582 	struct vnode* vnode;
6583 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, &vnode, NULL,
6584 		kernel);
6585 	if (status != B_OK)
6586 		goto err;
6587 
6588 	if (directory->mount != vnode->mount) {
6589 		status = B_CROSS_DEVICE_LINK;
6590 		goto err1;
6591 	}
6592 
6593 	if (HAS_FS_CALL(directory, link))
6594 		status = FS_CALL(directory, link, name, vnode);
6595 	else
6596 		status = B_READ_ONLY_DEVICE;
6597 
6598 err1:
6599 	put_vnode(vnode);
6600 err:
6601 	put_vnode(directory);
6602 
6603 	return status;
6604 }
6605 
6606 
6607 static status_t
6608 common_unlink(int fd, char* path, bool kernel)
6609 {
6610 	char filename[B_FILE_NAME_LENGTH];
6611 	struct vnode* vnode;
6612 	status_t status;
6613 
6614 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6615 		kernel));
6616 
6617 	status = fd_and_path_to_dir_vnode(fd, path, &vnode, filename, kernel);
6618 	if (status < 0)
6619 		return status;
6620 
6621 	if (HAS_FS_CALL(vnode, unlink))
6622 		status = FS_CALL(vnode, unlink, filename);
6623 	else
6624 		status = B_READ_ONLY_DEVICE;
6625 
6626 	put_vnode(vnode);
6627 
6628 	return status;
6629 }
6630 
6631 
6632 static status_t
6633 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6634 {
6635 	struct vnode* vnode;
6636 	status_t status;
6637 
6638 	// TODO: honor effectiveUserGroup argument
6639 
6640 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
6641 	if (status != B_OK)
6642 		return status;
6643 
6644 	if (HAS_FS_CALL(vnode, access))
6645 		status = FS_CALL(vnode, access, mode);
6646 	else
6647 		status = B_OK;
6648 
6649 	put_vnode(vnode);
6650 
6651 	return status;
6652 }
6653 
6654 
6655 static status_t
6656 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6657 {
6658 	struct vnode* fromVnode;
6659 	struct vnode* toVnode;
6660 	char fromName[B_FILE_NAME_LENGTH];
6661 	char toName[B_FILE_NAME_LENGTH];
6662 	status_t status;
6663 
6664 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6665 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6666 
6667 	status = fd_and_path_to_dir_vnode(fd, path, &fromVnode, fromName, kernel);
6668 	if (status != B_OK)
6669 		return status;
6670 
6671 	status = fd_and_path_to_dir_vnode(newFD, newPath, &toVnode, toName, kernel);
6672 	if (status != B_OK)
6673 		goto err1;
6674 
6675 	if (fromVnode->device != toVnode->device) {
6676 		status = B_CROSS_DEVICE_LINK;
6677 		goto err2;
6678 	}
6679 
6680 	if (fromName[0] == '\0' || toName[0] == '\0'
6681 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6682 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6683 		|| (fromVnode == toVnode && !strcmp(fromName, toName))) {
6684 		status = B_BAD_VALUE;
6685 		goto err2;
6686 	}
6687 
6688 	if (HAS_FS_CALL(fromVnode, rename))
6689 		status = FS_CALL(fromVnode, rename, fromName, toVnode, toName);
6690 	else
6691 		status = B_READ_ONLY_DEVICE;
6692 
6693 err2:
6694 	put_vnode(toVnode);
6695 err1:
6696 	put_vnode(fromVnode);
6697 
6698 	return status;
6699 }
6700 
6701 
6702 static status_t
6703 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6704 {
6705 	struct vnode* vnode = descriptor->u.vnode;
6706 
6707 	FUNCTION(("common_read_stat: stat %p\n", stat));
6708 
6709 	// TODO: remove this once all file systems properly set them!
6710 	stat->st_crtim.tv_nsec = 0;
6711 	stat->st_ctim.tv_nsec = 0;
6712 	stat->st_mtim.tv_nsec = 0;
6713 	stat->st_atim.tv_nsec = 0;
6714 
6715 	return vfs_stat_vnode(vnode, stat);
6716 }
6717 
6718 
6719 static status_t
6720 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6721 	int statMask)
6722 {
6723 	struct vnode* vnode = descriptor->u.vnode;
6724 
6725 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6726 		vnode, stat, statMask));
6727 
6728 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6729 		&& (statMask & B_STAT_SIZE) != 0) {
6730 		return B_BAD_VALUE;
6731 	}
6732 
6733 	if (!HAS_FS_CALL(vnode, write_stat))
6734 		return B_READ_ONLY_DEVICE;
6735 
6736 	return FS_CALL(vnode, write_stat, stat, statMask);
6737 }
6738 
6739 
6740 static status_t
6741 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6742 	struct stat* stat, bool kernel)
6743 {
6744 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6745 		stat));
6746 
6747 	struct vnode* vnode;
6748 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6749 		NULL, kernel);
6750 	if (status != B_OK)
6751 		return status;
6752 
6753 	status = vfs_stat_vnode(vnode, stat);
6754 
6755 	put_vnode(vnode);
6756 	return status;
6757 }
6758 
6759 
6760 static status_t
6761 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6762 	const struct stat* stat, int statMask, bool kernel)
6763 {
6764 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6765 		"kernel %d\n", fd, path, stat, statMask, kernel));
6766 
6767 	struct vnode* vnode;
6768 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6769 		NULL, kernel);
6770 	if (status != B_OK)
6771 		return status;
6772 
6773 	if (HAS_FS_CALL(vnode, write_stat))
6774 		status = FS_CALL(vnode, write_stat, stat, statMask);
6775 	else
6776 		status = B_READ_ONLY_DEVICE;
6777 
6778 	put_vnode(vnode);
6779 
6780 	return status;
6781 }
6782 
6783 
6784 static int
6785 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6786 {
6787 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6788 		kernel));
6789 
6790 	struct vnode* vnode;
6791 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, &vnode,
6792 		NULL, kernel);
6793 	if (status != B_OK)
6794 		return status;
6795 
6796 	status = open_attr_dir_vnode(vnode, kernel);
6797 	if (status < 0)
6798 		put_vnode(vnode);
6799 
6800 	return status;
6801 }
6802 
6803 
6804 static status_t
6805 attr_dir_close(struct file_descriptor* descriptor)
6806 {
6807 	struct vnode* vnode = descriptor->u.vnode;
6808 
6809 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6810 
6811 	if (HAS_FS_CALL(vnode, close_attr_dir))
6812 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6813 
6814 	return B_OK;
6815 }
6816 
6817 
6818 static void
6819 attr_dir_free_fd(struct file_descriptor* descriptor)
6820 {
6821 	struct vnode* vnode = descriptor->u.vnode;
6822 
6823 	if (vnode != NULL) {
6824 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6825 		put_vnode(vnode);
6826 	}
6827 }
6828 
6829 
6830 static status_t
6831 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6832 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6833 {
6834 	struct vnode* vnode = descriptor->u.vnode;
6835 
6836 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6837 
6838 	if (HAS_FS_CALL(vnode, read_attr_dir))
6839 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6840 			bufferSize, _count);
6841 
6842 	return B_UNSUPPORTED;
6843 }
6844 
6845 
6846 static status_t
6847 attr_dir_rewind(struct file_descriptor* descriptor)
6848 {
6849 	struct vnode* vnode = descriptor->u.vnode;
6850 
6851 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6852 
6853 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6854 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6855 
6856 	return B_UNSUPPORTED;
6857 }
6858 
6859 
6860 static int
6861 attr_create(int fd, char* path, const char* name, uint32 type,
6862 	int openMode, bool kernel)
6863 {
6864 	if (name == NULL || *name == '\0')
6865 		return B_BAD_VALUE;
6866 
6867 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6868 	struct vnode* vnode;
6869 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6870 		kernel);
6871 	if (status != B_OK)
6872 		return status;
6873 
6874 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6875 		status = B_LINK_LIMIT;
6876 		goto err;
6877 	}
6878 
6879 	if (!HAS_FS_CALL(vnode, create_attr)) {
6880 		status = B_READ_ONLY_DEVICE;
6881 		goto err;
6882 	}
6883 
6884 	void* cookie;
6885 	status = FS_CALL(vnode, create_attr, name, type, openMode, &cookie);
6886 	if (status != B_OK)
6887 		goto err;
6888 
6889 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6890 	if (fd >= 0)
6891 		return fd;
6892 
6893 	status = fd;
6894 
6895 	FS_CALL(vnode, close_attr, cookie);
6896 	FS_CALL(vnode, free_attr_cookie, cookie);
6897 
6898 	FS_CALL(vnode, remove_attr, name);
6899 
6900 err:
6901 	put_vnode(vnode);
6902 
6903 	return status;
6904 }
6905 
6906 
6907 static int
6908 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6909 {
6910 	if (name == NULL || *name == '\0')
6911 		return B_BAD_VALUE;
6912 
6913 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6914 	struct vnode* vnode;
6915 	status_t status = fd_and_path_to_vnode(fd, path, traverse, &vnode, NULL,
6916 		kernel);
6917 	if (status != B_OK)
6918 		return status;
6919 
6920 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type())) {
6921 		status = B_LINK_LIMIT;
6922 		goto err;
6923 	}
6924 
6925 	if (!HAS_FS_CALL(vnode, open_attr)) {
6926 		status = B_UNSUPPORTED;
6927 		goto err;
6928 	}
6929 
6930 	void* cookie;
6931 	status = FS_CALL(vnode, open_attr, name, openMode, &cookie);
6932 	if (status != B_OK)
6933 		goto err;
6934 
6935 	// now we only need a file descriptor for this attribute and we're done
6936 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode, cookie, openMode, kernel);
6937 	if (fd >= 0)
6938 		return fd;
6939 
6940 	status = fd;
6941 
6942 	FS_CALL(vnode, close_attr, cookie);
6943 	FS_CALL(vnode, free_attr_cookie, cookie);
6944 
6945 err:
6946 	put_vnode(vnode);
6947 
6948 	return status;
6949 }
6950 
6951 
6952 static status_t
6953 attr_close(struct file_descriptor* descriptor)
6954 {
6955 	struct vnode* vnode = descriptor->u.vnode;
6956 
6957 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6958 
6959 	if (HAS_FS_CALL(vnode, close_attr))
6960 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6961 
6962 	return B_OK;
6963 }
6964 
6965 
6966 static void
6967 attr_free_fd(struct file_descriptor* descriptor)
6968 {
6969 	struct vnode* vnode = descriptor->u.vnode;
6970 
6971 	if (vnode != NULL) {
6972 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6973 		put_vnode(vnode);
6974 	}
6975 }
6976 
6977 
6978 static status_t
6979 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6980 	size_t* length)
6981 {
6982 	struct vnode* vnode = descriptor->u.vnode;
6983 
6984 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6985 		pos, length, *length));
6986 
6987 	if (!HAS_FS_CALL(vnode, read_attr))
6988 		return B_UNSUPPORTED;
6989 
6990 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6991 }
6992 
6993 
6994 static status_t
6995 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6996 	size_t* length)
6997 {
6998 	struct vnode* vnode = descriptor->u.vnode;
6999 
7000 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
7001 		length));
7002 
7003 	if (!HAS_FS_CALL(vnode, write_attr))
7004 		return B_UNSUPPORTED;
7005 
7006 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
7007 }
7008 
7009 
7010 static off_t
7011 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
7012 {
7013 	off_t offset;
7014 
7015 	switch (seekType) {
7016 		case SEEK_SET:
7017 			offset = 0;
7018 			break;
7019 		case SEEK_CUR:
7020 			offset = descriptor->pos;
7021 			break;
7022 		case SEEK_END:
7023 		{
7024 			struct vnode* vnode = descriptor->u.vnode;
7025 			if (!HAS_FS_CALL(vnode, read_stat))
7026 				return B_UNSUPPORTED;
7027 
7028 			struct stat stat;
7029 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
7030 				&stat);
7031 			if (status != B_OK)
7032 				return status;
7033 
7034 			offset = stat.st_size;
7035 			break;
7036 		}
7037 		default:
7038 			return B_BAD_VALUE;
7039 	}
7040 
7041 	// assumes off_t is 64 bits wide
7042 	if (offset > 0 && LONGLONG_MAX - offset < pos)
7043 		return B_BUFFER_OVERFLOW;
7044 
7045 	pos += offset;
7046 	if (pos < 0)
7047 		return B_BAD_VALUE;
7048 
7049 	return descriptor->pos = pos;
7050 }
7051 
7052 
7053 static status_t
7054 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7055 {
7056 	struct vnode* vnode = descriptor->u.vnode;
7057 
7058 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
7059 
7060 	if (!HAS_FS_CALL(vnode, read_attr_stat))
7061 		return B_UNSUPPORTED;
7062 
7063 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
7064 }
7065 
7066 
7067 static status_t
7068 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
7069 	int statMask)
7070 {
7071 	struct vnode* vnode = descriptor->u.vnode;
7072 
7073 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
7074 
7075 	if (!HAS_FS_CALL(vnode, write_attr_stat))
7076 		return B_READ_ONLY_DEVICE;
7077 
7078 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
7079 }
7080 
7081 
7082 static status_t
7083 attr_remove(int fd, const char* name, bool kernel)
7084 {
7085 	struct file_descriptor* descriptor;
7086 	struct vnode* vnode;
7087 	status_t status;
7088 
7089 	if (name == NULL || *name == '\0')
7090 		return B_BAD_VALUE;
7091 
7092 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
7093 		kernel));
7094 
7095 	descriptor = get_fd_and_vnode(fd, &vnode, kernel);
7096 	if (descriptor == NULL)
7097 		return B_FILE_ERROR;
7098 
7099 	if (HAS_FS_CALL(vnode, remove_attr))
7100 		status = FS_CALL(vnode, remove_attr, name);
7101 	else
7102 		status = B_READ_ONLY_DEVICE;
7103 
7104 	put_fd(descriptor);
7105 
7106 	return status;
7107 }
7108 
7109 
7110 static status_t
7111 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
7112 	bool kernel)
7113 {
7114 	struct file_descriptor* fromDescriptor;
7115 	struct file_descriptor* toDescriptor;
7116 	struct vnode* fromVnode;
7117 	struct vnode* toVnode;
7118 	status_t status;
7119 
7120 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7121 		|| *toName == '\0')
7122 		return B_BAD_VALUE;
7123 
7124 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7125 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7126 
7127 	fromDescriptor = get_fd_and_vnode(fromFD, &fromVnode, kernel);
7128 	if (fromDescriptor == NULL)
7129 		return B_FILE_ERROR;
7130 
7131 	toDescriptor = get_fd_and_vnode(toFD, &toVnode, kernel);
7132 	if (toDescriptor == NULL) {
7133 		status = B_FILE_ERROR;
7134 		goto err;
7135 	}
7136 
7137 	// are the files on the same volume?
7138 	if (fromVnode->device != toVnode->device) {
7139 		status = B_CROSS_DEVICE_LINK;
7140 		goto err1;
7141 	}
7142 
7143 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7144 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7145 	} else
7146 		status = B_READ_ONLY_DEVICE;
7147 
7148 err1:
7149 	put_fd(toDescriptor);
7150 err:
7151 	put_fd(fromDescriptor);
7152 
7153 	return status;
7154 }
7155 
7156 
7157 static int
7158 index_dir_open(dev_t mountID, bool kernel)
7159 {
7160 	struct fs_mount* mount;
7161 	void* cookie;
7162 
7163 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7164 		kernel));
7165 
7166 	status_t status = get_mount(mountID, &mount);
7167 	if (status != B_OK)
7168 		return status;
7169 
7170 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7171 		status = B_UNSUPPORTED;
7172 		goto error;
7173 	}
7174 
7175 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7176 	if (status != B_OK)
7177 		goto error;
7178 
7179 	// get fd for the index directory
7180 	int fd;
7181 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7182 	if (fd >= 0)
7183 		return fd;
7184 
7185 	// something went wrong
7186 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7187 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7188 
7189 	status = fd;
7190 
7191 error:
7192 	put_mount(mount);
7193 	return status;
7194 }
7195 
7196 
7197 static status_t
7198 index_dir_close(struct file_descriptor* descriptor)
7199 {
7200 	struct fs_mount* mount = descriptor->u.mount;
7201 
7202 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7203 
7204 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7205 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7206 
7207 	return B_OK;
7208 }
7209 
7210 
7211 static void
7212 index_dir_free_fd(struct file_descriptor* descriptor)
7213 {
7214 	struct fs_mount* mount = descriptor->u.mount;
7215 
7216 	if (mount != NULL) {
7217 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7218 		put_mount(mount);
7219 	}
7220 }
7221 
7222 
7223 static status_t
7224 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7225 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7226 {
7227 	struct fs_mount* mount = descriptor->u.mount;
7228 
7229 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7230 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7231 			bufferSize, _count);
7232 	}
7233 
7234 	return B_UNSUPPORTED;
7235 }
7236 
7237 
7238 static status_t
7239 index_dir_rewind(struct file_descriptor* descriptor)
7240 {
7241 	struct fs_mount* mount = descriptor->u.mount;
7242 
7243 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7244 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7245 
7246 	return B_UNSUPPORTED;
7247 }
7248 
7249 
7250 static status_t
7251 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7252 	bool kernel)
7253 {
7254 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7255 		mountID, name, kernel));
7256 
7257 	struct fs_mount* mount;
7258 	status_t status = get_mount(mountID, &mount);
7259 	if (status != B_OK)
7260 		return status;
7261 
7262 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7263 		status = B_READ_ONLY_DEVICE;
7264 		goto out;
7265 	}
7266 
7267 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7268 
7269 out:
7270 	put_mount(mount);
7271 	return status;
7272 }
7273 
7274 
7275 #if 0
7276 static status_t
7277 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7278 {
7279 	struct vnode* vnode = descriptor->u.vnode;
7280 
7281 	// ToDo: currently unused!
7282 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7283 	if (!HAS_FS_CALL(vnode, read_index_stat))
7284 		return B_UNSUPPORTED;
7285 
7286 	return B_UNSUPPORTED;
7287 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7288 }
7289 
7290 
7291 static void
7292 index_free_fd(struct file_descriptor* descriptor)
7293 {
7294 	struct vnode* vnode = descriptor->u.vnode;
7295 
7296 	if (vnode != NULL) {
7297 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7298 		put_vnode(vnode);
7299 	}
7300 }
7301 #endif
7302 
7303 
7304 static status_t
7305 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7306 	bool kernel)
7307 {
7308 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7309 		mountID, name, kernel));
7310 
7311 	struct fs_mount* mount;
7312 	status_t status = get_mount(mountID, &mount);
7313 	if (status != B_OK)
7314 		return status;
7315 
7316 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7317 		status = B_UNSUPPORTED;
7318 		goto out;
7319 	}
7320 
7321 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7322 
7323 out:
7324 	put_mount(mount);
7325 	return status;
7326 }
7327 
7328 
7329 static status_t
7330 index_remove(dev_t mountID, const char* name, bool kernel)
7331 {
7332 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7333 		mountID, name, kernel));
7334 
7335 	struct fs_mount* mount;
7336 	status_t status = get_mount(mountID, &mount);
7337 	if (status != B_OK)
7338 		return status;
7339 
7340 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7341 		status = B_READ_ONLY_DEVICE;
7342 		goto out;
7343 	}
7344 
7345 	status = FS_MOUNT_CALL(mount, remove_index, name);
7346 
7347 out:
7348 	put_mount(mount);
7349 	return status;
7350 }
7351 
7352 
7353 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7354 		It would be nice if the FS would find some more kernel support
7355 		for them.
7356 		For example, query parsing should be moved into the kernel.
7357 */
7358 static int
7359 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7360 	int32 token, bool kernel)
7361 {
7362 	struct fs_mount* mount;
7363 	void* cookie;
7364 
7365 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7366 		device, query, kernel));
7367 
7368 	status_t status = get_mount(device, &mount);
7369 	if (status != B_OK)
7370 		return status;
7371 
7372 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7373 		status = B_UNSUPPORTED;
7374 		goto error;
7375 	}
7376 
7377 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7378 		&cookie);
7379 	if (status != B_OK)
7380 		goto error;
7381 
7382 	// get fd for the index directory
7383 	int fd;
7384 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7385 	if (fd >= 0)
7386 		return fd;
7387 
7388 	status = fd;
7389 
7390 	// something went wrong
7391 	FS_MOUNT_CALL(mount, close_query, cookie);
7392 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7393 
7394 error:
7395 	put_mount(mount);
7396 	return status;
7397 }
7398 
7399 
7400 static status_t
7401 query_close(struct file_descriptor* descriptor)
7402 {
7403 	struct fs_mount* mount = descriptor->u.mount;
7404 
7405 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7406 
7407 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7408 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7409 
7410 	return B_OK;
7411 }
7412 
7413 
7414 static void
7415 query_free_fd(struct file_descriptor* descriptor)
7416 {
7417 	struct fs_mount* mount = descriptor->u.mount;
7418 
7419 	if (mount != NULL) {
7420 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7421 		put_mount(mount);
7422 	}
7423 }
7424 
7425 
7426 static status_t
7427 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7428 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7429 {
7430 	struct fs_mount* mount = descriptor->u.mount;
7431 
7432 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7433 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7434 			bufferSize, _count);
7435 	}
7436 
7437 	return B_UNSUPPORTED;
7438 }
7439 
7440 
7441 static status_t
7442 query_rewind(struct file_descriptor* descriptor)
7443 {
7444 	struct fs_mount* mount = descriptor->u.mount;
7445 
7446 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7447 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7448 
7449 	return B_UNSUPPORTED;
7450 }
7451 
7452 
7453 //	#pragma mark - General File System functions
7454 
7455 
7456 static dev_t
7457 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7458 	const char* args, bool kernel)
7459 {
7460 	struct ::fs_mount* mount;
7461 	status_t status = B_OK;
7462 	fs_volume* volume = NULL;
7463 	int32 layer = 0;
7464 	Vnode* coveredNode = NULL;
7465 
7466 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7467 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7468 
7469 	// The path is always safe, we just have to make sure that fsName is
7470 	// almost valid - we can't make any assumptions about args, though.
7471 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7472 	// We'll get it from the DDM later.
7473 	if (fsName == NULL) {
7474 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7475 			return B_BAD_VALUE;
7476 	} else if (fsName[0] == '\0')
7477 		return B_BAD_VALUE;
7478 
7479 	RecursiveLocker mountOpLocker(sMountOpLock);
7480 
7481 	// Helper to delete a newly created file device on failure.
7482 	// Not exactly beautiful, but helps to keep the code below cleaner.
7483 	struct FileDeviceDeleter {
7484 		FileDeviceDeleter() : id(-1) {}
7485 		~FileDeviceDeleter()
7486 		{
7487 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7488 		}
7489 
7490 		partition_id id;
7491 	} fileDeviceDeleter;
7492 
7493 	// If the file system is not a "virtual" one, the device argument should
7494 	// point to a real file/device (if given at all).
7495 	// get the partition
7496 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7497 	KPartition* partition = NULL;
7498 	KPath normalizedDevice;
7499 	bool newlyCreatedFileDevice = false;
7500 
7501 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7502 		// normalize the device path
7503 		status = normalizedDevice.SetTo(device, true);
7504 		if (status != B_OK)
7505 			return status;
7506 
7507 		// get a corresponding partition from the DDM
7508 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7509 		if (partition == NULL) {
7510 			// Partition not found: This either means, the user supplied
7511 			// an invalid path, or the path refers to an image file. We try
7512 			// to let the DDM create a file device for the path.
7513 			partition_id deviceID = ddm->CreateFileDevice(
7514 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7515 			if (deviceID >= 0) {
7516 				partition = ddm->RegisterPartition(deviceID);
7517 				if (newlyCreatedFileDevice)
7518 					fileDeviceDeleter.id = deviceID;
7519 			}
7520 		}
7521 
7522 		if (!partition) {
7523 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7524 				normalizedDevice.Path()));
7525 			return B_ENTRY_NOT_FOUND;
7526 		}
7527 
7528 		device = normalizedDevice.Path();
7529 			// correct path to file device
7530 	}
7531 	PartitionRegistrar partitionRegistrar(partition, true);
7532 
7533 	// Write lock the partition's device. For the time being, we keep the lock
7534 	// until we're done mounting -- not nice, but ensure, that no-one is
7535 	// interfering.
7536 	// TODO: Just mark the partition busy while mounting!
7537 	KDiskDevice* diskDevice = NULL;
7538 	if (partition) {
7539 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7540 		if (!diskDevice) {
7541 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7542 			return B_ERROR;
7543 		}
7544 	}
7545 
7546 	DeviceWriteLocker writeLocker(diskDevice, true);
7547 		// this takes over the write lock acquired before
7548 
7549 	if (partition != NULL) {
7550 		// make sure, that the partition is not busy
7551 		if (partition->IsBusy()) {
7552 			TRACE(("fs_mount(): Partition is busy.\n"));
7553 			return B_BUSY;
7554 		}
7555 
7556 		// if no FS name had been supplied, we get it from the partition
7557 		if (fsName == NULL) {
7558 			KDiskSystem* diskSystem = partition->DiskSystem();
7559 			if (!diskSystem) {
7560 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7561 					"recognize it.\n"));
7562 				return B_BAD_VALUE;
7563 			}
7564 
7565 			if (!diskSystem->IsFileSystem()) {
7566 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7567 					"partitioning system.\n"));
7568 				return B_BAD_VALUE;
7569 			}
7570 
7571 			// The disk system name will not change, and the KDiskSystem
7572 			// object will not go away while the disk device is locked (and
7573 			// the partition has a reference to it), so this is safe.
7574 			fsName = diskSystem->Name();
7575 		}
7576 	}
7577 
7578 	mount = new(std::nothrow) (struct ::fs_mount);
7579 	if (mount == NULL)
7580 		return B_NO_MEMORY;
7581 
7582 	mount->device_name = strdup(device);
7583 		// "device" can be NULL
7584 
7585 	status = mount->entry_cache.Init();
7586 	if (status != B_OK)
7587 		goto err1;
7588 
7589 	// initialize structure
7590 	mount->id = sNextMountID++;
7591 	mount->partition = NULL;
7592 	mount->root_vnode = NULL;
7593 	mount->covers_vnode = NULL;
7594 	mount->unmounting = false;
7595 	mount->owns_file_device = false;
7596 	mount->volume = NULL;
7597 
7598 	// build up the volume(s)
7599 	while (true) {
7600 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7601 		if (layerFSName == NULL) {
7602 			if (layer == 0) {
7603 				status = B_NO_MEMORY;
7604 				goto err1;
7605 			}
7606 
7607 			break;
7608 		}
7609 		MemoryDeleter layerFSNameDeleter(layerFSName);
7610 
7611 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7612 		if (volume == NULL) {
7613 			status = B_NO_MEMORY;
7614 			goto err1;
7615 		}
7616 
7617 		volume->id = mount->id;
7618 		volume->partition = partition != NULL ? partition->ID() : -1;
7619 		volume->layer = layer++;
7620 		volume->private_volume = NULL;
7621 		volume->ops = NULL;
7622 		volume->sub_volume = NULL;
7623 		volume->super_volume = NULL;
7624 		volume->file_system = NULL;
7625 		volume->file_system_name = NULL;
7626 
7627 		volume->file_system_name = get_file_system_name(layerFSName);
7628 		if (volume->file_system_name == NULL) {
7629 			status = B_NO_MEMORY;
7630 			free(volume);
7631 			goto err1;
7632 		}
7633 
7634 		volume->file_system = get_file_system(layerFSName);
7635 		if (volume->file_system == NULL) {
7636 			status = B_DEVICE_NOT_FOUND;
7637 			free(volume->file_system_name);
7638 			free(volume);
7639 			goto err1;
7640 		}
7641 
7642 		if (mount->volume == NULL)
7643 			mount->volume = volume;
7644 		else {
7645 			volume->super_volume = mount->volume;
7646 			mount->volume->sub_volume = volume;
7647 			mount->volume = volume;
7648 		}
7649 	}
7650 
7651 	// insert mount struct into list before we call FS's mount() function
7652 	// so that vnodes can be created for this mount
7653 	rw_lock_write_lock(&sMountLock);
7654 	sMountsTable->Insert(mount);
7655 	rw_lock_write_unlock(&sMountLock);
7656 
7657 	ino_t rootID;
7658 
7659 	if (!sRoot) {
7660 		// we haven't mounted anything yet
7661 		if (strcmp(path, "/") != 0) {
7662 			status = B_ERROR;
7663 			goto err2;
7664 		}
7665 
7666 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7667 			args, &rootID);
7668 		if (status != B_OK || mount->volume->ops == NULL)
7669 			goto err2;
7670 	} else {
7671 		status = path_to_vnode(path, true, &coveredNode, NULL, kernel);
7672 		if (status != B_OK)
7673 			goto err2;
7674 
7675 		mount->covers_vnode = coveredNode;
7676 
7677 		// make sure covered_vnode is a directory
7678 		if (!S_ISDIR(coveredNode->Type())) {
7679 			status = B_NOT_A_DIRECTORY;
7680 			goto err3;
7681 		}
7682 
7683 		if (coveredNode->IsCovered()) {
7684 			// this is already a covered vnode
7685 			status = B_BUSY;
7686 			goto err3;
7687 		}
7688 
7689 		// mount it/them
7690 		fs_volume* volume = mount->volume;
7691 		while (volume) {
7692 			status = volume->file_system->mount(volume, device, flags, args,
7693 				&rootID);
7694 			if (status != B_OK || volume->ops == NULL) {
7695 				if (status == B_OK && volume->ops == NULL)
7696 					panic("fs_mount: mount() succeeded but ops is NULL!");
7697 				if (volume->sub_volume)
7698 					goto err4;
7699 				goto err3;
7700 			}
7701 
7702 			volume = volume->super_volume;
7703 		}
7704 
7705 		volume = mount->volume;
7706 		while (volume) {
7707 			if (volume->ops->all_layers_mounted != NULL)
7708 				volume->ops->all_layers_mounted(volume);
7709 			volume = volume->super_volume;
7710 		}
7711 	}
7712 
7713 	// the root node is supposed to be owned by the file system - it must
7714 	// exist at this point
7715 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7716 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7717 		panic("fs_mount: file system does not own its root node!\n");
7718 		status = B_ERROR;
7719 		goto err4;
7720 	}
7721 
7722 	// set up the links between the root vnode and the vnode it covers
7723 	rw_lock_write_lock(&sVnodeLock);
7724 	if (coveredNode != NULL) {
7725 		if (coveredNode->IsCovered()) {
7726 			// the vnode is covered now
7727 			status = B_BUSY;
7728 			rw_lock_write_unlock(&sVnodeLock);
7729 			goto err4;
7730 		}
7731 
7732 		mount->root_vnode->covers = coveredNode;
7733 		mount->root_vnode->SetCovering(true);
7734 
7735 		coveredNode->covered_by = mount->root_vnode;
7736 		coveredNode->SetCovered(true);
7737 	}
7738 	rw_lock_write_unlock(&sVnodeLock);
7739 
7740 	if (!sRoot) {
7741 		sRoot = mount->root_vnode;
7742 		mutex_lock(&sIOContextRootLock);
7743 		get_current_io_context(true)->root = sRoot;
7744 		mutex_unlock(&sIOContextRootLock);
7745 		inc_vnode_ref_count(sRoot);
7746 	}
7747 
7748 	// supply the partition (if any) with the mount cookie and mark it mounted
7749 	if (partition) {
7750 		partition->SetMountCookie(mount->volume->private_volume);
7751 		partition->SetVolumeID(mount->id);
7752 
7753 		// keep a partition reference as long as the partition is mounted
7754 		partitionRegistrar.Detach();
7755 		mount->partition = partition;
7756 		mount->owns_file_device = newlyCreatedFileDevice;
7757 		fileDeviceDeleter.id = -1;
7758 	}
7759 
7760 	notify_mount(mount->id,
7761 		coveredNode != NULL ? coveredNode->device : -1,
7762 		coveredNode ? coveredNode->id : -1);
7763 
7764 	return mount->id;
7765 
7766 err4:
7767 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7768 err3:
7769 	if (coveredNode != NULL)
7770 		put_vnode(coveredNode);
7771 err2:
7772 	rw_lock_write_lock(&sMountLock);
7773 	sMountsTable->Remove(mount);
7774 	rw_lock_write_unlock(&sMountLock);
7775 err1:
7776 	delete mount;
7777 
7778 	return status;
7779 }
7780 
7781 
7782 static status_t
7783 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7784 {
7785 	struct fs_mount* mount;
7786 	status_t err;
7787 
7788 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7789 		mountID, kernel));
7790 
7791 	struct vnode* pathVnode = NULL;
7792 	if (path != NULL) {
7793 		err = path_to_vnode(path, true, &pathVnode, NULL, kernel);
7794 		if (err != B_OK)
7795 			return B_ENTRY_NOT_FOUND;
7796 	}
7797 
7798 	RecursiveLocker mountOpLocker(sMountOpLock);
7799 	ReadLocker mountLocker(sMountLock);
7800 
7801 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7802 	if (mount == NULL) {
7803 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7804 			pathVnode);
7805 	}
7806 
7807 	mountLocker.Unlock();
7808 
7809 	if (path != NULL) {
7810 		put_vnode(pathVnode);
7811 
7812 		if (mount->root_vnode != pathVnode) {
7813 			// not mountpoint
7814 			return B_BAD_VALUE;
7815 		}
7816 	}
7817 
7818 	// if the volume is associated with a partition, lock the device of the
7819 	// partition as long as we are unmounting
7820 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7821 	KPartition* partition = mount->partition;
7822 	KDiskDevice* diskDevice = NULL;
7823 	if (partition != NULL) {
7824 		if (partition->Device() == NULL) {
7825 			dprintf("fs_unmount(): There is no device!\n");
7826 			return B_ERROR;
7827 		}
7828 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7829 		if (!diskDevice) {
7830 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7831 			return B_ERROR;
7832 		}
7833 	}
7834 	DeviceWriteLocker writeLocker(diskDevice, true);
7835 
7836 	// make sure, that the partition is not busy
7837 	if (partition != NULL) {
7838 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7839 			TRACE(("fs_unmount(): Partition is busy.\n"));
7840 			return B_BUSY;
7841 		}
7842 	}
7843 
7844 	// grab the vnode master mutex to keep someone from creating
7845 	// a vnode while we're figuring out if we can continue
7846 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7847 
7848 	bool disconnectedDescriptors = false;
7849 
7850 	while (true) {
7851 		bool busy = false;
7852 
7853 		// cycle through the list of vnodes associated with this mount and
7854 		// make sure all of them are not busy or have refs on them
7855 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7856 		while (struct vnode* vnode = iterator.Next()) {
7857 			if (vnode->IsBusy()) {
7858 				busy = true;
7859 				break;
7860 			}
7861 
7862 			// check the vnode's ref count -- subtract additional references for
7863 			// covering
7864 			int32 refCount = vnode->ref_count;
7865 			if (vnode->covers != NULL)
7866 				refCount--;
7867 			if (vnode->covered_by != NULL)
7868 				refCount--;
7869 
7870 			if (refCount != 0) {
7871 				// there are still vnodes in use on this mount, so we cannot
7872 				// unmount yet
7873 				busy = true;
7874 				break;
7875 			}
7876 		}
7877 
7878 		if (!busy)
7879 			break;
7880 
7881 		if ((flags & B_FORCE_UNMOUNT) == 0)
7882 			return B_BUSY;
7883 
7884 		if (disconnectedDescriptors) {
7885 			// wait a bit until the last access is finished, and then try again
7886 			vnodesWriteLocker.Unlock();
7887 			snooze(100000);
7888 			// TODO: if there is some kind of bug that prevents the ref counts
7889 			// from getting back to zero, this will fall into an endless loop...
7890 			vnodesWriteLocker.Lock();
7891 			continue;
7892 		}
7893 
7894 		// the file system is still busy - but we're forced to unmount it,
7895 		// so let's disconnect all open file descriptors
7896 
7897 		mount->unmounting = true;
7898 			// prevent new vnodes from being created
7899 
7900 		vnodesWriteLocker.Unlock();
7901 
7902 		disconnect_mount_or_vnode_fds(mount, NULL);
7903 		disconnectedDescriptors = true;
7904 
7905 		vnodesWriteLocker.Lock();
7906 	}
7907 
7908 	// We can safely continue. Mark all of the vnodes busy and this mount
7909 	// structure in unmounting state. Also undo the vnode covers/covered_by
7910 	// links.
7911 	mount->unmounting = true;
7912 
7913 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7914 	while (struct vnode* vnode = iterator.Next()) {
7915 		// Remove all covers/covered_by links from other mounts' nodes to this
7916 		// vnode and adjust the node ref count accordingly. We will release the
7917 		// references to the external vnodes below.
7918 		if (Vnode* coveredNode = vnode->covers) {
7919 			if (Vnode* coveringNode = vnode->covered_by) {
7920 				// We have both covered and covering vnodes, so just remove us
7921 				// from the chain.
7922 				coveredNode->covered_by = coveringNode;
7923 				coveringNode->covers = coveredNode;
7924 				vnode->ref_count -= 2;
7925 
7926 				vnode->covered_by = NULL;
7927 				vnode->covers = NULL;
7928 				vnode->SetCovering(false);
7929 				vnode->SetCovered(false);
7930 			} else {
7931 				// We only have a covered vnode. Remove its link to us.
7932 				coveredNode->covered_by = NULL;
7933 				coveredNode->SetCovered(false);
7934 				vnode->ref_count--;
7935 
7936 				// If the other node is an external vnode, we keep its link
7937 				// link around so we can put the reference later on. Otherwise
7938 				// we get rid of it right now.
7939 				if (coveredNode->mount == mount) {
7940 					vnode->covers = NULL;
7941 					coveredNode->ref_count--;
7942 				}
7943 			}
7944 		} else if (Vnode* coveringNode = vnode->covered_by) {
7945 			// We only have a covering vnode. Remove its link to us.
7946 			coveringNode->covers = NULL;
7947 			coveringNode->SetCovering(false);
7948 			vnode->ref_count--;
7949 
7950 			// If the other node is an external vnode, we keep its link
7951 			// link around so we can put the reference later on. Otherwise
7952 			// we get rid of it right now.
7953 			if (coveringNode->mount == mount) {
7954 				vnode->covered_by = NULL;
7955 				coveringNode->ref_count--;
7956 			}
7957 		}
7958 
7959 		vnode->SetBusy(true);
7960 		vnode_to_be_freed(vnode);
7961 	}
7962 
7963 	vnodesWriteLocker.Unlock();
7964 
7965 	// Free all vnodes associated with this mount.
7966 	// They will be removed from the mount list by free_vnode(), so
7967 	// we don't have to do this.
7968 	while (struct vnode* vnode = mount->vnodes.Head()) {
7969 		// Put the references to external covered/covering vnodes we kept above.
7970 		if (Vnode* coveredNode = vnode->covers)
7971 			put_vnode(coveredNode);
7972 		if (Vnode* coveringNode = vnode->covered_by)
7973 			put_vnode(coveringNode);
7974 
7975 		free_vnode(vnode, false);
7976 	}
7977 
7978 	// remove the mount structure from the hash table
7979 	rw_lock_write_lock(&sMountLock);
7980 	sMountsTable->Remove(mount);
7981 	rw_lock_write_unlock(&sMountLock);
7982 
7983 	mountOpLocker.Unlock();
7984 
7985 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7986 	notify_unmount(mount->id);
7987 
7988 	// dereference the partition and mark it unmounted
7989 	if (partition) {
7990 		partition->SetVolumeID(-1);
7991 		partition->SetMountCookie(NULL);
7992 
7993 		if (mount->owns_file_device)
7994 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7995 		partition->Unregister();
7996 	}
7997 
7998 	delete mount;
7999 	return B_OK;
8000 }
8001 
8002 
8003 static status_t
8004 fs_sync(dev_t device)
8005 {
8006 	struct fs_mount* mount;
8007 	status_t status = get_mount(device, &mount);
8008 	if (status != B_OK)
8009 		return status;
8010 
8011 	struct vnode marker;
8012 	memset(&marker, 0, sizeof(marker));
8013 	marker.SetBusy(true);
8014 	marker.SetRemoved(true);
8015 
8016 	// First, synchronize all file caches
8017 
8018 	while (true) {
8019 		WriteLocker locker(sVnodeLock);
8020 			// Note: That's the easy way. Which is probably OK for sync(),
8021 			// since it's a relatively rare call and doesn't need to allow for
8022 			// a lot of concurrency. Using a read lock would be possible, but
8023 			// also more involved, since we had to lock the individual nodes
8024 			// and take care of the locking order, which we might not want to
8025 			// do while holding fs_mount::lock.
8026 
8027 		// synchronize access to vnode list
8028 		mutex_lock(&mount->lock);
8029 
8030 		struct vnode* vnode;
8031 		if (!marker.IsRemoved()) {
8032 			vnode = mount->vnodes.GetNext(&marker);
8033 			mount->vnodes.Remove(&marker);
8034 			marker.SetRemoved(true);
8035 		} else
8036 			vnode = mount->vnodes.First();
8037 
8038 		while (vnode != NULL && (vnode->cache == NULL
8039 			|| vnode->IsRemoved() || vnode->IsBusy())) {
8040 			// TODO: we could track writes (and writable mapped vnodes)
8041 			//	and have a simple flag that we could test for here
8042 			vnode = mount->vnodes.GetNext(vnode);
8043 		}
8044 
8045 		if (vnode != NULL) {
8046 			// insert marker vnode again
8047 			mount->vnodes.Insert(mount->vnodes.GetNext(vnode), &marker);
8048 			marker.SetRemoved(false);
8049 		}
8050 
8051 		mutex_unlock(&mount->lock);
8052 
8053 		if (vnode == NULL)
8054 			break;
8055 
8056 		vnode = lookup_vnode(mount->id, vnode->id);
8057 		if (vnode == NULL || vnode->IsBusy())
8058 			continue;
8059 
8060 		if (vnode->ref_count == 0) {
8061 			// this vnode has been unused before
8062 			vnode_used(vnode);
8063 		}
8064 		inc_vnode_ref_count(vnode);
8065 
8066 		locker.Unlock();
8067 
8068 		if (vnode->cache != NULL && !vnode->IsRemoved())
8069 			vnode->cache->WriteModified();
8070 
8071 		put_vnode(vnode);
8072 	}
8073 
8074 	// Let the file systems do their synchronizing work
8075 	if (HAS_FS_MOUNT_CALL(mount, sync))
8076 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
8077 
8078 	// Finally, flush the underlying device's write cache (if possible.)
8079 	if (mount->partition != NULL && mount->partition->Device() != NULL)
8080 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
8081 
8082 	put_mount(mount);
8083 	return status;
8084 }
8085 
8086 
8087 static status_t
8088 fs_read_info(dev_t device, struct fs_info* info)
8089 {
8090 	struct fs_mount* mount;
8091 	status_t status = get_mount(device, &mount);
8092 	if (status != B_OK)
8093 		return status;
8094 
8095 	memset(info, 0, sizeof(struct fs_info));
8096 
8097 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
8098 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
8099 
8100 	// fill in info the file system doesn't (have to) know about
8101 	if (status == B_OK) {
8102 		info->dev = mount->id;
8103 		info->root = mount->root_vnode->id;
8104 
8105 		fs_volume* volume = mount->volume;
8106 		while (volume->super_volume != NULL)
8107 			volume = volume->super_volume;
8108 
8109 		strlcpy(info->fsh_name, volume->file_system_name,
8110 			sizeof(info->fsh_name));
8111 		if (mount->device_name != NULL) {
8112 			strlcpy(info->device_name, mount->device_name,
8113 				sizeof(info->device_name));
8114 		}
8115 	}
8116 
8117 	// if the call is not supported by the file system, there are still
8118 	// the parts that we filled out ourselves
8119 
8120 	put_mount(mount);
8121 	return status;
8122 }
8123 
8124 
8125 static status_t
8126 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8127 {
8128 	struct fs_mount* mount;
8129 	status_t status = get_mount(device, &mount);
8130 	if (status != B_OK)
8131 		return status;
8132 
8133 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8134 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8135 	else
8136 		status = B_READ_ONLY_DEVICE;
8137 
8138 	put_mount(mount);
8139 	return status;
8140 }
8141 
8142 
8143 static dev_t
8144 fs_next_device(int32* _cookie)
8145 {
8146 	struct fs_mount* mount = NULL;
8147 	dev_t device = *_cookie;
8148 
8149 	rw_lock_read_lock(&sMountLock);
8150 
8151 	// Since device IDs are assigned sequentially, this algorithm
8152 	// does work good enough. It makes sure that the device list
8153 	// returned is sorted, and that no device is skipped when an
8154 	// already visited device got unmounted.
8155 
8156 	while (device < sNextMountID) {
8157 		mount = find_mount(device++);
8158 		if (mount != NULL && mount->volume->private_volume != NULL)
8159 			break;
8160 	}
8161 
8162 	*_cookie = device;
8163 
8164 	if (mount != NULL)
8165 		device = mount->id;
8166 	else
8167 		device = B_BAD_VALUE;
8168 
8169 	rw_lock_read_unlock(&sMountLock);
8170 
8171 	return device;
8172 }
8173 
8174 
8175 ssize_t
8176 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8177 	void *buffer, size_t readBytes)
8178 {
8179 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8180 	if (attrFD < 0)
8181 		return attrFD;
8182 
8183 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8184 
8185 	_kern_close(attrFD);
8186 
8187 	return bytesRead;
8188 }
8189 
8190 
8191 static status_t
8192 get_cwd(char* buffer, size_t size, bool kernel)
8193 {
8194 	// Get current working directory from io context
8195 	struct io_context* context = get_current_io_context(kernel);
8196 	status_t status;
8197 
8198 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8199 
8200 	mutex_lock(&context->io_mutex);
8201 
8202 	struct vnode* vnode = context->cwd;
8203 	if (vnode)
8204 		inc_vnode_ref_count(vnode);
8205 
8206 	mutex_unlock(&context->io_mutex);
8207 
8208 	if (vnode) {
8209 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8210 		put_vnode(vnode);
8211 	} else
8212 		status = B_ERROR;
8213 
8214 	return status;
8215 }
8216 
8217 
8218 static status_t
8219 set_cwd(int fd, char* path, bool kernel)
8220 {
8221 	struct io_context* context;
8222 	struct vnode* vnode = NULL;
8223 	struct vnode* oldDirectory;
8224 	status_t status;
8225 
8226 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8227 
8228 	// Get vnode for passed path, and bail if it failed
8229 	status = fd_and_path_to_vnode(fd, path, true, &vnode, NULL, kernel);
8230 	if (status < 0)
8231 		return status;
8232 
8233 	if (!S_ISDIR(vnode->Type())) {
8234 		// nope, can't cwd to here
8235 		status = B_NOT_A_DIRECTORY;
8236 		goto err;
8237 	}
8238 
8239 	// We need to have the permission to enter the directory, too
8240 	if (HAS_FS_CALL(vnode, access)) {
8241 		status = FS_CALL(vnode, access, X_OK);
8242 		if (status != B_OK)
8243 			goto err;
8244 	}
8245 
8246 	// Get current io context and lock
8247 	context = get_current_io_context(kernel);
8248 	mutex_lock(&context->io_mutex);
8249 
8250 	// save the old current working directory first
8251 	oldDirectory = context->cwd;
8252 	context->cwd = vnode;
8253 
8254 	mutex_unlock(&context->io_mutex);
8255 
8256 	if (oldDirectory)
8257 		put_vnode(oldDirectory);
8258 
8259 	return B_NO_ERROR;
8260 
8261 err:
8262 	put_vnode(vnode);
8263 	return status;
8264 }
8265 
8266 
8267 static status_t
8268 user_copy_name(char* to, const char* from, size_t length)
8269 {
8270 	ssize_t len = user_strlcpy(to, from, length);
8271 	if (len < 0)
8272 		return len;
8273 	if (len >= (ssize_t)length)
8274 		return B_NAME_TOO_LONG;
8275 	return B_OK;
8276 }
8277 
8278 
8279 //	#pragma mark - kernel mirrored syscalls
8280 
8281 
8282 dev_t
8283 _kern_mount(const char* path, const char* device, const char* fsName,
8284 	uint32 flags, const char* args, size_t argsLength)
8285 {
8286 	KPath pathBuffer(path);
8287 	if (pathBuffer.InitCheck() != B_OK)
8288 		return B_NO_MEMORY;
8289 
8290 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8291 }
8292 
8293 
8294 status_t
8295 _kern_unmount(const char* path, uint32 flags)
8296 {
8297 	KPath pathBuffer(path);
8298 	if (pathBuffer.InitCheck() != B_OK)
8299 		return B_NO_MEMORY;
8300 
8301 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8302 }
8303 
8304 
8305 status_t
8306 _kern_read_fs_info(dev_t device, struct fs_info* info)
8307 {
8308 	if (info == NULL)
8309 		return B_BAD_VALUE;
8310 
8311 	return fs_read_info(device, info);
8312 }
8313 
8314 
8315 status_t
8316 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8317 {
8318 	if (info == NULL)
8319 		return B_BAD_VALUE;
8320 
8321 	return fs_write_info(device, info, mask);
8322 }
8323 
8324 
8325 status_t
8326 _kern_sync(void)
8327 {
8328 	// Note: _kern_sync() is also called from _user_sync()
8329 	int32 cookie = 0;
8330 	dev_t device;
8331 	while ((device = next_dev(&cookie)) >= 0) {
8332 		status_t status = fs_sync(device);
8333 		if (status != B_OK && status != B_BAD_VALUE) {
8334 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8335 				strerror(status));
8336 		}
8337 	}
8338 
8339 	return B_OK;
8340 }
8341 
8342 
8343 dev_t
8344 _kern_next_device(int32* _cookie)
8345 {
8346 	return fs_next_device(_cookie);
8347 }
8348 
8349 
8350 status_t
8351 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8352 	size_t infoSize)
8353 {
8354 	if (infoSize != sizeof(fd_info))
8355 		return B_BAD_VALUE;
8356 
8357 	// get the team
8358 	Team* team = Team::Get(teamID);
8359 	if (team == NULL)
8360 		return B_BAD_TEAM_ID;
8361 	BReference<Team> teamReference(team, true);
8362 
8363 	// now that we have a team reference, its I/O context won't go away
8364 	io_context* context = team->io_context;
8365 	MutexLocker contextLocker(context->io_mutex);
8366 
8367 	uint32 slot = *_cookie;
8368 
8369 	struct file_descriptor* descriptor;
8370 	while (slot < context->table_size
8371 		&& (descriptor = context->fds[slot]) == NULL) {
8372 		slot++;
8373 	}
8374 
8375 	if (slot >= context->table_size)
8376 		return B_ENTRY_NOT_FOUND;
8377 
8378 	info->number = slot;
8379 	info->open_mode = descriptor->open_mode;
8380 
8381 	struct vnode* vnode = fd_vnode(descriptor);
8382 	if (vnode != NULL) {
8383 		info->device = vnode->device;
8384 		info->node = vnode->id;
8385 	} else if (descriptor->u.mount != NULL) {
8386 		info->device = descriptor->u.mount->id;
8387 		info->node = -1;
8388 	}
8389 
8390 	*_cookie = slot + 1;
8391 	return B_OK;
8392 }
8393 
8394 
8395 int
8396 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8397 	int perms)
8398 {
8399 	if ((openMode & O_CREAT) != 0) {
8400 		return file_create_entry_ref(device, inode, name, openMode, perms,
8401 			true);
8402 	}
8403 
8404 	return file_open_entry_ref(device, inode, name, openMode, true);
8405 }
8406 
8407 
8408 /*!	\brief Opens a node specified by a FD + path pair.
8409 
8410 	At least one of \a fd and \a path must be specified.
8411 	If only \a fd is given, the function opens the node identified by this
8412 	FD. If only a path is given, this path is opened. If both are given and
8413 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8414 	of the directory (!) identified by \a fd.
8415 
8416 	\param fd The FD. May be < 0.
8417 	\param path The absolute or relative path. May be \c NULL.
8418 	\param openMode The open mode.
8419 	\return A FD referring to the newly opened node, or an error code,
8420 			if an error occurs.
8421 */
8422 int
8423 _kern_open(int fd, const char* path, int openMode, int perms)
8424 {
8425 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8426 	if (pathBuffer.InitCheck() != B_OK)
8427 		return B_NO_MEMORY;
8428 
8429 	if ((openMode & O_CREAT) != 0)
8430 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8431 
8432 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8433 }
8434 
8435 
8436 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8437 
8438 	The supplied name may be \c NULL, in which case directory identified
8439 	by \a device and \a inode will be opened. Otherwise \a device and
8440 	\a inode identify the parent directory of the directory to be opened
8441 	and \a name its entry name.
8442 
8443 	\param device If \a name is specified the ID of the device the parent
8444 		   directory of the directory to be opened resides on, otherwise
8445 		   the device of the directory itself.
8446 	\param inode If \a name is specified the node ID of the parent
8447 		   directory of the directory to be opened, otherwise node ID of the
8448 		   directory itself.
8449 	\param name The entry name of the directory to be opened. If \c NULL,
8450 		   the \a device + \a inode pair identify the node to be opened.
8451 	\return The FD of the newly opened directory or an error code, if
8452 			something went wrong.
8453 */
8454 int
8455 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8456 {
8457 	return dir_open_entry_ref(device, inode, name, true);
8458 }
8459 
8460 
8461 /*!	\brief Opens a directory specified by a FD + path pair.
8462 
8463 	At least one of \a fd and \a path must be specified.
8464 	If only \a fd is given, the function opens the directory identified by this
8465 	FD. If only a path is given, this path is opened. If both are given and
8466 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8467 	of the directory (!) identified by \a fd.
8468 
8469 	\param fd The FD. May be < 0.
8470 	\param path The absolute or relative path. May be \c NULL.
8471 	\return A FD referring to the newly opened directory, or an error code,
8472 			if an error occurs.
8473 */
8474 int
8475 _kern_open_dir(int fd, const char* path)
8476 {
8477 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8478 	if (pathBuffer.InitCheck() != B_OK)
8479 		return B_NO_MEMORY;
8480 
8481 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8482 }
8483 
8484 
8485 status_t
8486 _kern_fcntl(int fd, int op, size_t argument)
8487 {
8488 	return common_fcntl(fd, op, argument, true);
8489 }
8490 
8491 
8492 status_t
8493 _kern_fsync(int fd)
8494 {
8495 	return common_sync(fd, true);
8496 }
8497 
8498 
8499 status_t
8500 _kern_lock_node(int fd)
8501 {
8502 	return common_lock_node(fd, true);
8503 }
8504 
8505 
8506 status_t
8507 _kern_unlock_node(int fd)
8508 {
8509 	return common_unlock_node(fd, true);
8510 }
8511 
8512 
8513 status_t
8514 _kern_preallocate(int fd, off_t offset, off_t length)
8515 {
8516 	return common_preallocate(fd, offset, length, true);
8517 }
8518 
8519 
8520 status_t
8521 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8522 	int perms)
8523 {
8524 	return dir_create_entry_ref(device, inode, name, perms, true);
8525 }
8526 
8527 
8528 /*!	\brief Creates a directory specified by a FD + path pair.
8529 
8530 	\a path must always be specified (it contains the name of the new directory
8531 	at least). If only a path is given, this path identifies the location at
8532 	which the directory shall be created. If both \a fd and \a path are given
8533 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8534 	of the directory (!) identified by \a fd.
8535 
8536 	\param fd The FD. May be < 0.
8537 	\param path The absolute or relative path. Must not be \c NULL.
8538 	\param perms The access permissions the new directory shall have.
8539 	\return \c B_OK, if the directory has been created successfully, another
8540 			error code otherwise.
8541 */
8542 status_t
8543 _kern_create_dir(int fd, const char* path, int perms)
8544 {
8545 	KPath pathBuffer(path, KPath::DEFAULT);
8546 	if (pathBuffer.InitCheck() != B_OK)
8547 		return B_NO_MEMORY;
8548 
8549 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8550 }
8551 
8552 
8553 status_t
8554 _kern_remove_dir(int fd, const char* path)
8555 {
8556 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8557 	if (pathBuffer.InitCheck() != B_OK)
8558 		return B_NO_MEMORY;
8559 
8560 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8561 }
8562 
8563 
8564 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8565 
8566 	At least one of \a fd and \a path must be specified.
8567 	If only \a fd is given, the function the symlink to be read is the node
8568 	identified by this FD. If only a path is given, this path identifies the
8569 	symlink to be read. If both are given and the path is absolute, \a fd is
8570 	ignored; a relative path is reckoned off of the directory (!) identified
8571 	by \a fd.
8572 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8573 	will still be updated to reflect the required buffer size.
8574 
8575 	\param fd The FD. May be < 0.
8576 	\param path The absolute or relative path. May be \c NULL.
8577 	\param buffer The buffer into which the contents of the symlink shall be
8578 		   written.
8579 	\param _bufferSize A pointer to the size of the supplied buffer.
8580 	\return The length of the link on success or an appropriate error code
8581 */
8582 status_t
8583 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8584 {
8585 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8586 	if (pathBuffer.InitCheck() != B_OK)
8587 		return B_NO_MEMORY;
8588 
8589 	return common_read_link(fd, pathBuffer.LockBuffer(),
8590 		buffer, _bufferSize, true);
8591 }
8592 
8593 
8594 /*!	\brief Creates a symlink specified by a FD + path pair.
8595 
8596 	\a path must always be specified (it contains the name of the new symlink
8597 	at least). If only a path is given, this path identifies the location at
8598 	which the symlink shall be created. If both \a fd and \a path are given and
8599 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8600 	of the directory (!) identified by \a fd.
8601 
8602 	\param fd The FD. May be < 0.
8603 	\param toPath The absolute or relative path. Must not be \c NULL.
8604 	\param mode The access permissions the new symlink shall have.
8605 	\return \c B_OK, if the symlink has been created successfully, another
8606 			error code otherwise.
8607 */
8608 status_t
8609 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8610 {
8611 	KPath pathBuffer(path);
8612 	if (pathBuffer.InitCheck() != B_OK)
8613 		return B_NO_MEMORY;
8614 
8615 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8616 		toPath, mode, true);
8617 }
8618 
8619 
8620 status_t
8621 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8622 	bool traverseLeafLink)
8623 {
8624 	KPath pathBuffer(path);
8625 	KPath toPathBuffer(toPath);
8626 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8627 		return B_NO_MEMORY;
8628 
8629 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8630 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8631 }
8632 
8633 
8634 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8635 
8636 	\a path must always be specified (it contains at least the name of the entry
8637 	to be deleted). If only a path is given, this path identifies the entry
8638 	directly. If both \a fd and \a path are given and the path is absolute,
8639 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8640 	identified by \a fd.
8641 
8642 	\param fd The FD. May be < 0.
8643 	\param path The absolute or relative path. Must not be \c NULL.
8644 	\return \c B_OK, if the entry has been removed successfully, another
8645 			error code otherwise.
8646 */
8647 status_t
8648 _kern_unlink(int fd, const char* path)
8649 {
8650 	KPath pathBuffer(path);
8651 	if (pathBuffer.InitCheck() != B_OK)
8652 		return B_NO_MEMORY;
8653 
8654 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8655 }
8656 
8657 
8658 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8659 		   by another FD + path pair.
8660 
8661 	\a oldPath and \a newPath must always be specified (they contain at least
8662 	the name of the entry). If only a path is given, this path identifies the
8663 	entry directly. If both a FD and a path are given and the path is absolute,
8664 	the FD is ignored; a relative path is reckoned off of the directory (!)
8665 	identified by the respective FD.
8666 
8667 	\param oldFD The FD of the old location. May be < 0.
8668 	\param oldPath The absolute or relative path of the old location. Must not
8669 		   be \c NULL.
8670 	\param newFD The FD of the new location. May be < 0.
8671 	\param newPath The absolute or relative path of the new location. Must not
8672 		   be \c NULL.
8673 	\return \c B_OK, if the entry has been moved successfully, another
8674 			error code otherwise.
8675 */
8676 status_t
8677 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8678 {
8679 	KPath oldPathBuffer(oldPath);
8680 	KPath newPathBuffer(newPath);
8681 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8682 		return B_NO_MEMORY;
8683 
8684 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8685 		newFD, newPathBuffer.LockBuffer(), true);
8686 }
8687 
8688 
8689 status_t
8690 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8691 {
8692 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8693 	if (pathBuffer.InitCheck() != B_OK)
8694 		return B_NO_MEMORY;
8695 
8696 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8697 		true);
8698 }
8699 
8700 
8701 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8702 
8703 	If only \a fd is given, the stat operation associated with the type
8704 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8705 	given, this path identifies the entry for whose node to retrieve the
8706 	stat data. If both \a fd and \a path are given and the path is absolute,
8707 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8708 	identified by \a fd and specifies the entry whose stat data shall be
8709 	retrieved.
8710 
8711 	\param fd The FD. May be < 0.
8712 	\param path The absolute or relative path. Must not be \c NULL.
8713 	\param traverseLeafLink If \a path is given, \c true specifies that the
8714 		   function shall not stick to symlinks, but traverse them.
8715 	\param stat The buffer the stat data shall be written into.
8716 	\param statSize The size of the supplied stat buffer.
8717 	\return \c B_OK, if the the stat data have been read successfully, another
8718 			error code otherwise.
8719 */
8720 status_t
8721 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8722 	struct stat* stat, size_t statSize)
8723 {
8724 	struct stat completeStat;
8725 	struct stat* originalStat = NULL;
8726 	status_t status;
8727 
8728 	if (statSize > sizeof(struct stat))
8729 		return B_BAD_VALUE;
8730 
8731 	// this supports different stat extensions
8732 	if (statSize < sizeof(struct stat)) {
8733 		originalStat = stat;
8734 		stat = &completeStat;
8735 	}
8736 
8737 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8738 
8739 	if (status == B_OK && originalStat != NULL)
8740 		memcpy(originalStat, stat, statSize);
8741 
8742 	return status;
8743 }
8744 
8745 
8746 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8747 
8748 	If only \a fd is given, the stat operation associated with the type
8749 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8750 	given, this path identifies the entry for whose node to write the
8751 	stat data. If both \a fd and \a path are given and the path is absolute,
8752 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8753 	identified by \a fd and specifies the entry whose stat data shall be
8754 	written.
8755 
8756 	\param fd The FD. May be < 0.
8757 	\param path The absolute or relative path. May be \c NULL.
8758 	\param traverseLeafLink If \a path is given, \c true specifies that the
8759 		   function shall not stick to symlinks, but traverse them.
8760 	\param stat The buffer containing the stat data to be written.
8761 	\param statSize The size of the supplied stat buffer.
8762 	\param statMask A mask specifying which parts of the stat data shall be
8763 		   written.
8764 	\return \c B_OK, if the the stat data have been written successfully,
8765 			another error code otherwise.
8766 */
8767 status_t
8768 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8769 	const struct stat* stat, size_t statSize, int statMask)
8770 {
8771 	struct stat completeStat;
8772 
8773 	if (statSize > sizeof(struct stat))
8774 		return B_BAD_VALUE;
8775 
8776 	// this supports different stat extensions
8777 	if (statSize < sizeof(struct stat)) {
8778 		memset((uint8*)&completeStat + statSize, 0,
8779 			sizeof(struct stat) - statSize);
8780 		memcpy(&completeStat, stat, statSize);
8781 		stat = &completeStat;
8782 	}
8783 
8784 	status_t status;
8785 
8786 	if (path != NULL) {
8787 		// path given: write the stat of the node referred to by (fd, path)
8788 		KPath pathBuffer(path);
8789 		if (pathBuffer.InitCheck() != B_OK)
8790 			return B_NO_MEMORY;
8791 
8792 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8793 			traverseLeafLink, stat, statMask, true);
8794 	} else {
8795 		// no path given: get the FD and use the FD operation
8796 		struct file_descriptor* descriptor
8797 			= get_fd(get_current_io_context(true), fd);
8798 		if (descriptor == NULL)
8799 			return B_FILE_ERROR;
8800 
8801 		if (descriptor->ops->fd_write_stat)
8802 			status = descriptor->ops->fd_write_stat(descriptor, stat, statMask);
8803 		else
8804 			status = B_UNSUPPORTED;
8805 
8806 		put_fd(descriptor);
8807 	}
8808 
8809 	return status;
8810 }
8811 
8812 
8813 int
8814 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8815 {
8816 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8817 	if (pathBuffer.InitCheck() != B_OK)
8818 		return B_NO_MEMORY;
8819 
8820 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8821 }
8822 
8823 
8824 int
8825 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8826 	int openMode)
8827 {
8828 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8829 	if (pathBuffer.InitCheck() != B_OK)
8830 		return B_NO_MEMORY;
8831 
8832 	if ((openMode & O_CREAT) != 0) {
8833 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8834 			true);
8835 	}
8836 
8837 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8838 }
8839 
8840 
8841 status_t
8842 _kern_remove_attr(int fd, const char* name)
8843 {
8844 	return attr_remove(fd, name, true);
8845 }
8846 
8847 
8848 status_t
8849 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8850 	const char* toName)
8851 {
8852 	return attr_rename(fromFile, fromName, toFile, toName, true);
8853 }
8854 
8855 
8856 int
8857 _kern_open_index_dir(dev_t device)
8858 {
8859 	return index_dir_open(device, true);
8860 }
8861 
8862 
8863 status_t
8864 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8865 {
8866 	return index_create(device, name, type, flags, true);
8867 }
8868 
8869 
8870 status_t
8871 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8872 {
8873 	return index_name_read_stat(device, name, stat, true);
8874 }
8875 
8876 
8877 status_t
8878 _kern_remove_index(dev_t device, const char* name)
8879 {
8880 	return index_remove(device, name, true);
8881 }
8882 
8883 
8884 status_t
8885 _kern_getcwd(char* buffer, size_t size)
8886 {
8887 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8888 
8889 	// Call vfs to get current working directory
8890 	return get_cwd(buffer, size, true);
8891 }
8892 
8893 
8894 status_t
8895 _kern_setcwd(int fd, const char* path)
8896 {
8897 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8898 	if (pathBuffer.InitCheck() != B_OK)
8899 		return B_NO_MEMORY;
8900 
8901 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8902 }
8903 
8904 
8905 //	#pragma mark - userland syscalls
8906 
8907 
8908 dev_t
8909 _user_mount(const char* userPath, const char* userDevice,
8910 	const char* userFileSystem, uint32 flags, const char* userArgs,
8911 	size_t argsLength)
8912 {
8913 	char fileSystem[B_FILE_NAME_LENGTH];
8914 	KPath path, device;
8915 	char* args = NULL;
8916 	status_t status;
8917 
8918 	if (!IS_USER_ADDRESS(userPath))
8919 		return B_BAD_ADDRESS;
8920 
8921 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8922 		return B_NO_MEMORY;
8923 
8924 	status = user_copy_name(path.LockBuffer(), userPath,
8925 		B_PATH_NAME_LENGTH);
8926 	if (status != B_OK)
8927 		return status;
8928 	path.UnlockBuffer();
8929 
8930 	if (userFileSystem != NULL) {
8931 		if (!IS_USER_ADDRESS(userFileSystem))
8932 			return B_BAD_ADDRESS;
8933 
8934 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8935 		if (status != B_OK)
8936 			return status;
8937 	}
8938 
8939 	if (userDevice != NULL) {
8940 		if (!IS_USER_ADDRESS(userDevice))
8941 			return B_BAD_ADDRESS;
8942 
8943 		status = user_copy_name(device.LockBuffer(), userDevice,
8944 			B_PATH_NAME_LENGTH);
8945 		if (status != B_OK)
8946 			return status;
8947 		device.UnlockBuffer();
8948 	}
8949 
8950 	if (userArgs != NULL && argsLength > 0) {
8951 		if (!IS_USER_ADDRESS(userArgs))
8952 			return B_BAD_ADDRESS;
8953 
8954 		// this is a safety restriction
8955 		if (argsLength >= 65536)
8956 			return B_NAME_TOO_LONG;
8957 
8958 		args = (char*)malloc(argsLength + 1);
8959 		if (args == NULL)
8960 			return B_NO_MEMORY;
8961 
8962 		status = user_copy_name(args, userArgs, argsLength + 1);
8963 		if (status != B_OK) {
8964 			free(args);
8965 			return status;
8966 		}
8967 	}
8968 
8969 	status = fs_mount(path.LockBuffer(),
8970 		userDevice != NULL ? device.Path() : NULL,
8971 		userFileSystem ? fileSystem : NULL, flags, args, false);
8972 
8973 	free(args);
8974 	return status;
8975 }
8976 
8977 
8978 status_t
8979 _user_unmount(const char* userPath, uint32 flags)
8980 {
8981 	if (!IS_USER_ADDRESS(userPath))
8982 		return B_BAD_ADDRESS;
8983 
8984 	KPath pathBuffer;
8985 	if (pathBuffer.InitCheck() != B_OK)
8986 		return B_NO_MEMORY;
8987 
8988 	char* path = pathBuffer.LockBuffer();
8989 
8990 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8991 	if (status != B_OK)
8992 		return status;
8993 
8994 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8995 }
8996 
8997 
8998 status_t
8999 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
9000 {
9001 	struct fs_info info;
9002 	status_t status;
9003 
9004 	if (userInfo == NULL)
9005 		return B_BAD_VALUE;
9006 
9007 	if (!IS_USER_ADDRESS(userInfo))
9008 		return B_BAD_ADDRESS;
9009 
9010 	status = fs_read_info(device, &info);
9011 	if (status != B_OK)
9012 		return status;
9013 
9014 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
9015 		return B_BAD_ADDRESS;
9016 
9017 	return B_OK;
9018 }
9019 
9020 
9021 status_t
9022 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
9023 {
9024 	struct fs_info info;
9025 
9026 	if (userInfo == NULL)
9027 		return B_BAD_VALUE;
9028 
9029 	if (!IS_USER_ADDRESS(userInfo)
9030 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
9031 		return B_BAD_ADDRESS;
9032 
9033 	return fs_write_info(device, &info, mask);
9034 }
9035 
9036 
9037 dev_t
9038 _user_next_device(int32* _userCookie)
9039 {
9040 	int32 cookie;
9041 	dev_t device;
9042 
9043 	if (!IS_USER_ADDRESS(_userCookie)
9044 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
9045 		return B_BAD_ADDRESS;
9046 
9047 	device = fs_next_device(&cookie);
9048 
9049 	if (device >= B_OK) {
9050 		// update user cookie
9051 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
9052 			return B_BAD_ADDRESS;
9053 	}
9054 
9055 	return device;
9056 }
9057 
9058 
9059 status_t
9060 _user_sync(void)
9061 {
9062 	return _kern_sync();
9063 }
9064 
9065 
9066 status_t
9067 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
9068 	size_t infoSize)
9069 {
9070 	struct fd_info info;
9071 	uint32 cookie;
9072 
9073 	// only root can do this
9074 	if (geteuid() != 0)
9075 		return B_NOT_ALLOWED;
9076 
9077 	if (infoSize != sizeof(fd_info))
9078 		return B_BAD_VALUE;
9079 
9080 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
9081 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
9082 		return B_BAD_ADDRESS;
9083 
9084 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
9085 	if (status != B_OK)
9086 		return status;
9087 
9088 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
9089 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
9090 		return B_BAD_ADDRESS;
9091 
9092 	return status;
9093 }
9094 
9095 
9096 status_t
9097 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
9098 	char* userPath, size_t pathLength)
9099 {
9100 	if (!IS_USER_ADDRESS(userPath))
9101 		return B_BAD_ADDRESS;
9102 
9103 	KPath path;
9104 	if (path.InitCheck() != B_OK)
9105 		return B_NO_MEMORY;
9106 
9107 	// copy the leaf name onto the stack
9108 	char stackLeaf[B_FILE_NAME_LENGTH];
9109 	if (leaf != NULL) {
9110 		if (!IS_USER_ADDRESS(leaf))
9111 			return B_BAD_ADDRESS;
9112 
9113 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
9114 		if (status != B_OK)
9115 			return status;
9116 
9117 		leaf = stackLeaf;
9118 	}
9119 
9120 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
9121 		false, path.LockBuffer(), path.BufferSize());
9122 	if (status != B_OK)
9123 		return status;
9124 
9125 	path.UnlockBuffer();
9126 
9127 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9128 	if (length < 0)
9129 		return length;
9130 	if (length >= (int)pathLength)
9131 		return B_BUFFER_OVERFLOW;
9132 
9133 	return B_OK;
9134 }
9135 
9136 
9137 status_t
9138 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9139 {
9140 	if (userPath == NULL || buffer == NULL)
9141 		return B_BAD_VALUE;
9142 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9143 		return B_BAD_ADDRESS;
9144 
9145 	// copy path from userland
9146 	KPath pathBuffer;
9147 	if (pathBuffer.InitCheck() != B_OK)
9148 		return B_NO_MEMORY;
9149 	char* path = pathBuffer.LockBuffer();
9150 
9151 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9152 	if (status != B_OK)
9153 		return status;
9154 
9155 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9156 		false);
9157 	if (error != B_OK)
9158 		return error;
9159 
9160 	// copy back to userland
9161 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9162 	if (len < 0)
9163 		return len;
9164 	if (len >= B_PATH_NAME_LENGTH)
9165 		return B_BUFFER_OVERFLOW;
9166 
9167 	return B_OK;
9168 }
9169 
9170 
9171 int
9172 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9173 	int openMode, int perms)
9174 {
9175 	char name[B_FILE_NAME_LENGTH];
9176 
9177 	if (userName == NULL || device < 0 || inode < 0)
9178 		return B_BAD_VALUE;
9179 	if (!IS_USER_ADDRESS(userName))
9180 		return B_BAD_ADDRESS;
9181 	status_t status = user_copy_name(name, userName, sizeof(name));
9182 	if (status != B_OK)
9183 		return status;
9184 
9185 	if ((openMode & O_CREAT) != 0) {
9186 		return file_create_entry_ref(device, inode, name, openMode, perms,
9187 			false);
9188 	}
9189 
9190 	return file_open_entry_ref(device, inode, name, openMode, false);
9191 }
9192 
9193 
9194 int
9195 _user_open(int fd, const char* userPath, int openMode, int perms)
9196 {
9197 	KPath path;
9198 	if (path.InitCheck() != B_OK)
9199 		return B_NO_MEMORY;
9200 
9201 	char* buffer = path.LockBuffer();
9202 
9203 	if (!IS_USER_ADDRESS(userPath))
9204 		return B_BAD_ADDRESS;
9205 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9206 	if (status != B_OK)
9207 		return status;
9208 
9209 	if ((openMode & O_CREAT) != 0)
9210 		return file_create(fd, buffer, openMode, perms, false);
9211 
9212 	return file_open(fd, buffer, openMode, false);
9213 }
9214 
9215 
9216 int
9217 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9218 {
9219 	if (userName != NULL) {
9220 		char name[B_FILE_NAME_LENGTH];
9221 
9222 		if (!IS_USER_ADDRESS(userName))
9223 			return B_BAD_ADDRESS;
9224 		status_t status = user_copy_name(name, userName, sizeof(name));
9225 		if (status != B_OK)
9226 			return status;
9227 
9228 		return dir_open_entry_ref(device, inode, name, false);
9229 	}
9230 	return dir_open_entry_ref(device, inode, NULL, false);
9231 }
9232 
9233 
9234 int
9235 _user_open_dir(int fd, const char* userPath)
9236 {
9237 	if (userPath == NULL)
9238 		return dir_open(fd, NULL, false);
9239 
9240 	KPath path;
9241 	if (path.InitCheck() != B_OK)
9242 		return B_NO_MEMORY;
9243 
9244 	char* buffer = path.LockBuffer();
9245 
9246 	if (!IS_USER_ADDRESS(userPath))
9247 		return B_BAD_ADDRESS;
9248 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9249 	if (status != B_OK)
9250 		return status;
9251 
9252 	return dir_open(fd, buffer, false);
9253 }
9254 
9255 
9256 /*!	\brief Opens a directory's parent directory and returns the entry name
9257 		   of the former.
9258 
9259 	Aside from that it returns the directory's entry name, this method is
9260 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9261 	equivalent, if \a userName is \c NULL.
9262 
9263 	If a name buffer is supplied and the name does not fit the buffer, the
9264 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9265 
9266 	\param fd A FD referring to a directory.
9267 	\param userName Buffer the directory's entry name shall be written into.
9268 		   May be \c NULL.
9269 	\param nameLength Size of the name buffer.
9270 	\return The file descriptor of the opened parent directory, if everything
9271 			went fine, an error code otherwise.
9272 */
9273 int
9274 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9275 {
9276 	bool kernel = false;
9277 
9278 	if (userName && !IS_USER_ADDRESS(userName))
9279 		return B_BAD_ADDRESS;
9280 
9281 	// open the parent dir
9282 	int parentFD = dir_open(fd, (char*)"..", kernel);
9283 	if (parentFD < 0)
9284 		return parentFD;
9285 	FDCloser fdCloser(parentFD, kernel);
9286 
9287 	if (userName) {
9288 		// get the vnodes
9289 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9290 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9291 		VNodePutter parentVNodePutter(parentVNode);
9292 		VNodePutter dirVNodePutter(dirVNode);
9293 		if (!parentVNode || !dirVNode)
9294 			return B_FILE_ERROR;
9295 
9296 		// get the vnode name
9297 		char _buffer[sizeof(struct dirent) + B_FILE_NAME_LENGTH];
9298 		struct dirent* buffer = (struct dirent*)_buffer;
9299 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9300 			sizeof(_buffer), get_current_io_context(false));
9301 		if (status != B_OK)
9302 			return status;
9303 
9304 		// copy the name to the userland buffer
9305 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9306 		if (len < 0)
9307 			return len;
9308 		if (len >= (int)nameLength)
9309 			return B_BUFFER_OVERFLOW;
9310 	}
9311 
9312 	return fdCloser.Detach();
9313 }
9314 
9315 
9316 status_t
9317 _user_fcntl(int fd, int op, size_t argument)
9318 {
9319 	status_t status = common_fcntl(fd, op, argument, false);
9320 	if (op == F_SETLKW)
9321 		syscall_restart_handle_post(status);
9322 
9323 	return status;
9324 }
9325 
9326 
9327 status_t
9328 _user_fsync(int fd)
9329 {
9330 	return common_sync(fd, false);
9331 }
9332 
9333 
9334 status_t
9335 _user_flock(int fd, int operation)
9336 {
9337 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9338 
9339 	// Check if the operation is valid
9340 	switch (operation & ~LOCK_NB) {
9341 		case LOCK_UN:
9342 		case LOCK_SH:
9343 		case LOCK_EX:
9344 			break;
9345 
9346 		default:
9347 			return B_BAD_VALUE;
9348 	}
9349 
9350 	struct file_descriptor* descriptor;
9351 	struct vnode* vnode;
9352 	descriptor = get_fd_and_vnode(fd, &vnode, false);
9353 	if (descriptor == NULL)
9354 		return B_FILE_ERROR;
9355 
9356 	if (descriptor->type != FDTYPE_FILE) {
9357 		put_fd(descriptor);
9358 		return B_BAD_VALUE;
9359 	}
9360 
9361 	struct flock flock;
9362 	flock.l_start = 0;
9363 	flock.l_len = OFF_MAX;
9364 	flock.l_whence = 0;
9365 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9366 
9367 	status_t status;
9368 	if ((operation & LOCK_UN) != 0) {
9369 		if (HAS_FS_CALL(vnode, release_lock))
9370 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9371 		else
9372 			status = release_advisory_lock(vnode, NULL, descriptor, &flock);
9373 	} else {
9374 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9375 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9376 				(operation & LOCK_NB) == 0);
9377 		} else {
9378 			status = acquire_advisory_lock(vnode, NULL, descriptor, &flock,
9379 				(operation & LOCK_NB) == 0);
9380 		}
9381 	}
9382 
9383 	syscall_restart_handle_post(status);
9384 
9385 	put_fd(descriptor);
9386 	return status;
9387 }
9388 
9389 
9390 status_t
9391 _user_lock_node(int fd)
9392 {
9393 	return common_lock_node(fd, false);
9394 }
9395 
9396 
9397 status_t
9398 _user_unlock_node(int fd)
9399 {
9400 	return common_unlock_node(fd, false);
9401 }
9402 
9403 
9404 status_t
9405 _user_preallocate(int fd, off_t offset, off_t length)
9406 {
9407 	return common_preallocate(fd, offset, length, false);
9408 }
9409 
9410 
9411 status_t
9412 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9413 	int perms)
9414 {
9415 	char name[B_FILE_NAME_LENGTH];
9416 	status_t status;
9417 
9418 	if (!IS_USER_ADDRESS(userName))
9419 		return B_BAD_ADDRESS;
9420 
9421 	status = user_copy_name(name, userName, sizeof(name));
9422 	if (status != B_OK)
9423 		return status;
9424 
9425 	return dir_create_entry_ref(device, inode, name, perms, false);
9426 }
9427 
9428 
9429 status_t
9430 _user_create_dir(int fd, const char* userPath, int perms)
9431 {
9432 	KPath pathBuffer;
9433 	if (pathBuffer.InitCheck() != B_OK)
9434 		return B_NO_MEMORY;
9435 
9436 	char* path = pathBuffer.LockBuffer();
9437 
9438 	if (!IS_USER_ADDRESS(userPath))
9439 		return B_BAD_ADDRESS;
9440 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9441 	if (status != B_OK)
9442 		return status;
9443 
9444 	return dir_create(fd, path, perms, false);
9445 }
9446 
9447 
9448 status_t
9449 _user_remove_dir(int fd, const char* userPath)
9450 {
9451 	KPath pathBuffer;
9452 	if (pathBuffer.InitCheck() != B_OK)
9453 		return B_NO_MEMORY;
9454 
9455 	char* path = pathBuffer.LockBuffer();
9456 
9457 	if (userPath != NULL) {
9458 		if (!IS_USER_ADDRESS(userPath))
9459 			return B_BAD_ADDRESS;
9460 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9461 		if (status != B_OK)
9462 			return status;
9463 	}
9464 
9465 	return dir_remove(fd, userPath ? path : NULL, false);
9466 }
9467 
9468 
9469 status_t
9470 _user_read_link(int fd, const char* userPath, char* userBuffer,
9471 	size_t* userBufferSize)
9472 {
9473 	KPath pathBuffer, linkBuffer;
9474 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9475 		return B_NO_MEMORY;
9476 
9477 	size_t bufferSize;
9478 
9479 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9480 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9481 		return B_BAD_ADDRESS;
9482 
9483 	char* path = pathBuffer.LockBuffer();
9484 	char* buffer = linkBuffer.LockBuffer();
9485 
9486 	if (userPath) {
9487 		if (!IS_USER_ADDRESS(userPath))
9488 			return B_BAD_ADDRESS;
9489 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9490 		if (status != B_OK)
9491 			return status;
9492 
9493 		if (bufferSize > B_PATH_NAME_LENGTH)
9494 			bufferSize = B_PATH_NAME_LENGTH;
9495 	}
9496 
9497 	size_t newBufferSize = bufferSize;
9498 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9499 		&newBufferSize, false);
9500 
9501 	// we also update the bufferSize in case of errors
9502 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9503 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9504 		return B_BAD_ADDRESS;
9505 
9506 	if (status != B_OK)
9507 		return status;
9508 
9509 	bufferSize = min_c(newBufferSize, bufferSize);
9510 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9511 		return B_BAD_ADDRESS;
9512 
9513 	return B_OK;
9514 }
9515 
9516 
9517 status_t
9518 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9519 	int mode)
9520 {
9521 	KPath pathBuffer;
9522 	KPath toPathBuffer;
9523 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9524 		return B_NO_MEMORY;
9525 
9526 	char* path = pathBuffer.LockBuffer();
9527 	char* toPath = toPathBuffer.LockBuffer();
9528 
9529 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9530 		return B_BAD_ADDRESS;
9531 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9532 	if (status != B_OK)
9533 		return status;
9534 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9535 	if (status != B_OK)
9536 		return status;
9537 
9538 	return common_create_symlink(fd, path, toPath, mode, false);
9539 }
9540 
9541 
9542 status_t
9543 _user_create_link(int pathFD, const char* userPath, int toFD,
9544 	const char* userToPath, bool traverseLeafLink)
9545 {
9546 	KPath pathBuffer;
9547 	KPath toPathBuffer;
9548 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9549 		return B_NO_MEMORY;
9550 
9551 	char* path = pathBuffer.LockBuffer();
9552 	char* toPath = toPathBuffer.LockBuffer();
9553 
9554 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9555 		return B_BAD_ADDRESS;
9556 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9557 	if (status != B_OK)
9558 		return status;
9559 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9560 	if (status != B_OK)
9561 		return status;
9562 
9563 	status = check_path(toPath);
9564 	if (status != B_OK)
9565 		return status;
9566 
9567 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9568 		false);
9569 }
9570 
9571 
9572 status_t
9573 _user_unlink(int fd, const char* userPath)
9574 {
9575 	KPath pathBuffer;
9576 	if (pathBuffer.InitCheck() != B_OK)
9577 		return B_NO_MEMORY;
9578 
9579 	char* path = pathBuffer.LockBuffer();
9580 
9581 	if (!IS_USER_ADDRESS(userPath))
9582 		return B_BAD_ADDRESS;
9583 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9584 	if (status != B_OK)
9585 		return status;
9586 
9587 	return common_unlink(fd, path, false);
9588 }
9589 
9590 
9591 status_t
9592 _user_rename(int oldFD, const char* userOldPath, int newFD,
9593 	const char* userNewPath)
9594 {
9595 	KPath oldPathBuffer;
9596 	KPath newPathBuffer;
9597 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9598 		return B_NO_MEMORY;
9599 
9600 	char* oldPath = oldPathBuffer.LockBuffer();
9601 	char* newPath = newPathBuffer.LockBuffer();
9602 
9603 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9604 		return B_BAD_ADDRESS;
9605 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9606 	if (status != B_OK)
9607 		return status;
9608 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9609 	if (status != B_OK)
9610 		return status;
9611 
9612 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9613 }
9614 
9615 
9616 status_t
9617 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9618 {
9619 	KPath pathBuffer;
9620 	if (pathBuffer.InitCheck() != B_OK)
9621 		return B_NO_MEMORY;
9622 
9623 	char* path = pathBuffer.LockBuffer();
9624 
9625 	if (!IS_USER_ADDRESS(userPath))
9626 		return B_BAD_ADDRESS;
9627 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9628 	if (status != B_OK)
9629 		return status;
9630 
9631 	// split into directory vnode and filename path
9632 	char filename[B_FILE_NAME_LENGTH];
9633 	struct vnode* dir;
9634 	status = fd_and_path_to_dir_vnode(fd, path, &dir, filename, false);
9635 	if (status != B_OK)
9636 		return status;
9637 
9638 	VNodePutter _(dir);
9639 
9640 	// the underlying FS needs to support creating FIFOs
9641 	if (!HAS_FS_CALL(dir, create_special_node))
9642 		return B_UNSUPPORTED;
9643 
9644 	// create the entry	-- the FIFO sub node is set up automatically
9645 	fs_vnode superVnode;
9646 	ino_t nodeID;
9647 	status = FS_CALL(dir, create_special_node, filename, NULL,
9648 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9649 
9650 	// create_special_node() acquired a reference for us that we don't need.
9651 	if (status == B_OK)
9652 		put_vnode(dir->mount->volume, nodeID);
9653 
9654 	return status;
9655 }
9656 
9657 
9658 status_t
9659 _user_create_pipe(int* userFDs)
9660 {
9661 	// rootfs should support creating FIFOs, but let's be sure
9662 	if (!HAS_FS_CALL(sRoot, create_special_node))
9663 		return B_UNSUPPORTED;
9664 
9665 	// create the node	-- the FIFO sub node is set up automatically
9666 	fs_vnode superVnode;
9667 	ino_t nodeID;
9668 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9669 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9670 	if (status != B_OK)
9671 		return status;
9672 
9673 	// We've got one reference to the node and need another one.
9674 	struct vnode* vnode;
9675 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9676 	if (status != B_OK) {
9677 		// that should not happen
9678 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9679 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9680 		return status;
9681 	}
9682 
9683 	// Everything looks good so far. Open two FDs for reading respectively
9684 	// writing.
9685 	int fds[2];
9686 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9687 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9688 
9689 	FDCloser closer0(fds[0], false);
9690 	FDCloser closer1(fds[1], false);
9691 
9692 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9693 
9694 	// copy FDs to userland
9695 	if (status == B_OK) {
9696 		if (!IS_USER_ADDRESS(userFDs)
9697 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9698 			status = B_BAD_ADDRESS;
9699 		}
9700 	}
9701 
9702 	// keep FDs, if everything went fine
9703 	if (status == B_OK) {
9704 		closer0.Detach();
9705 		closer1.Detach();
9706 	}
9707 
9708 	return status;
9709 }
9710 
9711 
9712 status_t
9713 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9714 {
9715 	KPath pathBuffer;
9716 	if (pathBuffer.InitCheck() != B_OK)
9717 		return B_NO_MEMORY;
9718 
9719 	char* path = pathBuffer.LockBuffer();
9720 
9721 	if (!IS_USER_ADDRESS(userPath))
9722 		return B_BAD_ADDRESS;
9723 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9724 	if (status != B_OK)
9725 		return status;
9726 
9727 	return common_access(fd, path, mode, effectiveUserGroup, false);
9728 }
9729 
9730 
9731 status_t
9732 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9733 	struct stat* userStat, size_t statSize)
9734 {
9735 	struct stat stat = {0};
9736 	status_t status;
9737 
9738 	if (statSize > sizeof(struct stat))
9739 		return B_BAD_VALUE;
9740 
9741 	if (!IS_USER_ADDRESS(userStat))
9742 		return B_BAD_ADDRESS;
9743 
9744 	if (userPath != NULL) {
9745 		// path given: get the stat of the node referred to by (fd, path)
9746 		if (!IS_USER_ADDRESS(userPath))
9747 			return B_BAD_ADDRESS;
9748 
9749 		KPath pathBuffer;
9750 		if (pathBuffer.InitCheck() != B_OK)
9751 			return B_NO_MEMORY;
9752 
9753 		char* path = pathBuffer.LockBuffer();
9754 
9755 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9756 		if (status != B_OK)
9757 			return status;
9758 
9759 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9760 	} else {
9761 		// no path given: get the FD and use the FD operation
9762 		struct file_descriptor* descriptor
9763 			= get_fd(get_current_io_context(false), fd);
9764 		if (descriptor == NULL)
9765 			return B_FILE_ERROR;
9766 
9767 		if (descriptor->ops->fd_read_stat)
9768 			status = descriptor->ops->fd_read_stat(descriptor, &stat);
9769 		else
9770 			status = B_UNSUPPORTED;
9771 
9772 		put_fd(descriptor);
9773 	}
9774 
9775 	if (status != B_OK)
9776 		return status;
9777 
9778 	return user_memcpy(userStat, &stat, statSize);
9779 }
9780 
9781 
9782 status_t
9783 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9784 	const struct stat* userStat, size_t statSize, int statMask)
9785 {
9786 	if (statSize > sizeof(struct stat))
9787 		return B_BAD_VALUE;
9788 
9789 	struct stat stat;
9790 
9791 	if (!IS_USER_ADDRESS(userStat)
9792 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9793 		return B_BAD_ADDRESS;
9794 
9795 	// clear additional stat fields
9796 	if (statSize < sizeof(struct stat))
9797 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9798 
9799 	status_t status;
9800 
9801 	if (userPath != NULL) {
9802 		// path given: write the stat of the node referred to by (fd, path)
9803 		if (!IS_USER_ADDRESS(userPath))
9804 			return B_BAD_ADDRESS;
9805 
9806 		KPath pathBuffer;
9807 		if (pathBuffer.InitCheck() != B_OK)
9808 			return B_NO_MEMORY;
9809 
9810 		char* path = pathBuffer.LockBuffer();
9811 
9812 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9813 		if (status != B_OK)
9814 			return status;
9815 
9816 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9817 			statMask, false);
9818 	} else {
9819 		// no path given: get the FD and use the FD operation
9820 		struct file_descriptor* descriptor
9821 			= get_fd(get_current_io_context(false), fd);
9822 		if (descriptor == NULL)
9823 			return B_FILE_ERROR;
9824 
9825 		if (descriptor->ops->fd_write_stat) {
9826 			status = descriptor->ops->fd_write_stat(descriptor, &stat,
9827 				statMask);
9828 		} else
9829 			status = B_UNSUPPORTED;
9830 
9831 		put_fd(descriptor);
9832 	}
9833 
9834 	return status;
9835 }
9836 
9837 
9838 int
9839 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9840 {
9841 	KPath pathBuffer;
9842 	if (pathBuffer.InitCheck() != B_OK)
9843 		return B_NO_MEMORY;
9844 
9845 	char* path = pathBuffer.LockBuffer();
9846 
9847 	if (userPath != NULL) {
9848 		if (!IS_USER_ADDRESS(userPath))
9849 			return B_BAD_ADDRESS;
9850 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9851 		if (status != B_OK)
9852 			return status;
9853 	}
9854 
9855 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9856 }
9857 
9858 
9859 ssize_t
9860 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9861 	size_t readBytes)
9862 {
9863 	char attribute[B_FILE_NAME_LENGTH];
9864 
9865 	if (userAttribute == NULL)
9866 		return B_BAD_VALUE;
9867 	if (!IS_USER_ADDRESS(userAttribute))
9868 		return B_BAD_ADDRESS;
9869 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9870 	if (status != B_OK)
9871 		return status;
9872 
9873 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9874 	if (attr < 0)
9875 		return attr;
9876 
9877 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9878 	_user_close(attr);
9879 
9880 	return bytes;
9881 }
9882 
9883 
9884 ssize_t
9885 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9886 	const void* buffer, size_t writeBytes)
9887 {
9888 	char attribute[B_FILE_NAME_LENGTH];
9889 
9890 	if (userAttribute == NULL)
9891 		return B_BAD_VALUE;
9892 	if (!IS_USER_ADDRESS(userAttribute))
9893 		return B_BAD_ADDRESS;
9894 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9895 	if (status != B_OK)
9896 		return status;
9897 
9898 	// Try to support the BeOS typical truncation as well as the position
9899 	// argument
9900 	int attr = attr_create(fd, NULL, attribute, type,
9901 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9902 	if (attr < 0)
9903 		return attr;
9904 
9905 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9906 	_user_close(attr);
9907 
9908 	return bytes;
9909 }
9910 
9911 
9912 status_t
9913 _user_stat_attr(int fd, const char* userAttribute,
9914 	struct attr_info* userAttrInfo)
9915 {
9916 	char attribute[B_FILE_NAME_LENGTH];
9917 
9918 	if (userAttribute == NULL || userAttrInfo == NULL)
9919 		return B_BAD_VALUE;
9920 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9921 		return B_BAD_ADDRESS;
9922 	status_t status = user_copy_name(attribute, userAttribute,
9923 		sizeof(attribute));
9924 	if (status != B_OK)
9925 		return status;
9926 
9927 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9928 	if (attr < 0)
9929 		return attr;
9930 
9931 	struct file_descriptor* descriptor
9932 		= get_fd(get_current_io_context(false), attr);
9933 	if (descriptor == NULL) {
9934 		_user_close(attr);
9935 		return B_FILE_ERROR;
9936 	}
9937 
9938 	struct stat stat;
9939 	if (descriptor->ops->fd_read_stat)
9940 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9941 	else
9942 		status = B_UNSUPPORTED;
9943 
9944 	put_fd(descriptor);
9945 	_user_close(attr);
9946 
9947 	if (status == B_OK) {
9948 		attr_info info;
9949 		info.type = stat.st_type;
9950 		info.size = stat.st_size;
9951 
9952 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9953 			return B_BAD_ADDRESS;
9954 	}
9955 
9956 	return status;
9957 }
9958 
9959 
9960 int
9961 _user_open_attr(int fd, const char* userPath, const char* userName,
9962 	uint32 type, int openMode)
9963 {
9964 	char name[B_FILE_NAME_LENGTH];
9965 
9966 	if (!IS_USER_ADDRESS(userName))
9967 		return B_BAD_ADDRESS;
9968 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9969 	if (status != B_OK)
9970 		return status;
9971 
9972 	KPath pathBuffer;
9973 	if (pathBuffer.InitCheck() != B_OK)
9974 		return B_NO_MEMORY;
9975 
9976 	char* path = pathBuffer.LockBuffer();
9977 
9978 	if (userPath != NULL) {
9979 		if (!IS_USER_ADDRESS(userPath))
9980 			return B_BAD_ADDRESS;
9981 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9982 		if (status != B_OK)
9983 			return status;
9984 	}
9985 
9986 	if ((openMode & O_CREAT) != 0) {
9987 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9988 			false);
9989 	}
9990 
9991 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9992 }
9993 
9994 
9995 status_t
9996 _user_remove_attr(int fd, const char* userName)
9997 {
9998 	char name[B_FILE_NAME_LENGTH];
9999 
10000 	if (!IS_USER_ADDRESS(userName))
10001 		return B_BAD_ADDRESS;
10002 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10003 	if (status != B_OK)
10004 		return status;
10005 
10006 	return attr_remove(fd, name, false);
10007 }
10008 
10009 
10010 status_t
10011 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
10012 	const char* userToName)
10013 {
10014 	if (!IS_USER_ADDRESS(userFromName)
10015 		|| !IS_USER_ADDRESS(userToName))
10016 		return B_BAD_ADDRESS;
10017 
10018 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
10019 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
10020 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
10021 		return B_NO_MEMORY;
10022 
10023 	char* fromName = fromNameBuffer.LockBuffer();
10024 	char* toName = toNameBuffer.LockBuffer();
10025 
10026 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
10027 	if (status != B_OK)
10028 		return status;
10029 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
10030 	if (status != B_OK)
10031 		return status;
10032 
10033 	return attr_rename(fromFile, fromName, toFile, toName, false);
10034 }
10035 
10036 
10037 int
10038 _user_open_index_dir(dev_t device)
10039 {
10040 	return index_dir_open(device, false);
10041 }
10042 
10043 
10044 status_t
10045 _user_create_index(dev_t device, const char* userName, uint32 type,
10046 	uint32 flags)
10047 {
10048 	char name[B_FILE_NAME_LENGTH];
10049 
10050 	if (!IS_USER_ADDRESS(userName))
10051 		return B_BAD_ADDRESS;
10052 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10053 	if (status != B_OK)
10054 		return status;
10055 
10056 	return index_create(device, name, type, flags, false);
10057 }
10058 
10059 
10060 status_t
10061 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
10062 {
10063 	char name[B_FILE_NAME_LENGTH];
10064 	struct stat stat = {0};
10065 	status_t status;
10066 
10067 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
10068 		return B_BAD_ADDRESS;
10069 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10070 	if (status != B_OK)
10071 		return status;
10072 
10073 	status = index_name_read_stat(device, name, &stat, false);
10074 	if (status == B_OK) {
10075 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
10076 			return B_BAD_ADDRESS;
10077 	}
10078 
10079 	return status;
10080 }
10081 
10082 
10083 status_t
10084 _user_remove_index(dev_t device, const char* userName)
10085 {
10086 	char name[B_FILE_NAME_LENGTH];
10087 
10088 	if (!IS_USER_ADDRESS(userName))
10089 		return B_BAD_ADDRESS;
10090 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
10091 	if (status != B_OK)
10092 		return status;
10093 
10094 	return index_remove(device, name, false);
10095 }
10096 
10097 
10098 status_t
10099 _user_getcwd(char* userBuffer, size_t size)
10100 {
10101 	if (size == 0)
10102 		return B_BAD_VALUE;
10103 	if (!IS_USER_ADDRESS(userBuffer))
10104 		return B_BAD_ADDRESS;
10105 
10106 	if (size > kMaxPathLength)
10107 		size = kMaxPathLength;
10108 
10109 	KPath pathBuffer(size);
10110 	if (pathBuffer.InitCheck() != B_OK)
10111 		return B_NO_MEMORY;
10112 
10113 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
10114 
10115 	char* path = pathBuffer.LockBuffer();
10116 
10117 	status_t status = get_cwd(path, size, false);
10118 	if (status != B_OK)
10119 		return status;
10120 
10121 	// Copy back the result
10122 	if (user_strlcpy(userBuffer, path, size) < B_OK)
10123 		return B_BAD_ADDRESS;
10124 
10125 	return status;
10126 }
10127 
10128 
10129 status_t
10130 _user_setcwd(int fd, const char* userPath)
10131 {
10132 	TRACE(("user_setcwd: path = %p\n", userPath));
10133 
10134 	KPath pathBuffer;
10135 	if (pathBuffer.InitCheck() != B_OK)
10136 		return B_NO_MEMORY;
10137 
10138 	char* path = pathBuffer.LockBuffer();
10139 
10140 	if (userPath != NULL) {
10141 		if (!IS_USER_ADDRESS(userPath))
10142 			return B_BAD_ADDRESS;
10143 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10144 		if (status != B_OK)
10145 			return status;
10146 	}
10147 
10148 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10149 }
10150 
10151 
10152 status_t
10153 _user_change_root(const char* userPath)
10154 {
10155 	// only root is allowed to chroot()
10156 	if (geteuid() != 0)
10157 		return B_NOT_ALLOWED;
10158 
10159 	// alloc path buffer
10160 	KPath pathBuffer;
10161 	if (pathBuffer.InitCheck() != B_OK)
10162 		return B_NO_MEMORY;
10163 
10164 	// copy userland path to kernel
10165 	char* path = pathBuffer.LockBuffer();
10166 	if (userPath != NULL) {
10167 		if (!IS_USER_ADDRESS(userPath))
10168 			return B_BAD_ADDRESS;
10169 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10170 		if (status != B_OK)
10171 			return status;
10172 	}
10173 
10174 	// get the vnode
10175 	struct vnode* vnode;
10176 	status_t status = path_to_vnode(path, true, &vnode, NULL, false);
10177 	if (status != B_OK)
10178 		return status;
10179 
10180 	// set the new root
10181 	struct io_context* context = get_current_io_context(false);
10182 	mutex_lock(&sIOContextRootLock);
10183 	struct vnode* oldRoot = context->root;
10184 	context->root = vnode;
10185 	mutex_unlock(&sIOContextRootLock);
10186 
10187 	put_vnode(oldRoot);
10188 
10189 	return B_OK;
10190 }
10191 
10192 
10193 int
10194 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10195 	uint32 flags, port_id port, int32 token)
10196 {
10197 	if (device < 0 || userQuery == NULL || queryLength == 0)
10198 		return B_BAD_VALUE;
10199 
10200 	if (!IS_USER_ADDRESS(userQuery))
10201 		return B_BAD_ADDRESS;
10202 
10203 	// this is a safety restriction
10204 	if (queryLength >= 65536)
10205 		return B_NAME_TOO_LONG;
10206 
10207 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10208 	if (!query.IsValid())
10209 		return B_NO_MEMORY;
10210 
10211 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10212 		return B_BAD_ADDRESS;
10213 
10214 	return query_open(device, query, flags, port, token, false);
10215 }
10216 
10217 
10218 #include "vfs_request_io.cpp"
10219