xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 13581b3d2a71545960b98fefebc5225b5bf29072)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	list_link		link;
180 	void*			bound_to;
181 	team_id			team;
182 	pid_t			session;
183 	off_t			start;
184 	off_t			end;
185 	bool			shared;
186 };
187 
188 typedef DoublyLinkedList<advisory_lock> LockList;
189 
190 } // namespace
191 
192 
193 struct advisory_locking {
194 	sem_id			lock;
195 	sem_id			wait_sem;
196 	LockList		locks;
197 
198 	advisory_locking()
199 		:
200 		lock(-1),
201 		wait_sem(-1)
202 	{
203 	}
204 
205 	~advisory_locking()
206 	{
207 		if (lock >= 0)
208 			delete_sem(lock);
209 		if (wait_sem >= 0)
210 			delete_sem(wait_sem);
211 	}
212 };
213 
214 /*!	\brief Guards sMountsTable.
215 
216 	The holder is allowed to read/write access the sMountsTable.
217 	Manipulation of the fs_mount structures themselves
218 	(and their destruction) requires different locks though.
219 */
220 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
221 
222 /*!	\brief Guards mount/unmount operations.
223 
224 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
225 	That is locking the lock ensures that no FS is mounted/unmounted. In
226 	particular this means that
227 	- sMountsTable will not be modified,
228 	- the fields immutable after initialization of the fs_mount structures in
229 	  sMountsTable will not be modified,
230 
231 	The thread trying to lock the lock must not hold sVnodeLock or
232 	sMountLock.
233 */
234 static recursive_lock sMountOpLock;
235 
236 /*!	\brief Guards sVnodeTable.
237 
238 	The holder is allowed read/write access to sVnodeTable and to
239 	any unbusy vnode in that table, save to the immutable fields (device, id,
240 	private_node, mount) to which only read-only access is allowed.
241 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
242 	well as the busy, removed, unused flags, and the vnode's type can also be
243 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
244 	locked. Write access to covered_by and covers requires to write lock
245 	sVnodeLock.
246 
247 	The thread trying to acquire the lock must not hold sMountLock.
248 	You must not hold this lock when calling create_sem(), as this might call
249 	vfs_free_unused_vnodes() and thus cause a deadlock.
250 */
251 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
252 
253 /*!	\brief Guards io_context::root.
254 
255 	Must be held when setting or getting the io_context::root field.
256 	The only operation allowed while holding this lock besides getting or
257 	setting the field is inc_vnode_ref_count() on io_context::root.
258 */
259 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
260 
261 
262 namespace {
263 
264 struct vnode_hash_key {
265 	dev_t	device;
266 	ino_t	vnode;
267 };
268 
269 struct VnodeHash {
270 	typedef vnode_hash_key	KeyType;
271 	typedef	struct vnode	ValueType;
272 
273 #define VHASH(mountid, vnodeid) \
274 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
275 
276 	size_t HashKey(KeyType key) const
277 	{
278 		return VHASH(key.device, key.vnode);
279 	}
280 
281 	size_t Hash(ValueType* vnode) const
282 	{
283 		return VHASH(vnode->device, vnode->id);
284 	}
285 
286 #undef VHASH
287 
288 	bool Compare(KeyType key, ValueType* vnode) const
289 	{
290 		return vnode->device == key.device && vnode->id == key.vnode;
291 	}
292 
293 	ValueType*& GetLink(ValueType* value) const
294 	{
295 		return value->next;
296 	}
297 };
298 
299 typedef BOpenHashTable<VnodeHash> VnodeTable;
300 
301 
302 struct MountHash {
303 	typedef dev_t			KeyType;
304 	typedef	struct fs_mount	ValueType;
305 
306 	size_t HashKey(KeyType key) const
307 	{
308 		return key;
309 	}
310 
311 	size_t Hash(ValueType* mount) const
312 	{
313 		return mount->id;
314 	}
315 
316 	bool Compare(KeyType key, ValueType* mount) const
317 	{
318 		return mount->id == key;
319 	}
320 
321 	ValueType*& GetLink(ValueType* value) const
322 	{
323 		return value->next;
324 	}
325 };
326 
327 typedef BOpenHashTable<MountHash> MountTable;
328 
329 } // namespace
330 
331 
332 object_cache* sPathNameCache;
333 object_cache* sVnodeCache;
334 object_cache* sFileDescriptorCache;
335 
336 #define VNODE_HASH_TABLE_SIZE 1024
337 static VnodeTable* sVnodeTable;
338 static struct vnode* sRoot;
339 
340 #define MOUNTS_HASH_TABLE_SIZE 16
341 static MountTable* sMountsTable;
342 static dev_t sNextMountID = 1;
343 
344 #define MAX_TEMP_IO_VECS 8
345 
346 // How long to wait for busy vnodes (10s)
347 #define BUSY_VNODE_RETRIES 2000
348 #define BUSY_VNODE_DELAY 5000
349 
350 mode_t __gUmask = 022;
351 
352 /* function declarations */
353 
354 static void free_unused_vnodes();
355 
356 // file descriptor operation prototypes
357 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
358 	void* buffer, size_t* _bytes);
359 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
360 	const void* buffer, size_t* _bytes);
361 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
362 	int seekType);
363 static void file_free_fd(struct file_descriptor* descriptor);
364 static status_t file_close(struct file_descriptor* descriptor);
365 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
366 	struct selectsync* sync);
367 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
368 	struct selectsync* sync);
369 static status_t dir_read(struct io_context* context,
370 	struct file_descriptor* descriptor, struct dirent* buffer,
371 	size_t bufferSize, uint32* _count);
372 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
373 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
374 static status_t dir_rewind(struct file_descriptor* descriptor);
375 static void dir_free_fd(struct file_descriptor* descriptor);
376 static status_t dir_close(struct file_descriptor* descriptor);
377 static status_t attr_dir_read(struct io_context* context,
378 	struct file_descriptor* descriptor, struct dirent* buffer,
379 	size_t bufferSize, uint32* _count);
380 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
381 static void attr_dir_free_fd(struct file_descriptor* descriptor);
382 static status_t attr_dir_close(struct file_descriptor* descriptor);
383 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
384 	void* buffer, size_t* _bytes);
385 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
386 	const void* buffer, size_t* _bytes);
387 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
388 	int seekType);
389 static void attr_free_fd(struct file_descriptor* descriptor);
390 static status_t attr_close(struct file_descriptor* descriptor);
391 static status_t attr_read_stat(struct file_descriptor* descriptor,
392 	struct stat* statData);
393 static status_t attr_write_stat(struct file_descriptor* descriptor,
394 	const struct stat* stat, int statMask);
395 static status_t index_dir_read(struct io_context* context,
396 	struct file_descriptor* descriptor, struct dirent* buffer,
397 	size_t bufferSize, uint32* _count);
398 static status_t index_dir_rewind(struct file_descriptor* descriptor);
399 static void index_dir_free_fd(struct file_descriptor* descriptor);
400 static status_t index_dir_close(struct file_descriptor* descriptor);
401 static status_t query_read(struct io_context* context,
402 	struct file_descriptor* descriptor, struct dirent* buffer,
403 	size_t bufferSize, uint32* _count);
404 static status_t query_rewind(struct file_descriptor* descriptor);
405 static void query_free_fd(struct file_descriptor* descriptor);
406 static status_t query_close(struct file_descriptor* descriptor);
407 
408 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
409 	void* buffer, size_t length);
410 static status_t common_read_stat(struct file_descriptor* descriptor,
411 	struct stat* statData);
412 static status_t common_write_stat(struct file_descriptor* descriptor,
413 	const struct stat* statData, int statMask);
414 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
415 	struct stat* stat, bool kernel);
416 
417 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
418 	bool traverseLeafLink, bool kernel,
419 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
420 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
421 	size_t bufferSize, bool kernel);
422 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
423 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
424 static void inc_vnode_ref_count(struct vnode* vnode);
425 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
426 	bool reenter);
427 static inline void put_vnode(struct vnode* vnode);
428 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
429 	bool kernel);
430 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
431 
432 
433 static struct fd_ops sFileOps = {
434 	file_read,
435 	file_write,
436 	file_seek,
437 	common_ioctl,
438 	NULL,		// set_flags
439 	file_select,
440 	file_deselect,
441 	NULL,		// read_dir()
442 	NULL,		// rewind_dir()
443 	common_read_stat,
444 	common_write_stat,
445 	file_close,
446 	file_free_fd
447 };
448 
449 static struct fd_ops sDirectoryOps = {
450 	NULL,		// read()
451 	NULL,		// write()
452 	NULL,		// seek()
453 	common_ioctl,
454 	NULL,		// set_flags
455 	NULL,		// select()
456 	NULL,		// deselect()
457 	dir_read,
458 	dir_rewind,
459 	common_read_stat,
460 	common_write_stat,
461 	dir_close,
462 	dir_free_fd
463 };
464 
465 static struct fd_ops sAttributeDirectoryOps = {
466 	NULL,		// read()
467 	NULL,		// write()
468 	NULL,		// seek()
469 	common_ioctl,
470 	NULL,		// set_flags
471 	NULL,		// select()
472 	NULL,		// deselect()
473 	attr_dir_read,
474 	attr_dir_rewind,
475 	common_read_stat,
476 	common_write_stat,
477 	attr_dir_close,
478 	attr_dir_free_fd
479 };
480 
481 static struct fd_ops sAttributeOps = {
482 	attr_read,
483 	attr_write,
484 	attr_seek,
485 	common_ioctl,
486 	NULL,		// set_flags
487 	NULL,		// select()
488 	NULL,		// deselect()
489 	NULL,		// read_dir()
490 	NULL,		// rewind_dir()
491 	attr_read_stat,
492 	attr_write_stat,
493 	attr_close,
494 	attr_free_fd
495 };
496 
497 static struct fd_ops sIndexDirectoryOps = {
498 	NULL,		// read()
499 	NULL,		// write()
500 	NULL,		// seek()
501 	NULL,		// ioctl()
502 	NULL,		// set_flags
503 	NULL,		// select()
504 	NULL,		// deselect()
505 	index_dir_read,
506 	index_dir_rewind,
507 	NULL,		// read_stat()
508 	NULL,		// write_stat()
509 	index_dir_close,
510 	index_dir_free_fd
511 };
512 
513 #if 0
514 static struct fd_ops sIndexOps = {
515 	NULL,		// read()
516 	NULL,		// write()
517 	NULL,		// seek()
518 	NULL,		// ioctl()
519 	NULL,		// set_flags
520 	NULL,		// select()
521 	NULL,		// deselect()
522 	NULL,		// dir_read()
523 	NULL,		// dir_rewind()
524 	index_read_stat,	// read_stat()
525 	NULL,		// write_stat()
526 	NULL,		// dir_close()
527 	NULL		// free_fd()
528 };
529 #endif
530 
531 static struct fd_ops sQueryOps = {
532 	NULL,		// read()
533 	NULL,		// write()
534 	NULL,		// seek()
535 	NULL,		// ioctl()
536 	NULL,		// set_flags
537 	NULL,		// select()
538 	NULL,		// deselect()
539 	query_read,
540 	query_rewind,
541 	NULL,		// read_stat()
542 	NULL,		// write_stat()
543 	query_close,
544 	query_free_fd
545 };
546 
547 
548 namespace {
549 
550 class FDCloser {
551 public:
552 	FDCloser() : fFD(-1), fKernel(true) {}
553 
554 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
555 
556 	~FDCloser()
557 	{
558 		Close();
559 	}
560 
561 	void SetTo(int fd, bool kernel)
562 	{
563 		Close();
564 		fFD = fd;
565 		fKernel = kernel;
566 	}
567 
568 	void Close()
569 	{
570 		if (fFD >= 0) {
571 			if (fKernel)
572 				_kern_close(fFD);
573 			else
574 				_user_close(fFD);
575 			fFD = -1;
576 		}
577 	}
578 
579 	int Detach()
580 	{
581 		int fd = fFD;
582 		fFD = -1;
583 		return fd;
584 	}
585 
586 private:
587 	int		fFD;
588 	bool	fKernel;
589 };
590 
591 } // namespace
592 
593 
594 #if VFS_PAGES_IO_TRACING
595 
596 namespace VFSPagesIOTracing {
597 
598 class PagesIOTraceEntry : public AbstractTraceEntry {
599 protected:
600 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
601 		const generic_io_vec* vecs, uint32 count, uint32 flags,
602 		generic_size_t bytesRequested, status_t status,
603 		generic_size_t bytesTransferred)
604 		:
605 		fVnode(vnode),
606 		fMountID(vnode->mount->id),
607 		fNodeID(vnode->id),
608 		fCookie(cookie),
609 		fPos(pos),
610 		fCount(count),
611 		fFlags(flags),
612 		fBytesRequested(bytesRequested),
613 		fStatus(status),
614 		fBytesTransferred(bytesTransferred)
615 	{
616 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
617 			sizeof(generic_io_vec) * count, false);
618 	}
619 
620 	void AddDump(TraceOutput& out, const char* mode)
621 	{
622 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
623 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
624 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
625 			(uint64)fBytesRequested);
626 
627 		if (fVecs != NULL) {
628 			for (uint32 i = 0; i < fCount; i++) {
629 				if (i > 0)
630 					out.Print(", ");
631 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
632 					(uint64)fVecs[i].length);
633 			}
634 		}
635 
636 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
637 			"transferred: %" B_PRIu64, fFlags, fStatus,
638 			(uint64)fBytesTransferred);
639 	}
640 
641 protected:
642 	struct vnode*	fVnode;
643 	dev_t			fMountID;
644 	ino_t			fNodeID;
645 	void*			fCookie;
646 	off_t			fPos;
647 	generic_io_vec*	fVecs;
648 	uint32			fCount;
649 	uint32			fFlags;
650 	generic_size_t	fBytesRequested;
651 	status_t		fStatus;
652 	generic_size_t	fBytesTransferred;
653 };
654 
655 
656 class ReadPages : public PagesIOTraceEntry {
657 public:
658 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
659 		const generic_io_vec* vecs, uint32 count, uint32 flags,
660 		generic_size_t bytesRequested, status_t status,
661 		generic_size_t bytesTransferred)
662 		:
663 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
664 			bytesRequested, status, bytesTransferred)
665 	{
666 		Initialized();
667 	}
668 
669 	virtual void AddDump(TraceOutput& out)
670 	{
671 		PagesIOTraceEntry::AddDump(out, "read");
672 	}
673 };
674 
675 
676 class WritePages : public PagesIOTraceEntry {
677 public:
678 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
679 		const generic_io_vec* vecs, uint32 count, uint32 flags,
680 		generic_size_t bytesRequested, status_t status,
681 		generic_size_t bytesTransferred)
682 		:
683 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
684 			bytesRequested, status, bytesTransferred)
685 	{
686 		Initialized();
687 	}
688 
689 	virtual void AddDump(TraceOutput& out)
690 	{
691 		PagesIOTraceEntry::AddDump(out, "write");
692 	}
693 };
694 
695 }	// namespace VFSPagesIOTracing
696 
697 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
698 #else
699 #	define TPIO(x) ;
700 #endif	// VFS_PAGES_IO_TRACING
701 
702 
703 /*! Finds the mounted device (the fs_mount structure) with the given ID.
704 	Note, you must hold the sMountLock lock when you call this function.
705 */
706 static struct fs_mount*
707 find_mount(dev_t id)
708 {
709 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
710 
711 	return sMountsTable->Lookup(id);
712 }
713 
714 
715 static status_t
716 get_mount(dev_t id, struct fs_mount** _mount)
717 {
718 	struct fs_mount* mount;
719 
720 	ReadLocker nodeLocker(sVnodeLock);
721 	ReadLocker mountLocker(sMountLock);
722 
723 	mount = find_mount(id);
724 	if (mount == NULL)
725 		return B_BAD_VALUE;
726 
727 	struct vnode* rootNode = mount->root_vnode;
728 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
729 		|| rootNode->ref_count == 0) {
730 		// might have been called during a mount/unmount operation
731 		return B_BUSY;
732 	}
733 
734 	inc_vnode_ref_count(rootNode);
735 	*_mount = mount;
736 	return B_OK;
737 }
738 
739 
740 static void
741 put_mount(struct fs_mount* mount)
742 {
743 	if (mount)
744 		put_vnode(mount->root_vnode);
745 }
746 
747 
748 /*!	Tries to open the specified file system module.
749 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
750 	Returns a pointer to file system module interface, or NULL if it
751 	could not open the module.
752 */
753 static file_system_module_info*
754 get_file_system(const char* fsName)
755 {
756 	char name[B_FILE_NAME_LENGTH];
757 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
758 		// construct module name if we didn't get one
759 		// (we currently support only one API)
760 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
761 		fsName = NULL;
762 	}
763 
764 	file_system_module_info* info;
765 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
766 		return NULL;
767 
768 	return info;
769 }
770 
771 
772 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
773 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available.
777 */
778 static char*
779 get_file_system_name(const char* fsName)
780 {
781 	const size_t length = strlen("file_systems/");
782 
783 	if (strncmp(fsName, "file_systems/", length)) {
784 		// the name already seems to be the module's file name
785 		return strdup(fsName);
786 	}
787 
788 	fsName += length;
789 	const char* end = strchr(fsName, '/');
790 	if (end == NULL) {
791 		// this doesn't seem to be a valid name, but well...
792 		return strdup(fsName);
793 	}
794 
795 	// cut off the trailing /v1
796 
797 	char* name = (char*)malloc(end + 1 - fsName);
798 	if (name == NULL)
799 		return NULL;
800 
801 	strlcpy(name, fsName, end + 1 - fsName);
802 	return name;
803 }
804 
805 
806 /*!	Accepts a list of file system names separated by a colon, one for each
807 	layer and returns the file system name for the specified layer.
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available or if there is no
811 	name for the specified layer.
812 */
813 static char*
814 get_file_system_name_for_layer(const char* fsNames, int32 layer)
815 {
816 	while (layer >= 0) {
817 		const char* end = strchr(fsNames, ':');
818 		if (end == NULL) {
819 			if (layer == 0)
820 				return strdup(fsNames);
821 			return NULL;
822 		}
823 
824 		if (layer == 0) {
825 			size_t length = end - fsNames + 1;
826 			char* result = (char*)malloc(length);
827 			strlcpy(result, fsNames, length);
828 			return result;
829 		}
830 
831 		fsNames = end + 1;
832 		layer--;
833 	}
834 
835 	return NULL;
836 }
837 
838 
839 static void
840 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
841 {
842 	MutexLocker _(mount->lock);
843 	mount->vnodes.Add(vnode);
844 }
845 
846 
847 static void
848 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
849 {
850 	MutexLocker _(mount->lock);
851 	mount->vnodes.Remove(vnode);
852 }
853 
854 
855 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
856 
857 	The caller must hold the sVnodeLock (read lock at least).
858 
859 	\param mountID the mount ID.
860 	\param vnodeID the node ID.
861 
862 	\return The vnode structure, if it was found in the hash table, \c NULL
863 			otherwise.
864 */
865 static struct vnode*
866 lookup_vnode(dev_t mountID, ino_t vnodeID)
867 {
868 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
869 
870 	struct vnode_hash_key key;
871 
872 	key.device = mountID;
873 	key.vnode = vnodeID;
874 
875 	return sVnodeTable->Lookup(key);
876 }
877 
878 
879 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
880 
881 	This will also wait for BUSY_VNODE_DELAY before returning if one should
882 	still wait for the vnode becoming unbusy.
883 
884 	\return \c true if one should retry, \c false if not.
885 */
886 static bool
887 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
888 {
889 	if (--tries < 0) {
890 		// vnode doesn't seem to become unbusy
891 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
892 			" is not becoming unbusy!\n", mountID, vnodeID);
893 		return false;
894 	}
895 	snooze(BUSY_VNODE_DELAY);
896 	return true;
897 }
898 
899 
900 /*!	Creates a new vnode with the given mount and node ID.
901 	If the node already exists, it is returned instead and no new node is
902 	created. In either case -- but not, if an error occurs -- the function write
903 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
904 	error the lock is not held on return.
905 
906 	\param mountID The mount ID.
907 	\param vnodeID The vnode ID.
908 	\param _vnode Will be set to the new vnode on success.
909 	\param _nodeCreated Will be set to \c true when the returned vnode has
910 		been newly created, \c false when it already existed. Will not be
911 		changed on error.
912 	\return \c B_OK, when the vnode was successfully created and inserted or
913 		a node with the given ID was found, \c B_NO_MEMORY or
914 		\c B_ENTRY_NOT_FOUND on error.
915 */
916 static status_t
917 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
918 	bool& _nodeCreated)
919 {
920 	FUNCTION(("create_new_vnode_and_lock()\n"));
921 
922 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
923 	if (vnode == NULL)
924 		return B_NO_MEMORY;
925 
926 	// initialize basic values
927 	memset(vnode, 0, sizeof(struct vnode));
928 	vnode->device = mountID;
929 	vnode->id = vnodeID;
930 	vnode->ref_count = 1;
931 	vnode->SetBusy(true);
932 
933 	// look up the node -- it might have been added by someone else in the
934 	// meantime
935 	rw_lock_write_lock(&sVnodeLock);
936 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
937 	if (existingVnode != NULL) {
938 		object_cache_free(sVnodeCache, vnode, 0);
939 		_vnode = existingVnode;
940 		_nodeCreated = false;
941 		return B_OK;
942 	}
943 
944 	// get the mount structure
945 	rw_lock_read_lock(&sMountLock);
946 	vnode->mount = find_mount(mountID);
947 	if (!vnode->mount || vnode->mount->unmounting) {
948 		rw_lock_read_unlock(&sMountLock);
949 		rw_lock_write_unlock(&sVnodeLock);
950 		object_cache_free(sVnodeCache, vnode, 0);
951 		return B_ENTRY_NOT_FOUND;
952 	}
953 
954 	// add the vnode to the mount's node list and the hash table
955 	sVnodeTable->Insert(vnode);
956 	add_vnode_to_mount_list(vnode, vnode->mount);
957 
958 	rw_lock_read_unlock(&sMountLock);
959 
960 	_vnode = vnode;
961 	_nodeCreated = true;
962 
963 	// keep the vnode lock locked
964 	return B_OK;
965 }
966 
967 
968 /*!	Frees the vnode and all resources it has acquired, and removes
969 	it from the vnode hash as well as from its mount structure.
970 	Will also make sure that any cache modifications are written back.
971 */
972 static void
973 free_vnode(struct vnode* vnode, bool reenter)
974 {
975 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
976 		vnode);
977 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
978 
979 	// write back any changes in this vnode's cache -- but only
980 	// if the vnode won't be deleted, in which case the changes
981 	// will be discarded
982 
983 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
984 		FS_CALL_NO_PARAMS(vnode, fsync);
985 
986 	// Note: If this vnode has a cache attached, there will still be two
987 	// references to that cache at this point. The last one belongs to the vnode
988 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
989 	// cache. Each but the last reference to a cache also includes a reference
990 	// to the vnode. The file cache, however, released its reference (cf.
991 	// file_cache_create()), so that this vnode's ref count has the chance to
992 	// ever drop to 0. Deleting the file cache now, will cause the next to last
993 	// cache reference to be released, which will also release a (no longer
994 	// existing) vnode reference. To avoid problems, we set the vnode's ref
995 	// count, so that it will neither become negative nor 0.
996 	vnode->ref_count = 2;
997 
998 	if (!vnode->IsUnpublished()) {
999 		if (vnode->IsRemoved())
1000 			FS_CALL(vnode, remove_vnode, reenter);
1001 		else
1002 			FS_CALL(vnode, put_vnode, reenter);
1003 	}
1004 
1005 	// If the vnode has a VMCache attached, make sure that it won't try to get
1006 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1007 	// long as the vnode is busy and in the hash, that won't happen, but as
1008 	// soon as we've removed it from the hash, it could reload the vnode -- with
1009 	// a new cache attached!
1010 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1011 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1012 
1013 	// The file system has removed the resources of the vnode now, so we can
1014 	// make it available again (by removing the busy vnode from the hash).
1015 	rw_lock_write_lock(&sVnodeLock);
1016 	sVnodeTable->Remove(vnode);
1017 	rw_lock_write_unlock(&sVnodeLock);
1018 
1019 	// if we have a VMCache attached, remove it
1020 	if (vnode->cache)
1021 		vnode->cache->ReleaseRef();
1022 
1023 	vnode->cache = NULL;
1024 
1025 	remove_vnode_from_mount_list(vnode, vnode->mount);
1026 
1027 	object_cache_free(sVnodeCache, vnode, 0);
1028 }
1029 
1030 
1031 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1032 	if the counter dropped to 0.
1033 
1034 	The caller must, of course, own a reference to the vnode to call this
1035 	function.
1036 	The caller must not hold the sVnodeLock or the sMountLock.
1037 
1038 	\param vnode the vnode.
1039 	\param alwaysFree don't move this vnode into the unused list, but really
1040 		   delete it if possible.
1041 	\param reenter \c true, if this function is called (indirectly) from within
1042 		   a file system. This will be passed to file system hooks only.
1043 	\return \c B_OK, if everything went fine, an error code otherwise.
1044 */
1045 static status_t
1046 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1047 {
1048 	ReadLocker locker(sVnodeLock);
1049 	AutoLocker<Vnode> nodeLocker(vnode);
1050 
1051 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1052 
1053 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1054 
1055 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1056 		vnode->ref_count));
1057 
1058 	if (oldRefCount != 1)
1059 		return B_OK;
1060 
1061 	if (vnode->IsBusy())
1062 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1063 
1064 	bool freeNode = false;
1065 	bool freeUnusedNodes = false;
1066 
1067 	// Just insert the vnode into an unused list if we don't need
1068 	// to delete it
1069 	if (vnode->IsRemoved() || alwaysFree) {
1070 		vnode_to_be_freed(vnode);
1071 		vnode->SetBusy(true);
1072 		freeNode = true;
1073 	} else
1074 		freeUnusedNodes = vnode_unused(vnode);
1075 
1076 	nodeLocker.Unlock();
1077 	locker.Unlock();
1078 
1079 	if (freeNode)
1080 		free_vnode(vnode, reenter);
1081 	else if (freeUnusedNodes)
1082 		free_unused_vnodes();
1083 
1084 	return B_OK;
1085 }
1086 
1087 
1088 /*!	\brief Increments the reference counter of the given vnode.
1089 
1090 	The caller must make sure that the node isn't deleted while this function
1091 	is called. This can be done either:
1092 	- by ensuring that a reference to the node exists and remains in existence,
1093 	  or
1094 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1095 	  or by holding sVnodeLock write locked.
1096 
1097 	In the second case the caller is responsible for dealing with the ref count
1098 	0 -> 1 transition. That is 1. this function must not be invoked when the
1099 	node is busy in the first place and 2. vnode_used() must be called for the
1100 	node.
1101 
1102 	\param vnode the vnode.
1103 */
1104 static void
1105 inc_vnode_ref_count(struct vnode* vnode)
1106 {
1107 	atomic_add(&vnode->ref_count, 1);
1108 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1109 		vnode->ref_count));
1110 }
1111 
1112 
1113 static bool
1114 is_special_node_type(int type)
1115 {
1116 	// at the moment only FIFOs are supported
1117 	return S_ISFIFO(type);
1118 }
1119 
1120 
1121 static status_t
1122 create_special_sub_node(struct vnode* vnode, uint32 flags)
1123 {
1124 	if (S_ISFIFO(vnode->Type()))
1125 		return create_fifo_vnode(vnode->mount->volume, vnode);
1126 
1127 	return B_BAD_VALUE;
1128 }
1129 
1130 
1131 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1132 
1133 	If the node is not yet in memory, it will be loaded.
1134 
1135 	The caller must not hold the sVnodeLock or the sMountLock.
1136 
1137 	\param mountID the mount ID.
1138 	\param vnodeID the node ID.
1139 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1140 		   retrieved vnode structure shall be written.
1141 	\param reenter \c true, if this function is called (indirectly) from within
1142 		   a file system.
1143 	\return \c B_OK, if everything when fine, an error code otherwise.
1144 */
1145 static status_t
1146 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1147 	int reenter)
1148 {
1149 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1150 		mountID, vnodeID, _vnode));
1151 
1152 	rw_lock_read_lock(&sVnodeLock);
1153 
1154 	int32 tries = BUSY_VNODE_RETRIES;
1155 restart:
1156 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1157 	AutoLocker<Vnode> nodeLocker(vnode);
1158 
1159 	if (vnode && vnode->IsBusy()) {
1160 		// vnodes in the Removed state (except ones still Unpublished)
1161 		// which are also Busy will disappear soon, so we do not wait for them.
1162 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1163 
1164 		nodeLocker.Unlock();
1165 		rw_lock_read_unlock(&sVnodeLock);
1166 		if (!canWait) {
1167 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1168 				mountID, vnodeID);
1169 			return B_BUSY;
1170 		}
1171 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1172 			return B_BUSY;
1173 
1174 		rw_lock_read_lock(&sVnodeLock);
1175 		goto restart;
1176 	}
1177 
1178 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1179 
1180 	status_t status;
1181 
1182 	if (vnode) {
1183 		if (vnode->ref_count == 0) {
1184 			// this vnode has been unused before
1185 			vnode_used(vnode);
1186 		}
1187 		inc_vnode_ref_count(vnode);
1188 
1189 		nodeLocker.Unlock();
1190 		rw_lock_read_unlock(&sVnodeLock);
1191 	} else {
1192 		// we need to create a new vnode and read it in
1193 		rw_lock_read_unlock(&sVnodeLock);
1194 			// unlock -- create_new_vnode_and_lock() write-locks on success
1195 		bool nodeCreated;
1196 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1197 			nodeCreated);
1198 		if (status != B_OK)
1199 			return status;
1200 
1201 		if (!nodeCreated) {
1202 			rw_lock_read_lock(&sVnodeLock);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 			goto restart;
1205 		}
1206 
1207 		rw_lock_write_unlock(&sVnodeLock);
1208 
1209 		int type;
1210 		uint32 flags;
1211 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1212 			&flags, reenter);
1213 		if (status == B_OK && vnode->private_node == NULL)
1214 			status = B_BAD_VALUE;
1215 
1216 		bool gotNode = status == B_OK;
1217 		bool publishSpecialSubNode = false;
1218 		if (gotNode) {
1219 			vnode->SetType(type);
1220 			publishSpecialSubNode = is_special_node_type(type)
1221 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1222 		}
1223 
1224 		if (gotNode && publishSpecialSubNode)
1225 			status = create_special_sub_node(vnode, flags);
1226 
1227 		if (status != B_OK) {
1228 			if (gotNode)
1229 				FS_CALL(vnode, put_vnode, reenter);
1230 
1231 			rw_lock_write_lock(&sVnodeLock);
1232 			sVnodeTable->Remove(vnode);
1233 			remove_vnode_from_mount_list(vnode, vnode->mount);
1234 			rw_lock_write_unlock(&sVnodeLock);
1235 
1236 			object_cache_free(sVnodeCache, vnode, 0);
1237 			return status;
1238 		}
1239 
1240 		rw_lock_read_lock(&sVnodeLock);
1241 		vnode->Lock();
1242 
1243 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1244 		vnode->SetBusy(false);
1245 
1246 		vnode->Unlock();
1247 		rw_lock_read_unlock(&sVnodeLock);
1248 	}
1249 
1250 	TRACE(("get_vnode: returning %p\n", vnode));
1251 
1252 	*_vnode = vnode;
1253 	return B_OK;
1254 }
1255 
1256 
1257 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1258 	if the counter dropped to 0.
1259 
1260 	The caller must, of course, own a reference to the vnode to call this
1261 	function.
1262 	The caller must not hold the sVnodeLock or the sMountLock.
1263 
1264 	\param vnode the vnode.
1265 */
1266 static inline void
1267 put_vnode(struct vnode* vnode)
1268 {
1269 	dec_vnode_ref_count(vnode, false, false);
1270 }
1271 
1272 
1273 static void
1274 free_unused_vnodes(int32 level)
1275 {
1276 	unused_vnodes_check_started();
1277 
1278 	if (level == B_NO_LOW_RESOURCE) {
1279 		unused_vnodes_check_done();
1280 		return;
1281 	}
1282 
1283 	flush_hot_vnodes();
1284 
1285 	// determine how many nodes to free
1286 	uint32 count = 1;
1287 	{
1288 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1289 
1290 		switch (level) {
1291 			case B_LOW_RESOURCE_NOTE:
1292 				count = sUnusedVnodes / 100;
1293 				break;
1294 			case B_LOW_RESOURCE_WARNING:
1295 				count = sUnusedVnodes / 10;
1296 				break;
1297 			case B_LOW_RESOURCE_CRITICAL:
1298 				count = sUnusedVnodes;
1299 				break;
1300 		}
1301 
1302 		if (count > sUnusedVnodes)
1303 			count = sUnusedVnodes;
1304 	}
1305 
1306 	// Write back the modified pages of some unused vnodes and free them.
1307 
1308 	for (uint32 i = 0; i < count; i++) {
1309 		ReadLocker vnodesReadLocker(sVnodeLock);
1310 
1311 		// get the first node
1312 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1313 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1314 			&sUnusedVnodeList);
1315 		unusedVnodesLocker.Unlock();
1316 
1317 		if (vnode == NULL)
1318 			break;
1319 
1320 		// lock the node
1321 		AutoLocker<Vnode> nodeLocker(vnode);
1322 
1323 		// Check whether the node is still unused -- since we only append to the
1324 		// tail of the unused queue, the vnode should still be at its head.
1325 		// Alternatively we could check its ref count for 0 and its busy flag,
1326 		// but if the node is no longer at the head of the queue, it means it
1327 		// has been touched in the meantime, i.e. it is no longer the least
1328 		// recently used unused vnode and we rather don't free it.
1329 		unusedVnodesLocker.Lock();
1330 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1331 			continue;
1332 		unusedVnodesLocker.Unlock();
1333 
1334 		ASSERT(!vnode->IsBusy());
1335 
1336 		// grab a reference
1337 		inc_vnode_ref_count(vnode);
1338 		vnode_used(vnode);
1339 
1340 		// write back changes and free the node
1341 		nodeLocker.Unlock();
1342 		vnodesReadLocker.Unlock();
1343 
1344 		if (vnode->cache != NULL)
1345 			vnode->cache->WriteModified();
1346 
1347 		dec_vnode_ref_count(vnode, true, false);
1348 			// this should free the vnode when it's still unused
1349 	}
1350 
1351 	unused_vnodes_check_done();
1352 }
1353 
1354 
1355 /*!	Gets the vnode the given vnode is covering.
1356 
1357 	The caller must have \c sVnodeLock read-locked at least.
1358 
1359 	The function returns a reference to the retrieved vnode (if any), the caller
1360 	is responsible to free.
1361 
1362 	\param vnode The vnode whose covered node shall be returned.
1363 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1364 		vnode.
1365 */
1366 static inline Vnode*
1367 get_covered_vnode_locked(Vnode* vnode)
1368 {
1369 	if (Vnode* coveredNode = vnode->covers) {
1370 		while (coveredNode->covers != NULL)
1371 			coveredNode = coveredNode->covers;
1372 
1373 		inc_vnode_ref_count(coveredNode);
1374 		return coveredNode;
1375 	}
1376 
1377 	return NULL;
1378 }
1379 
1380 
1381 /*!	Gets the vnode the given vnode is covering.
1382 
1383 	The caller must not hold \c sVnodeLock. Note that this implies a race
1384 	condition, since the situation can change at any time.
1385 
1386 	The function returns a reference to the retrieved vnode (if any), the caller
1387 	is responsible to free.
1388 
1389 	\param vnode The vnode whose covered node shall be returned.
1390 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1391 		vnode.
1392 */
1393 static inline Vnode*
1394 get_covered_vnode(Vnode* vnode)
1395 {
1396 	if (!vnode->IsCovering())
1397 		return NULL;
1398 
1399 	ReadLocker vnodeReadLocker(sVnodeLock);
1400 	return get_covered_vnode_locked(vnode);
1401 }
1402 
1403 
1404 /*!	Gets the vnode the given vnode is covered by.
1405 
1406 	The caller must have \c sVnodeLock read-locked at least.
1407 
1408 	The function returns a reference to the retrieved vnode (if any), the caller
1409 	is responsible to free.
1410 
1411 	\param vnode The vnode whose covering node shall be returned.
1412 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1413 		any vnode.
1414 */
1415 static Vnode*
1416 get_covering_vnode_locked(Vnode* vnode)
1417 {
1418 	if (Vnode* coveringNode = vnode->covered_by) {
1419 		while (coveringNode->covered_by != NULL)
1420 			coveringNode = coveringNode->covered_by;
1421 
1422 		inc_vnode_ref_count(coveringNode);
1423 		return coveringNode;
1424 	}
1425 
1426 	return NULL;
1427 }
1428 
1429 
1430 /*!	Gets the vnode the given vnode is covered by.
1431 
1432 	The caller must not hold \c sVnodeLock. Note that this implies a race
1433 	condition, since the situation can change at any time.
1434 
1435 	The function returns a reference to the retrieved vnode (if any), the caller
1436 	is responsible to free.
1437 
1438 	\param vnode The vnode whose covering node shall be returned.
1439 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1440 		any vnode.
1441 */
1442 static inline Vnode*
1443 get_covering_vnode(Vnode* vnode)
1444 {
1445 	if (!vnode->IsCovered())
1446 		return NULL;
1447 
1448 	ReadLocker vnodeReadLocker(sVnodeLock);
1449 	return get_covering_vnode_locked(vnode);
1450 }
1451 
1452 
1453 static void
1454 free_unused_vnodes()
1455 {
1456 	free_unused_vnodes(
1457 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1458 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1459 }
1460 
1461 
1462 static void
1463 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1464 {
1465 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1466 
1467 	free_unused_vnodes(level);
1468 }
1469 
1470 
1471 static inline void
1472 put_advisory_locking(struct advisory_locking* locking)
1473 {
1474 	release_sem(locking->lock);
1475 }
1476 
1477 
1478 /*!	Returns the advisory_locking object of the \a vnode in case it
1479 	has one, and locks it.
1480 	You have to call put_advisory_locking() when you're done with
1481 	it.
1482 	Note, you must not have the vnode mutex locked when calling
1483 	this function.
1484 */
1485 static struct advisory_locking*
1486 get_advisory_locking(struct vnode* vnode)
1487 {
1488 	rw_lock_read_lock(&sVnodeLock);
1489 	vnode->Lock();
1490 
1491 	struct advisory_locking* locking = vnode->advisory_locking;
1492 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1493 
1494 	vnode->Unlock();
1495 	rw_lock_read_unlock(&sVnodeLock);
1496 
1497 	if (lock >= 0)
1498 		lock = acquire_sem(lock);
1499 	if (lock < 0) {
1500 		// This means the locking has been deleted in the mean time
1501 		// or had never existed in the first place - otherwise, we
1502 		// would get the lock at some point.
1503 		return NULL;
1504 	}
1505 
1506 	return locking;
1507 }
1508 
1509 
1510 /*!	Creates a locked advisory_locking object, and attaches it to the
1511 	given \a vnode.
1512 	Returns B_OK in case of success - also if the vnode got such an
1513 	object from someone else in the mean time, you'll still get this
1514 	one locked then.
1515 */
1516 static status_t
1517 create_advisory_locking(struct vnode* vnode)
1518 {
1519 	if (vnode == NULL)
1520 		return B_FILE_ERROR;
1521 
1522 	ObjectDeleter<advisory_locking> lockingDeleter;
1523 	struct advisory_locking* locking = NULL;
1524 
1525 	while (get_advisory_locking(vnode) == NULL) {
1526 		// no locking object set on the vnode yet, create one
1527 		if (locking == NULL) {
1528 			locking = new(std::nothrow) advisory_locking;
1529 			if (locking == NULL)
1530 				return B_NO_MEMORY;
1531 			lockingDeleter.SetTo(locking);
1532 
1533 			locking->wait_sem = create_sem(0, "advisory lock");
1534 			if (locking->wait_sem < 0)
1535 				return locking->wait_sem;
1536 
1537 			locking->lock = create_sem(0, "advisory locking");
1538 			if (locking->lock < 0)
1539 				return locking->lock;
1540 		}
1541 
1542 		// set our newly created locking object
1543 		ReadLocker _(sVnodeLock);
1544 		AutoLocker<Vnode> nodeLocker(vnode);
1545 		if (vnode->advisory_locking == NULL) {
1546 			vnode->advisory_locking = locking;
1547 			lockingDeleter.Detach();
1548 			return B_OK;
1549 		}
1550 	}
1551 
1552 	// The vnode already had a locking object. That's just as well.
1553 
1554 	return B_OK;
1555 }
1556 
1557 
1558 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1559 	with the advisory_lock \a lock.
1560 */
1561 static bool
1562 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1563 {
1564 	if (flock == NULL)
1565 		return true;
1566 
1567 	return lock->start <= flock->l_start - 1 + flock->l_len
1568 		&& lock->end >= flock->l_start;
1569 }
1570 
1571 
1572 /*!	Tests whether acquiring a lock would block.
1573 */
1574 static status_t
1575 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1576 {
1577 	flock->l_type = F_UNLCK;
1578 
1579 	struct advisory_locking* locking = get_advisory_locking(vnode);
1580 	if (locking == NULL)
1581 		return B_OK;
1582 
1583 	team_id team = team_get_current_team_id();
1584 
1585 	LockList::Iterator iterator = locking->locks.GetIterator();
1586 	while (iterator.HasNext()) {
1587 		struct advisory_lock* lock = iterator.Next();
1588 
1589 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1590 			// locks do overlap
1591 			if (flock->l_type != F_RDLCK || !lock->shared) {
1592 				// collision
1593 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1594 				flock->l_whence = SEEK_SET;
1595 				flock->l_start = lock->start;
1596 				flock->l_len = lock->end - lock->start + 1;
1597 				flock->l_pid = lock->team;
1598 				break;
1599 			}
1600 		}
1601 	}
1602 
1603 	put_advisory_locking(locking);
1604 	return B_OK;
1605 }
1606 
1607 
1608 /*!	Removes the specified lock, or all locks of the calling team
1609 	if \a flock is NULL.
1610 */
1611 static status_t
1612 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1613 	struct file_descriptor* descriptor, struct flock* flock)
1614 {
1615 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1616 
1617 	struct advisory_locking* locking = get_advisory_locking(vnode);
1618 	if (locking == NULL)
1619 		return B_OK;
1620 
1621 	// find matching lock entries
1622 
1623 	LockList::Iterator iterator = locking->locks.GetIterator();
1624 	while (iterator.HasNext()) {
1625 		struct advisory_lock* lock = iterator.Next();
1626 		bool removeLock = false;
1627 
1628 		if (descriptor != NULL && lock->bound_to == descriptor) {
1629 			// Remove flock() locks
1630 			removeLock = true;
1631 		} else if (lock->bound_to == context
1632 				&& advisory_lock_intersects(lock, flock)) {
1633 			// Remove POSIX locks
1634 			bool endsBeyond = false;
1635 			bool startsBefore = false;
1636 			if (flock != NULL) {
1637 				startsBefore = lock->start < flock->l_start;
1638 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1639 			}
1640 
1641 			if (!startsBefore && !endsBeyond) {
1642 				// lock is completely contained in flock
1643 				removeLock = true;
1644 			} else if (startsBefore && !endsBeyond) {
1645 				// cut the end of the lock
1646 				lock->end = flock->l_start - 1;
1647 			} else if (!startsBefore && endsBeyond) {
1648 				// cut the start of the lock
1649 				lock->start = flock->l_start + flock->l_len;
1650 			} else {
1651 				// divide the lock into two locks
1652 				struct advisory_lock* secondLock = new advisory_lock;
1653 				if (secondLock == NULL) {
1654 					// TODO: we should probably revert the locks we already
1655 					// changed... (ie. allocate upfront)
1656 					put_advisory_locking(locking);
1657 					return B_NO_MEMORY;
1658 				}
1659 
1660 				lock->end = flock->l_start - 1;
1661 
1662 				secondLock->bound_to = context;
1663 				secondLock->team = lock->team;
1664 				secondLock->session = lock->session;
1665 				// values must already be normalized when getting here
1666 				secondLock->start = flock->l_start + flock->l_len;
1667 				secondLock->end = lock->end;
1668 				secondLock->shared = lock->shared;
1669 
1670 				locking->locks.Add(secondLock);
1671 			}
1672 		}
1673 
1674 		if (removeLock) {
1675 			// this lock is no longer used
1676 			iterator.Remove();
1677 			delete lock;
1678 		}
1679 	}
1680 
1681 	bool removeLocking = locking->locks.IsEmpty();
1682 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1683 
1684 	put_advisory_locking(locking);
1685 
1686 	if (removeLocking) {
1687 		// We can remove the whole advisory locking structure; it's no
1688 		// longer used
1689 		locking = get_advisory_locking(vnode);
1690 		if (locking != NULL) {
1691 			ReadLocker locker(sVnodeLock);
1692 			AutoLocker<Vnode> nodeLocker(vnode);
1693 
1694 			// the locking could have been changed in the mean time
1695 			if (locking->locks.IsEmpty()) {
1696 				vnode->advisory_locking = NULL;
1697 				nodeLocker.Unlock();
1698 				locker.Unlock();
1699 
1700 				// we've detached the locking from the vnode, so we can
1701 				// safely delete it
1702 				delete locking;
1703 			} else {
1704 				// the locking is in use again
1705 				nodeLocker.Unlock();
1706 				locker.Unlock();
1707 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1708 			}
1709 		}
1710 	}
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1717 	will wait for the lock to become available, if there are any collisions
1718 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1719 
1720 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1721 	BSD flock() semantics are used, that is, all children can unlock the file
1722 	in question (we even allow parents to remove the lock, though, but that
1723 	seems to be in line to what the BSD's are doing).
1724 */
1725 static status_t
1726 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1727 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1728 {
1729 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1730 		vnode, flock, wait ? "yes" : "no"));
1731 
1732 	bool shared = flock->l_type == F_RDLCK;
1733 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1734 	status_t status = B_OK;
1735 
1736 	// TODO: do deadlock detection!
1737 
1738 	struct advisory_locking* locking;
1739 
1740 	while (true) {
1741 		// if this vnode has an advisory_locking structure attached,
1742 		// lock that one and search for any colliding file lock
1743 		status = create_advisory_locking(vnode);
1744 		if (status != B_OK)
1745 			return status;
1746 
1747 		locking = vnode->advisory_locking;
1748 		team_id team = team_get_current_team_id();
1749 		sem_id waitForLock = -1;
1750 
1751 		// test for collisions
1752 		LockList::Iterator iterator = locking->locks.GetIterator();
1753 		while (iterator.HasNext()) {
1754 			struct advisory_lock* lock = iterator.Next();
1755 
1756 			// TODO: locks from the same team might be joinable!
1757 			if ((lock->team != team || lock->bound_to != boundTo)
1758 					&& advisory_lock_intersects(lock, flock)) {
1759 				// locks do overlap
1760 				if (!shared || !lock->shared) {
1761 					// we need to wait
1762 					waitForLock = locking->wait_sem;
1763 					break;
1764 				}
1765 			}
1766 		}
1767 
1768 		if (waitForLock < 0)
1769 			break;
1770 
1771 		// We need to wait. Do that or fail now, if we've been asked not to.
1772 
1773 		if (!wait) {
1774 			put_advisory_locking(locking);
1775 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1776 		}
1777 
1778 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1779 			B_CAN_INTERRUPT, 0);
1780 		if (status != B_OK && status != B_BAD_SEM_ID)
1781 			return status;
1782 
1783 		// We have been notified, but we need to re-lock the locking object. So
1784 		// go another round...
1785 	}
1786 
1787 	// install new lock
1788 
1789 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1790 	if (lock == NULL) {
1791 		put_advisory_locking(locking);
1792 		return B_NO_MEMORY;
1793 	}
1794 
1795 	lock->bound_to = boundTo;
1796 	lock->team = team_get_current_team_id();
1797 	lock->session = thread_get_current_thread()->team->session_id;
1798 	// values must already be normalized when getting here
1799 	lock->start = flock->l_start;
1800 	lock->end = flock->l_start - 1 + flock->l_len;
1801 	lock->shared = shared;
1802 
1803 	locking->locks.Add(lock);
1804 	put_advisory_locking(locking);
1805 
1806 	return status;
1807 }
1808 
1809 
1810 /*!	Normalizes the \a flock structure to make it easier to compare the
1811 	structure with others. The l_start and l_len fields are set to absolute
1812 	values according to the l_whence field.
1813 */
1814 static status_t
1815 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1816 {
1817 	switch (flock->l_whence) {
1818 		case SEEK_SET:
1819 			break;
1820 		case SEEK_CUR:
1821 			flock->l_start += descriptor->pos;
1822 			break;
1823 		case SEEK_END:
1824 		{
1825 			struct vnode* vnode = descriptor->u.vnode;
1826 			struct stat stat;
1827 			status_t status;
1828 
1829 			if (!HAS_FS_CALL(vnode, read_stat))
1830 				return B_UNSUPPORTED;
1831 
1832 			status = FS_CALL(vnode, read_stat, &stat);
1833 			if (status != B_OK)
1834 				return status;
1835 
1836 			flock->l_start += stat.st_size;
1837 			break;
1838 		}
1839 		default:
1840 			return B_BAD_VALUE;
1841 	}
1842 
1843 	if (flock->l_start < 0)
1844 		flock->l_start = 0;
1845 	if (flock->l_len == 0)
1846 		flock->l_len = OFF_MAX;
1847 
1848 	// don't let the offset and length overflow
1849 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1850 		flock->l_len = OFF_MAX - flock->l_start;
1851 
1852 	if (flock->l_len < 0) {
1853 		// a negative length reverses the region
1854 		flock->l_start += flock->l_len;
1855 		flock->l_len = -flock->l_len;
1856 	}
1857 
1858 	return B_OK;
1859 }
1860 
1861 
1862 static void
1863 replace_vnode_if_disconnected(struct fs_mount* mount,
1864 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1865 	struct vnode* fallBack, bool lockRootLock)
1866 {
1867 	struct vnode* givenVnode = vnode;
1868 	bool vnodeReplaced = false;
1869 
1870 	ReadLocker vnodeReadLocker(sVnodeLock);
1871 
1872 	if (lockRootLock)
1873 		mutex_lock(&sIOContextRootLock);
1874 
1875 	while (vnode != NULL && vnode->mount == mount
1876 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1877 		if (vnode->covers != NULL) {
1878 			// redirect the vnode to the covered vnode
1879 			vnode = vnode->covers;
1880 		} else
1881 			vnode = fallBack;
1882 
1883 		vnodeReplaced = true;
1884 	}
1885 
1886 	// If we've replaced the node, grab a reference for the new one.
1887 	if (vnodeReplaced && vnode != NULL)
1888 		inc_vnode_ref_count(vnode);
1889 
1890 	if (lockRootLock)
1891 		mutex_unlock(&sIOContextRootLock);
1892 
1893 	vnodeReadLocker.Unlock();
1894 
1895 	if (vnodeReplaced)
1896 		put_vnode(givenVnode);
1897 }
1898 
1899 
1900 /*!	Disconnects all file descriptors that are associated with the
1901 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1902 	\a mount object.
1903 
1904 	Note, after you've called this function, there might still be ongoing
1905 	accesses - they won't be interrupted if they already happened before.
1906 	However, any subsequent access will fail.
1907 
1908 	This is not a cheap function and should be used with care and rarely.
1909 	TODO: there is currently no means to stop a blocking read/write!
1910 */
1911 static void
1912 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1913 	struct vnode* vnodeToDisconnect)
1914 {
1915 	// iterate over all teams and peek into their file descriptors
1916 	TeamListIterator teamIterator;
1917 	while (Team* team = teamIterator.Next()) {
1918 		BReference<Team> teamReference(team, true);
1919 		TeamLocker teamLocker(team);
1920 
1921 		// lock the I/O context
1922 		io_context* context = team->io_context;
1923 		if (context == NULL)
1924 			continue;
1925 		MutexLocker contextLocker(context->io_mutex);
1926 
1927 		teamLocker.Unlock();
1928 
1929 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1930 			sRoot, true);
1931 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1932 			sRoot, false);
1933 
1934 		for (uint32 i = 0; i < context->table_size; i++) {
1935 			struct file_descriptor* descriptor = context->fds[i];
1936 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1937 				continue;
1938 
1939 			inc_fd_ref_count(descriptor);
1940 
1941 			// if this descriptor points at this mount, we
1942 			// need to disconnect it to be able to unmount
1943 			struct vnode* vnode = fd_vnode(descriptor);
1944 			if (vnodeToDisconnect != NULL) {
1945 				if (vnode == vnodeToDisconnect)
1946 					disconnect_fd(descriptor);
1947 			} else if ((vnode != NULL && vnode->mount == mount)
1948 				|| (vnode == NULL && descriptor->u.mount == mount))
1949 				disconnect_fd(descriptor);
1950 
1951 			put_fd(descriptor);
1952 		}
1953 	}
1954 }
1955 
1956 
1957 /*!	\brief Gets the root node of the current IO context.
1958 	If \a kernel is \c true, the kernel IO context will be used.
1959 	The caller obtains a reference to the returned node.
1960 */
1961 struct vnode*
1962 get_root_vnode(bool kernel)
1963 {
1964 	if (!kernel) {
1965 		// Get current working directory from io context
1966 		struct io_context* context = get_current_io_context(kernel);
1967 
1968 		mutex_lock(&sIOContextRootLock);
1969 
1970 		struct vnode* root = context->root;
1971 		if (root != NULL)
1972 			inc_vnode_ref_count(root);
1973 
1974 		mutex_unlock(&sIOContextRootLock);
1975 
1976 		if (root != NULL)
1977 			return root;
1978 
1979 		// That should never happen.
1980 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1981 			"have a root\n", team_get_current_team_id());
1982 	}
1983 
1984 	inc_vnode_ref_count(sRoot);
1985 	return sRoot;
1986 }
1987 
1988 
1989 /*!	\brief Gets the directory path and leaf name for a given path.
1990 
1991 	The supplied \a path is transformed to refer to the directory part of
1992 	the entry identified by the original path, and into the buffer \a filename
1993 	the leaf name of the original entry is written.
1994 	Neither the returned path nor the leaf name can be expected to be
1995 	canonical.
1996 
1997 	\param path The path to be analyzed. Must be able to store at least one
1998 		   additional character.
1999 	\param filename The buffer into which the leaf name will be written.
2000 		   Must be of size B_FILE_NAME_LENGTH at least.
2001 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2002 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2003 		   if the given path name is empty.
2004 */
2005 static status_t
2006 get_dir_path_and_leaf(char* path, char* filename)
2007 {
2008 	if (*path == '\0')
2009 		return B_ENTRY_NOT_FOUND;
2010 
2011 	char* last = strrchr(path, '/');
2012 		// '/' are not allowed in file names!
2013 
2014 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2015 
2016 	if (last == NULL) {
2017 		// this path is single segment with no '/' in it
2018 		// ex. "foo"
2019 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2020 			return B_NAME_TOO_LONG;
2021 
2022 		strcpy(path, ".");
2023 	} else {
2024 		last++;
2025 		if (last[0] == '\0') {
2026 			// special case: the path ends in one or more '/' - remove them
2027 			while (*--last == '/' && last != path);
2028 			last[1] = '\0';
2029 
2030 			if (last == path && last[0] == '/') {
2031 				// This path points to the root of the file system
2032 				strcpy(filename, ".");
2033 				return B_OK;
2034 			}
2035 			for (; last != path && *(last - 1) != '/'; last--);
2036 				// rewind to the start of the leaf before the '/'
2037 		}
2038 
2039 		// normal leaf: replace the leaf portion of the path with a '.'
2040 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2041 			return B_NAME_TOO_LONG;
2042 
2043 		last[0] = '.';
2044 		last[1] = '\0';
2045 	}
2046 	return B_OK;
2047 }
2048 
2049 
2050 static status_t
2051 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2052 	bool traverse, bool kernel, VnodePutter& _vnode)
2053 {
2054 	char clonedName[B_FILE_NAME_LENGTH + 1];
2055 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2056 		return B_NAME_TOO_LONG;
2057 
2058 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2059 	struct vnode* directory;
2060 
2061 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2062 	if (status < 0)
2063 		return status;
2064 
2065 	return vnode_path_to_vnode(directory, clonedName, traverse, kernel,
2066 		_vnode, NULL);
2067 }
2068 
2069 
2070 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2071 	and returns the respective vnode.
2072 	On success a reference to the vnode is acquired for the caller.
2073 */
2074 static status_t
2075 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2076 {
2077 	ino_t id;
2078 	bool missing;
2079 
2080 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2081 		return missing ? B_ENTRY_NOT_FOUND
2082 			: get_vnode(dir->device, id, _vnode, true, false);
2083 	}
2084 
2085 	status_t status = FS_CALL(dir, lookup, name, &id);
2086 	if (status != B_OK)
2087 		return status;
2088 
2089 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2090 	// have a reference and just need to look the node up.
2091 	rw_lock_read_lock(&sVnodeLock);
2092 	*_vnode = lookup_vnode(dir->device, id);
2093 	rw_lock_read_unlock(&sVnodeLock);
2094 
2095 	if (*_vnode == NULL) {
2096 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2097 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2098 		return B_ENTRY_NOT_FOUND;
2099 	}
2100 
2101 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2102 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2103 //		(*_vnode)->mount->id, (*_vnode)->id);
2104 
2105 	return B_OK;
2106 }
2107 
2108 
2109 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2110 
2111 	\param[in,out] path The relative path being searched. Must not be NULL.
2112 	If the function returns successfully, \a path contains the name of the last path
2113 	component. This function clobbers the buffer pointed to by \a path only
2114 	if it does contain more than one component.
2115 
2116 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2117 	the caller has the responsibility to call put_vnode() on it.
2118 
2119 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2120 	it is successful or not!
2121 
2122 	\param[out] _vnode If the function returns B_OK, points to the found node.
2123 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2124 		last existing directory in the path. The caller has responsibility to release it using
2125 		put_vnode().
2126 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2127 */
2128 static status_t
2129 vnode_path_to_vnode(struct vnode* start, char* path, bool traverseLeafLink,
2130 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2131 	ino_t* _parentID, char* leafName)
2132 {
2133 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 	ASSERT(!_vnode.IsSet());
2135 
2136 	VnodePutter vnode(start);
2137 
2138 	if (path == NULL)
2139 		return B_BAD_VALUE;
2140 	if (*path == '\0')
2141 		return B_ENTRY_NOT_FOUND;
2142 
2143 	status_t status = B_OK;
2144 	ino_t lastParentID = vnode->id;
2145 	while (true) {
2146 		char* nextPath;
2147 
2148 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2149 			path));
2150 
2151 		// done?
2152 		if (path[0] == '\0')
2153 			break;
2154 
2155 		// walk to find the next path component ("path" will point to a single
2156 		// path component), and filter out multiple slashes
2157 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2158 				nextPath++);
2159 
2160 		bool directoryFound = false;
2161 		if (*nextPath == '/') {
2162 			directoryFound = true;
2163 			*nextPath = '\0';
2164 			do
2165 				nextPath++;
2166 			while (*nextPath == '/');
2167 		}
2168 
2169 		// See if the '..' is at a covering vnode move to the covered
2170 		// vnode so we pass the '..' path to the underlying filesystem.
2171 		// Also prevent breaking the root of the IO context.
2172 		if (strcmp("..", path) == 0) {
2173 			if (vnode.Get() == ioContext->root) {
2174 				// Attempted prison break! Keep it contained.
2175 				path = nextPath;
2176 				continue;
2177 			}
2178 
2179 			if (Vnode* coveredVnode = get_covered_vnode(vnode.Get()))
2180 				vnode.SetTo(coveredVnode);
2181 		}
2182 
2183 		// check if vnode is really a directory
2184 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2185 			status = B_NOT_A_DIRECTORY;
2186 
2187 		// Check if we have the right to search the current directory vnode.
2188 		// If a file system doesn't have the access() function, we assume that
2189 		// searching a directory is always allowed
2190 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2191 			status = FS_CALL(vnode.Get(), access, X_OK);
2192 
2193 		// Tell the filesystem to get the vnode of this path component (if we
2194 		// got the permission from the call above)
2195 		VnodePutter nextVnode;
2196 		if (status == B_OK) {
2197 			struct vnode* temp = NULL;
2198 			status = lookup_dir_entry(vnode.Get(), path, &temp);
2199 			nextVnode.SetTo(temp);
2200 		}
2201 
2202 		if (status != B_OK) {
2203 			if (leafName != NULL) {
2204 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2205 				_vnode.SetTo(vnode.Detach());
2206 			}
2207 			return status;
2208 		}
2209 
2210 		// If the new node is a symbolic link, resolve it (if we've been told
2211 		// to do it)
2212 		if (S_ISLNK(nextVnode->Type())
2213 			&& (traverseLeafLink || directoryFound)) {
2214 			size_t bufferSize;
2215 			char* buffer;
2216 
2217 			TRACE(("traverse link\n"));
2218 
2219 			if (count + 1 > B_MAX_SYMLINKS)
2220 				return B_LINK_LIMIT;
2221 
2222 			bufferSize = B_PATH_NAME_LENGTH;
2223 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2224 			if (buffer == NULL)
2225 				return B_NO_MEMORY;
2226 
2227 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2228 				bufferSize--;
2229 				status = FS_CALL(nextVnode.Get(), read_symlink, buffer, &bufferSize);
2230 				// null-terminate
2231 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2232 					buffer[bufferSize] = '\0';
2233 			} else
2234 				status = B_BAD_VALUE;
2235 
2236 			if (status != B_OK) {
2237 				free(buffer);
2238 				return status;
2239 			}
2240 			nextVnode.Unset();
2241 
2242 			// Check if we start from the root directory or the current
2243 			// directory ("vnode" still points to that one).
2244 			// Cut off all leading slashes if it's the root directory
2245 			path = buffer;
2246 			bool absoluteSymlink = false;
2247 			if (path[0] == '/') {
2248 				// we don't need the old directory anymore
2249 				vnode.Unset();
2250 
2251 				while (*++path == '/')
2252 					;
2253 
2254 				mutex_lock(&sIOContextRootLock);
2255 				vnode.SetTo(ioContext->root);
2256 				inc_vnode_ref_count(vnode.Get());
2257 				mutex_unlock(&sIOContextRootLock);
2258 
2259 				absoluteSymlink = true;
2260 			}
2261 
2262 			inc_vnode_ref_count(vnode.Get());
2263 				// balance the next recursion - we will decrement the
2264 				// ref_count of the vnode, no matter if we succeeded or not
2265 
2266 			if (absoluteSymlink && *path == '\0') {
2267 				// symlink was just "/"
2268 				nextVnode.SetTo(vnode.Get());
2269 			} else {
2270 				status = vnode_path_to_vnode(vnode.Get(), path, true, count + 1,
2271 					ioContext, nextVnode, &lastParentID, leafName);
2272 			}
2273 
2274 			object_cache_free(sPathNameCache, buffer, 0);
2275 
2276 			if (status != B_OK) {
2277 				if (leafName != NULL)
2278 					_vnode.SetTo(nextVnode.Detach());
2279 				return status;
2280 			}
2281 		} else
2282 			lastParentID = vnode->id;
2283 
2284 		// decrease the ref count on the old dir we just looked up into
2285 		vnode.Unset();
2286 
2287 		path = nextPath;
2288 		vnode.SetTo(nextVnode.Detach());
2289 
2290 		// see if we hit a covered node
2291 		if (Vnode* coveringNode = get_covering_vnode(vnode.Get()))
2292 			vnode.SetTo(coveringNode);
2293 	}
2294 
2295 	_vnode.SetTo(vnode.Detach());
2296 	if (_parentID)
2297 		*_parentID = lastParentID;
2298 
2299 	return B_OK;
2300 }
2301 
2302 
2303 static status_t
2304 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2305 	bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2306 {
2307 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0,
2308 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2309 }
2310 
2311 
2312 static status_t
2313 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2314 	ino_t* _parentID, bool kernel)
2315 {
2316 	struct vnode* start = NULL;
2317 
2318 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2319 
2320 	if (!path)
2321 		return B_BAD_VALUE;
2322 
2323 	if (*path == '\0')
2324 		return B_ENTRY_NOT_FOUND;
2325 
2326 	// figure out if we need to start at root or at cwd
2327 	if (*path == '/') {
2328 		if (sRoot == NULL) {
2329 			// we're a bit early, aren't we?
2330 			return B_ERROR;
2331 		}
2332 
2333 		while (*++path == '/')
2334 			;
2335 		start = get_root_vnode(kernel);
2336 
2337 		if (*path == '\0') {
2338 			_vnode.SetTo(start);
2339 			return B_OK;
2340 		}
2341 
2342 	} else {
2343 		struct io_context* context = get_current_io_context(kernel);
2344 
2345 		mutex_lock(&context->io_mutex);
2346 		start = context->cwd;
2347 		if (start != NULL)
2348 			inc_vnode_ref_count(start);
2349 		mutex_unlock(&context->io_mutex);
2350 
2351 		if (start == NULL)
2352 			return B_ERROR;
2353 	}
2354 
2355 	return vnode_path_to_vnode(start, path, traverseLink, kernel, _vnode,
2356 		_parentID);
2357 }
2358 
2359 
2360 /*! Returns the vnode in the next to last segment of the path, and returns
2361 	the last portion in filename.
2362 	The path buffer must be able to store at least one additional character.
2363 */
2364 static status_t
2365 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2366 	bool kernel)
2367 {
2368 	status_t status = get_dir_path_and_leaf(path, filename);
2369 	if (status != B_OK)
2370 		return status;
2371 
2372 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2373 }
2374 
2375 
2376 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2377 		   to by a FD + path pair.
2378 
2379 	\a path must be given in either case. \a fd might be omitted, in which
2380 	case \a path is either an absolute path or one relative to the current
2381 	directory. If both a supplied and \a path is relative it is reckoned off
2382 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2383 	ignored.
2384 
2385 	The caller has the responsibility to call put_vnode() on the returned
2386 	directory vnode.
2387 
2388 	\param fd The FD. May be < 0.
2389 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2390 	       is modified by this function. It must have at least room for a
2391 	       string one character longer than the path it contains.
2392 	\param _vnode A pointer to a variable the directory vnode shall be written
2393 		   into.
2394 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2395 		   the leaf name of the specified entry will be written.
2396 	\param kernel \c true, if invoked from inside the kernel, \c false if
2397 		   invoked from userland.
2398 	\return \c B_OK, if everything went fine, another error code otherwise.
2399 */
2400 static status_t
2401 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2402 	char* filename, bool kernel)
2403 {
2404 	if (!path)
2405 		return B_BAD_VALUE;
2406 	if (*path == '\0')
2407 		return B_ENTRY_NOT_FOUND;
2408 	if (fd < 0)
2409 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2410 
2411 	status_t status = get_dir_path_and_leaf(path, filename);
2412 	if (status != B_OK)
2413 		return status;
2414 
2415 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2416 }
2417 
2418 
2419 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2420 		   to by a vnode + path pair.
2421 
2422 	\a path must be given in either case. \a vnode might be omitted, in which
2423 	case \a path is either an absolute path or one relative to the current
2424 	directory. If both a supplied and \a path is relative it is reckoned off
2425 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2426 	ignored.
2427 
2428 	The caller has the responsibility to call put_vnode() on the returned
2429 	directory vnode.
2430 
2431 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2432 	it is successful or not.
2433 
2434 	\param vnode The vnode. May be \c NULL.
2435 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2436 	       is modified by this function. It must have at least room for a
2437 	       string one character longer than the path it contains.
2438 	\param _vnode A pointer to a variable the directory vnode shall be written
2439 		   into.
2440 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2441 		   the leaf name of the specified entry will be written.
2442 	\param kernel \c true, if invoked from inside the kernel, \c false if
2443 		   invoked from userland.
2444 	\return \c B_OK, if everything went fine, another error code otherwise.
2445 */
2446 static status_t
2447 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2448 	VnodePutter& _vnode, char* filename, bool kernel)
2449 {
2450 	VnodePutter vnodePutter(vnode);
2451 
2452 	if (!path)
2453 		return B_BAD_VALUE;
2454 	if (*path == '\0')
2455 		return B_ENTRY_NOT_FOUND;
2456 	if (vnode == NULL || path[0] == '/')
2457 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2458 
2459 	status_t status = get_dir_path_and_leaf(path, filename);
2460 	if (status != B_OK)
2461 		return status;
2462 
2463 	vnodePutter.Detach();
2464 	return vnode_path_to_vnode(vnode, path, true, kernel, _vnode, NULL);
2465 }
2466 
2467 
2468 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2469 */
2470 static status_t
2471 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2472 	size_t bufferSize, struct io_context* ioContext)
2473 {
2474 	if (bufferSize < sizeof(struct dirent))
2475 		return B_BAD_VALUE;
2476 
2477 	// See if the vnode is covering another vnode and move to the covered
2478 	// vnode so we get the underlying file system
2479 	VnodePutter vnodePutter;
2480 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2481 		vnode = coveredVnode;
2482 		vnodePutter.SetTo(vnode);
2483 	}
2484 
2485 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2486 		// The FS supports getting the name of a vnode.
2487 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2488 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2489 			return B_OK;
2490 	}
2491 
2492 	// The FS doesn't support getting the name of a vnode. So we search the
2493 	// parent directory for the vnode, if the caller let us.
2494 
2495 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2496 		return B_UNSUPPORTED;
2497 
2498 	void* cookie;
2499 
2500 	status_t status = FS_CALL(parent, open_dir, &cookie);
2501 	if (status >= B_OK) {
2502 		while (true) {
2503 			uint32 num = 1;
2504 			// We use the FS hook directly instead of dir_read(), since we don't
2505 			// want the entries to be fixed. We have already resolved vnode to
2506 			// the covered node.
2507 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2508 				&num);
2509 			if (status != B_OK)
2510 				break;
2511 			if (num == 0) {
2512 				status = B_ENTRY_NOT_FOUND;
2513 				break;
2514 			}
2515 
2516 			if (vnode->id == buffer->d_ino) {
2517 				// found correct entry!
2518 				break;
2519 			}
2520 		}
2521 
2522 		FS_CALL(parent, close_dir, cookie);
2523 		FS_CALL(parent, free_dir_cookie, cookie);
2524 	}
2525 	return status;
2526 }
2527 
2528 
2529 static status_t
2530 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2531 	size_t nameSize, bool kernel)
2532 {
2533 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2534 	struct dirent* dirent = (struct dirent*)buffer;
2535 
2536 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2537 		get_current_io_context(kernel));
2538 	if (status != B_OK)
2539 		return status;
2540 
2541 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2542 		return B_BUFFER_OVERFLOW;
2543 
2544 	return B_OK;
2545 }
2546 
2547 
2548 /*!	Gets the full path to a given directory vnode.
2549 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2550 	file system doesn't support this call, it will fall back to iterating
2551 	through the parent directory to get the name of the child.
2552 
2553 	To protect against circular loops, it supports a maximum tree depth
2554 	of 256 levels.
2555 
2556 	Note that the path may not be correct the time this function returns!
2557 	It doesn't use any locking to prevent returning the correct path, as
2558 	paths aren't safe anyway: the path to a file can change at any time.
2559 
2560 	It might be a good idea, though, to check if the returned path exists
2561 	in the calling function (it's not done here because of efficiency)
2562 */
2563 static status_t
2564 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2565 	bool kernel)
2566 {
2567 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2568 
2569 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2570 		return B_BAD_VALUE;
2571 
2572 	if (!S_ISDIR(vnode->Type()))
2573 		return B_NOT_A_DIRECTORY;
2574 
2575 	char* path = buffer;
2576 	int32 insert = bufferSize;
2577 	int32 maxLevel = 256;
2578 	int32 length;
2579 	status_t status = B_OK;
2580 	struct io_context* ioContext = get_current_io_context(kernel);
2581 
2582 	// we don't use get_vnode() here because this call is more
2583 	// efficient and does all we need from get_vnode()
2584 	inc_vnode_ref_count(vnode);
2585 
2586 	path[--insert] = '\0';
2587 		// the path is filled right to left
2588 
2589 	while (true) {
2590 		// If the node is the context's root, bail out. Otherwise resolve mount
2591 		// points.
2592 		if (vnode == ioContext->root)
2593 			break;
2594 
2595 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2596 			put_vnode(vnode);
2597 			vnode = coveredVnode;
2598 		}
2599 
2600 		// lookup the parent vnode
2601 		struct vnode* parentVnode;
2602 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2603 		if (status != B_OK)
2604 			goto out;
2605 
2606 		if (parentVnode == vnode) {
2607 			// The caller apparently got their hands on a node outside of their
2608 			// context's root. Now we've hit the global root.
2609 			put_vnode(parentVnode);
2610 			break;
2611 		}
2612 
2613 		// get the node's name
2614 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2615 			// also used for fs_read_dir()
2616 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2617 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2618 			sizeof(nameBuffer), ioContext);
2619 
2620 		// release the current vnode, we only need its parent from now on
2621 		put_vnode(vnode);
2622 		vnode = parentVnode;
2623 
2624 		if (status != B_OK)
2625 			goto out;
2626 
2627 		// TODO: add an explicit check for loops in about 10 levels to do
2628 		// real loop detection
2629 
2630 		// don't go deeper as 'maxLevel' to prevent circular loops
2631 		if (maxLevel-- < 0) {
2632 			status = B_LINK_LIMIT;
2633 			goto out;
2634 		}
2635 
2636 		// add the name in front of the current path
2637 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2638 		length = strlen(name);
2639 		insert -= length;
2640 		if (insert <= 0) {
2641 			status = B_RESULT_NOT_REPRESENTABLE;
2642 			goto out;
2643 		}
2644 		memcpy(path + insert, name, length);
2645 		path[--insert] = '/';
2646 	}
2647 
2648 	// the root dir will result in an empty path: fix it
2649 	if (path[insert] == '\0')
2650 		path[--insert] = '/';
2651 
2652 	TRACE(("  path is: %s\n", path + insert));
2653 
2654 	// move the path to the start of the buffer
2655 	length = bufferSize - insert;
2656 	memmove(buffer, path + insert, length);
2657 
2658 out:
2659 	put_vnode(vnode);
2660 	return status;
2661 }
2662 
2663 
2664 /*!	Checks the length of every path component, and adds a '.'
2665 	if the path ends in a slash.
2666 	The given path buffer must be able to store at least one
2667 	additional character.
2668 */
2669 static status_t
2670 check_path(char* to)
2671 {
2672 	int32 length = 0;
2673 
2674 	// check length of every path component
2675 
2676 	while (*to) {
2677 		char* begin;
2678 		if (*to == '/')
2679 			to++, length++;
2680 
2681 		begin = to;
2682 		while (*to != '/' && *to)
2683 			to++, length++;
2684 
2685 		if (to - begin > B_FILE_NAME_LENGTH)
2686 			return B_NAME_TOO_LONG;
2687 	}
2688 
2689 	if (length == 0)
2690 		return B_ENTRY_NOT_FOUND;
2691 
2692 	// complete path if there is a slash at the end
2693 
2694 	if (*(to - 1) == '/') {
2695 		if (length > B_PATH_NAME_LENGTH - 2)
2696 			return B_NAME_TOO_LONG;
2697 
2698 		to[0] = '.';
2699 		to[1] = '\0';
2700 	}
2701 
2702 	return B_OK;
2703 }
2704 
2705 
2706 static struct file_descriptor*
2707 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2708 {
2709 	struct file_descriptor* descriptor
2710 		= get_fd(get_current_io_context(kernel), fd);
2711 	if (descriptor == NULL)
2712 		return NULL;
2713 
2714 	struct vnode* vnode = fd_vnode(descriptor);
2715 	if (vnode == NULL) {
2716 		put_fd(descriptor);
2717 		return NULL;
2718 	}
2719 
2720 	// ToDo: when we can close a file descriptor at any point, investigate
2721 	//	if this is still valid to do (accessing the vnode without ref_count
2722 	//	or locking)
2723 	*_vnode = vnode;
2724 	return descriptor;
2725 }
2726 
2727 
2728 static struct vnode*
2729 get_vnode_from_fd(int fd, bool kernel)
2730 {
2731 	struct file_descriptor* descriptor;
2732 	struct vnode* vnode;
2733 
2734 	descriptor = get_fd(get_current_io_context(kernel), fd);
2735 	if (descriptor == NULL)
2736 		return NULL;
2737 
2738 	vnode = fd_vnode(descriptor);
2739 	if (vnode != NULL)
2740 		inc_vnode_ref_count(vnode);
2741 
2742 	put_fd(descriptor);
2743 	return vnode;
2744 }
2745 
2746 
2747 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2748 	only the path will be considered. In this case, the \a path must not be
2749 	NULL.
2750 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2751 	and should be NULL for files.
2752 */
2753 static status_t
2754 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2755 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2756 {
2757 	if (fd < 0 && !path)
2758 		return B_BAD_VALUE;
2759 
2760 	if (path != NULL && *path == '\0')
2761 		return B_ENTRY_NOT_FOUND;
2762 
2763 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2764 		// no FD or absolute path
2765 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2766 	}
2767 
2768 	// FD only, or FD + relative path
2769 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2770 	if (vnode == NULL)
2771 		return B_FILE_ERROR;
2772 
2773 	if (path != NULL) {
2774 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, kernel,
2775 			_vnode, _parentID);
2776 	}
2777 
2778 	// there is no relative path to take into account
2779 
2780 	_vnode.SetTo(vnode);
2781 	if (_parentID)
2782 		*_parentID = -1;
2783 
2784 	return B_OK;
2785 }
2786 
2787 
2788 static int
2789 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2790 	void* cookie, int openMode, bool kernel)
2791 {
2792 	struct file_descriptor* descriptor;
2793 	int fd;
2794 
2795 	// If the vnode is locked, we don't allow creating a new file/directory
2796 	// file_descriptor for it
2797 	if (vnode && vnode->mandatory_locked_by != NULL
2798 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2799 		return B_BUSY;
2800 
2801 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2802 		return B_BAD_VALUE;
2803 
2804 	descriptor = alloc_fd();
2805 	if (!descriptor)
2806 		return B_NO_MEMORY;
2807 
2808 	if (vnode)
2809 		descriptor->u.vnode = vnode;
2810 	else
2811 		descriptor->u.mount = mount;
2812 	descriptor->cookie = cookie;
2813 
2814 	switch (type) {
2815 		// vnode types
2816 		case FDTYPE_FILE:
2817 			descriptor->ops = &sFileOps;
2818 			break;
2819 		case FDTYPE_DIR:
2820 			descriptor->ops = &sDirectoryOps;
2821 			break;
2822 		case FDTYPE_ATTR:
2823 			descriptor->ops = &sAttributeOps;
2824 			break;
2825 		case FDTYPE_ATTR_DIR:
2826 			descriptor->ops = &sAttributeDirectoryOps;
2827 			break;
2828 
2829 		// mount types
2830 		case FDTYPE_INDEX_DIR:
2831 			descriptor->ops = &sIndexDirectoryOps;
2832 			break;
2833 		case FDTYPE_QUERY:
2834 			descriptor->ops = &sQueryOps;
2835 			break;
2836 
2837 		default:
2838 			panic("get_new_fd() called with unknown type %d\n", type);
2839 			break;
2840 	}
2841 	descriptor->type = type;
2842 	descriptor->open_mode = openMode;
2843 
2844 	if (descriptor->ops->fd_seek != NULL) {
2845 		// some kinds of files are not seekable
2846 		switch (vnode->Type() & S_IFMT) {
2847 			case S_IFIFO:
2848 			case S_IFSOCK:
2849 				ASSERT(descriptor->pos == -1);
2850 				break;
2851 
2852 			// The Open Group Base Specs don't mention any file types besides pipes,
2853 			// FIFOs, and sockets specially, so we allow seeking all others.
2854 			default:
2855 				descriptor->pos = 0;
2856 				break;
2857 		}
2858 	}
2859 
2860 	io_context* context = get_current_io_context(kernel);
2861 	fd = new_fd(context, descriptor);
2862 	if (fd < 0) {
2863 		descriptor->ops = NULL;
2864 		put_fd(descriptor);
2865 		return B_NO_MORE_FDS;
2866 	}
2867 
2868 	mutex_lock(&context->io_mutex);
2869 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2870 	mutex_unlock(&context->io_mutex);
2871 
2872 	return fd;
2873 }
2874 
2875 
2876 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2877 	vfs_normalize_path(). See there for more documentation.
2878 */
2879 static status_t
2880 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2881 {
2882 	VnodePutter dir;
2883 	status_t error;
2884 
2885 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2886 		// get dir vnode + leaf name
2887 		char leaf[B_FILE_NAME_LENGTH];
2888 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2889 		if (error != B_OK)
2890 			return error;
2891 		strcpy(path, leaf);
2892 
2893 		// get file vnode, if we shall resolve links
2894 		bool fileExists = false;
2895 		VnodePutter fileVnode;
2896 		if (traverseLink) {
2897 			inc_vnode_ref_count(dir.Get());
2898 			if (vnode_path_to_vnode(dir.Get(), path, false, kernel, fileVnode,
2899 					NULL) == B_OK) {
2900 				fileExists = true;
2901 			}
2902 		}
2903 
2904 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2905 			// we're done -- construct the path
2906 			bool hasLeaf = true;
2907 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2908 				// special cases "." and ".." -- get the dir, forget the leaf
2909 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, kernel,
2910 					dir, NULL);
2911 				if (error != B_OK)
2912 					return error;
2913 				hasLeaf = false;
2914 			}
2915 
2916 			// get the directory path
2917 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2918 			if (error != B_OK)
2919 				return error;
2920 
2921 			// append the leaf name
2922 			if (hasLeaf) {
2923 				// insert a directory separator if this is not the file system
2924 				// root
2925 				if ((strcmp(path, "/") != 0
2926 					&& strlcat(path, "/", pathSize) >= pathSize)
2927 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2928 					return B_NAME_TOO_LONG;
2929 				}
2930 			}
2931 
2932 			return B_OK;
2933 		}
2934 
2935 		// read link
2936 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2937 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2938 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2939 			if (error != B_OK)
2940 				return error;
2941 			if (bufferSize < B_PATH_NAME_LENGTH)
2942 				path[bufferSize] = '\0';
2943 		} else
2944 			return B_BAD_VALUE;
2945 	}
2946 
2947 	return B_LINK_LIMIT;
2948 }
2949 
2950 
2951 static status_t
2952 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2953 	struct io_context* ioContext)
2954 {
2955 	// Make sure the IO context root is not bypassed.
2956 	if (parent == ioContext->root) {
2957 		*_device = parent->device;
2958 		*_node = parent->id;
2959 		return B_OK;
2960 	}
2961 
2962 	inc_vnode_ref_count(parent);
2963 		// vnode_path_to_vnode() puts the node
2964 
2965 	// ".." is guaranteed not to be clobbered by this call
2966 	VnodePutter vnode;
2967 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false,
2968 		ioContext, vnode, NULL);
2969 	if (status == B_OK) {
2970 		*_device = vnode->device;
2971 		*_node = vnode->id;
2972 	}
2973 
2974 	return status;
2975 }
2976 
2977 
2978 #ifdef ADD_DEBUGGER_COMMANDS
2979 
2980 
2981 static void
2982 _dump_advisory_locking(advisory_locking* locking)
2983 {
2984 	if (locking == NULL)
2985 		return;
2986 
2987 	kprintf("   lock:        %" B_PRId32, locking->lock);
2988 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2989 
2990 	int32 index = 0;
2991 	LockList::Iterator iterator = locking->locks.GetIterator();
2992 	while (iterator.HasNext()) {
2993 		struct advisory_lock* lock = iterator.Next();
2994 
2995 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2996 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
2997 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
2998 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
2999 	}
3000 }
3001 
3002 
3003 static void
3004 _dump_mount(struct fs_mount* mount)
3005 {
3006 	kprintf("MOUNT: %p\n", mount);
3007 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3008 	kprintf(" device_name:   %s\n", mount->device_name);
3009 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3010 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3011 	kprintf(" partition:     %p\n", mount->partition);
3012 	kprintf(" lock:          %p\n", &mount->lock);
3013 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3014 		mount->owns_file_device ? " owns_file_device" : "");
3015 
3016 	fs_volume* volume = mount->volume;
3017 	while (volume != NULL) {
3018 		kprintf(" volume %p:\n", volume);
3019 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3020 		kprintf("  private_volume:   %p\n", volume->private_volume);
3021 		kprintf("  ops:              %p\n", volume->ops);
3022 		kprintf("  file_system:      %p\n", volume->file_system);
3023 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3024 		volume = volume->super_volume;
3025 	}
3026 
3027 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3028 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3029 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3030 	set_debug_variable("_partition", (addr_t)mount->partition);
3031 }
3032 
3033 
3034 static bool
3035 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3036 	const char* name)
3037 {
3038 	bool insertSlash = buffer[bufferSize] != '\0';
3039 	size_t nameLength = strlen(name);
3040 
3041 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3042 		return false;
3043 
3044 	if (insertSlash)
3045 		buffer[--bufferSize] = '/';
3046 
3047 	bufferSize -= nameLength;
3048 	memcpy(buffer + bufferSize, name, nameLength);
3049 
3050 	return true;
3051 }
3052 
3053 
3054 static bool
3055 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3056 	ino_t nodeID)
3057 {
3058 	if (bufferSize == 0)
3059 		return false;
3060 
3061 	bool insertSlash = buffer[bufferSize] != '\0';
3062 	if (insertSlash)
3063 		buffer[--bufferSize] = '/';
3064 
3065 	size_t size = snprintf(buffer, bufferSize,
3066 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3067 	if (size > bufferSize) {
3068 		if (insertSlash)
3069 			bufferSize++;
3070 		return false;
3071 	}
3072 
3073 	if (size < bufferSize)
3074 		memmove(buffer + bufferSize - size, buffer, size);
3075 
3076 	bufferSize -= size;
3077 	return true;
3078 }
3079 
3080 
3081 static char*
3082 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3083 	bool& _truncated)
3084 {
3085 	// null-terminate the path
3086 	buffer[--bufferSize] = '\0';
3087 
3088 	while (true) {
3089 		while (vnode->covers != NULL)
3090 			vnode = vnode->covers;
3091 
3092 		if (vnode == sRoot) {
3093 			_truncated = bufferSize == 0;
3094 			if (!_truncated)
3095 				buffer[--bufferSize] = '/';
3096 			return buffer + bufferSize;
3097 		}
3098 
3099 		// resolve the name
3100 		ino_t dirID;
3101 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3102 			vnode->id, dirID);
3103 		if (name == NULL) {
3104 			// Failed to resolve the name -- prepend "<dev,node>/".
3105 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3106 				vnode->mount->id, vnode->id);
3107 			return buffer + bufferSize;
3108 		}
3109 
3110 		// prepend the name
3111 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3112 			_truncated = true;
3113 			return buffer + bufferSize;
3114 		}
3115 
3116 		// resolve the directory node
3117 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3118 		if (nextVnode == NULL) {
3119 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3120 				vnode->mount->id, dirID);
3121 			return buffer + bufferSize;
3122 		}
3123 
3124 		vnode = nextVnode;
3125 	}
3126 }
3127 
3128 
3129 static void
3130 _dump_vnode(struct vnode* vnode, bool printPath)
3131 {
3132 	kprintf("VNODE: %p\n", vnode);
3133 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3134 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3135 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3136 	kprintf(" private_node:  %p\n", vnode->private_node);
3137 	kprintf(" mount:         %p\n", vnode->mount);
3138 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3139 	kprintf(" covers:        %p\n", vnode->covers);
3140 	kprintf(" cache:         %p\n", vnode->cache);
3141 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3142 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3143 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3144 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3145 
3146 	_dump_advisory_locking(vnode->advisory_locking);
3147 
3148 	if (printPath) {
3149 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3150 		if (buffer != NULL) {
3151 			bool truncated;
3152 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3153 				B_PATH_NAME_LENGTH, truncated);
3154 			if (path != NULL) {
3155 				kprintf(" path:          ");
3156 				if (truncated)
3157 					kputs("<truncated>/");
3158 				kputs(path);
3159 				kputs("\n");
3160 			} else
3161 				kprintf("Failed to resolve vnode path.\n");
3162 
3163 			debug_free(buffer);
3164 		} else
3165 			kprintf("Failed to allocate memory for constructing the path.\n");
3166 	}
3167 
3168 	set_debug_variable("_node", (addr_t)vnode->private_node);
3169 	set_debug_variable("_mount", (addr_t)vnode->mount);
3170 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3171 	set_debug_variable("_covers", (addr_t)vnode->covers);
3172 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3173 }
3174 
3175 
3176 static int
3177 dump_mount(int argc, char** argv)
3178 {
3179 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3180 		kprintf("usage: %s [id|address]\n", argv[0]);
3181 		return 0;
3182 	}
3183 
3184 	ulong val = parse_expression(argv[1]);
3185 	uint32 id = val;
3186 
3187 	struct fs_mount* mount = sMountsTable->Lookup(id);
3188 	if (mount == NULL) {
3189 		if (IS_USER_ADDRESS(id)) {
3190 			kprintf("fs_mount not found\n");
3191 			return 0;
3192 		}
3193 		mount = (fs_mount*)val;
3194 	}
3195 
3196 	_dump_mount(mount);
3197 	return 0;
3198 }
3199 
3200 
3201 static int
3202 dump_mounts(int argc, char** argv)
3203 {
3204 	if (argc != 1) {
3205 		kprintf("usage: %s\n", argv[0]);
3206 		return 0;
3207 	}
3208 
3209 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3210 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3211 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3212 
3213 	struct fs_mount* mount;
3214 
3215 	MountTable::Iterator iterator(sMountsTable);
3216 	while (iterator.HasNext()) {
3217 		mount = iterator.Next();
3218 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3219 			mount->root_vnode->covers, mount->volume->private_volume,
3220 			mount->volume->file_system_name);
3221 
3222 		fs_volume* volume = mount->volume;
3223 		while (volume->super_volume != NULL) {
3224 			volume = volume->super_volume;
3225 			kprintf("                                     %p %s\n",
3226 				volume->private_volume, volume->file_system_name);
3227 		}
3228 	}
3229 
3230 	return 0;
3231 }
3232 
3233 
3234 static int
3235 dump_vnode(int argc, char** argv)
3236 {
3237 	bool printPath = false;
3238 	int argi = 1;
3239 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3240 		printPath = true;
3241 		argi++;
3242 	}
3243 
3244 	if (argi >= argc || argi + 2 < argc) {
3245 		print_debugger_command_usage(argv[0]);
3246 		return 0;
3247 	}
3248 
3249 	struct vnode* vnode = NULL;
3250 
3251 	if (argi + 1 == argc) {
3252 		vnode = (struct vnode*)parse_expression(argv[argi]);
3253 		if (IS_USER_ADDRESS(vnode)) {
3254 			kprintf("invalid vnode address\n");
3255 			return 0;
3256 		}
3257 		_dump_vnode(vnode, printPath);
3258 		return 0;
3259 	}
3260 
3261 	dev_t device = parse_expression(argv[argi]);
3262 	ino_t id = parse_expression(argv[argi + 1]);
3263 
3264 	VnodeTable::Iterator iterator(sVnodeTable);
3265 	while (iterator.HasNext()) {
3266 		vnode = iterator.Next();
3267 		if (vnode->id != id || vnode->device != device)
3268 			continue;
3269 
3270 		_dump_vnode(vnode, printPath);
3271 	}
3272 
3273 	return 0;
3274 }
3275 
3276 
3277 static int
3278 dump_vnodes(int argc, char** argv)
3279 {
3280 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3281 		kprintf("usage: %s [device]\n", argv[0]);
3282 		return 0;
3283 	}
3284 
3285 	// restrict dumped nodes to a certain device if requested
3286 	dev_t device = parse_expression(argv[1]);
3287 
3288 	struct vnode* vnode;
3289 
3290 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3291 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3292 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3293 
3294 	VnodeTable::Iterator iterator(sVnodeTable);
3295 	while (iterator.HasNext()) {
3296 		vnode = iterator.Next();
3297 		if (vnode->device != device)
3298 			continue;
3299 
3300 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3301 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3302 			vnode->private_node, vnode->advisory_locking,
3303 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3304 			vnode->IsUnpublished() ? "u" : "-");
3305 	}
3306 
3307 	return 0;
3308 }
3309 
3310 
3311 static int
3312 dump_vnode_caches(int argc, char** argv)
3313 {
3314 	struct vnode* vnode;
3315 
3316 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3317 		kprintf("usage: %s [device]\n", argv[0]);
3318 		return 0;
3319 	}
3320 
3321 	// restrict dumped nodes to a certain device if requested
3322 	dev_t device = -1;
3323 	if (argc > 1)
3324 		device = parse_expression(argv[1]);
3325 
3326 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3327 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3328 
3329 	VnodeTable::Iterator iterator(sVnodeTable);
3330 	while (iterator.HasNext()) {
3331 		vnode = iterator.Next();
3332 		if (vnode->cache == NULL)
3333 			continue;
3334 		if (device != -1 && vnode->device != device)
3335 			continue;
3336 
3337 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3338 			vnode, vnode->device, vnode->id, vnode->cache,
3339 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3340 			vnode->cache->page_count);
3341 	}
3342 
3343 	return 0;
3344 }
3345 
3346 
3347 int
3348 dump_io_context(int argc, char** argv)
3349 {
3350 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3351 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3352 		return 0;
3353 	}
3354 
3355 	struct io_context* context = NULL;
3356 
3357 	if (argc > 1) {
3358 		ulong num = parse_expression(argv[1]);
3359 		if (IS_KERNEL_ADDRESS(num))
3360 			context = (struct io_context*)num;
3361 		else {
3362 			Team* team = team_get_team_struct_locked(num);
3363 			if (team == NULL) {
3364 				kprintf("could not find team with ID %lu\n", num);
3365 				return 0;
3366 			}
3367 			context = (struct io_context*)team->io_context;
3368 		}
3369 	} else
3370 		context = get_current_io_context(true);
3371 
3372 	kprintf("I/O CONTEXT: %p\n", context);
3373 	kprintf(" root vnode:\t%p\n", context->root);
3374 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3375 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3376 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3377 
3378 	if (context->num_used_fds) {
3379 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3380 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3381 	}
3382 
3383 	for (uint32 i = 0; i < context->table_size; i++) {
3384 		struct file_descriptor* fd = context->fds[i];
3385 		if (fd == NULL)
3386 			continue;
3387 
3388 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3389 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3390 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3391 			fd->pos, fd->cookie,
3392 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3393 				? "mount" : "vnode",
3394 			fd->u.vnode);
3395 	}
3396 
3397 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3398 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3399 
3400 	set_debug_variable("_cwd", (addr_t)context->cwd);
3401 
3402 	return 0;
3403 }
3404 
3405 
3406 int
3407 dump_vnode_usage(int argc, char** argv)
3408 {
3409 	if (argc != 1) {
3410 		kprintf("usage: %s\n", argv[0]);
3411 		return 0;
3412 	}
3413 
3414 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3415 		sUnusedVnodes, kMaxUnusedVnodes);
3416 
3417 	uint32 count = sVnodeTable->CountElements();
3418 
3419 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3420 		count - sUnusedVnodes);
3421 	return 0;
3422 }
3423 
3424 #endif	// ADD_DEBUGGER_COMMANDS
3425 
3426 
3427 /*!	Clears memory specified by an iovec array.
3428 */
3429 static void
3430 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3431 {
3432 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3433 		size_t length = std::min(vecs[i].iov_len, bytes);
3434 		memset(vecs[i].iov_base, 0, length);
3435 		bytes -= length;
3436 	}
3437 }
3438 
3439 
3440 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3441 	and calls the file system hooks to read/write the request to disk.
3442 */
3443 static status_t
3444 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3445 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3446 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3447 	bool doWrite)
3448 {
3449 	if (fileVecCount == 0) {
3450 		// There are no file vecs at this offset, so we're obviously trying
3451 		// to access the file outside of its bounds
3452 		return B_BAD_VALUE;
3453 	}
3454 
3455 	size_t numBytes = *_numBytes;
3456 	uint32 fileVecIndex;
3457 	size_t vecOffset = *_vecOffset;
3458 	uint32 vecIndex = *_vecIndex;
3459 	status_t status;
3460 	size_t size;
3461 
3462 	if (!doWrite && vecOffset == 0) {
3463 		// now directly read the data from the device
3464 		// the first file_io_vec can be read directly
3465 		// TODO: we could also write directly
3466 
3467 		if (fileVecs[0].length < (off_t)numBytes)
3468 			size = fileVecs[0].length;
3469 		else
3470 			size = numBytes;
3471 
3472 		if (fileVecs[0].offset >= 0) {
3473 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3474 				&vecs[vecIndex], vecCount - vecIndex, &size);
3475 		} else {
3476 			// sparse read
3477 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3478 			status = B_OK;
3479 		}
3480 		if (status != B_OK)
3481 			return status;
3482 
3483 		ASSERT((off_t)size <= fileVecs[0].length);
3484 
3485 		// If the file portion was contiguous, we're already done now
3486 		if (size == numBytes)
3487 			return B_OK;
3488 
3489 		// if we reached the end of the file, we can return as well
3490 		if ((off_t)size != fileVecs[0].length) {
3491 			*_numBytes = size;
3492 			return B_OK;
3493 		}
3494 
3495 		fileVecIndex = 1;
3496 
3497 		// first, find out where we have to continue in our iovecs
3498 		for (; vecIndex < vecCount; vecIndex++) {
3499 			if (size < vecs[vecIndex].iov_len)
3500 				break;
3501 
3502 			size -= vecs[vecIndex].iov_len;
3503 		}
3504 
3505 		vecOffset = size;
3506 	} else {
3507 		fileVecIndex = 0;
3508 		size = 0;
3509 	}
3510 
3511 	// Too bad, let's process the rest of the file_io_vecs
3512 
3513 	size_t totalSize = size;
3514 	size_t bytesLeft = numBytes - size;
3515 
3516 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3517 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3518 		off_t fileOffset = fileVec.offset;
3519 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3520 
3521 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3522 			fileLeft));
3523 
3524 		// process the complete fileVec
3525 		while (fileLeft > 0) {
3526 			iovec tempVecs[MAX_TEMP_IO_VECS];
3527 			uint32 tempCount = 0;
3528 
3529 			// size tracks how much of what is left of the current fileVec
3530 			// (fileLeft) has been assigned to tempVecs
3531 			size = 0;
3532 
3533 			// assign what is left of the current fileVec to the tempVecs
3534 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3535 					&& tempCount < MAX_TEMP_IO_VECS;) {
3536 				// try to satisfy one iovec per iteration (or as much as
3537 				// possible)
3538 
3539 				// bytes left of the current iovec
3540 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3541 				if (vecLeft == 0) {
3542 					vecOffset = 0;
3543 					vecIndex++;
3544 					continue;
3545 				}
3546 
3547 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3548 					vecIndex, vecOffset, size));
3549 
3550 				// actually available bytes
3551 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3552 
3553 				tempVecs[tempCount].iov_base
3554 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3555 				tempVecs[tempCount].iov_len = tempVecSize;
3556 				tempCount++;
3557 
3558 				size += tempVecSize;
3559 				vecOffset += tempVecSize;
3560 			}
3561 
3562 			size_t bytes = size;
3563 
3564 			if (fileOffset == -1) {
3565 				if (doWrite) {
3566 					panic("sparse write attempt: vnode %p", vnode);
3567 					status = B_IO_ERROR;
3568 				} else {
3569 					// sparse read
3570 					zero_iovecs(tempVecs, tempCount, bytes);
3571 					status = B_OK;
3572 				}
3573 			} else if (doWrite) {
3574 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3575 					tempVecs, tempCount, &bytes);
3576 			} else {
3577 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3578 					tempVecs, tempCount, &bytes);
3579 			}
3580 			if (status != B_OK)
3581 				return status;
3582 
3583 			totalSize += bytes;
3584 			bytesLeft -= size;
3585 			if (fileOffset >= 0)
3586 				fileOffset += size;
3587 			fileLeft -= size;
3588 			//dprintf("-> file left = %Lu\n", fileLeft);
3589 
3590 			if (size != bytes || vecIndex >= vecCount) {
3591 				// there are no more bytes or iovecs, let's bail out
3592 				*_numBytes = totalSize;
3593 				return B_OK;
3594 			}
3595 		}
3596 	}
3597 
3598 	*_vecIndex = vecIndex;
3599 	*_vecOffset = vecOffset;
3600 	*_numBytes = totalSize;
3601 	return B_OK;
3602 }
3603 
3604 
3605 static bool
3606 is_user_in_group(gid_t gid)
3607 {
3608 	if (gid == getegid())
3609 		return true;
3610 
3611 	gid_t groups[NGROUPS_MAX];
3612 	int groupCount = getgroups(NGROUPS_MAX, groups);
3613 	for (int i = 0; i < groupCount; i++) {
3614 		if (gid == groups[i])
3615 			return true;
3616 	}
3617 
3618 	return false;
3619 }
3620 
3621 
3622 static status_t
3623 free_io_context(io_context* context)
3624 {
3625 	uint32 i;
3626 
3627 	TIOC(FreeIOContext(context));
3628 
3629 	if (context->root)
3630 		put_vnode(context->root);
3631 
3632 	if (context->cwd)
3633 		put_vnode(context->cwd);
3634 
3635 	mutex_lock(&context->io_mutex);
3636 
3637 	for (i = 0; i < context->table_size; i++) {
3638 		if (struct file_descriptor* descriptor = context->fds[i]) {
3639 			close_fd(context, descriptor);
3640 			put_fd(descriptor);
3641 		}
3642 	}
3643 
3644 	mutex_destroy(&context->io_mutex);
3645 
3646 	remove_node_monitors(context);
3647 	free(context->fds);
3648 	free(context);
3649 
3650 	return B_OK;
3651 }
3652 
3653 
3654 static status_t
3655 resize_monitor_table(struct io_context* context, const int newSize)
3656 {
3657 	int	status = B_OK;
3658 
3659 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3660 		return B_BAD_VALUE;
3661 
3662 	mutex_lock(&context->io_mutex);
3663 
3664 	if ((size_t)newSize < context->num_monitors) {
3665 		status = B_BUSY;
3666 		goto out;
3667 	}
3668 	context->max_monitors = newSize;
3669 
3670 out:
3671 	mutex_unlock(&context->io_mutex);
3672 	return status;
3673 }
3674 
3675 
3676 //	#pragma mark - public API for file systems
3677 
3678 
3679 extern "C" status_t
3680 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3681 	fs_vnode_ops* ops)
3682 {
3683 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3684 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3685 
3686 	if (privateNode == NULL)
3687 		return B_BAD_VALUE;
3688 
3689 	int32 tries = BUSY_VNODE_RETRIES;
3690 restart:
3691 	// create the node
3692 	bool nodeCreated;
3693 	struct vnode* vnode;
3694 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3695 		nodeCreated);
3696 	if (status != B_OK)
3697 		return status;
3698 
3699 	WriteLocker nodeLocker(sVnodeLock, true);
3700 		// create_new_vnode_and_lock() has locked for us
3701 
3702 	if (!nodeCreated && vnode->IsBusy()) {
3703 		nodeLocker.Unlock();
3704 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3705 			return B_BUSY;
3706 		goto restart;
3707 	}
3708 
3709 	// file system integrity check:
3710 	// test if the vnode already exists and bail out if this is the case!
3711 	if (!nodeCreated) {
3712 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3713 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3714 			vnode->private_node);
3715 		return B_ERROR;
3716 	}
3717 
3718 	vnode->private_node = privateNode;
3719 	vnode->ops = ops;
3720 	vnode->SetUnpublished(true);
3721 
3722 	TRACE(("returns: %s\n", strerror(status)));
3723 
3724 	return status;
3725 }
3726 
3727 
3728 extern "C" status_t
3729 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3730 	fs_vnode_ops* ops, int type, uint32 flags)
3731 {
3732 	FUNCTION(("publish_vnode()\n"));
3733 
3734 	int32 tries = BUSY_VNODE_RETRIES;
3735 restart:
3736 	WriteLocker locker(sVnodeLock);
3737 
3738 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3739 
3740 	bool nodeCreated = false;
3741 	if (vnode == NULL) {
3742 		if (privateNode == NULL)
3743 			return B_BAD_VALUE;
3744 
3745 		// create the node
3746 		locker.Unlock();
3747 			// create_new_vnode_and_lock() will re-lock for us on success
3748 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3749 			nodeCreated);
3750 		if (status != B_OK)
3751 			return status;
3752 
3753 		locker.SetTo(sVnodeLock, true);
3754 	}
3755 
3756 	if (nodeCreated) {
3757 		vnode->private_node = privateNode;
3758 		vnode->ops = ops;
3759 		vnode->SetUnpublished(true);
3760 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3761 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3762 		// already known, but not published
3763 	} else if (vnode->IsBusy()) {
3764 		locker.Unlock();
3765 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3766 			return B_BUSY;
3767 		goto restart;
3768 	} else
3769 		return B_BAD_VALUE;
3770 
3771 	bool publishSpecialSubNode = false;
3772 
3773 	vnode->SetType(type);
3774 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3775 	publishSpecialSubNode = is_special_node_type(type)
3776 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3777 
3778 	status_t status = B_OK;
3779 
3780 	// create sub vnodes, if necessary
3781 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3782 		locker.Unlock();
3783 
3784 		fs_volume* subVolume = volume;
3785 		if (volume->sub_volume != NULL) {
3786 			while (status == B_OK && subVolume->sub_volume != NULL) {
3787 				subVolume = subVolume->sub_volume;
3788 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3789 					vnode);
3790 			}
3791 		}
3792 
3793 		if (status == B_OK && publishSpecialSubNode)
3794 			status = create_special_sub_node(vnode, flags);
3795 
3796 		if (status != B_OK) {
3797 			// error -- clean up the created sub vnodes
3798 			while (subVolume->super_volume != volume) {
3799 				subVolume = subVolume->super_volume;
3800 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3801 			}
3802 		}
3803 
3804 		if (status == B_OK) {
3805 			ReadLocker vnodesReadLocker(sVnodeLock);
3806 			AutoLocker<Vnode> nodeLocker(vnode);
3807 			vnode->SetBusy(false);
3808 			vnode->SetUnpublished(false);
3809 		} else {
3810 			locker.Lock();
3811 			sVnodeTable->Remove(vnode);
3812 			remove_vnode_from_mount_list(vnode, vnode->mount);
3813 			object_cache_free(sVnodeCache, vnode, 0);
3814 		}
3815 	} else {
3816 		// we still hold the write lock -- mark the node unbusy and published
3817 		vnode->SetBusy(false);
3818 		vnode->SetUnpublished(false);
3819 	}
3820 
3821 	TRACE(("returns: %s\n", strerror(status)));
3822 
3823 	return status;
3824 }
3825 
3826 
3827 extern "C" status_t
3828 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3829 {
3830 	struct vnode* vnode;
3831 
3832 	if (volume == NULL)
3833 		return B_BAD_VALUE;
3834 
3835 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3836 	if (status != B_OK)
3837 		return status;
3838 
3839 	// If this is a layered FS, we need to get the node cookie for the requested
3840 	// layer.
3841 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3842 		fs_vnode resolvedNode;
3843 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3844 			&resolvedNode);
3845 		if (status != B_OK) {
3846 			panic("get_vnode(): Failed to get super node for vnode %p, "
3847 				"volume: %p", vnode, volume);
3848 			put_vnode(vnode);
3849 			return status;
3850 		}
3851 
3852 		if (_privateNode != NULL)
3853 			*_privateNode = resolvedNode.private_node;
3854 	} else if (_privateNode != NULL)
3855 		*_privateNode = vnode->private_node;
3856 
3857 	return B_OK;
3858 }
3859 
3860 
3861 extern "C" status_t
3862 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3863 {
3864 	ReadLocker nodeLocker(sVnodeLock);
3865 
3866 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3867 	if (vnode == NULL)
3868 		return B_BAD_VALUE;
3869 
3870 	inc_vnode_ref_count(vnode);
3871 	return B_OK;
3872 }
3873 
3874 
3875 extern "C" status_t
3876 put_vnode(fs_volume* volume, ino_t vnodeID)
3877 {
3878 	struct vnode* vnode;
3879 
3880 	rw_lock_read_lock(&sVnodeLock);
3881 	vnode = lookup_vnode(volume->id, vnodeID);
3882 	rw_lock_read_unlock(&sVnodeLock);
3883 
3884 	if (vnode == NULL)
3885 		return B_BAD_VALUE;
3886 
3887 	dec_vnode_ref_count(vnode, false, true);
3888 	return B_OK;
3889 }
3890 
3891 
3892 extern "C" status_t
3893 remove_vnode(fs_volume* volume, ino_t vnodeID)
3894 {
3895 	ReadLocker locker(sVnodeLock);
3896 
3897 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3898 	if (vnode == NULL)
3899 		return B_ENTRY_NOT_FOUND;
3900 
3901 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3902 		// this vnode is in use
3903 		return B_BUSY;
3904 	}
3905 
3906 	vnode->Lock();
3907 
3908 	vnode->SetRemoved(true);
3909 	bool removeUnpublished = false;
3910 
3911 	if (vnode->IsUnpublished()) {
3912 		// prepare the vnode for deletion
3913 		removeUnpublished = true;
3914 		vnode->SetBusy(true);
3915 	}
3916 
3917 	vnode->Unlock();
3918 	locker.Unlock();
3919 
3920 	if (removeUnpublished) {
3921 		// If the vnode hasn't been published yet, we delete it here
3922 		atomic_add(&vnode->ref_count, -1);
3923 		free_vnode(vnode, true);
3924 	}
3925 
3926 	return B_OK;
3927 }
3928 
3929 
3930 extern "C" status_t
3931 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3932 {
3933 	struct vnode* vnode;
3934 
3935 	rw_lock_read_lock(&sVnodeLock);
3936 
3937 	vnode = lookup_vnode(volume->id, vnodeID);
3938 	if (vnode) {
3939 		AutoLocker<Vnode> nodeLocker(vnode);
3940 		vnode->SetRemoved(false);
3941 	}
3942 
3943 	rw_lock_read_unlock(&sVnodeLock);
3944 	return B_OK;
3945 }
3946 
3947 
3948 extern "C" status_t
3949 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3950 {
3951 	ReadLocker _(sVnodeLock);
3952 
3953 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3954 		if (_removed != NULL)
3955 			*_removed = vnode->IsRemoved();
3956 		return B_OK;
3957 	}
3958 
3959 	return B_BAD_VALUE;
3960 }
3961 
3962 
3963 extern "C" fs_volume*
3964 volume_for_vnode(fs_vnode* _vnode)
3965 {
3966 	if (_vnode == NULL)
3967 		return NULL;
3968 
3969 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3970 	return vnode->mount->volume;
3971 }
3972 
3973 
3974 extern "C" status_t
3975 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3976 	uid_t nodeUserID)
3977 {
3978 	// get node permissions
3979 	int userPermissions = (mode & S_IRWXU) >> 6;
3980 	int groupPermissions = (mode & S_IRWXG) >> 3;
3981 	int otherPermissions = mode & S_IRWXO;
3982 
3983 	// get the node permissions for this uid/gid
3984 	int permissions = 0;
3985 	uid_t uid = geteuid();
3986 
3987 	if (uid == 0) {
3988 		// user is root
3989 		// root has always read/write permission, but at least one of the
3990 		// X bits must be set for execute permission
3991 		permissions = userPermissions | groupPermissions | otherPermissions
3992 			| S_IROTH | S_IWOTH;
3993 		if (S_ISDIR(mode))
3994 			permissions |= S_IXOTH;
3995 	} else if (uid == nodeUserID) {
3996 		// user is node owner
3997 		permissions = userPermissions;
3998 	} else if (is_user_in_group(nodeGroupID)) {
3999 		// user is in owning group
4000 		permissions = groupPermissions;
4001 	} else {
4002 		// user is one of the others
4003 		permissions = otherPermissions;
4004 	}
4005 
4006 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4007 }
4008 
4009 
4010 #if 0
4011 extern "C" status_t
4012 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4013 	size_t* _numBytes)
4014 {
4015 	struct file_descriptor* descriptor;
4016 	struct vnode* vnode;
4017 
4018 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4019 	if (descriptor == NULL)
4020 		return B_FILE_ERROR;
4021 
4022 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4023 		count, 0, _numBytes);
4024 
4025 	put_fd(descriptor);
4026 	return status;
4027 }
4028 
4029 
4030 extern "C" status_t
4031 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4032 	size_t* _numBytes)
4033 {
4034 	struct file_descriptor* descriptor;
4035 	struct vnode* vnode;
4036 
4037 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4038 	if (descriptor == NULL)
4039 		return B_FILE_ERROR;
4040 
4041 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4042 		count, 0, _numBytes);
4043 
4044 	put_fd(descriptor);
4045 	return status;
4046 }
4047 #endif
4048 
4049 
4050 extern "C" status_t
4051 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4052 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4053 	size_t* _bytes)
4054 {
4055 	struct vnode* vnode;
4056 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4057 	if (!descriptor.IsSet())
4058 		return B_FILE_ERROR;
4059 
4060 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4061 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4062 		false);
4063 
4064 	return status;
4065 }
4066 
4067 
4068 extern "C" status_t
4069 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4070 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4071 	size_t* _bytes)
4072 {
4073 	struct vnode* vnode;
4074 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4075 	if (!descriptor.IsSet())
4076 		return B_FILE_ERROR;
4077 
4078 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4079 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4080 		true);
4081 
4082 	return status;
4083 }
4084 
4085 
4086 extern "C" status_t
4087 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4088 {
4089 	// lookup mount -- the caller is required to make sure that the mount
4090 	// won't go away
4091 	ReadLocker locker(sMountLock);
4092 	struct fs_mount* mount = find_mount(mountID);
4093 	if (mount == NULL)
4094 		return B_BAD_VALUE;
4095 	locker.Unlock();
4096 
4097 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4098 }
4099 
4100 
4101 extern "C" status_t
4102 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4103 {
4104 	// lookup mount -- the caller is required to make sure that the mount
4105 	// won't go away
4106 	ReadLocker locker(sMountLock);
4107 	struct fs_mount* mount = find_mount(mountID);
4108 	if (mount == NULL)
4109 		return B_BAD_VALUE;
4110 	locker.Unlock();
4111 
4112 	return mount->entry_cache.Add(dirID, name, -1, true);
4113 }
4114 
4115 
4116 extern "C" status_t
4117 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4118 {
4119 	// lookup mount -- the caller is required to make sure that the mount
4120 	// won't go away
4121 	ReadLocker locker(sMountLock);
4122 	struct fs_mount* mount = find_mount(mountID);
4123 	if (mount == NULL)
4124 		return B_BAD_VALUE;
4125 	locker.Unlock();
4126 
4127 	return mount->entry_cache.Remove(dirID, name);
4128 }
4129 
4130 
4131 //	#pragma mark - private VFS API
4132 //	Functions the VFS exports for other parts of the kernel
4133 
4134 
4135 /*! Acquires another reference to the vnode that has to be released
4136 	by calling vfs_put_vnode().
4137 */
4138 void
4139 vfs_acquire_vnode(struct vnode* vnode)
4140 {
4141 	inc_vnode_ref_count(vnode);
4142 }
4143 
4144 
4145 /*! This is currently called from file_cache_create() only.
4146 	It's probably a temporary solution as long as devfs requires that
4147 	fs_read_pages()/fs_write_pages() are called with the standard
4148 	open cookie and not with a device cookie.
4149 	If that's done differently, remove this call; it has no other
4150 	purpose.
4151 */
4152 extern "C" status_t
4153 vfs_get_cookie_from_fd(int fd, void** _cookie)
4154 {
4155 	struct file_descriptor* descriptor;
4156 
4157 	descriptor = get_fd(get_current_io_context(true), fd);
4158 	if (descriptor == NULL)
4159 		return B_FILE_ERROR;
4160 
4161 	*_cookie = descriptor->cookie;
4162 	return B_OK;
4163 }
4164 
4165 
4166 extern "C" status_t
4167 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4168 {
4169 	*vnode = get_vnode_from_fd(fd, kernel);
4170 
4171 	if (*vnode == NULL)
4172 		return B_FILE_ERROR;
4173 
4174 	return B_NO_ERROR;
4175 }
4176 
4177 
4178 extern "C" status_t
4179 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4180 {
4181 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4182 		path, kernel));
4183 
4184 	KPath pathBuffer;
4185 	if (pathBuffer.InitCheck() != B_OK)
4186 		return B_NO_MEMORY;
4187 
4188 	char* buffer = pathBuffer.LockBuffer();
4189 	strlcpy(buffer, path, pathBuffer.BufferSize());
4190 
4191 	VnodePutter vnode;
4192 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4193 	if (status != B_OK)
4194 		return status;
4195 
4196 	*_vnode = vnode.Detach();
4197 	return B_OK;
4198 }
4199 
4200 
4201 extern "C" status_t
4202 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4203 {
4204 	struct vnode* vnode = NULL;
4205 
4206 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4207 	if (status != B_OK)
4208 		return status;
4209 
4210 	*_vnode = vnode;
4211 	return B_OK;
4212 }
4213 
4214 
4215 extern "C" status_t
4216 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4217 	const char* name, struct vnode** _vnode)
4218 {
4219 	VnodePutter vnode;
4220 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4221 	*_vnode = vnode.Detach();
4222 	return status;
4223 }
4224 
4225 
4226 extern "C" void
4227 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4228 {
4229 	*_mountID = vnode->device;
4230 	*_vnodeID = vnode->id;
4231 }
4232 
4233 
4234 /*!
4235 	Helper function abstracting the process of "converting" a given
4236 	vnode-pointer to a fs_vnode-pointer.
4237 	Currently only used in bindfs.
4238 */
4239 extern "C" fs_vnode*
4240 vfs_fsnode_for_vnode(struct vnode* vnode)
4241 {
4242 	return vnode;
4243 }
4244 
4245 
4246 /*!
4247 	Calls fs_open() on the given vnode and returns a new
4248 	file descriptor for it
4249 */
4250 int
4251 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4252 {
4253 	return open_vnode(vnode, openMode, kernel);
4254 }
4255 
4256 
4257 /*!	Looks up a vnode with the given mount and vnode ID.
4258 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4259 	to the node.
4260 	It's currently only be used by file_cache_create().
4261 */
4262 extern "C" status_t
4263 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4264 {
4265 	rw_lock_read_lock(&sVnodeLock);
4266 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4267 	rw_lock_read_unlock(&sVnodeLock);
4268 
4269 	if (vnode == NULL)
4270 		return B_ERROR;
4271 
4272 	*_vnode = vnode;
4273 	return B_OK;
4274 }
4275 
4276 
4277 extern "C" status_t
4278 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4279 	bool traverseLeafLink, bool kernel, void** _node)
4280 {
4281 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4282 		volume, path, kernel));
4283 
4284 	KPath pathBuffer;
4285 	if (pathBuffer.InitCheck() != B_OK)
4286 		return B_NO_MEMORY;
4287 
4288 	fs_mount* mount;
4289 	status_t status = get_mount(volume->id, &mount);
4290 	if (status != B_OK)
4291 		return status;
4292 
4293 	char* buffer = pathBuffer.LockBuffer();
4294 	strlcpy(buffer, path, pathBuffer.BufferSize());
4295 
4296 	VnodePutter vnode;
4297 
4298 	if (buffer[0] == '/')
4299 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4300 	else {
4301 		inc_vnode_ref_count(mount->root_vnode);
4302 			// vnode_path_to_vnode() releases a reference to the starting vnode
4303 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink,
4304 			kernel, vnode, NULL);
4305 	}
4306 
4307 	put_mount(mount);
4308 
4309 	if (status != B_OK)
4310 		return status;
4311 
4312 	if (vnode->device != volume->id) {
4313 		// wrong mount ID - must not gain access on foreign file system nodes
4314 		return B_BAD_VALUE;
4315 	}
4316 
4317 	// Use get_vnode() to resolve the cookie for the right layer.
4318 	status = get_vnode(volume, vnode->id, _node);
4319 
4320 	return status;
4321 }
4322 
4323 
4324 status_t
4325 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4326 	struct stat* stat, bool kernel)
4327 {
4328 	status_t status;
4329 
4330 	if (path != NULL) {
4331 		// path given: get the stat of the node referred to by (fd, path)
4332 		KPath pathBuffer(path);
4333 		if (pathBuffer.InitCheck() != B_OK)
4334 			return B_NO_MEMORY;
4335 
4336 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4337 			traverseLeafLink, stat, kernel);
4338 	} else {
4339 		// no path given: get the FD and use the FD operation
4340 		FileDescriptorPutter descriptor
4341 			(get_fd(get_current_io_context(kernel), fd));
4342 		if (!descriptor.IsSet())
4343 			return B_FILE_ERROR;
4344 
4345 		if (descriptor->ops->fd_read_stat)
4346 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4347 		else
4348 			status = B_UNSUPPORTED;
4349 	}
4350 
4351 	return status;
4352 }
4353 
4354 
4355 /*!	Finds the full path to the file that contains the module \a moduleName,
4356 	puts it into \a pathBuffer, and returns B_OK for success.
4357 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4358 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4359 	\a pathBuffer is clobbered in any case and must not be relied on if this
4360 	functions returns unsuccessfully.
4361 	\a basePath and \a pathBuffer must not point to the same space.
4362 */
4363 status_t
4364 vfs_get_module_path(const char* basePath, const char* moduleName,
4365 	char* pathBuffer, size_t bufferSize)
4366 {
4367 	status_t status;
4368 	size_t length;
4369 	char* path;
4370 
4371 	if (bufferSize == 0
4372 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4373 		return B_BUFFER_OVERFLOW;
4374 
4375 	VnodePutter dir;
4376 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4377 	if (status != B_OK)
4378 		return status;
4379 
4380 	// the path buffer had been clobbered by the above call
4381 	length = strlcpy(pathBuffer, basePath, bufferSize);
4382 	if (pathBuffer[length - 1] != '/')
4383 		pathBuffer[length++] = '/';
4384 
4385 	path = pathBuffer + length;
4386 	bufferSize -= length;
4387 
4388 	VnodePutter file;
4389 	while (moduleName) {
4390 		char* nextPath = strchr(moduleName, '/');
4391 		if (nextPath == NULL)
4392 			length = strlen(moduleName);
4393 		else {
4394 			length = nextPath - moduleName;
4395 			nextPath++;
4396 		}
4397 
4398 		if (length + 1 >= bufferSize)
4399 			return B_BUFFER_OVERFLOW;
4400 
4401 		memcpy(path, moduleName, length);
4402 		path[length] = '\0';
4403 		moduleName = nextPath;
4404 
4405 		// vnode_path_to_vnode() assumes ownership of the passed dir
4406 		status = vnode_path_to_vnode(dir.Detach(), path, true, true, file, NULL);
4407 		if (status != B_OK)
4408 			return status;
4409 
4410 		if (S_ISDIR(file->Type())) {
4411 			// goto the next directory
4412 			path[length] = '/';
4413 			path[length + 1] = '\0';
4414 			path += length + 1;
4415 			bufferSize -= length + 1;
4416 
4417 			dir.SetTo(file.Detach());
4418 		} else if (S_ISREG(file->Type())) {
4419 			// it's a file so it should be what we've searched for
4420 			return B_OK;
4421 		} else {
4422 			TRACE(("vfs_get_module_path(): something is strange here: "
4423 				"0x%08" B_PRIx32 "...\n", file->Type()));
4424 			return B_ERROR;
4425 		}
4426 	}
4427 
4428 	// if we got here, the moduleName just pointed to a directory, not to
4429 	// a real module - what should we do in this case?
4430 	return B_ENTRY_NOT_FOUND;
4431 }
4432 
4433 
4434 /*!	\brief Normalizes a given path.
4435 
4436 	The path must refer to an existing or non-existing entry in an existing
4437 	directory, that is chopping off the leaf component the remaining path must
4438 	refer to an existing directory.
4439 
4440 	The returned will be canonical in that it will be absolute, will not
4441 	contain any "." or ".." components or duplicate occurrences of '/'s,
4442 	and none of the directory components will by symbolic links.
4443 
4444 	Any two paths referring to the same entry, will result in the same
4445 	normalized path (well, that is pretty much the definition of `normalized',
4446 	isn't it :-).
4447 
4448 	\param path The path to be normalized.
4449 	\param buffer The buffer into which the normalized path will be written.
4450 		   May be the same one as \a path.
4451 	\param bufferSize The size of \a buffer.
4452 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4453 	\param kernel \c true, if the IO context of the kernel shall be used,
4454 		   otherwise that of the team this thread belongs to. Only relevant,
4455 		   if the path is relative (to get the CWD).
4456 	\return \c B_OK if everything went fine, another error code otherwise.
4457 */
4458 status_t
4459 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4460 	bool traverseLink, bool kernel)
4461 {
4462 	if (!path || !buffer || bufferSize < 1)
4463 		return B_BAD_VALUE;
4464 
4465 	if (path != buffer) {
4466 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4467 			return B_BUFFER_OVERFLOW;
4468 	}
4469 
4470 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4471 }
4472 
4473 
4474 /*!	\brief Gets the parent of the passed in node.
4475 
4476 	Gets the parent of the passed in node, and correctly resolves covered
4477 	nodes.
4478 */
4479 extern "C" status_t
4480 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4481 {
4482 	return resolve_covered_parent(parent, device, node,
4483 		get_current_io_context(true));
4484 }
4485 
4486 
4487 /*!	\brief Creates a special node in the file system.
4488 
4489 	The caller gets a reference to the newly created node (which is passed
4490 	back through \a _createdVnode) and is responsible for releasing it.
4491 
4492 	\param path The path where to create the entry for the node. Can be \c NULL,
4493 		in which case the node is created without an entry in the root FS -- it
4494 		will automatically be deleted when the last reference has been released.
4495 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4496 		the target file system will just create the node with its standard
4497 		operations. Depending on the type of the node a subnode might be created
4498 		automatically, though.
4499 	\param mode The type and permissions for the node to be created.
4500 	\param flags Flags to be passed to the creating FS.
4501 	\param kernel \c true, if called in the kernel context (relevant only if
4502 		\a path is not \c NULL and not absolute).
4503 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4504 		file system creating the node, with the private data pointer and
4505 		operations for the super node. Can be \c NULL.
4506 	\param _createVnode Pointer to pre-allocated storage where to store the
4507 		pointer to the newly created node.
4508 	\return \c B_OK, if everything went fine, another error code otherwise.
4509 */
4510 status_t
4511 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4512 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4513 	struct vnode** _createdVnode)
4514 {
4515 	VnodePutter dirNode;
4516 	char _leaf[B_FILE_NAME_LENGTH];
4517 	char* leaf = NULL;
4518 
4519 	if (path) {
4520 		// We've got a path. Get the dir vnode and the leaf name.
4521 		KPath tmpPathBuffer;
4522 		if (tmpPathBuffer.InitCheck() != B_OK)
4523 			return B_NO_MEMORY;
4524 
4525 		char* tmpPath = tmpPathBuffer.LockBuffer();
4526 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4527 			return B_NAME_TOO_LONG;
4528 
4529 		// get the dir vnode and the leaf name
4530 		leaf = _leaf;
4531 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4532 		if (error != B_OK)
4533 			return error;
4534 	} else {
4535 		// No path. Create the node in the root FS.
4536 		dirNode.SetTo(sRoot);
4537 		inc_vnode_ref_count(dirNode.Get());
4538 	}
4539 
4540 	// check support for creating special nodes
4541 	if (!HAS_FS_CALL(dirNode, create_special_node))
4542 		return B_UNSUPPORTED;
4543 
4544 	// create the node
4545 	fs_vnode superVnode;
4546 	ino_t nodeID;
4547 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4548 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4549 	if (status != B_OK)
4550 		return status;
4551 
4552 	// lookup the node
4553 	rw_lock_read_lock(&sVnodeLock);
4554 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4555 	rw_lock_read_unlock(&sVnodeLock);
4556 
4557 	if (*_createdVnode == NULL) {
4558 		panic("vfs_create_special_node(): lookup of node failed");
4559 		return B_ERROR;
4560 	}
4561 
4562 	return B_OK;
4563 }
4564 
4565 
4566 extern "C" void
4567 vfs_put_vnode(struct vnode* vnode)
4568 {
4569 	put_vnode(vnode);
4570 }
4571 
4572 
4573 extern "C" status_t
4574 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4575 {
4576 	// Get current working directory from io context
4577 	struct io_context* context = get_current_io_context(false);
4578 	status_t status = B_OK;
4579 
4580 	mutex_lock(&context->io_mutex);
4581 
4582 	if (context->cwd != NULL) {
4583 		*_mountID = context->cwd->device;
4584 		*_vnodeID = context->cwd->id;
4585 	} else
4586 		status = B_ERROR;
4587 
4588 	mutex_unlock(&context->io_mutex);
4589 	return status;
4590 }
4591 
4592 
4593 status_t
4594 vfs_unmount(dev_t mountID, uint32 flags)
4595 {
4596 	return fs_unmount(NULL, mountID, flags, true);
4597 }
4598 
4599 
4600 extern "C" status_t
4601 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4602 {
4603 	struct vnode* vnode;
4604 
4605 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4606 	if (status != B_OK)
4607 		return status;
4608 
4609 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4610 	put_vnode(vnode);
4611 	return B_OK;
4612 }
4613 
4614 
4615 extern "C" void
4616 vfs_free_unused_vnodes(int32 level)
4617 {
4618 	vnode_low_resource_handler(NULL,
4619 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4620 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4621 		level);
4622 }
4623 
4624 
4625 extern "C" bool
4626 vfs_can_page(struct vnode* vnode, void* cookie)
4627 {
4628 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4629 
4630 	if (HAS_FS_CALL(vnode, can_page))
4631 		return FS_CALL(vnode, can_page, cookie);
4632 	return false;
4633 }
4634 
4635 
4636 extern "C" status_t
4637 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4638 	const generic_io_vec* vecs, size_t count, uint32 flags,
4639 	generic_size_t* _numBytes)
4640 {
4641 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4642 		vecs, pos));
4643 
4644 #if VFS_PAGES_IO_TRACING
4645 	generic_size_t bytesRequested = *_numBytes;
4646 #endif
4647 
4648 	IORequest request;
4649 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4650 	if (status == B_OK) {
4651 		status = vfs_vnode_io(vnode, cookie, &request);
4652 		if (status == B_OK)
4653 			status = request.Wait();
4654 		*_numBytes = request.TransferredBytes();
4655 	}
4656 
4657 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4658 		status, *_numBytes));
4659 
4660 	return status;
4661 }
4662 
4663 
4664 extern "C" status_t
4665 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4666 	const generic_io_vec* vecs, size_t count, uint32 flags,
4667 	generic_size_t* _numBytes)
4668 {
4669 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4670 		vecs, pos));
4671 
4672 #if VFS_PAGES_IO_TRACING
4673 	generic_size_t bytesRequested = *_numBytes;
4674 #endif
4675 
4676 	IORequest request;
4677 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4678 	if (status == B_OK) {
4679 		status = vfs_vnode_io(vnode, cookie, &request);
4680 		if (status == B_OK)
4681 			status = request.Wait();
4682 		*_numBytes = request.TransferredBytes();
4683 	}
4684 
4685 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4686 		status, *_numBytes));
4687 
4688 	return status;
4689 }
4690 
4691 
4692 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4693 	created if \a allocate is \c true.
4694 	In case it's successful, it will also grab a reference to the cache
4695 	it returns.
4696 */
4697 extern "C" status_t
4698 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4699 {
4700 	if (vnode->cache != NULL) {
4701 		vnode->cache->AcquireRef();
4702 		*_cache = vnode->cache;
4703 		return B_OK;
4704 	}
4705 
4706 	rw_lock_read_lock(&sVnodeLock);
4707 	vnode->Lock();
4708 
4709 	status_t status = B_OK;
4710 
4711 	// The cache could have been created in the meantime
4712 	if (vnode->cache == NULL) {
4713 		if (allocate) {
4714 			// TODO: actually the vnode needs to be busy already here, or
4715 			//	else this won't work...
4716 			bool wasBusy = vnode->IsBusy();
4717 			vnode->SetBusy(true);
4718 
4719 			vnode->Unlock();
4720 			rw_lock_read_unlock(&sVnodeLock);
4721 
4722 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4723 
4724 			rw_lock_read_lock(&sVnodeLock);
4725 			vnode->Lock();
4726 			vnode->SetBusy(wasBusy);
4727 		} else
4728 			status = B_BAD_VALUE;
4729 	}
4730 
4731 	vnode->Unlock();
4732 	rw_lock_read_unlock(&sVnodeLock);
4733 
4734 	if (status == B_OK) {
4735 		vnode->cache->AcquireRef();
4736 		*_cache = vnode->cache;
4737 	}
4738 
4739 	return status;
4740 }
4741 
4742 
4743 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4744 	their own.
4745 	In case it's successful, it will also grab a reference to the cache
4746 	it returns.
4747 */
4748 extern "C" status_t
4749 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4750 {
4751 	rw_lock_read_lock(&sVnodeLock);
4752 	vnode->Lock();
4753 
4754 	status_t status = B_OK;
4755 	if (vnode->cache != NULL) {
4756 		status = B_NOT_ALLOWED;
4757 	} else {
4758 		vnode->cache = _cache;
4759 		_cache->AcquireRef();
4760 	}
4761 
4762 	vnode->Unlock();
4763 	rw_lock_read_unlock(&sVnodeLock);
4764 	return status;
4765 }
4766 
4767 
4768 status_t
4769 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4770 	file_io_vec* vecs, size_t* _count)
4771 {
4772 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4773 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4774 
4775 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4776 }
4777 
4778 
4779 status_t
4780 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4781 {
4782 	status_t status = FS_CALL(vnode, read_stat, stat);
4783 
4784 	// fill in the st_dev and st_ino fields
4785 	if (status == B_OK) {
4786 		stat->st_dev = vnode->device;
4787 		stat->st_ino = vnode->id;
4788 		// the rdev field must stay unset for non-special files
4789 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4790 			stat->st_rdev = -1;
4791 	}
4792 
4793 	return status;
4794 }
4795 
4796 
4797 status_t
4798 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4799 {
4800 	struct vnode* vnode;
4801 	status_t status = get_vnode(device, inode, &vnode, true, false);
4802 	if (status != B_OK)
4803 		return status;
4804 
4805 	status = vfs_stat_vnode(vnode, stat);
4806 
4807 	put_vnode(vnode);
4808 	return status;
4809 }
4810 
4811 
4812 status_t
4813 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4814 {
4815 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4816 }
4817 
4818 
4819 status_t
4820 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4821 	bool kernel, char* path, size_t pathLength)
4822 {
4823 	VnodePutter vnode;
4824 	status_t status;
4825 
4826 	// filter invalid leaf names
4827 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4828 		return B_BAD_VALUE;
4829 
4830 	// get the vnode matching the dir's node_ref
4831 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4832 		// special cases "." and "..": we can directly get the vnode of the
4833 		// referenced directory
4834 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4835 		leaf = NULL;
4836 	} else {
4837 		struct vnode* temp = NULL;
4838 		status = get_vnode(device, inode, &temp, true, false);
4839 		vnode.SetTo(temp);
4840 	}
4841 	if (status != B_OK)
4842 		return status;
4843 
4844 	// get the directory path
4845 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4846 	vnode.Unset();
4847 		// we don't need the vnode anymore
4848 	if (status != B_OK)
4849 		return status;
4850 
4851 	// append the leaf name
4852 	if (leaf) {
4853 		// insert a directory separator if this is not the file system root
4854 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4855 				>= pathLength)
4856 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4857 			return B_NAME_TOO_LONG;
4858 		}
4859 	}
4860 
4861 	return B_OK;
4862 }
4863 
4864 
4865 /*!	If the given descriptor locked its vnode, that lock will be released. */
4866 void
4867 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4868 {
4869 	struct vnode* vnode = fd_vnode(descriptor);
4870 
4871 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4872 		vnode->mandatory_locked_by = NULL;
4873 }
4874 
4875 
4876 /*!	Releases any POSIX locks on the file descriptor. */
4877 status_t
4878 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4879 {
4880 	struct vnode* vnode = descriptor->u.vnode;
4881 	if (vnode == NULL)
4882 		return B_OK;
4883 
4884 	if (HAS_FS_CALL(vnode, release_lock))
4885 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4886 
4887 	return release_advisory_lock(vnode, context, NULL, NULL);
4888 }
4889 
4890 
4891 /*!	Closes all file descriptors of the specified I/O context that
4892 	have the O_CLOEXEC flag set.
4893 */
4894 void
4895 vfs_exec_io_context(io_context* context)
4896 {
4897 	uint32 i;
4898 
4899 	for (i = 0; i < context->table_size; i++) {
4900 		mutex_lock(&context->io_mutex);
4901 
4902 		struct file_descriptor* descriptor = context->fds[i];
4903 		bool remove = false;
4904 
4905 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4906 			context->fds[i] = NULL;
4907 			context->num_used_fds--;
4908 
4909 			remove = true;
4910 		}
4911 
4912 		mutex_unlock(&context->io_mutex);
4913 
4914 		if (remove) {
4915 			close_fd(context, descriptor);
4916 			put_fd(descriptor);
4917 		}
4918 	}
4919 }
4920 
4921 
4922 /*! Sets up a new io_control structure, and inherits the properties
4923 	of the parent io_control if it is given.
4924 */
4925 io_context*
4926 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4927 {
4928 	io_context* context = (io_context*)malloc(sizeof(io_context));
4929 	if (context == NULL)
4930 		return NULL;
4931 
4932 	TIOC(NewIOContext(context, parentContext));
4933 
4934 	memset(context, 0, sizeof(io_context));
4935 	context->ref_count = 1;
4936 
4937 	MutexLocker parentLocker;
4938 
4939 	size_t tableSize;
4940 	if (parentContext != NULL) {
4941 		parentLocker.SetTo(parentContext->io_mutex, false);
4942 		tableSize = parentContext->table_size;
4943 	} else
4944 		tableSize = DEFAULT_FD_TABLE_SIZE;
4945 
4946 	// allocate space for FDs and their close-on-exec flag
4947 	context->fds = (file_descriptor**)malloc(
4948 		sizeof(struct file_descriptor*) * tableSize
4949 		+ sizeof(struct select_info**) * tableSize
4950 		+ (tableSize + 7) / 8);
4951 	if (context->fds == NULL) {
4952 		free(context);
4953 		return NULL;
4954 	}
4955 
4956 	context->select_infos = (select_info**)(context->fds + tableSize);
4957 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4958 
4959 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4960 		+ sizeof(struct select_info**) * tableSize
4961 		+ (tableSize + 7) / 8);
4962 
4963 	mutex_init(&context->io_mutex, "I/O context");
4964 
4965 	// Copy all parent file descriptors
4966 
4967 	if (parentContext != NULL) {
4968 		size_t i;
4969 
4970 		mutex_lock(&sIOContextRootLock);
4971 		context->root = parentContext->root;
4972 		if (context->root)
4973 			inc_vnode_ref_count(context->root);
4974 		mutex_unlock(&sIOContextRootLock);
4975 
4976 		context->cwd = parentContext->cwd;
4977 		if (context->cwd)
4978 			inc_vnode_ref_count(context->cwd);
4979 
4980 		if (parentContext->inherit_fds) {
4981 			for (i = 0; i < tableSize; i++) {
4982 				struct file_descriptor* descriptor = parentContext->fds[i];
4983 
4984 				if (descriptor != NULL
4985 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4986 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4987 					if (closeOnExec && purgeCloseOnExec)
4988 						continue;
4989 
4990 					TFD(InheritFD(context, i, descriptor, parentContext));
4991 
4992 					context->fds[i] = descriptor;
4993 					context->num_used_fds++;
4994 					atomic_add(&descriptor->ref_count, 1);
4995 					atomic_add(&descriptor->open_count, 1);
4996 
4997 					if (closeOnExec)
4998 						fd_set_close_on_exec(context, i, true);
4999 				}
5000 			}
5001 		}
5002 
5003 		parentLocker.Unlock();
5004 	} else {
5005 		context->root = sRoot;
5006 		context->cwd = sRoot;
5007 
5008 		if (context->root)
5009 			inc_vnode_ref_count(context->root);
5010 
5011 		if (context->cwd)
5012 			inc_vnode_ref_count(context->cwd);
5013 	}
5014 
5015 	context->table_size = tableSize;
5016 	context->inherit_fds = parentContext != NULL;
5017 
5018 	list_init(&context->node_monitors);
5019 	context->max_monitors = DEFAULT_NODE_MONITORS;
5020 
5021 	return context;
5022 }
5023 
5024 
5025 void
5026 vfs_get_io_context(io_context* context)
5027 {
5028 	atomic_add(&context->ref_count, 1);
5029 }
5030 
5031 
5032 void
5033 vfs_put_io_context(io_context* context)
5034 {
5035 	if (atomic_add(&context->ref_count, -1) == 1)
5036 		free_io_context(context);
5037 }
5038 
5039 
5040 status_t
5041 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5042 {
5043 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5044 		return B_BAD_VALUE;
5045 
5046 	TIOC(ResizeIOContext(context, newSize));
5047 
5048 	MutexLocker _(context->io_mutex);
5049 
5050 	uint32 oldSize = context->table_size;
5051 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5052 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5053 
5054 	// If the tables shrink, make sure none of the fds being dropped are in use.
5055 	if (newSize < oldSize) {
5056 		for (uint32 i = oldSize; i-- > newSize;) {
5057 			if (context->fds[i])
5058 				return B_BUSY;
5059 		}
5060 	}
5061 
5062 	// store pointers to the old tables
5063 	file_descriptor** oldFDs = context->fds;
5064 	select_info** oldSelectInfos = context->select_infos;
5065 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5066 
5067 	// allocate new tables
5068 	file_descriptor** newFDs = (file_descriptor**)malloc(
5069 		sizeof(struct file_descriptor*) * newSize
5070 		+ sizeof(struct select_infos**) * newSize
5071 		+ newCloseOnExitBitmapSize);
5072 	if (newFDs == NULL)
5073 		return B_NO_MEMORY;
5074 
5075 	context->fds = newFDs;
5076 	context->select_infos = (select_info**)(context->fds + newSize);
5077 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5078 	context->table_size = newSize;
5079 
5080 	// copy entries from old tables
5081 	uint32 toCopy = min_c(oldSize, newSize);
5082 
5083 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5084 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5085 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5086 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5087 
5088 	// clear additional entries, if the tables grow
5089 	if (newSize > oldSize) {
5090 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5091 		memset(context->select_infos + oldSize, 0,
5092 			sizeof(void*) * (newSize - oldSize));
5093 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5094 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5095 	}
5096 
5097 	free(oldFDs);
5098 
5099 	return B_OK;
5100 }
5101 
5102 
5103 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5104 
5105 	Given an arbitrary vnode (identified by mount and node ID), the function
5106 	checks, whether the vnode is covered by another vnode. If it is, the
5107 	function returns the mount and node ID of the covering vnode. Otherwise
5108 	it simply returns the supplied mount and node ID.
5109 
5110 	In case of error (e.g. the supplied node could not be found) the variables
5111 	for storing the resolved mount and node ID remain untouched and an error
5112 	code is returned.
5113 
5114 	\param mountID The mount ID of the vnode in question.
5115 	\param nodeID The node ID of the vnode in question.
5116 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5117 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5118 	\return
5119 	- \c B_OK, if everything went fine,
5120 	- another error code, if something went wrong.
5121 */
5122 status_t
5123 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5124 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5125 {
5126 	// get the node
5127 	struct vnode* node;
5128 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5129 	if (error != B_OK)
5130 		return error;
5131 
5132 	// resolve the node
5133 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5134 		put_vnode(node);
5135 		node = coveringNode;
5136 	}
5137 
5138 	// set the return values
5139 	*resolvedMountID = node->device;
5140 	*resolvedNodeID = node->id;
5141 
5142 	put_vnode(node);
5143 
5144 	return B_OK;
5145 }
5146 
5147 
5148 status_t
5149 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5150 	ino_t* _mountPointNodeID)
5151 {
5152 	ReadLocker nodeLocker(sVnodeLock);
5153 	ReadLocker mountLocker(sMountLock);
5154 
5155 	struct fs_mount* mount = find_mount(mountID);
5156 	if (mount == NULL)
5157 		return B_BAD_VALUE;
5158 
5159 	Vnode* mountPoint = mount->covers_vnode;
5160 
5161 	*_mountPointMountID = mountPoint->device;
5162 	*_mountPointNodeID = mountPoint->id;
5163 
5164 	return B_OK;
5165 }
5166 
5167 
5168 status_t
5169 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5170 	ino_t coveredNodeID)
5171 {
5172 	// get the vnodes
5173 	Vnode* vnode;
5174 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5175 	if (error != B_OK)
5176 		return B_BAD_VALUE;
5177 	VnodePutter vnodePutter(vnode);
5178 
5179 	Vnode* coveredVnode;
5180 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5181 		false);
5182 	if (error != B_OK)
5183 		return B_BAD_VALUE;
5184 	VnodePutter coveredVnodePutter(coveredVnode);
5185 
5186 	// establish the covered/covering links
5187 	WriteLocker locker(sVnodeLock);
5188 
5189 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5190 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5191 		return B_BUSY;
5192 	}
5193 
5194 	vnode->covers = coveredVnode;
5195 	vnode->SetCovering(true);
5196 
5197 	coveredVnode->covered_by = vnode;
5198 	coveredVnode->SetCovered(true);
5199 
5200 	// the vnodes do now reference each other
5201 	inc_vnode_ref_count(vnode);
5202 	inc_vnode_ref_count(coveredVnode);
5203 
5204 	return B_OK;
5205 }
5206 
5207 
5208 int
5209 vfs_getrlimit(int resource, struct rlimit* rlp)
5210 {
5211 	if (!rlp)
5212 		return B_BAD_ADDRESS;
5213 
5214 	switch (resource) {
5215 		case RLIMIT_NOFILE:
5216 		{
5217 			struct io_context* context = get_current_io_context(false);
5218 			MutexLocker _(context->io_mutex);
5219 
5220 			rlp->rlim_cur = context->table_size;
5221 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5222 			return 0;
5223 		}
5224 
5225 		case RLIMIT_NOVMON:
5226 		{
5227 			struct io_context* context = get_current_io_context(false);
5228 			MutexLocker _(context->io_mutex);
5229 
5230 			rlp->rlim_cur = context->max_monitors;
5231 			rlp->rlim_max = MAX_NODE_MONITORS;
5232 			return 0;
5233 		}
5234 
5235 		default:
5236 			return B_BAD_VALUE;
5237 	}
5238 }
5239 
5240 
5241 int
5242 vfs_setrlimit(int resource, const struct rlimit* rlp)
5243 {
5244 	if (!rlp)
5245 		return B_BAD_ADDRESS;
5246 
5247 	switch (resource) {
5248 		case RLIMIT_NOFILE:
5249 			/* TODO: check getuid() */
5250 			if (rlp->rlim_max != RLIM_SAVED_MAX
5251 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5252 				return B_NOT_ALLOWED;
5253 
5254 			return vfs_resize_fd_table(get_current_io_context(false),
5255 				rlp->rlim_cur);
5256 
5257 		case RLIMIT_NOVMON:
5258 			/* TODO: check getuid() */
5259 			if (rlp->rlim_max != RLIM_SAVED_MAX
5260 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5261 				return B_NOT_ALLOWED;
5262 
5263 			return resize_monitor_table(get_current_io_context(false),
5264 				rlp->rlim_cur);
5265 
5266 		default:
5267 			return B_BAD_VALUE;
5268 	}
5269 }
5270 
5271 
5272 status_t
5273 vfs_init(kernel_args* args)
5274 {
5275 	vnode::StaticInit();
5276 
5277 	sVnodeTable = new(std::nothrow) VnodeTable();
5278 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5279 		panic("vfs_init: error creating vnode hash table\n");
5280 
5281 	struct vnode dummy_vnode;
5282 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5283 
5284 	struct fs_mount dummyMount;
5285 	sMountsTable = new(std::nothrow) MountTable();
5286 	if (sMountsTable == NULL
5287 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5288 		panic("vfs_init: error creating mounts hash table\n");
5289 
5290 	sPathNameCache = create_object_cache("vfs path names",
5291 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5292 	if (sPathNameCache == NULL)
5293 		panic("vfs_init: error creating path name object_cache\n");
5294 
5295 	sVnodeCache = create_object_cache("vfs vnodes",
5296 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5297 	if (sVnodeCache == NULL)
5298 		panic("vfs_init: error creating vnode object_cache\n");
5299 
5300 	sFileDescriptorCache = create_object_cache("vfs fds",
5301 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5302 	if (sFileDescriptorCache == NULL)
5303 		panic("vfs_init: error creating file descriptor object_cache\n");
5304 
5305 	node_monitor_init();
5306 
5307 	sRoot = NULL;
5308 
5309 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5310 
5311 	if (block_cache_init() != B_OK)
5312 		return B_ERROR;
5313 
5314 #ifdef ADD_DEBUGGER_COMMANDS
5315 	// add some debugger commands
5316 	add_debugger_command_etc("vnode", &dump_vnode,
5317 		"Print info about the specified vnode",
5318 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5319 		"Prints information about the vnode specified by address <vnode> or\n"
5320 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5321 		"constructed and printed. It might not be possible to construct a\n"
5322 		"complete path, though.\n",
5323 		0);
5324 	add_debugger_command("vnodes", &dump_vnodes,
5325 		"list all vnodes (from the specified device)");
5326 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5327 		"list all vnode caches");
5328 	add_debugger_command("mount", &dump_mount,
5329 		"info about the specified fs_mount");
5330 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5331 	add_debugger_command("io_context", &dump_io_context,
5332 		"info about the I/O context");
5333 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5334 		"info about vnode usage");
5335 #endif
5336 
5337 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5338 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5339 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5340 		0);
5341 
5342 	fifo_init();
5343 	file_map_init();
5344 
5345 	return file_cache_init();
5346 }
5347 
5348 
5349 //	#pragma mark - fd_ops implementations
5350 
5351 
5352 /*!
5353 	Calls fs_open() on the given vnode and returns a new
5354 	file descriptor for it
5355 */
5356 static int
5357 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5358 {
5359 	void* cookie;
5360 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5361 	if (status != B_OK)
5362 		return status;
5363 
5364 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5365 	if (fd < 0) {
5366 		FS_CALL(vnode, close, cookie);
5367 		FS_CALL(vnode, free_cookie, cookie);
5368 	}
5369 	return fd;
5370 }
5371 
5372 
5373 /*!
5374 	Calls fs_open() on the given vnode and returns a new
5375 	file descriptor for it
5376 */
5377 static int
5378 create_vnode(struct vnode* directory, const char* name, int openMode,
5379 	int perms, bool kernel)
5380 {
5381 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5382 	status_t status = B_ERROR;
5383 	VnodePutter vnode, dirPutter;
5384 	void* cookie;
5385 	ino_t newID;
5386 	char clonedName[B_FILE_NAME_LENGTH + 1];
5387 
5388 	// This is somewhat tricky: If the entry already exists, the FS responsible
5389 	// for the directory might not necessarily also be the one responsible for
5390 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5391 	// we can actually never call the create() hook without O_EXCL. Instead we
5392 	// try to look the entry up first. If it already exists, we just open the
5393 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5394 	// introduces a race condition, since someone else might have created the
5395 	// entry in the meantime. We hope the respective FS returns the correct
5396 	// error code and retry (up to 3 times) again.
5397 
5398 	for (int i = 0; i < 3 && status != B_OK; i++) {
5399 		bool create = false;
5400 
5401 		// look the node up
5402 		{
5403 			struct vnode* entry = NULL;
5404 			status = lookup_dir_entry(directory, name, &entry);
5405 			vnode.SetTo(entry);
5406 		}
5407 		if (status == B_OK) {
5408 			if ((openMode & O_EXCL) != 0)
5409 				return B_FILE_EXISTS;
5410 
5411 			// If the node is a symlink, we have to follow it, unless
5412 			// O_NOTRAVERSE is set.
5413 			if (S_ISLNK(vnode->Type()) && traverse) {
5414 				vnode.Unset();
5415 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5416 						>= B_FILE_NAME_LENGTH) {
5417 					return B_NAME_TOO_LONG;
5418 				}
5419 
5420 				inc_vnode_ref_count(directory);
5421 				dirPutter.Unset();
5422 				status = vnode_path_to_vnode(directory, clonedName, true,
5423 					kernel, vnode, NULL, clonedName);
5424 				if (status != B_OK) {
5425 					// vnode is not found, but maybe it has a parent and we can create it from
5426 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5427 					// directory found in the path
5428 					if (status == B_ENTRY_NOT_FOUND) {
5429 						directory = vnode.Detach();
5430 						dirPutter.SetTo(directory);
5431 						name = clonedName;
5432 						create = true;
5433 					} else
5434 						return status;
5435 				}
5436 			}
5437 
5438 			if (!create) {
5439 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5440 					return B_LINK_LIMIT;
5441 
5442 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5443 				// on success keep the vnode reference for the FD
5444 				if (fd >= 0)
5445 					vnode.Detach();
5446 
5447 				return fd;
5448 			}
5449 		}
5450 
5451 		// it doesn't exist yet -- try to create it
5452 
5453 		if (!HAS_FS_CALL(directory, create))
5454 			return B_READ_ONLY_DEVICE;
5455 
5456 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5457 			&cookie, &newID);
5458 		if (status != B_OK
5459 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5460 			return status;
5461 		}
5462 	}
5463 
5464 	if (status != B_OK)
5465 		return status;
5466 
5467 	// the node has been created successfully
5468 
5469 	rw_lock_read_lock(&sVnodeLock);
5470 	vnode.SetTo(lookup_vnode(directory->device, newID));
5471 	rw_lock_read_unlock(&sVnodeLock);
5472 
5473 	if (!vnode.IsSet()) {
5474 		panic("vfs: fs_create() returned success but there is no vnode, "
5475 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5476 		return B_BAD_VALUE;
5477 	}
5478 
5479 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode.Get(), cookie, openMode, kernel);
5480 	if (fd >= 0) {
5481 		vnode.Detach();
5482 		return fd;
5483 	}
5484 
5485 	status = fd;
5486 
5487 	// something went wrong, clean up
5488 
5489 	FS_CALL(vnode.Get(), close, cookie);
5490 	FS_CALL(vnode.Get(), free_cookie, cookie);
5491 
5492 	FS_CALL(directory, unlink, name);
5493 
5494 	return status;
5495 }
5496 
5497 
5498 /*! Calls fs open_dir() on the given vnode and returns a new
5499 	file descriptor for it
5500 */
5501 static int
5502 open_dir_vnode(struct vnode* vnode, bool kernel)
5503 {
5504 	if (!HAS_FS_CALL(vnode, open_dir))
5505 		return B_UNSUPPORTED;
5506 
5507 	void* cookie;
5508 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5509 	if (status != B_OK)
5510 		return status;
5511 
5512 	// directory is opened, create a fd
5513 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5514 	if (status >= 0)
5515 		return status;
5516 
5517 	FS_CALL(vnode, close_dir, cookie);
5518 	FS_CALL(vnode, free_dir_cookie, cookie);
5519 
5520 	return status;
5521 }
5522 
5523 
5524 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5525 	file descriptor for it.
5526 	Used by attr_dir_open(), and attr_dir_open_fd().
5527 */
5528 static int
5529 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5530 {
5531 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5532 		return B_UNSUPPORTED;
5533 
5534 	void* cookie;
5535 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5536 	if (status != B_OK)
5537 		return status;
5538 
5539 	// directory is opened, create a fd
5540 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5541 		kernel);
5542 	if (status >= 0)
5543 		return status;
5544 
5545 	FS_CALL(vnode, close_attr_dir, cookie);
5546 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5547 
5548 	return status;
5549 }
5550 
5551 
5552 static int
5553 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5554 	int openMode, int perms, bool kernel)
5555 {
5556 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5557 		"kernel %d\n", name, openMode, perms, kernel));
5558 
5559 	// get directory to put the new file in
5560 	struct vnode* directory;
5561 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5562 	if (status != B_OK)
5563 		return status;
5564 
5565 	status = create_vnode(directory, name, openMode, perms, kernel);
5566 	put_vnode(directory);
5567 
5568 	return status;
5569 }
5570 
5571 
5572 static int
5573 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5574 {
5575 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5576 		openMode, perms, kernel));
5577 
5578 	// get directory to put the new file in
5579 	char name[B_FILE_NAME_LENGTH];
5580 	VnodePutter directory;
5581 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5582 		kernel);
5583 	if (status < 0)
5584 		return status;
5585 
5586 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5587 }
5588 
5589 
5590 static int
5591 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5592 	int openMode, bool kernel)
5593 {
5594 	if (name == NULL || *name == '\0')
5595 		return B_BAD_VALUE;
5596 
5597 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5598 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5599 
5600 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5601 
5602 	// get the vnode matching the entry_ref
5603 	VnodePutter vnode;
5604 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5605 		kernel, vnode);
5606 	if (status != B_OK)
5607 		return status;
5608 
5609 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5610 		return B_LINK_LIMIT;
5611 
5612 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5613 	if (newFD >= 0) {
5614 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache, mountID,
5615 			directoryID, vnode->id, name);
5616 
5617 		// The vnode reference has been transferred to the FD
5618 		vnode.Detach();
5619 	}
5620 
5621 	return newFD;
5622 }
5623 
5624 
5625 static int
5626 file_open(int fd, char* path, int openMode, bool kernel)
5627 {
5628 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5629 
5630 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5631 		fd, path, openMode, kernel));
5632 
5633 	// get the vnode matching the vnode + path combination
5634 	VnodePutter vnode;
5635 	ino_t parentID;
5636 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5637 		&parentID, kernel);
5638 	if (status != B_OK)
5639 		return status;
5640 
5641 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5642 		return B_LINK_LIMIT;
5643 
5644 	// open the vnode
5645 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5646 	if (newFD >= 0) {
5647 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache,
5648 			vnode->device, parentID, vnode->id, NULL);
5649 
5650 		// The vnode reference has been transferred to the FD
5651 		vnode.Detach();
5652 	}
5653 
5654 	return newFD;
5655 }
5656 
5657 
5658 static status_t
5659 file_close(struct file_descriptor* descriptor)
5660 {
5661 	struct vnode* vnode = descriptor->u.vnode;
5662 	status_t status = B_OK;
5663 
5664 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5665 
5666 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5667 		vnode->id);
5668 	if (HAS_FS_CALL(vnode, close)) {
5669 		status = FS_CALL(vnode, close, descriptor->cookie);
5670 	}
5671 
5672 	if (status == B_OK) {
5673 		// remove all outstanding locks for this team
5674 		if (HAS_FS_CALL(vnode, release_lock))
5675 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5676 		else
5677 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5678 	}
5679 	return status;
5680 }
5681 
5682 
5683 static void
5684 file_free_fd(struct file_descriptor* descriptor)
5685 {
5686 	struct vnode* vnode = descriptor->u.vnode;
5687 
5688 	if (vnode != NULL) {
5689 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5690 		put_vnode(vnode);
5691 	}
5692 }
5693 
5694 
5695 static status_t
5696 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5697 	size_t* length)
5698 {
5699 	struct vnode* vnode = descriptor->u.vnode;
5700 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5701 		pos, length, *length));
5702 
5703 	if (S_ISDIR(vnode->Type()))
5704 		return B_IS_A_DIRECTORY;
5705 	if (pos != -1 && descriptor->pos == -1)
5706 		return ESPIPE;
5707 
5708 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5709 }
5710 
5711 
5712 static status_t
5713 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5714 	size_t* length)
5715 {
5716 	struct vnode* vnode = descriptor->u.vnode;
5717 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5718 		length));
5719 
5720 	if (S_ISDIR(vnode->Type()))
5721 		return B_IS_A_DIRECTORY;
5722 	if (pos != -1 && descriptor->pos == -1)
5723 		return ESPIPE;
5724 
5725 	if (!HAS_FS_CALL(vnode, write))
5726 		return B_READ_ONLY_DEVICE;
5727 
5728 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5729 }
5730 
5731 
5732 static off_t
5733 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5734 {
5735 	struct vnode* vnode = descriptor->u.vnode;
5736 	off_t offset;
5737 	bool isDevice = false;
5738 
5739 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5740 		seekType));
5741 
5742 	if (descriptor->pos == -1)
5743 		return ESPIPE;
5744 
5745 	switch (vnode->Type() & S_IFMT) {
5746 		// drivers publish block devices as chr, so pick both
5747 		case S_IFBLK:
5748 		case S_IFCHR:
5749 			isDevice = true;
5750 			break;
5751 	}
5752 
5753 	switch (seekType) {
5754 		case SEEK_SET:
5755 			offset = 0;
5756 			break;
5757 		case SEEK_CUR:
5758 			offset = descriptor->pos;
5759 			break;
5760 		case SEEK_END:
5761 		{
5762 			// stat() the node
5763 			if (!HAS_FS_CALL(vnode, read_stat))
5764 				return B_UNSUPPORTED;
5765 
5766 			struct stat stat;
5767 			status_t status = FS_CALL(vnode, read_stat, &stat);
5768 			if (status != B_OK)
5769 				return status;
5770 
5771 			offset = stat.st_size;
5772 
5773 			if (offset == 0 && isDevice) {
5774 				// stat() on regular drivers doesn't report size
5775 				device_geometry geometry;
5776 
5777 				if (HAS_FS_CALL(vnode, ioctl)) {
5778 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5779 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5780 					if (status == B_OK)
5781 						offset = (off_t)geometry.bytes_per_sector
5782 							* geometry.sectors_per_track
5783 							* geometry.cylinder_count
5784 							* geometry.head_count;
5785 				}
5786 			}
5787 
5788 			break;
5789 		}
5790 		case SEEK_DATA:
5791 		case SEEK_HOLE:
5792 		{
5793 			status_t status = B_BAD_VALUE;
5794 			if (HAS_FS_CALL(vnode, ioctl)) {
5795 				offset = pos;
5796 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5797 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5798 					&offset, sizeof(offset));
5799 				if (status == B_OK) {
5800 					if (offset > pos)
5801 						offset -= pos;
5802 					break;
5803 				}
5804 			}
5805 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5806 				return status;
5807 
5808 			// basic implementation with stat() the node
5809 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5810 				return B_BAD_VALUE;
5811 
5812 			struct stat stat;
5813 			status = FS_CALL(vnode, read_stat, &stat);
5814 			if (status != B_OK)
5815 				return status;
5816 
5817 			off_t end = stat.st_size;
5818 			if (pos >= end)
5819 				return ENXIO;
5820 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5821 			break;
5822 		}
5823 		default:
5824 			return B_BAD_VALUE;
5825 	}
5826 
5827 	// assumes off_t is 64 bits wide
5828 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5829 		return B_BUFFER_OVERFLOW;
5830 
5831 	pos += offset;
5832 	if (pos < 0)
5833 		return B_BAD_VALUE;
5834 
5835 	return descriptor->pos = pos;
5836 }
5837 
5838 
5839 static status_t
5840 file_select(struct file_descriptor* descriptor, uint8 event,
5841 	struct selectsync* sync)
5842 {
5843 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5844 
5845 	struct vnode* vnode = descriptor->u.vnode;
5846 
5847 	// If the FS has no select() hook, notify select() now.
5848 	if (!HAS_FS_CALL(vnode, select)) {
5849 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5850 			return notify_select_event(sync, event);
5851 		else
5852 			return B_OK;
5853 	}
5854 
5855 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5856 }
5857 
5858 
5859 static status_t
5860 file_deselect(struct file_descriptor* descriptor, uint8 event,
5861 	struct selectsync* sync)
5862 {
5863 	struct vnode* vnode = descriptor->u.vnode;
5864 
5865 	if (!HAS_FS_CALL(vnode, deselect))
5866 		return B_OK;
5867 
5868 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5869 }
5870 
5871 
5872 static status_t
5873 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5874 	bool kernel)
5875 {
5876 	struct vnode* vnode;
5877 	status_t status;
5878 
5879 	if (name == NULL || *name == '\0')
5880 		return B_BAD_VALUE;
5881 
5882 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5883 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5884 
5885 	status = get_vnode(mountID, parentID, &vnode, true, false);
5886 	if (status != B_OK)
5887 		return status;
5888 
5889 	if (HAS_FS_CALL(vnode, create_dir))
5890 		status = FS_CALL(vnode, create_dir, name, perms);
5891 	else
5892 		status = B_READ_ONLY_DEVICE;
5893 
5894 	put_vnode(vnode);
5895 	return status;
5896 }
5897 
5898 
5899 static status_t
5900 dir_create(int fd, char* path, int perms, bool kernel)
5901 {
5902 	char filename[B_FILE_NAME_LENGTH];
5903 	status_t status;
5904 
5905 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5906 		kernel));
5907 
5908 	VnodePutter vnode;
5909 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5910 	if (status < 0)
5911 		return status;
5912 
5913 	if (HAS_FS_CALL(vnode, create_dir)) {
5914 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5915 	} else
5916 		status = B_READ_ONLY_DEVICE;
5917 
5918 	return status;
5919 }
5920 
5921 
5922 static int
5923 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5924 {
5925 	FUNCTION(("dir_open_entry_ref()\n"));
5926 
5927 	if (name && name[0] == '\0')
5928 		return B_BAD_VALUE;
5929 
5930 	// get the vnode matching the entry_ref/node_ref
5931 	VnodePutter vnode;
5932 	status_t status;
5933 	if (name) {
5934 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5935 			vnode);
5936 	} else {
5937 		struct vnode* temp = NULL;
5938 		status = get_vnode(mountID, parentID, &temp, true, false);
5939 		vnode.SetTo(temp);
5940 	}
5941 	if (status != B_OK)
5942 		return status;
5943 
5944 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5945 	if (newFD >= 0) {
5946 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, mountID, parentID,
5947 			vnode->id, name);
5948 
5949 		// The vnode reference has been transferred to the FD
5950 		vnode.Detach();
5951 	}
5952 
5953 	return newFD;
5954 }
5955 
5956 
5957 static int
5958 dir_open(int fd, char* path, bool kernel)
5959 {
5960 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5961 		kernel));
5962 
5963 	// get the vnode matching the vnode + path combination
5964 	VnodePutter vnode;
5965 	ino_t parentID;
5966 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
5967 		kernel);
5968 	if (status != B_OK)
5969 		return status;
5970 
5971 	// open the dir
5972 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5973 	if (newFD >= 0) {
5974 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, vnode->device,
5975 			parentID, vnode->id, NULL);
5976 
5977 		// The vnode reference has been transferred to the FD
5978 		vnode.Detach();
5979 	}
5980 
5981 	return newFD;
5982 }
5983 
5984 
5985 static status_t
5986 dir_close(struct file_descriptor* descriptor)
5987 {
5988 	struct vnode* vnode = descriptor->u.vnode;
5989 
5990 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5991 
5992 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5993 		vnode->id);
5994 	if (HAS_FS_CALL(vnode, close_dir))
5995 		return FS_CALL(vnode, close_dir, descriptor->cookie);
5996 
5997 	return B_OK;
5998 }
5999 
6000 
6001 static void
6002 dir_free_fd(struct file_descriptor* descriptor)
6003 {
6004 	struct vnode* vnode = descriptor->u.vnode;
6005 
6006 	if (vnode != NULL) {
6007 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6008 		put_vnode(vnode);
6009 	}
6010 }
6011 
6012 
6013 static status_t
6014 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6015 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6016 {
6017 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6018 		bufferSize, _count);
6019 }
6020 
6021 
6022 static status_t
6023 fix_dirent(struct vnode* parent, struct dirent* entry,
6024 	struct io_context* ioContext)
6025 {
6026 	// set d_pdev and d_pino
6027 	entry->d_pdev = parent->device;
6028 	entry->d_pino = parent->id;
6029 
6030 	// If this is the ".." entry and the directory covering another vnode,
6031 	// we need to replace d_dev and d_ino with the actual values.
6032 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6033 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6034 			ioContext);
6035 	}
6036 
6037 	// resolve covered vnodes
6038 	ReadLocker _(&sVnodeLock);
6039 
6040 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6041 	if (vnode != NULL && vnode->covered_by != NULL) {
6042 		do {
6043 			vnode = vnode->covered_by;
6044 		} while (vnode->covered_by != NULL);
6045 
6046 		entry->d_dev = vnode->device;
6047 		entry->d_ino = vnode->id;
6048 	}
6049 
6050 	return B_OK;
6051 }
6052 
6053 
6054 static status_t
6055 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6056 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6057 {
6058 	if (!HAS_FS_CALL(vnode, read_dir))
6059 		return B_UNSUPPORTED;
6060 
6061 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6062 		_count);
6063 	if (error != B_OK)
6064 		return error;
6065 
6066 	// we need to adjust the read dirents
6067 	uint32 count = *_count;
6068 	for (uint32 i = 0; i < count; i++) {
6069 		error = fix_dirent(vnode, buffer, ioContext);
6070 		if (error != B_OK)
6071 			return error;
6072 
6073 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6074 	}
6075 
6076 	return error;
6077 }
6078 
6079 
6080 static status_t
6081 dir_rewind(struct file_descriptor* descriptor)
6082 {
6083 	struct vnode* vnode = descriptor->u.vnode;
6084 
6085 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6086 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6087 	}
6088 
6089 	return B_UNSUPPORTED;
6090 }
6091 
6092 
6093 static status_t
6094 dir_remove(int fd, char* path, bool kernel)
6095 {
6096 	char name[B_FILE_NAME_LENGTH];
6097 	status_t status;
6098 
6099 	if (path != NULL) {
6100 		// we need to make sure our path name doesn't stop with "/", ".",
6101 		// or ".."
6102 		char* lastSlash;
6103 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6104 			char* leaf = lastSlash + 1;
6105 			if (!strcmp(leaf, ".."))
6106 				return B_NOT_ALLOWED;
6107 
6108 			// omit multiple slashes
6109 			while (lastSlash > path && lastSlash[-1] == '/')
6110 				lastSlash--;
6111 
6112 			if (leaf[0]
6113 				&& strcmp(leaf, ".")) {
6114 				break;
6115 			}
6116 			// "name/" -> "name", or "name/." -> "name"
6117 			lastSlash[0] = '\0';
6118 		}
6119 
6120 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6121 			return B_NOT_ALLOWED;
6122 	}
6123 
6124 	VnodePutter directory;
6125 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6126 	if (status != B_OK)
6127 		return status;
6128 
6129 	if (HAS_FS_CALL(directory, remove_dir))
6130 		status = FS_CALL(directory.Get(), remove_dir, name);
6131 	else
6132 		status = B_READ_ONLY_DEVICE;
6133 
6134 	return status;
6135 }
6136 
6137 
6138 static status_t
6139 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6140 	size_t length)
6141 {
6142 	struct vnode* vnode = descriptor->u.vnode;
6143 
6144 	if (HAS_FS_CALL(vnode, ioctl))
6145 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6146 
6147 	return B_DEV_INVALID_IOCTL;
6148 }
6149 
6150 
6151 static status_t
6152 common_fcntl(int fd, int op, size_t argument, bool kernel)
6153 {
6154 	struct flock flock;
6155 
6156 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6157 		fd, op, argument, kernel ? "kernel" : "user"));
6158 
6159 	struct io_context* context = get_current_io_context(kernel);
6160 
6161 	FileDescriptorPutter descriptor(get_fd(context, fd));
6162 	if (!descriptor.IsSet())
6163 		return B_FILE_ERROR;
6164 
6165 	struct vnode* vnode = fd_vnode(descriptor.Get());
6166 
6167 	status_t status = B_OK;
6168 
6169 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6170 		if (descriptor->type != FDTYPE_FILE)
6171 			status = B_BAD_VALUE;
6172 		else if (kernel)
6173 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6174 		else if (user_memcpy(&flock, (struct flock*)argument,
6175 				sizeof(struct flock)) != B_OK)
6176 			status = B_BAD_ADDRESS;
6177 		if (status != B_OK)
6178 			return status;
6179 	}
6180 
6181 	switch (op) {
6182 		case F_SETFD:
6183 		{
6184 			// Set file descriptor flags
6185 
6186 			// O_CLOEXEC is the only flag available at this time
6187 			mutex_lock(&context->io_mutex);
6188 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6189 			mutex_unlock(&context->io_mutex);
6190 
6191 			status = B_OK;
6192 			break;
6193 		}
6194 
6195 		case F_GETFD:
6196 		{
6197 			// Get file descriptor flags
6198 			mutex_lock(&context->io_mutex);
6199 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6200 			mutex_unlock(&context->io_mutex);
6201 			break;
6202 		}
6203 
6204 		case F_SETFL:
6205 		{
6206 			// Set file descriptor open mode
6207 
6208 			// we only accept changes to certain flags
6209 			const int32 modifiableFlags = O_APPEND | O_NONBLOCK;
6210 			argument &= modifiableFlags;
6211 
6212 			if (descriptor->ops->fd_set_flags != NULL) {
6213 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6214 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6215 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6216 					(int)argument);
6217 			} else
6218 				status = B_UNSUPPORTED;
6219 
6220 			if (status == B_OK) {
6221 				// update this descriptor's open_mode field
6222 				descriptor->open_mode = (descriptor->open_mode
6223 					& ~modifiableFlags) | argument;
6224 			}
6225 
6226 			break;
6227 		}
6228 
6229 		case F_GETFL:
6230 			// Get file descriptor open mode
6231 			status = descriptor->open_mode;
6232 			break;
6233 
6234 		case F_DUPFD:
6235 		case F_DUPFD_CLOEXEC:
6236 		{
6237 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6238 			if (status >= 0) {
6239 				mutex_lock(&context->io_mutex);
6240 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6241 				mutex_unlock(&context->io_mutex);
6242 
6243 				atomic_add(&descriptor->ref_count, 1);
6244 			}
6245 			break;
6246 		}
6247 
6248 		case F_GETLK:
6249 			if (vnode != NULL) {
6250 				struct flock normalizedLock;
6251 
6252 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6253 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6254 				if (status != B_OK)
6255 					break;
6256 
6257 				if (HAS_FS_CALL(vnode, test_lock)) {
6258 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6259 						&normalizedLock);
6260 				} else
6261 					status = test_advisory_lock(vnode, &normalizedLock);
6262 				if (status == B_OK) {
6263 					if (normalizedLock.l_type == F_UNLCK) {
6264 						// no conflicting lock found, copy back the same struct
6265 						// we were given except change type to F_UNLCK
6266 						flock.l_type = F_UNLCK;
6267 						if (kernel) {
6268 							memcpy((struct flock*)argument, &flock,
6269 								sizeof(struct flock));
6270 						} else {
6271 							status = user_memcpy((struct flock*)argument,
6272 								&flock, sizeof(struct flock));
6273 						}
6274 					} else {
6275 						// a conflicting lock was found, copy back its range and
6276 						// type
6277 						if (normalizedLock.l_len == OFF_MAX)
6278 							normalizedLock.l_len = 0;
6279 
6280 						if (kernel) {
6281 							memcpy((struct flock*)argument,
6282 								&normalizedLock, sizeof(struct flock));
6283 						} else {
6284 							status = user_memcpy((struct flock*)argument,
6285 								&normalizedLock, sizeof(struct flock));
6286 						}
6287 					}
6288 				}
6289 			} else
6290 				status = B_BAD_VALUE;
6291 			break;
6292 
6293 		case F_SETLK:
6294 		case F_SETLKW:
6295 			status = normalize_flock(descriptor.Get(), &flock);
6296 			if (status != B_OK)
6297 				break;
6298 
6299 			if (vnode == NULL) {
6300 				status = B_BAD_VALUE;
6301 			} else if (flock.l_type == F_UNLCK) {
6302 				if (HAS_FS_CALL(vnode, release_lock)) {
6303 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6304 						&flock);
6305 				} else {
6306 					status = release_advisory_lock(vnode, context, NULL,
6307 						&flock);
6308 				}
6309 			} else {
6310 				// the open mode must match the lock type
6311 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6312 						&& flock.l_type == F_WRLCK)
6313 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6314 						&& flock.l_type == F_RDLCK))
6315 					status = B_FILE_ERROR;
6316 				else {
6317 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6318 						status = FS_CALL(vnode, acquire_lock,
6319 							descriptor->cookie, &flock, op == F_SETLKW);
6320 					} else {
6321 						status = acquire_advisory_lock(vnode, context, NULL,
6322 							&flock, op == F_SETLKW);
6323 					}
6324 				}
6325 			}
6326 			break;
6327 
6328 		// ToDo: add support for more ops?
6329 
6330 		default:
6331 			status = B_BAD_VALUE;
6332 	}
6333 
6334 	return status;
6335 }
6336 
6337 
6338 static status_t
6339 common_sync(int fd, bool kernel)
6340 {
6341 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6342 
6343 	struct vnode* vnode;
6344 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6345 	if (!descriptor.IsSet())
6346 		return B_FILE_ERROR;
6347 
6348 	status_t status;
6349 	if (HAS_FS_CALL(vnode, fsync))
6350 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6351 	else
6352 		status = B_UNSUPPORTED;
6353 
6354 	return status;
6355 }
6356 
6357 
6358 static status_t
6359 common_lock_node(int fd, bool kernel)
6360 {
6361 	struct vnode* vnode;
6362 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6363 	if (!descriptor.IsSet())
6364 		return B_FILE_ERROR;
6365 
6366 	status_t status = B_OK;
6367 
6368 	// We need to set the locking atomically - someone
6369 	// else might set one at the same time
6370 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6371 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6372 		status = B_BUSY;
6373 
6374 	return status;
6375 }
6376 
6377 
6378 static status_t
6379 common_unlock_node(int fd, bool kernel)
6380 {
6381 	struct vnode* vnode;
6382 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6383 	if (!descriptor.IsSet())
6384 		return B_FILE_ERROR;
6385 
6386 	status_t status = B_OK;
6387 
6388 	// We need to set the locking atomically - someone
6389 	// else might set one at the same time
6390 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6391 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6392 		status = B_BAD_VALUE;
6393 
6394 	return status;
6395 }
6396 
6397 
6398 static status_t
6399 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6400 {
6401 	if (offset < 0 || length == 0)
6402 		return B_BAD_VALUE;
6403 	if (offset > OFF_MAX - length)
6404 		return B_FILE_TOO_LARGE;
6405 
6406 	struct vnode* vnode;
6407 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6408 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6409 		return B_FILE_ERROR;
6410 
6411 	switch (vnode->Type() & S_IFMT) {
6412 		case S_IFIFO:
6413 		case S_IFSOCK:
6414 			return ESPIPE;
6415 
6416 		case S_IFBLK:
6417 		case S_IFCHR:
6418 		case S_IFDIR:
6419 		case S_IFLNK:
6420 			return B_DEVICE_NOT_FOUND;
6421 
6422 		case S_IFREG:
6423 			break;
6424 	}
6425 
6426 	status_t status = B_OK;
6427 	if (HAS_FS_CALL(vnode, preallocate)) {
6428 		status = FS_CALL(vnode, preallocate, offset, length);
6429 	} else {
6430 		status = HAS_FS_CALL(vnode, write)
6431 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6432 	}
6433 
6434 	return status;
6435 }
6436 
6437 
6438 static status_t
6439 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6440 	bool kernel)
6441 {
6442 	VnodePutter vnode;
6443 	status_t status;
6444 
6445 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6446 	if (status != B_OK)
6447 		return status;
6448 
6449 	if (HAS_FS_CALL(vnode, read_symlink)) {
6450 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6451 	} else
6452 		status = B_BAD_VALUE;
6453 
6454 	return status;
6455 }
6456 
6457 
6458 static status_t
6459 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6460 	bool kernel)
6461 {
6462 	// path validity checks have to be in the calling function!
6463 	char name[B_FILE_NAME_LENGTH];
6464 	status_t status;
6465 
6466 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6467 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6468 
6469 	VnodePutter vnode;
6470 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6471 	if (status != B_OK)
6472 		return status;
6473 
6474 	if (HAS_FS_CALL(vnode, create_symlink))
6475 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6476 	else {
6477 		status = HAS_FS_CALL(vnode, write)
6478 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6479 	}
6480 
6481 	return status;
6482 }
6483 
6484 
6485 static status_t
6486 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6487 	bool traverseLeafLink, bool kernel)
6488 {
6489 	// path validity checks have to be in the calling function!
6490 
6491 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6492 		toPath, kernel));
6493 
6494 	char name[B_FILE_NAME_LENGTH];
6495 	VnodePutter directory;
6496 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6497 		kernel);
6498 	if (status != B_OK)
6499 		return status;
6500 
6501 	VnodePutter vnode;
6502 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6503 		kernel);
6504 	if (status != B_OK)
6505 		return status;
6506 
6507 	if (directory->mount != vnode->mount)
6508 		return B_CROSS_DEVICE_LINK;
6509 
6510 	if (HAS_FS_CALL(directory, link))
6511 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6512 	else
6513 		status = B_READ_ONLY_DEVICE;
6514 
6515 	return status;
6516 }
6517 
6518 
6519 static status_t
6520 common_unlink(int fd, char* path, bool kernel)
6521 {
6522 	char filename[B_FILE_NAME_LENGTH];
6523 	status_t status;
6524 
6525 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6526 		kernel));
6527 
6528 	VnodePutter vnode;
6529 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6530 	if (status < 0)
6531 		return status;
6532 
6533 	if (HAS_FS_CALL(vnode, unlink))
6534 		status = FS_CALL(vnode.Get(), unlink, filename);
6535 	else
6536 		status = B_READ_ONLY_DEVICE;
6537 
6538 	return status;
6539 }
6540 
6541 
6542 static status_t
6543 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6544 {
6545 	status_t status;
6546 
6547 	// TODO: honor effectiveUserGroup argument
6548 
6549 	VnodePutter vnode;
6550 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6551 	if (status != B_OK)
6552 		return status;
6553 
6554 	if (HAS_FS_CALL(vnode, access))
6555 		status = FS_CALL(vnode.Get(), access, mode);
6556 	else
6557 		status = B_OK;
6558 
6559 	return status;
6560 }
6561 
6562 
6563 static status_t
6564 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6565 {
6566 	status_t status;
6567 
6568 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6569 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6570 
6571 	VnodePutter fromVnode;
6572 	char fromName[B_FILE_NAME_LENGTH];
6573 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6574 	if (status != B_OK)
6575 		return status;
6576 
6577 	VnodePutter toVnode;
6578 	char toName[B_FILE_NAME_LENGTH];
6579 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6580 	if (status != B_OK)
6581 		return status;
6582 
6583 	if (fromVnode->device != toVnode->device)
6584 		return B_CROSS_DEVICE_LINK;
6585 
6586 	if (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))
6587 		return B_OK;
6588 
6589 	if (fromName[0] == '\0' || toName[0] == '\0'
6590 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6591 		|| !strcmp(toName, ".") || !strcmp(toName, "..")) {
6592 		return B_BAD_VALUE;
6593 	}
6594 
6595 	if (HAS_FS_CALL(fromVnode, rename))
6596 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6597 	else
6598 		status = B_READ_ONLY_DEVICE;
6599 
6600 	return status;
6601 }
6602 
6603 
6604 static status_t
6605 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6606 {
6607 	struct vnode* vnode = descriptor->u.vnode;
6608 
6609 	FUNCTION(("common_read_stat: stat %p\n", stat));
6610 
6611 	// TODO: remove this once all file systems properly set them!
6612 	stat->st_crtim.tv_nsec = 0;
6613 	stat->st_ctim.tv_nsec = 0;
6614 	stat->st_mtim.tv_nsec = 0;
6615 	stat->st_atim.tv_nsec = 0;
6616 
6617 	return vfs_stat_vnode(vnode, stat);
6618 }
6619 
6620 
6621 static status_t
6622 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6623 	int statMask)
6624 {
6625 	struct vnode* vnode = descriptor->u.vnode;
6626 
6627 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6628 		vnode, stat, statMask));
6629 
6630 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6631 		&& (statMask & B_STAT_SIZE) != 0) {
6632 		return B_BAD_VALUE;
6633 	}
6634 
6635 	if (!HAS_FS_CALL(vnode, write_stat))
6636 		return B_READ_ONLY_DEVICE;
6637 
6638 	return FS_CALL(vnode, write_stat, stat, statMask);
6639 }
6640 
6641 
6642 static status_t
6643 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6644 	struct stat* stat, bool kernel)
6645 {
6646 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6647 		stat));
6648 
6649 	VnodePutter vnode;
6650 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6651 		NULL, kernel);
6652 	if (status != B_OK)
6653 		return status;
6654 
6655 	status = vfs_stat_vnode(vnode.Get(), stat);
6656 
6657 	return status;
6658 }
6659 
6660 
6661 static status_t
6662 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6663 	const struct stat* stat, int statMask, bool kernel)
6664 {
6665 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6666 		"kernel %d\n", fd, path, stat, statMask, kernel));
6667 
6668 	VnodePutter vnode;
6669 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6670 		NULL, kernel);
6671 	if (status != B_OK)
6672 		return status;
6673 
6674 	if (HAS_FS_CALL(vnode, write_stat))
6675 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6676 	else
6677 		status = B_READ_ONLY_DEVICE;
6678 
6679 	return status;
6680 }
6681 
6682 
6683 static int
6684 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6685 {
6686 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6687 		kernel));
6688 
6689 	VnodePutter vnode;
6690 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6691 		NULL, kernel);
6692 	if (status != B_OK)
6693 		return status;
6694 
6695 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6696 	if (status >= 0)
6697 		vnode.Detach();
6698 
6699 	return status;
6700 }
6701 
6702 
6703 static status_t
6704 attr_dir_close(struct file_descriptor* descriptor)
6705 {
6706 	struct vnode* vnode = descriptor->u.vnode;
6707 
6708 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6709 
6710 	if (HAS_FS_CALL(vnode, close_attr_dir))
6711 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6712 
6713 	return B_OK;
6714 }
6715 
6716 
6717 static void
6718 attr_dir_free_fd(struct file_descriptor* descriptor)
6719 {
6720 	struct vnode* vnode = descriptor->u.vnode;
6721 
6722 	if (vnode != NULL) {
6723 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6724 		put_vnode(vnode);
6725 	}
6726 }
6727 
6728 
6729 static status_t
6730 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6731 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6732 {
6733 	struct vnode* vnode = descriptor->u.vnode;
6734 
6735 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6736 
6737 	if (HAS_FS_CALL(vnode, read_attr_dir))
6738 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6739 			bufferSize, _count);
6740 
6741 	return B_UNSUPPORTED;
6742 }
6743 
6744 
6745 static status_t
6746 attr_dir_rewind(struct file_descriptor* descriptor)
6747 {
6748 	struct vnode* vnode = descriptor->u.vnode;
6749 
6750 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6751 
6752 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6753 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6754 
6755 	return B_UNSUPPORTED;
6756 }
6757 
6758 
6759 static int
6760 attr_create(int fd, char* path, const char* name, uint32 type,
6761 	int openMode, bool kernel)
6762 {
6763 	if (name == NULL || *name == '\0')
6764 		return B_BAD_VALUE;
6765 
6766 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6767 	VnodePutter vnode;
6768 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6769 		kernel);
6770 	if (status != B_OK)
6771 		return status;
6772 
6773 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6774 		return B_LINK_LIMIT;
6775 
6776 	if (!HAS_FS_CALL(vnode, create_attr))
6777 		return B_READ_ONLY_DEVICE;
6778 
6779 	void* cookie;
6780 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6781 	if (status != B_OK)
6782 		return status;
6783 
6784 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6785 	if (fd >= 0) {
6786 		vnode.Detach();
6787 		return fd;
6788 	}
6789 
6790 	status = fd;
6791 
6792 	FS_CALL(vnode.Get(), close_attr, cookie);
6793 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6794 
6795 	FS_CALL(vnode.Get(), remove_attr, name);
6796 
6797 	return status;
6798 }
6799 
6800 
6801 static int
6802 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6803 {
6804 	if (name == NULL || *name == '\0')
6805 		return B_BAD_VALUE;
6806 
6807 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6808 	VnodePutter vnode;
6809 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6810 		kernel);
6811 	if (status != B_OK)
6812 		return status;
6813 
6814 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6815 		return B_LINK_LIMIT;
6816 
6817 	if (!HAS_FS_CALL(vnode, open_attr))
6818 		return B_UNSUPPORTED;
6819 
6820 	void* cookie;
6821 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6822 	if (status != B_OK)
6823 		return status;
6824 
6825 	// now we only need a file descriptor for this attribute and we're done
6826 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6827 	if (fd >= 0) {
6828 		vnode.Detach();
6829 		return fd;
6830 	}
6831 
6832 	status = fd;
6833 
6834 	FS_CALL(vnode.Get(), close_attr, cookie);
6835 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6836 
6837 	return status;
6838 }
6839 
6840 
6841 static status_t
6842 attr_close(struct file_descriptor* descriptor)
6843 {
6844 	struct vnode* vnode = descriptor->u.vnode;
6845 
6846 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6847 
6848 	if (HAS_FS_CALL(vnode, close_attr))
6849 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6850 
6851 	return B_OK;
6852 }
6853 
6854 
6855 static void
6856 attr_free_fd(struct file_descriptor* descriptor)
6857 {
6858 	struct vnode* vnode = descriptor->u.vnode;
6859 
6860 	if (vnode != NULL) {
6861 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6862 		put_vnode(vnode);
6863 	}
6864 }
6865 
6866 
6867 static status_t
6868 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6869 	size_t* length)
6870 {
6871 	struct vnode* vnode = descriptor->u.vnode;
6872 
6873 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6874 		pos, length, *length));
6875 
6876 	if (!HAS_FS_CALL(vnode, read_attr))
6877 		return B_UNSUPPORTED;
6878 
6879 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6880 }
6881 
6882 
6883 static status_t
6884 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6885 	size_t* length)
6886 {
6887 	struct vnode* vnode = descriptor->u.vnode;
6888 
6889 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6890 		length));
6891 
6892 	if (!HAS_FS_CALL(vnode, write_attr))
6893 		return B_UNSUPPORTED;
6894 
6895 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6896 }
6897 
6898 
6899 static off_t
6900 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6901 {
6902 	off_t offset;
6903 
6904 	switch (seekType) {
6905 		case SEEK_SET:
6906 			offset = 0;
6907 			break;
6908 		case SEEK_CUR:
6909 			offset = descriptor->pos;
6910 			break;
6911 		case SEEK_END:
6912 		{
6913 			struct vnode* vnode = descriptor->u.vnode;
6914 			if (!HAS_FS_CALL(vnode, read_stat))
6915 				return B_UNSUPPORTED;
6916 
6917 			struct stat stat;
6918 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6919 				&stat);
6920 			if (status != B_OK)
6921 				return status;
6922 
6923 			offset = stat.st_size;
6924 			break;
6925 		}
6926 		default:
6927 			return B_BAD_VALUE;
6928 	}
6929 
6930 	// assumes off_t is 64 bits wide
6931 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6932 		return B_BUFFER_OVERFLOW;
6933 
6934 	pos += offset;
6935 	if (pos < 0)
6936 		return B_BAD_VALUE;
6937 
6938 	return descriptor->pos = pos;
6939 }
6940 
6941 
6942 static status_t
6943 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6944 {
6945 	struct vnode* vnode = descriptor->u.vnode;
6946 
6947 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6948 
6949 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6950 		return B_UNSUPPORTED;
6951 
6952 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6953 }
6954 
6955 
6956 static status_t
6957 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6958 	int statMask)
6959 {
6960 	struct vnode* vnode = descriptor->u.vnode;
6961 
6962 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6963 
6964 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6965 		return B_READ_ONLY_DEVICE;
6966 
6967 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6968 }
6969 
6970 
6971 static status_t
6972 attr_remove(int fd, const char* name, bool kernel)
6973 {
6974 	if (name == NULL || *name == '\0')
6975 		return B_BAD_VALUE;
6976 
6977 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6978 		kernel));
6979 
6980 	struct vnode* vnode;
6981 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6982 	if (!descriptor.IsSet())
6983 		return B_FILE_ERROR;
6984 
6985 	status_t status;
6986 	if (HAS_FS_CALL(vnode, remove_attr))
6987 		status = FS_CALL(vnode, remove_attr, name);
6988 	else
6989 		status = B_READ_ONLY_DEVICE;
6990 
6991 	return status;
6992 }
6993 
6994 
6995 static status_t
6996 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6997 	bool kernel)
6998 {
6999 	if (fromName == NULL || *fromName == '\0' || toName == NULL
7000 		|| *toName == '\0')
7001 		return B_BAD_VALUE;
7002 
7003 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7004 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7005 
7006 	struct vnode* fromVnode;
7007 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7008 	if (!fromDescriptor.IsSet())
7009 		return B_FILE_ERROR;
7010 
7011 	struct vnode* toVnode;
7012 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7013 	if (!toDescriptor.IsSet())
7014 		return B_FILE_ERROR;
7015 
7016 	// are the files on the same volume?
7017 	if (fromVnode->device != toVnode->device)
7018 		return B_CROSS_DEVICE_LINK;
7019 
7020 	status_t status;
7021 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7022 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7023 	} else
7024 		status = B_READ_ONLY_DEVICE;
7025 
7026 	return status;
7027 }
7028 
7029 
7030 static int
7031 index_dir_open(dev_t mountID, bool kernel)
7032 {
7033 	struct fs_mount* mount;
7034 	void* cookie;
7035 
7036 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7037 		kernel));
7038 
7039 	status_t status = get_mount(mountID, &mount);
7040 	if (status != B_OK)
7041 		return status;
7042 
7043 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7044 		status = B_UNSUPPORTED;
7045 		goto error;
7046 	}
7047 
7048 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7049 	if (status != B_OK)
7050 		goto error;
7051 
7052 	// get fd for the index directory
7053 	int fd;
7054 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7055 	if (fd >= 0)
7056 		return fd;
7057 
7058 	// something went wrong
7059 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7060 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7061 
7062 	status = fd;
7063 
7064 error:
7065 	put_mount(mount);
7066 	return status;
7067 }
7068 
7069 
7070 static status_t
7071 index_dir_close(struct file_descriptor* descriptor)
7072 {
7073 	struct fs_mount* mount = descriptor->u.mount;
7074 
7075 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7076 
7077 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7078 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7079 
7080 	return B_OK;
7081 }
7082 
7083 
7084 static void
7085 index_dir_free_fd(struct file_descriptor* descriptor)
7086 {
7087 	struct fs_mount* mount = descriptor->u.mount;
7088 
7089 	if (mount != NULL) {
7090 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7091 		put_mount(mount);
7092 	}
7093 }
7094 
7095 
7096 static status_t
7097 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7098 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7099 {
7100 	struct fs_mount* mount = descriptor->u.mount;
7101 
7102 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7103 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7104 			bufferSize, _count);
7105 	}
7106 
7107 	return B_UNSUPPORTED;
7108 }
7109 
7110 
7111 static status_t
7112 index_dir_rewind(struct file_descriptor* descriptor)
7113 {
7114 	struct fs_mount* mount = descriptor->u.mount;
7115 
7116 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7117 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7118 
7119 	return B_UNSUPPORTED;
7120 }
7121 
7122 
7123 static status_t
7124 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7125 	bool kernel)
7126 {
7127 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7128 		mountID, name, kernel));
7129 
7130 	struct fs_mount* mount;
7131 	status_t status = get_mount(mountID, &mount);
7132 	if (status != B_OK)
7133 		return status;
7134 
7135 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7136 		status = B_READ_ONLY_DEVICE;
7137 		goto out;
7138 	}
7139 
7140 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7141 
7142 out:
7143 	put_mount(mount);
7144 	return status;
7145 }
7146 
7147 
7148 #if 0
7149 static status_t
7150 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7151 {
7152 	struct vnode* vnode = descriptor->u.vnode;
7153 
7154 	// ToDo: currently unused!
7155 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7156 	if (!HAS_FS_CALL(vnode, read_index_stat))
7157 		return B_UNSUPPORTED;
7158 
7159 	return B_UNSUPPORTED;
7160 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7161 }
7162 
7163 
7164 static void
7165 index_free_fd(struct file_descriptor* descriptor)
7166 {
7167 	struct vnode* vnode = descriptor->u.vnode;
7168 
7169 	if (vnode != NULL) {
7170 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7171 		put_vnode(vnode);
7172 	}
7173 }
7174 #endif
7175 
7176 
7177 static status_t
7178 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7179 	bool kernel)
7180 {
7181 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7182 		mountID, name, kernel));
7183 
7184 	struct fs_mount* mount;
7185 	status_t status = get_mount(mountID, &mount);
7186 	if (status != B_OK)
7187 		return status;
7188 
7189 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7190 		status = B_UNSUPPORTED;
7191 		goto out;
7192 	}
7193 
7194 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7195 
7196 out:
7197 	put_mount(mount);
7198 	return status;
7199 }
7200 
7201 
7202 static status_t
7203 index_remove(dev_t mountID, const char* name, bool kernel)
7204 {
7205 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7206 		mountID, name, kernel));
7207 
7208 	struct fs_mount* mount;
7209 	status_t status = get_mount(mountID, &mount);
7210 	if (status != B_OK)
7211 		return status;
7212 
7213 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7214 		status = B_READ_ONLY_DEVICE;
7215 		goto out;
7216 	}
7217 
7218 	status = FS_MOUNT_CALL(mount, remove_index, name);
7219 
7220 out:
7221 	put_mount(mount);
7222 	return status;
7223 }
7224 
7225 
7226 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7227 		It would be nice if the FS would find some more kernel support
7228 		for them.
7229 		For example, query parsing should be moved into the kernel.
7230 */
7231 static int
7232 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7233 	int32 token, bool kernel)
7234 {
7235 	struct fs_mount* mount;
7236 	void* cookie;
7237 
7238 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7239 		device, query, kernel));
7240 
7241 	status_t status = get_mount(device, &mount);
7242 	if (status != B_OK)
7243 		return status;
7244 
7245 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7246 		status = B_UNSUPPORTED;
7247 		goto error;
7248 	}
7249 
7250 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7251 		&cookie);
7252 	if (status != B_OK)
7253 		goto error;
7254 
7255 	// get fd for the index directory
7256 	int fd;
7257 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7258 	if (fd >= 0)
7259 		return fd;
7260 
7261 	status = fd;
7262 
7263 	// something went wrong
7264 	FS_MOUNT_CALL(mount, close_query, cookie);
7265 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7266 
7267 error:
7268 	put_mount(mount);
7269 	return status;
7270 }
7271 
7272 
7273 static status_t
7274 query_close(struct file_descriptor* descriptor)
7275 {
7276 	struct fs_mount* mount = descriptor->u.mount;
7277 
7278 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7279 
7280 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7281 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7282 
7283 	return B_OK;
7284 }
7285 
7286 
7287 static void
7288 query_free_fd(struct file_descriptor* descriptor)
7289 {
7290 	struct fs_mount* mount = descriptor->u.mount;
7291 
7292 	if (mount != NULL) {
7293 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7294 		put_mount(mount);
7295 	}
7296 }
7297 
7298 
7299 static status_t
7300 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7301 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7302 {
7303 	struct fs_mount* mount = descriptor->u.mount;
7304 
7305 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7306 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7307 			bufferSize, _count);
7308 	}
7309 
7310 	return B_UNSUPPORTED;
7311 }
7312 
7313 
7314 static status_t
7315 query_rewind(struct file_descriptor* descriptor)
7316 {
7317 	struct fs_mount* mount = descriptor->u.mount;
7318 
7319 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7320 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7321 
7322 	return B_UNSUPPORTED;
7323 }
7324 
7325 
7326 //	#pragma mark - General File System functions
7327 
7328 
7329 static dev_t
7330 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7331 	const char* args, bool kernel)
7332 {
7333 	struct ::fs_mount* mount;
7334 	status_t status = B_OK;
7335 	fs_volume* volume = NULL;
7336 	int32 layer = 0;
7337 	Vnode* coveredNode = NULL;
7338 
7339 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7340 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7341 
7342 	// The path is always safe, we just have to make sure that fsName is
7343 	// almost valid - we can't make any assumptions about args, though.
7344 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7345 	// We'll get it from the DDM later.
7346 	if (fsName == NULL) {
7347 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7348 			return B_BAD_VALUE;
7349 	} else if (fsName[0] == '\0')
7350 		return B_BAD_VALUE;
7351 
7352 	RecursiveLocker mountOpLocker(sMountOpLock);
7353 
7354 	// Helper to delete a newly created file device on failure.
7355 	// Not exactly beautiful, but helps to keep the code below cleaner.
7356 	struct FileDeviceDeleter {
7357 		FileDeviceDeleter() : id(-1) {}
7358 		~FileDeviceDeleter()
7359 		{
7360 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7361 		}
7362 
7363 		partition_id id;
7364 	} fileDeviceDeleter;
7365 
7366 	// If the file system is not a "virtual" one, the device argument should
7367 	// point to a real file/device (if given at all).
7368 	// get the partition
7369 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7370 	KPartition* partition = NULL;
7371 	KPath normalizedDevice;
7372 	bool newlyCreatedFileDevice = false;
7373 
7374 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7375 		// normalize the device path
7376 		status = normalizedDevice.SetTo(device, true);
7377 		if (status != B_OK)
7378 			return status;
7379 
7380 		// get a corresponding partition from the DDM
7381 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7382 		if (partition == NULL) {
7383 			// Partition not found: This either means, the user supplied
7384 			// an invalid path, or the path refers to an image file. We try
7385 			// to let the DDM create a file device for the path.
7386 			partition_id deviceID = ddm->CreateFileDevice(
7387 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7388 			if (deviceID >= 0) {
7389 				partition = ddm->RegisterPartition(deviceID);
7390 				if (newlyCreatedFileDevice)
7391 					fileDeviceDeleter.id = deviceID;
7392 			}
7393 		}
7394 
7395 		if (!partition) {
7396 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7397 				normalizedDevice.Path()));
7398 			return B_ENTRY_NOT_FOUND;
7399 		}
7400 
7401 		device = normalizedDevice.Path();
7402 			// correct path to file device
7403 	}
7404 	PartitionRegistrar partitionRegistrar(partition, true);
7405 
7406 	// Write lock the partition's device. For the time being, we keep the lock
7407 	// until we're done mounting -- not nice, but ensure, that no-one is
7408 	// interfering.
7409 	// TODO: Just mark the partition busy while mounting!
7410 	KDiskDevice* diskDevice = NULL;
7411 	if (partition) {
7412 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7413 		if (!diskDevice) {
7414 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7415 			return B_ERROR;
7416 		}
7417 	}
7418 
7419 	DeviceWriteLocker writeLocker(diskDevice, true);
7420 		// this takes over the write lock acquired before
7421 
7422 	if (partition != NULL) {
7423 		// make sure, that the partition is not busy
7424 		if (partition->IsBusy()) {
7425 			TRACE(("fs_mount(): Partition is busy.\n"));
7426 			return B_BUSY;
7427 		}
7428 
7429 		// if no FS name had been supplied, we get it from the partition
7430 		if (fsName == NULL) {
7431 			KDiskSystem* diskSystem = partition->DiskSystem();
7432 			if (!diskSystem) {
7433 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7434 					"recognize it.\n"));
7435 				return B_BAD_VALUE;
7436 			}
7437 
7438 			if (!diskSystem->IsFileSystem()) {
7439 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7440 					"partitioning system.\n"));
7441 				return B_BAD_VALUE;
7442 			}
7443 
7444 			// The disk system name will not change, and the KDiskSystem
7445 			// object will not go away while the disk device is locked (and
7446 			// the partition has a reference to it), so this is safe.
7447 			fsName = diskSystem->Name();
7448 		}
7449 	}
7450 
7451 	mount = new(std::nothrow) (struct ::fs_mount);
7452 	if (mount == NULL)
7453 		return B_NO_MEMORY;
7454 
7455 	mount->device_name = strdup(device);
7456 		// "device" can be NULL
7457 
7458 	status = mount->entry_cache.Init();
7459 	if (status != B_OK)
7460 		goto err1;
7461 
7462 	// initialize structure
7463 	mount->id = sNextMountID++;
7464 	mount->partition = NULL;
7465 	mount->root_vnode = NULL;
7466 	mount->covers_vnode = NULL;
7467 	mount->unmounting = false;
7468 	mount->owns_file_device = false;
7469 	mount->volume = NULL;
7470 
7471 	// build up the volume(s)
7472 	while (true) {
7473 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7474 		if (layerFSName == NULL) {
7475 			if (layer == 0) {
7476 				status = B_NO_MEMORY;
7477 				goto err1;
7478 			}
7479 
7480 			break;
7481 		}
7482 		MemoryDeleter layerFSNameDeleter(layerFSName);
7483 
7484 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7485 		if (volume == NULL) {
7486 			status = B_NO_MEMORY;
7487 			goto err1;
7488 		}
7489 
7490 		volume->id = mount->id;
7491 		volume->partition = partition != NULL ? partition->ID() : -1;
7492 		volume->layer = layer++;
7493 		volume->private_volume = NULL;
7494 		volume->ops = NULL;
7495 		volume->sub_volume = NULL;
7496 		volume->super_volume = NULL;
7497 		volume->file_system = NULL;
7498 		volume->file_system_name = NULL;
7499 
7500 		volume->file_system_name = get_file_system_name(layerFSName);
7501 		if (volume->file_system_name == NULL) {
7502 			status = B_NO_MEMORY;
7503 			free(volume);
7504 			goto err1;
7505 		}
7506 
7507 		volume->file_system = get_file_system(layerFSName);
7508 		if (volume->file_system == NULL) {
7509 			status = B_DEVICE_NOT_FOUND;
7510 			free(volume->file_system_name);
7511 			free(volume);
7512 			goto err1;
7513 		}
7514 
7515 		if (mount->volume == NULL)
7516 			mount->volume = volume;
7517 		else {
7518 			volume->super_volume = mount->volume;
7519 			mount->volume->sub_volume = volume;
7520 			mount->volume = volume;
7521 		}
7522 	}
7523 
7524 	// insert mount struct into list before we call FS's mount() function
7525 	// so that vnodes can be created for this mount
7526 	rw_lock_write_lock(&sMountLock);
7527 	sMountsTable->Insert(mount);
7528 	rw_lock_write_unlock(&sMountLock);
7529 
7530 	ino_t rootID;
7531 
7532 	if (!sRoot) {
7533 		// we haven't mounted anything yet
7534 		if (strcmp(path, "/") != 0) {
7535 			status = B_ERROR;
7536 			goto err2;
7537 		}
7538 
7539 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7540 			args, &rootID);
7541 		if (status != B_OK || mount->volume->ops == NULL)
7542 			goto err2;
7543 	} else {
7544 		{
7545 			VnodePutter temp;
7546 			status = path_to_vnode(path, true, temp, NULL, kernel);
7547 			coveredNode = temp.Detach();
7548 		}
7549 		if (status != B_OK)
7550 			goto err2;
7551 
7552 		mount->covers_vnode = coveredNode;
7553 
7554 		// make sure covered_vnode is a directory
7555 		if (!S_ISDIR(coveredNode->Type())) {
7556 			status = B_NOT_A_DIRECTORY;
7557 			goto err3;
7558 		}
7559 
7560 		if (coveredNode->IsCovered()) {
7561 			// this is already a covered vnode
7562 			status = B_BUSY;
7563 			goto err3;
7564 		}
7565 
7566 		// mount it/them
7567 		fs_volume* volume = mount->volume;
7568 		while (volume) {
7569 			status = volume->file_system->mount(volume, device, flags, args,
7570 				&rootID);
7571 			if (status != B_OK || volume->ops == NULL) {
7572 				if (status == B_OK && volume->ops == NULL)
7573 					panic("fs_mount: mount() succeeded but ops is NULL!");
7574 				if (volume->sub_volume)
7575 					goto err4;
7576 				goto err3;
7577 			}
7578 
7579 			volume = volume->super_volume;
7580 		}
7581 
7582 		volume = mount->volume;
7583 		while (volume) {
7584 			if (volume->ops->all_layers_mounted != NULL)
7585 				volume->ops->all_layers_mounted(volume);
7586 			volume = volume->super_volume;
7587 		}
7588 	}
7589 
7590 	// the root node is supposed to be owned by the file system - it must
7591 	// exist at this point
7592 	rw_lock_write_lock(&sVnodeLock);
7593 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7594 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7595 		panic("fs_mount: file system does not own its root node!\n");
7596 		status = B_ERROR;
7597 		rw_lock_write_unlock(&sVnodeLock);
7598 		goto err4;
7599 	}
7600 
7601 	// set up the links between the root vnode and the vnode it covers
7602 	if (coveredNode != NULL) {
7603 		if (coveredNode->IsCovered()) {
7604 			// the vnode is covered now
7605 			status = B_BUSY;
7606 			rw_lock_write_unlock(&sVnodeLock);
7607 			goto err4;
7608 		}
7609 
7610 		mount->root_vnode->covers = coveredNode;
7611 		mount->root_vnode->SetCovering(true);
7612 
7613 		coveredNode->covered_by = mount->root_vnode;
7614 		coveredNode->SetCovered(true);
7615 	}
7616 	rw_lock_write_unlock(&sVnodeLock);
7617 
7618 	if (!sRoot) {
7619 		sRoot = mount->root_vnode;
7620 		mutex_lock(&sIOContextRootLock);
7621 		get_current_io_context(true)->root = sRoot;
7622 		mutex_unlock(&sIOContextRootLock);
7623 		inc_vnode_ref_count(sRoot);
7624 	}
7625 
7626 	// supply the partition (if any) with the mount cookie and mark it mounted
7627 	if (partition) {
7628 		partition->SetMountCookie(mount->volume->private_volume);
7629 		partition->SetVolumeID(mount->id);
7630 
7631 		// keep a partition reference as long as the partition is mounted
7632 		partitionRegistrar.Detach();
7633 		mount->partition = partition;
7634 		mount->owns_file_device = newlyCreatedFileDevice;
7635 		fileDeviceDeleter.id = -1;
7636 	}
7637 
7638 	notify_mount(mount->id,
7639 		coveredNode != NULL ? coveredNode->device : -1,
7640 		coveredNode ? coveredNode->id : -1);
7641 
7642 	return mount->id;
7643 
7644 err4:
7645 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7646 err3:
7647 	if (coveredNode != NULL)
7648 		put_vnode(coveredNode);
7649 err2:
7650 	rw_lock_write_lock(&sMountLock);
7651 	sMountsTable->Remove(mount);
7652 	rw_lock_write_unlock(&sMountLock);
7653 err1:
7654 	delete mount;
7655 
7656 	return status;
7657 }
7658 
7659 
7660 static status_t
7661 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7662 {
7663 	struct fs_mount* mount;
7664 	status_t err;
7665 
7666 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7667 		mountID, kernel));
7668 
7669 	VnodePutter pathVnode;
7670 	if (path != NULL) {
7671 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7672 		if (err != B_OK)
7673 			return B_ENTRY_NOT_FOUND;
7674 	}
7675 
7676 	RecursiveLocker mountOpLocker(sMountOpLock);
7677 	ReadLocker mountLocker(sMountLock);
7678 
7679 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7680 	if (mount == NULL) {
7681 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7682 			pathVnode.Get());
7683 	}
7684 
7685 	mountLocker.Unlock();
7686 
7687 	if (path != NULL) {
7688 		if (mount->root_vnode != pathVnode.Get()) {
7689 			// not mountpoint
7690 			return B_BAD_VALUE;
7691 		}
7692 
7693 		pathVnode.Unset();
7694 	}
7695 
7696 	// if the volume is associated with a partition, lock the device of the
7697 	// partition as long as we are unmounting
7698 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7699 	KPartition* partition = mount->partition;
7700 	KDiskDevice* diskDevice = NULL;
7701 	if (partition != NULL) {
7702 		if (partition->Device() == NULL) {
7703 			dprintf("fs_unmount(): There is no device!\n");
7704 			return B_ERROR;
7705 		}
7706 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7707 		if (!diskDevice) {
7708 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7709 			return B_ERROR;
7710 		}
7711 	}
7712 	DeviceWriteLocker writeLocker(diskDevice, true);
7713 
7714 	// make sure, that the partition is not busy
7715 	if (partition != NULL) {
7716 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7717 			dprintf("fs_unmount(): Partition is busy.\n");
7718 			return B_BUSY;
7719 		}
7720 	}
7721 
7722 	// grab the vnode master mutex to keep someone from creating
7723 	// a vnode while we're figuring out if we can continue
7724 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7725 
7726 	bool disconnectedDescriptors = false;
7727 
7728 	while (true) {
7729 		bool busy = false;
7730 
7731 		// cycle through the list of vnodes associated with this mount and
7732 		// make sure all of them are not busy or have refs on them
7733 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7734 		while (struct vnode* vnode = iterator.Next()) {
7735 			if (vnode->IsBusy()) {
7736 				dprintf("fs_unmount(): inode %" B_PRIdINO " is busy\n", vnode->id);
7737 				busy = true;
7738 				break;
7739 			}
7740 
7741 			// check the vnode's ref count -- subtract additional references for
7742 			// covering
7743 			int32 refCount = vnode->ref_count;
7744 			if (vnode->covers != NULL)
7745 				refCount--;
7746 			if (vnode->covered_by != NULL)
7747 				refCount--;
7748 
7749 			if (refCount != 0) {
7750 				dprintf("fs_unmount(): inode %" B_PRIdINO " is still referenced\n", vnode->id);
7751 				// there are still vnodes in use on this mount, so we cannot
7752 				// unmount yet
7753 				busy = true;
7754 				break;
7755 			}
7756 		}
7757 
7758 		if (!busy)
7759 			break;
7760 
7761 		if ((flags & B_FORCE_UNMOUNT) == 0)
7762 			return B_BUSY;
7763 
7764 		if (disconnectedDescriptors) {
7765 			// wait a bit until the last access is finished, and then try again
7766 			vnodesWriteLocker.Unlock();
7767 			snooze(100000);
7768 			// TODO: if there is some kind of bug that prevents the ref counts
7769 			// from getting back to zero, this will fall into an endless loop...
7770 			vnodesWriteLocker.Lock();
7771 			continue;
7772 		}
7773 
7774 		// the file system is still busy - but we're forced to unmount it,
7775 		// so let's disconnect all open file descriptors
7776 
7777 		mount->unmounting = true;
7778 			// prevent new vnodes from being created
7779 
7780 		vnodesWriteLocker.Unlock();
7781 
7782 		disconnect_mount_or_vnode_fds(mount, NULL);
7783 		disconnectedDescriptors = true;
7784 
7785 		vnodesWriteLocker.Lock();
7786 	}
7787 
7788 	// We can safely continue. Mark all of the vnodes busy and this mount
7789 	// structure in unmounting state. Also undo the vnode covers/covered_by
7790 	// links.
7791 	mount->unmounting = true;
7792 
7793 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7794 	while (struct vnode* vnode = iterator.Next()) {
7795 		// Remove all covers/covered_by links from other mounts' nodes to this
7796 		// vnode and adjust the node ref count accordingly. We will release the
7797 		// references to the external vnodes below.
7798 		if (Vnode* coveredNode = vnode->covers) {
7799 			if (Vnode* coveringNode = vnode->covered_by) {
7800 				// We have both covered and covering vnodes, so just remove us
7801 				// from the chain.
7802 				coveredNode->covered_by = coveringNode;
7803 				coveringNode->covers = coveredNode;
7804 				vnode->ref_count -= 2;
7805 
7806 				vnode->covered_by = NULL;
7807 				vnode->covers = NULL;
7808 				vnode->SetCovering(false);
7809 				vnode->SetCovered(false);
7810 			} else {
7811 				// We only have a covered vnode. Remove its link to us.
7812 				coveredNode->covered_by = NULL;
7813 				coveredNode->SetCovered(false);
7814 				vnode->ref_count--;
7815 
7816 				// If the other node is an external vnode, we keep its link
7817 				// link around so we can put the reference later on. Otherwise
7818 				// we get rid of it right now.
7819 				if (coveredNode->mount == mount) {
7820 					vnode->covers = NULL;
7821 					coveredNode->ref_count--;
7822 				}
7823 			}
7824 		} else if (Vnode* coveringNode = vnode->covered_by) {
7825 			// We only have a covering vnode. Remove its link to us.
7826 			coveringNode->covers = NULL;
7827 			coveringNode->SetCovering(false);
7828 			vnode->ref_count--;
7829 
7830 			// If the other node is an external vnode, we keep its link
7831 			// link around so we can put the reference later on. Otherwise
7832 			// we get rid of it right now.
7833 			if (coveringNode->mount == mount) {
7834 				vnode->covered_by = NULL;
7835 				coveringNode->ref_count--;
7836 			}
7837 		}
7838 
7839 		vnode->SetBusy(true);
7840 		vnode_to_be_freed(vnode);
7841 	}
7842 
7843 	vnodesWriteLocker.Unlock();
7844 
7845 	// Free all vnodes associated with this mount.
7846 	// They will be removed from the mount list by free_vnode(), so
7847 	// we don't have to do this.
7848 	while (struct vnode* vnode = mount->vnodes.Head()) {
7849 		// Put the references to external covered/covering vnodes we kept above.
7850 		if (Vnode* coveredNode = vnode->covers)
7851 			put_vnode(coveredNode);
7852 		if (Vnode* coveringNode = vnode->covered_by)
7853 			put_vnode(coveringNode);
7854 
7855 		free_vnode(vnode, false);
7856 	}
7857 
7858 	// remove the mount structure from the hash table
7859 	rw_lock_write_lock(&sMountLock);
7860 	sMountsTable->Remove(mount);
7861 	rw_lock_write_unlock(&sMountLock);
7862 
7863 	mountOpLocker.Unlock();
7864 
7865 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7866 	notify_unmount(mount->id);
7867 
7868 	// dereference the partition and mark it unmounted
7869 	if (partition) {
7870 		partition->SetVolumeID(-1);
7871 		partition->SetMountCookie(NULL);
7872 
7873 		if (mount->owns_file_device)
7874 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7875 		partition->Unregister();
7876 	}
7877 
7878 	delete mount;
7879 	return B_OK;
7880 }
7881 
7882 
7883 static status_t
7884 fs_sync(dev_t device)
7885 {
7886 	struct fs_mount* mount;
7887 	status_t status = get_mount(device, &mount);
7888 	if (status != B_OK)
7889 		return status;
7890 
7891 	struct vnode marker;
7892 	memset(&marker, 0, sizeof(marker));
7893 	marker.SetBusy(true);
7894 	marker.SetRemoved(true);
7895 
7896 	// First, synchronize all file caches
7897 
7898 	while (true) {
7899 		WriteLocker locker(sVnodeLock);
7900 			// Note: That's the easy way. Which is probably OK for sync(),
7901 			// since it's a relatively rare call and doesn't need to allow for
7902 			// a lot of concurrency. Using a read lock would be possible, but
7903 			// also more involved, since we had to lock the individual nodes
7904 			// and take care of the locking order, which we might not want to
7905 			// do while holding fs_mount::lock.
7906 
7907 		// synchronize access to vnode list
7908 		mutex_lock(&mount->lock);
7909 
7910 		struct vnode* vnode;
7911 		if (!marker.IsRemoved()) {
7912 			vnode = mount->vnodes.GetNext(&marker);
7913 			mount->vnodes.Remove(&marker);
7914 			marker.SetRemoved(true);
7915 		} else
7916 			vnode = mount->vnodes.First();
7917 
7918 		while (vnode != NULL && (vnode->cache == NULL
7919 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7920 			// TODO: we could track writes (and writable mapped vnodes)
7921 			//	and have a simple flag that we could test for here
7922 			vnode = mount->vnodes.GetNext(vnode);
7923 		}
7924 
7925 		if (vnode != NULL) {
7926 			// insert marker vnode again
7927 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7928 			marker.SetRemoved(false);
7929 		}
7930 
7931 		mutex_unlock(&mount->lock);
7932 
7933 		if (vnode == NULL)
7934 			break;
7935 
7936 		vnode = lookup_vnode(mount->id, vnode->id);
7937 		if (vnode == NULL || vnode->IsBusy())
7938 			continue;
7939 
7940 		if (vnode->ref_count == 0) {
7941 			// this vnode has been unused before
7942 			vnode_used(vnode);
7943 		}
7944 		inc_vnode_ref_count(vnode);
7945 
7946 		locker.Unlock();
7947 
7948 		if (vnode->cache != NULL && !vnode->IsRemoved())
7949 			vnode->cache->WriteModified();
7950 
7951 		put_vnode(vnode);
7952 	}
7953 
7954 	// Let the file systems do their synchronizing work
7955 	if (HAS_FS_MOUNT_CALL(mount, sync))
7956 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7957 
7958 	// Finally, flush the underlying device's write cache (if possible.)
7959 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7960 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7961 
7962 	put_mount(mount);
7963 	return status;
7964 }
7965 
7966 
7967 static status_t
7968 fs_read_info(dev_t device, struct fs_info* info)
7969 {
7970 	struct fs_mount* mount;
7971 	status_t status = get_mount(device, &mount);
7972 	if (status != B_OK)
7973 		return status;
7974 
7975 	memset(info, 0, sizeof(struct fs_info));
7976 
7977 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7978 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7979 
7980 	// fill in info the file system doesn't (have to) know about
7981 	if (status == B_OK) {
7982 		info->dev = mount->id;
7983 		info->root = mount->root_vnode->id;
7984 
7985 		fs_volume* volume = mount->volume;
7986 		while (volume->super_volume != NULL)
7987 			volume = volume->super_volume;
7988 
7989 		strlcpy(info->fsh_name, volume->file_system_name,
7990 			sizeof(info->fsh_name));
7991 		if (mount->device_name != NULL) {
7992 			strlcpy(info->device_name, mount->device_name,
7993 				sizeof(info->device_name));
7994 		}
7995 	}
7996 
7997 	// if the call is not supported by the file system, there are still
7998 	// the parts that we filled out ourselves
7999 
8000 	put_mount(mount);
8001 	return status;
8002 }
8003 
8004 
8005 static status_t
8006 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8007 {
8008 	struct fs_mount* mount;
8009 	status_t status = get_mount(device, &mount);
8010 	if (status != B_OK)
8011 		return status;
8012 
8013 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8014 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8015 	else
8016 		status = B_READ_ONLY_DEVICE;
8017 
8018 	put_mount(mount);
8019 	return status;
8020 }
8021 
8022 
8023 static dev_t
8024 fs_next_device(int32* _cookie)
8025 {
8026 	struct fs_mount* mount = NULL;
8027 	dev_t device = *_cookie;
8028 
8029 	rw_lock_read_lock(&sMountLock);
8030 
8031 	// Since device IDs are assigned sequentially, this algorithm
8032 	// does work good enough. It makes sure that the device list
8033 	// returned is sorted, and that no device is skipped when an
8034 	// already visited device got unmounted.
8035 
8036 	while (device < sNextMountID) {
8037 		mount = find_mount(device++);
8038 		if (mount != NULL && mount->volume->private_volume != NULL)
8039 			break;
8040 	}
8041 
8042 	*_cookie = device;
8043 
8044 	if (mount != NULL)
8045 		device = mount->id;
8046 	else
8047 		device = B_BAD_VALUE;
8048 
8049 	rw_lock_read_unlock(&sMountLock);
8050 
8051 	return device;
8052 }
8053 
8054 
8055 ssize_t
8056 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8057 	void *buffer, size_t readBytes)
8058 {
8059 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8060 	if (attrFD < 0)
8061 		return attrFD;
8062 
8063 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8064 
8065 	_kern_close(attrFD);
8066 
8067 	return bytesRead;
8068 }
8069 
8070 
8071 static status_t
8072 get_cwd(char* buffer, size_t size, bool kernel)
8073 {
8074 	// Get current working directory from io context
8075 	struct io_context* context = get_current_io_context(kernel);
8076 	status_t status;
8077 
8078 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8079 
8080 	mutex_lock(&context->io_mutex);
8081 
8082 	struct vnode* vnode = context->cwd;
8083 	if (vnode)
8084 		inc_vnode_ref_count(vnode);
8085 
8086 	mutex_unlock(&context->io_mutex);
8087 
8088 	if (vnode) {
8089 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8090 		put_vnode(vnode);
8091 	} else
8092 		status = B_ERROR;
8093 
8094 	return status;
8095 }
8096 
8097 
8098 static status_t
8099 set_cwd(int fd, char* path, bool kernel)
8100 {
8101 	struct io_context* context;
8102 	struct vnode* oldDirectory;
8103 
8104 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8105 
8106 	// Get vnode for passed path, and bail if it failed
8107 	VnodePutter vnode;
8108 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8109 	if (status < 0)
8110 		return status;
8111 
8112 	if (!S_ISDIR(vnode->Type())) {
8113 		// nope, can't cwd to here
8114 		return B_NOT_A_DIRECTORY;
8115 	}
8116 
8117 	// We need to have the permission to enter the directory, too
8118 	if (HAS_FS_CALL(vnode, access)) {
8119 		status = FS_CALL(vnode.Get(), access, X_OK);
8120 		if (status != B_OK)
8121 			return status;
8122 	}
8123 
8124 	// Get current io context and lock
8125 	context = get_current_io_context(kernel);
8126 	mutex_lock(&context->io_mutex);
8127 
8128 	// save the old current working directory first
8129 	oldDirectory = context->cwd;
8130 	context->cwd = vnode.Detach();
8131 
8132 	mutex_unlock(&context->io_mutex);
8133 
8134 	if (oldDirectory)
8135 		put_vnode(oldDirectory);
8136 
8137 	return B_NO_ERROR;
8138 }
8139 
8140 
8141 static status_t
8142 user_copy_name(char* to, const char* from, size_t length)
8143 {
8144 	ssize_t len = user_strlcpy(to, from, length);
8145 	if (len < 0)
8146 		return len;
8147 	if (len >= (ssize_t)length)
8148 		return B_NAME_TOO_LONG;
8149 	return B_OK;
8150 }
8151 
8152 
8153 //	#pragma mark - kernel mirrored syscalls
8154 
8155 
8156 dev_t
8157 _kern_mount(const char* path, const char* device, const char* fsName,
8158 	uint32 flags, const char* args, size_t argsLength)
8159 {
8160 	KPath pathBuffer(path);
8161 	if (pathBuffer.InitCheck() != B_OK)
8162 		return B_NO_MEMORY;
8163 
8164 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8165 }
8166 
8167 
8168 status_t
8169 _kern_unmount(const char* path, uint32 flags)
8170 {
8171 	KPath pathBuffer(path);
8172 	if (pathBuffer.InitCheck() != B_OK)
8173 		return B_NO_MEMORY;
8174 
8175 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8176 }
8177 
8178 
8179 status_t
8180 _kern_read_fs_info(dev_t device, struct fs_info* info)
8181 {
8182 	if (info == NULL)
8183 		return B_BAD_VALUE;
8184 
8185 	return fs_read_info(device, info);
8186 }
8187 
8188 
8189 status_t
8190 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8191 {
8192 	if (info == NULL)
8193 		return B_BAD_VALUE;
8194 
8195 	return fs_write_info(device, info, mask);
8196 }
8197 
8198 
8199 status_t
8200 _kern_sync(void)
8201 {
8202 	// Note: _kern_sync() is also called from _user_sync()
8203 	int32 cookie = 0;
8204 	dev_t device;
8205 	while ((device = next_dev(&cookie)) >= 0) {
8206 		status_t status = fs_sync(device);
8207 		if (status != B_OK && status != B_BAD_VALUE) {
8208 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8209 				strerror(status));
8210 		}
8211 	}
8212 
8213 	return B_OK;
8214 }
8215 
8216 
8217 dev_t
8218 _kern_next_device(int32* _cookie)
8219 {
8220 	return fs_next_device(_cookie);
8221 }
8222 
8223 
8224 status_t
8225 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8226 	size_t infoSize)
8227 {
8228 	if (infoSize != sizeof(fd_info))
8229 		return B_BAD_VALUE;
8230 
8231 	// get the team
8232 	Team* team = Team::Get(teamID);
8233 	if (team == NULL)
8234 		return B_BAD_TEAM_ID;
8235 	BReference<Team> teamReference(team, true);
8236 
8237 	// now that we have a team reference, its I/O context won't go away
8238 	io_context* context = team->io_context;
8239 	MutexLocker contextLocker(context->io_mutex);
8240 
8241 	uint32 slot = *_cookie;
8242 
8243 	struct file_descriptor* descriptor;
8244 	while (slot < context->table_size
8245 		&& (descriptor = context->fds[slot]) == NULL) {
8246 		slot++;
8247 	}
8248 
8249 	if (slot >= context->table_size)
8250 		return B_ENTRY_NOT_FOUND;
8251 
8252 	info->number = slot;
8253 	info->open_mode = descriptor->open_mode;
8254 
8255 	struct vnode* vnode = fd_vnode(descriptor);
8256 	if (vnode != NULL) {
8257 		info->device = vnode->device;
8258 		info->node = vnode->id;
8259 	} else if (descriptor->u.mount != NULL) {
8260 		info->device = descriptor->u.mount->id;
8261 		info->node = -1;
8262 	}
8263 
8264 	*_cookie = slot + 1;
8265 	return B_OK;
8266 }
8267 
8268 
8269 int
8270 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8271 	int perms)
8272 {
8273 	if ((openMode & O_CREAT) != 0) {
8274 		return file_create_entry_ref(device, inode, name, openMode, perms,
8275 			true);
8276 	}
8277 
8278 	return file_open_entry_ref(device, inode, name, openMode, true);
8279 }
8280 
8281 
8282 /*!	\brief Opens a node specified by a FD + path pair.
8283 
8284 	At least one of \a fd and \a path must be specified.
8285 	If only \a fd is given, the function opens the node identified by this
8286 	FD. If only a path is given, this path is opened. If both are given and
8287 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8288 	of the directory (!) identified by \a fd.
8289 
8290 	\param fd The FD. May be < 0.
8291 	\param path The absolute or relative path. May be \c NULL.
8292 	\param openMode The open mode.
8293 	\return A FD referring to the newly opened node, or an error code,
8294 			if an error occurs.
8295 */
8296 int
8297 _kern_open(int fd, const char* path, int openMode, int perms)
8298 {
8299 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8300 	if (pathBuffer.InitCheck() != B_OK)
8301 		return B_NO_MEMORY;
8302 
8303 	if ((openMode & O_CREAT) != 0)
8304 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8305 
8306 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8307 }
8308 
8309 
8310 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8311 
8312 	The supplied name may be \c NULL, in which case directory identified
8313 	by \a device and \a inode will be opened. Otherwise \a device and
8314 	\a inode identify the parent directory of the directory to be opened
8315 	and \a name its entry name.
8316 
8317 	\param device If \a name is specified the ID of the device the parent
8318 		   directory of the directory to be opened resides on, otherwise
8319 		   the device of the directory itself.
8320 	\param inode If \a name is specified the node ID of the parent
8321 		   directory of the directory to be opened, otherwise node ID of the
8322 		   directory itself.
8323 	\param name The entry name of the directory to be opened. If \c NULL,
8324 		   the \a device + \a inode pair identify the node to be opened.
8325 	\return The FD of the newly opened directory or an error code, if
8326 			something went wrong.
8327 */
8328 int
8329 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8330 {
8331 	return dir_open_entry_ref(device, inode, name, true);
8332 }
8333 
8334 
8335 /*!	\brief Opens a directory specified by a FD + path pair.
8336 
8337 	At least one of \a fd and \a path must be specified.
8338 	If only \a fd is given, the function opens the directory identified by this
8339 	FD. If only a path is given, this path is opened. If both are given and
8340 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8341 	of the directory (!) identified by \a fd.
8342 
8343 	\param fd The FD. May be < 0.
8344 	\param path The absolute or relative path. May be \c NULL.
8345 	\return A FD referring to the newly opened directory, or an error code,
8346 			if an error occurs.
8347 */
8348 int
8349 _kern_open_dir(int fd, const char* path)
8350 {
8351 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8352 	if (pathBuffer.InitCheck() != B_OK)
8353 		return B_NO_MEMORY;
8354 
8355 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8356 }
8357 
8358 
8359 status_t
8360 _kern_fcntl(int fd, int op, size_t argument)
8361 {
8362 	return common_fcntl(fd, op, argument, true);
8363 }
8364 
8365 
8366 status_t
8367 _kern_fsync(int fd)
8368 {
8369 	return common_sync(fd, true);
8370 }
8371 
8372 
8373 status_t
8374 _kern_lock_node(int fd)
8375 {
8376 	return common_lock_node(fd, true);
8377 }
8378 
8379 
8380 status_t
8381 _kern_unlock_node(int fd)
8382 {
8383 	return common_unlock_node(fd, true);
8384 }
8385 
8386 
8387 status_t
8388 _kern_preallocate(int fd, off_t offset, off_t length)
8389 {
8390 	return common_preallocate(fd, offset, length, true);
8391 }
8392 
8393 
8394 status_t
8395 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8396 	int perms)
8397 {
8398 	return dir_create_entry_ref(device, inode, name, perms, true);
8399 }
8400 
8401 
8402 /*!	\brief Creates a directory specified by a FD + path pair.
8403 
8404 	\a path must always be specified (it contains the name of the new directory
8405 	at least). If only a path is given, this path identifies the location at
8406 	which the directory shall be created. If both \a fd and \a path are given
8407 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8408 	of the directory (!) identified by \a fd.
8409 
8410 	\param fd The FD. May be < 0.
8411 	\param path The absolute or relative path. Must not be \c NULL.
8412 	\param perms The access permissions the new directory shall have.
8413 	\return \c B_OK, if the directory has been created successfully, another
8414 			error code otherwise.
8415 */
8416 status_t
8417 _kern_create_dir(int fd, const char* path, int perms)
8418 {
8419 	KPath pathBuffer(path, KPath::DEFAULT);
8420 	if (pathBuffer.InitCheck() != B_OK)
8421 		return B_NO_MEMORY;
8422 
8423 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8424 }
8425 
8426 
8427 status_t
8428 _kern_remove_dir(int fd, const char* path)
8429 {
8430 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8431 	if (pathBuffer.InitCheck() != B_OK)
8432 		return B_NO_MEMORY;
8433 
8434 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8435 }
8436 
8437 
8438 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8439 
8440 	At least one of \a fd and \a path must be specified.
8441 	If only \a fd is given, the function the symlink to be read is the node
8442 	identified by this FD. If only a path is given, this path identifies the
8443 	symlink to be read. If both are given and the path is absolute, \a fd is
8444 	ignored; a relative path is reckoned off of the directory (!) identified
8445 	by \a fd.
8446 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8447 	will still be updated to reflect the required buffer size.
8448 
8449 	\param fd The FD. May be < 0.
8450 	\param path The absolute or relative path. May be \c NULL.
8451 	\param buffer The buffer into which the contents of the symlink shall be
8452 		   written.
8453 	\param _bufferSize A pointer to the size of the supplied buffer.
8454 	\return The length of the link on success or an appropriate error code
8455 */
8456 status_t
8457 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8458 {
8459 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8460 	if (pathBuffer.InitCheck() != B_OK)
8461 		return B_NO_MEMORY;
8462 
8463 	return common_read_link(fd, pathBuffer.LockBuffer(),
8464 		buffer, _bufferSize, true);
8465 }
8466 
8467 
8468 /*!	\brief Creates a symlink specified by a FD + path pair.
8469 
8470 	\a path must always be specified (it contains the name of the new symlink
8471 	at least). If only a path is given, this path identifies the location at
8472 	which the symlink shall be created. If both \a fd and \a path are given and
8473 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8474 	of the directory (!) identified by \a fd.
8475 
8476 	\param fd The FD. May be < 0.
8477 	\param toPath The absolute or relative path. Must not be \c NULL.
8478 	\param mode The access permissions the new symlink shall have.
8479 	\return \c B_OK, if the symlink has been created successfully, another
8480 			error code otherwise.
8481 */
8482 status_t
8483 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8484 {
8485 	KPath pathBuffer(path);
8486 	if (pathBuffer.InitCheck() != B_OK)
8487 		return B_NO_MEMORY;
8488 
8489 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8490 		toPath, mode, true);
8491 }
8492 
8493 
8494 status_t
8495 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8496 	bool traverseLeafLink)
8497 {
8498 	KPath pathBuffer(path);
8499 	KPath toPathBuffer(toPath);
8500 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8501 		return B_NO_MEMORY;
8502 
8503 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8504 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8505 }
8506 
8507 
8508 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8509 
8510 	\a path must always be specified (it contains at least the name of the entry
8511 	to be deleted). If only a path is given, this path identifies the entry
8512 	directly. If both \a fd and \a path are given and the path is absolute,
8513 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8514 	identified by \a fd.
8515 
8516 	\param fd The FD. May be < 0.
8517 	\param path The absolute or relative path. Must not be \c NULL.
8518 	\return \c B_OK, if the entry has been removed successfully, another
8519 			error code otherwise.
8520 */
8521 status_t
8522 _kern_unlink(int fd, const char* path)
8523 {
8524 	KPath pathBuffer(path);
8525 	if (pathBuffer.InitCheck() != B_OK)
8526 		return B_NO_MEMORY;
8527 
8528 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8529 }
8530 
8531 
8532 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8533 		   by another FD + path pair.
8534 
8535 	\a oldPath and \a newPath must always be specified (they contain at least
8536 	the name of the entry). If only a path is given, this path identifies the
8537 	entry directly. If both a FD and a path are given and the path is absolute,
8538 	the FD is ignored; a relative path is reckoned off of the directory (!)
8539 	identified by the respective FD.
8540 
8541 	\param oldFD The FD of the old location. May be < 0.
8542 	\param oldPath The absolute or relative path of the old location. Must not
8543 		   be \c NULL.
8544 	\param newFD The FD of the new location. May be < 0.
8545 	\param newPath The absolute or relative path of the new location. Must not
8546 		   be \c NULL.
8547 	\return \c B_OK, if the entry has been moved successfully, another
8548 			error code otherwise.
8549 */
8550 status_t
8551 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8552 {
8553 	KPath oldPathBuffer(oldPath);
8554 	KPath newPathBuffer(newPath);
8555 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8556 		return B_NO_MEMORY;
8557 
8558 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8559 		newFD, newPathBuffer.LockBuffer(), true);
8560 }
8561 
8562 
8563 status_t
8564 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8565 {
8566 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8567 	if (pathBuffer.InitCheck() != B_OK)
8568 		return B_NO_MEMORY;
8569 
8570 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8571 		true);
8572 }
8573 
8574 
8575 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8576 
8577 	If only \a fd is given, the stat operation associated with the type
8578 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8579 	given, this path identifies the entry for whose node to retrieve the
8580 	stat data. If both \a fd and \a path are given and the path is absolute,
8581 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8582 	identified by \a fd and specifies the entry whose stat data shall be
8583 	retrieved.
8584 
8585 	\param fd The FD. May be < 0.
8586 	\param path The absolute or relative path. Must not be \c NULL.
8587 	\param traverseLeafLink If \a path is given, \c true specifies that the
8588 		   function shall not stick to symlinks, but traverse them.
8589 	\param stat The buffer the stat data shall be written into.
8590 	\param statSize The size of the supplied stat buffer.
8591 	\return \c B_OK, if the the stat data have been read successfully, another
8592 			error code otherwise.
8593 */
8594 status_t
8595 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8596 	struct stat* stat, size_t statSize)
8597 {
8598 	struct stat completeStat;
8599 	struct stat* originalStat = NULL;
8600 	status_t status;
8601 
8602 	if (statSize > sizeof(struct stat))
8603 		return B_BAD_VALUE;
8604 
8605 	// this supports different stat extensions
8606 	if (statSize < sizeof(struct stat)) {
8607 		originalStat = stat;
8608 		stat = &completeStat;
8609 	}
8610 
8611 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8612 
8613 	if (status == B_OK && originalStat != NULL)
8614 		memcpy(originalStat, stat, statSize);
8615 
8616 	return status;
8617 }
8618 
8619 
8620 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8621 
8622 	If only \a fd is given, the stat operation associated with the type
8623 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8624 	given, this path identifies the entry for whose node to write the
8625 	stat data. If both \a fd and \a path are given and the path is absolute,
8626 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8627 	identified by \a fd and specifies the entry whose stat data shall be
8628 	written.
8629 
8630 	\param fd The FD. May be < 0.
8631 	\param path The absolute or relative path. May be \c NULL.
8632 	\param traverseLeafLink If \a path is given, \c true specifies that the
8633 		   function shall not stick to symlinks, but traverse them.
8634 	\param stat The buffer containing the stat data to be written.
8635 	\param statSize The size of the supplied stat buffer.
8636 	\param statMask A mask specifying which parts of the stat data shall be
8637 		   written.
8638 	\return \c B_OK, if the the stat data have been written successfully,
8639 			another error code otherwise.
8640 */
8641 status_t
8642 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8643 	const struct stat* stat, size_t statSize, int statMask)
8644 {
8645 	struct stat completeStat;
8646 
8647 	if (statSize > sizeof(struct stat))
8648 		return B_BAD_VALUE;
8649 
8650 	// this supports different stat extensions
8651 	if (statSize < sizeof(struct stat)) {
8652 		memset((uint8*)&completeStat + statSize, 0,
8653 			sizeof(struct stat) - statSize);
8654 		memcpy(&completeStat, stat, statSize);
8655 		stat = &completeStat;
8656 	}
8657 
8658 	status_t status;
8659 
8660 	if (path != NULL) {
8661 		// path given: write the stat of the node referred to by (fd, path)
8662 		KPath pathBuffer(path);
8663 		if (pathBuffer.InitCheck() != B_OK)
8664 			return B_NO_MEMORY;
8665 
8666 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8667 			traverseLeafLink, stat, statMask, true);
8668 	} else {
8669 		// no path given: get the FD and use the FD operation
8670 		FileDescriptorPutter descriptor
8671 			(get_fd(get_current_io_context(true), fd));
8672 		if (!descriptor.IsSet())
8673 			return B_FILE_ERROR;
8674 
8675 		if (descriptor->ops->fd_write_stat)
8676 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8677 		else
8678 			status = B_UNSUPPORTED;
8679 	}
8680 
8681 	return status;
8682 }
8683 
8684 
8685 int
8686 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8687 {
8688 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8689 	if (pathBuffer.InitCheck() != B_OK)
8690 		return B_NO_MEMORY;
8691 
8692 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8693 }
8694 
8695 
8696 int
8697 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8698 	int openMode)
8699 {
8700 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8701 	if (pathBuffer.InitCheck() != B_OK)
8702 		return B_NO_MEMORY;
8703 
8704 	if ((openMode & O_CREAT) != 0) {
8705 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8706 			true);
8707 	}
8708 
8709 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8710 }
8711 
8712 
8713 status_t
8714 _kern_remove_attr(int fd, const char* name)
8715 {
8716 	return attr_remove(fd, name, true);
8717 }
8718 
8719 
8720 status_t
8721 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8722 	const char* toName)
8723 {
8724 	return attr_rename(fromFile, fromName, toFile, toName, true);
8725 }
8726 
8727 
8728 int
8729 _kern_open_index_dir(dev_t device)
8730 {
8731 	return index_dir_open(device, true);
8732 }
8733 
8734 
8735 status_t
8736 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8737 {
8738 	return index_create(device, name, type, flags, true);
8739 }
8740 
8741 
8742 status_t
8743 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8744 {
8745 	return index_name_read_stat(device, name, stat, true);
8746 }
8747 
8748 
8749 status_t
8750 _kern_remove_index(dev_t device, const char* name)
8751 {
8752 	return index_remove(device, name, true);
8753 }
8754 
8755 
8756 status_t
8757 _kern_getcwd(char* buffer, size_t size)
8758 {
8759 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8760 
8761 	// Call vfs to get current working directory
8762 	return get_cwd(buffer, size, true);
8763 }
8764 
8765 
8766 status_t
8767 _kern_setcwd(int fd, const char* path)
8768 {
8769 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8770 	if (pathBuffer.InitCheck() != B_OK)
8771 		return B_NO_MEMORY;
8772 
8773 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8774 }
8775 
8776 
8777 //	#pragma mark - userland syscalls
8778 
8779 
8780 dev_t
8781 _user_mount(const char* userPath, const char* userDevice,
8782 	const char* userFileSystem, uint32 flags, const char* userArgs,
8783 	size_t argsLength)
8784 {
8785 	char fileSystem[B_FILE_NAME_LENGTH];
8786 	KPath path, device;
8787 	char* args = NULL;
8788 	status_t status;
8789 
8790 	if (!IS_USER_ADDRESS(userPath))
8791 		return B_BAD_ADDRESS;
8792 
8793 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8794 		return B_NO_MEMORY;
8795 
8796 	status = user_copy_name(path.LockBuffer(), userPath,
8797 		B_PATH_NAME_LENGTH);
8798 	if (status != B_OK)
8799 		return status;
8800 	path.UnlockBuffer();
8801 
8802 	if (userFileSystem != NULL) {
8803 		if (!IS_USER_ADDRESS(userFileSystem))
8804 			return B_BAD_ADDRESS;
8805 
8806 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8807 		if (status != B_OK)
8808 			return status;
8809 	}
8810 
8811 	if (userDevice != NULL) {
8812 		if (!IS_USER_ADDRESS(userDevice))
8813 			return B_BAD_ADDRESS;
8814 
8815 		status = user_copy_name(device.LockBuffer(), userDevice,
8816 			B_PATH_NAME_LENGTH);
8817 		if (status != B_OK)
8818 			return status;
8819 		device.UnlockBuffer();
8820 	}
8821 
8822 	if (userArgs != NULL && argsLength > 0) {
8823 		if (!IS_USER_ADDRESS(userArgs))
8824 			return B_BAD_ADDRESS;
8825 
8826 		// this is a safety restriction
8827 		if (argsLength >= 65536)
8828 			return B_NAME_TOO_LONG;
8829 
8830 		args = (char*)malloc(argsLength + 1);
8831 		if (args == NULL)
8832 			return B_NO_MEMORY;
8833 
8834 		status = user_copy_name(args, userArgs, argsLength + 1);
8835 		if (status != B_OK) {
8836 			free(args);
8837 			return status;
8838 		}
8839 	}
8840 
8841 	status = fs_mount(path.LockBuffer(),
8842 		userDevice != NULL ? device.Path() : NULL,
8843 		userFileSystem ? fileSystem : NULL, flags, args, false);
8844 
8845 	free(args);
8846 	return status;
8847 }
8848 
8849 
8850 status_t
8851 _user_unmount(const char* userPath, uint32 flags)
8852 {
8853 	if (!IS_USER_ADDRESS(userPath))
8854 		return B_BAD_ADDRESS;
8855 
8856 	KPath pathBuffer;
8857 	if (pathBuffer.InitCheck() != B_OK)
8858 		return B_NO_MEMORY;
8859 
8860 	char* path = pathBuffer.LockBuffer();
8861 
8862 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8863 	if (status != B_OK)
8864 		return status;
8865 
8866 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8867 }
8868 
8869 
8870 status_t
8871 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8872 {
8873 	struct fs_info info;
8874 	status_t status;
8875 
8876 	if (userInfo == NULL)
8877 		return B_BAD_VALUE;
8878 
8879 	if (!IS_USER_ADDRESS(userInfo))
8880 		return B_BAD_ADDRESS;
8881 
8882 	status = fs_read_info(device, &info);
8883 	if (status != B_OK)
8884 		return status;
8885 
8886 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8887 		return B_BAD_ADDRESS;
8888 
8889 	return B_OK;
8890 }
8891 
8892 
8893 status_t
8894 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8895 {
8896 	struct fs_info info;
8897 
8898 	if (userInfo == NULL)
8899 		return B_BAD_VALUE;
8900 
8901 	if (!IS_USER_ADDRESS(userInfo)
8902 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8903 		return B_BAD_ADDRESS;
8904 
8905 	return fs_write_info(device, &info, mask);
8906 }
8907 
8908 
8909 dev_t
8910 _user_next_device(int32* _userCookie)
8911 {
8912 	int32 cookie;
8913 	dev_t device;
8914 
8915 	if (!IS_USER_ADDRESS(_userCookie)
8916 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8917 		return B_BAD_ADDRESS;
8918 
8919 	device = fs_next_device(&cookie);
8920 
8921 	if (device >= B_OK) {
8922 		// update user cookie
8923 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8924 			return B_BAD_ADDRESS;
8925 	}
8926 
8927 	return device;
8928 }
8929 
8930 
8931 status_t
8932 _user_sync(void)
8933 {
8934 	return _kern_sync();
8935 }
8936 
8937 
8938 status_t
8939 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8940 	size_t infoSize)
8941 {
8942 	struct fd_info info;
8943 	uint32 cookie;
8944 
8945 	// only root can do this
8946 	if (geteuid() != 0)
8947 		return B_NOT_ALLOWED;
8948 
8949 	if (infoSize != sizeof(fd_info))
8950 		return B_BAD_VALUE;
8951 
8952 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8953 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8954 		return B_BAD_ADDRESS;
8955 
8956 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8957 	if (status != B_OK)
8958 		return status;
8959 
8960 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8961 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8962 		return B_BAD_ADDRESS;
8963 
8964 	return status;
8965 }
8966 
8967 
8968 status_t
8969 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8970 	char* userPath, size_t pathLength)
8971 {
8972 	if (!IS_USER_ADDRESS(userPath))
8973 		return B_BAD_ADDRESS;
8974 
8975 	KPath path;
8976 	if (path.InitCheck() != B_OK)
8977 		return B_NO_MEMORY;
8978 
8979 	// copy the leaf name onto the stack
8980 	char stackLeaf[B_FILE_NAME_LENGTH];
8981 	if (leaf != NULL) {
8982 		if (!IS_USER_ADDRESS(leaf))
8983 			return B_BAD_ADDRESS;
8984 
8985 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8986 		if (status != B_OK)
8987 			return status;
8988 
8989 		leaf = stackLeaf;
8990 	}
8991 
8992 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8993 		false, path.LockBuffer(), path.BufferSize());
8994 	if (status != B_OK)
8995 		return status;
8996 
8997 	path.UnlockBuffer();
8998 
8999 	int length = user_strlcpy(userPath, path.Path(), pathLength);
9000 	if (length < 0)
9001 		return length;
9002 	if (length >= (int)pathLength)
9003 		return B_BUFFER_OVERFLOW;
9004 
9005 	return B_OK;
9006 }
9007 
9008 
9009 status_t
9010 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9011 {
9012 	if (userPath == NULL || buffer == NULL)
9013 		return B_BAD_VALUE;
9014 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9015 		return B_BAD_ADDRESS;
9016 
9017 	// copy path from userland
9018 	KPath pathBuffer;
9019 	if (pathBuffer.InitCheck() != B_OK)
9020 		return B_NO_MEMORY;
9021 	char* path = pathBuffer.LockBuffer();
9022 
9023 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9024 	if (status != B_OK)
9025 		return status;
9026 
9027 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9028 		false);
9029 	if (error != B_OK)
9030 		return error;
9031 
9032 	// copy back to userland
9033 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9034 	if (len < 0)
9035 		return len;
9036 	if (len >= B_PATH_NAME_LENGTH)
9037 		return B_BUFFER_OVERFLOW;
9038 
9039 	return B_OK;
9040 }
9041 
9042 
9043 int
9044 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9045 	int openMode, int perms)
9046 {
9047 	char name[B_FILE_NAME_LENGTH];
9048 
9049 	if (userName == NULL || device < 0 || inode < 0)
9050 		return B_BAD_VALUE;
9051 	if (!IS_USER_ADDRESS(userName))
9052 		return B_BAD_ADDRESS;
9053 	status_t status = user_copy_name(name, userName, sizeof(name));
9054 	if (status != B_OK)
9055 		return status;
9056 
9057 	if ((openMode & O_CREAT) != 0) {
9058 		return file_create_entry_ref(device, inode, name, openMode, perms,
9059 			false);
9060 	}
9061 
9062 	return file_open_entry_ref(device, inode, name, openMode, false);
9063 }
9064 
9065 
9066 int
9067 _user_open(int fd, const char* userPath, int openMode, int perms)
9068 {
9069 	KPath path;
9070 	if (path.InitCheck() != B_OK)
9071 		return B_NO_MEMORY;
9072 
9073 	char* buffer = path.LockBuffer();
9074 
9075 	if (!IS_USER_ADDRESS(userPath))
9076 		return B_BAD_ADDRESS;
9077 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9078 	if (status != B_OK)
9079 		return status;
9080 
9081 	if ((openMode & O_CREAT) != 0)
9082 		return file_create(fd, buffer, openMode, perms, false);
9083 
9084 	return file_open(fd, buffer, openMode, false);
9085 }
9086 
9087 
9088 int
9089 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9090 {
9091 	if (userName != NULL) {
9092 		char name[B_FILE_NAME_LENGTH];
9093 
9094 		if (!IS_USER_ADDRESS(userName))
9095 			return B_BAD_ADDRESS;
9096 		status_t status = user_copy_name(name, userName, sizeof(name));
9097 		if (status != B_OK)
9098 			return status;
9099 
9100 		return dir_open_entry_ref(device, inode, name, false);
9101 	}
9102 	return dir_open_entry_ref(device, inode, NULL, false);
9103 }
9104 
9105 
9106 int
9107 _user_open_dir(int fd, const char* userPath)
9108 {
9109 	if (userPath == NULL)
9110 		return dir_open(fd, NULL, false);
9111 
9112 	KPath path;
9113 	if (path.InitCheck() != B_OK)
9114 		return B_NO_MEMORY;
9115 
9116 	char* buffer = path.LockBuffer();
9117 
9118 	if (!IS_USER_ADDRESS(userPath))
9119 		return B_BAD_ADDRESS;
9120 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9121 	if (status != B_OK)
9122 		return status;
9123 
9124 	return dir_open(fd, buffer, false);
9125 }
9126 
9127 
9128 /*!	\brief Opens a directory's parent directory and returns the entry name
9129 		   of the former.
9130 
9131 	Aside from that it returns the directory's entry name, this method is
9132 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9133 	equivalent, if \a userName is \c NULL.
9134 
9135 	If a name buffer is supplied and the name does not fit the buffer, the
9136 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9137 
9138 	\param fd A FD referring to a directory.
9139 	\param userName Buffer the directory's entry name shall be written into.
9140 		   May be \c NULL.
9141 	\param nameLength Size of the name buffer.
9142 	\return The file descriptor of the opened parent directory, if everything
9143 			went fine, an error code otherwise.
9144 */
9145 int
9146 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9147 {
9148 	bool kernel = false;
9149 
9150 	if (userName && !IS_USER_ADDRESS(userName))
9151 		return B_BAD_ADDRESS;
9152 
9153 	// open the parent dir
9154 	int parentFD = dir_open(fd, (char*)"..", kernel);
9155 	if (parentFD < 0)
9156 		return parentFD;
9157 	FDCloser fdCloser(parentFD, kernel);
9158 
9159 	if (userName) {
9160 		// get the vnodes
9161 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9162 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9163 		VnodePutter parentVNodePutter(parentVNode);
9164 		VnodePutter dirVNodePutter(dirVNode);
9165 		if (!parentVNode || !dirVNode)
9166 			return B_FILE_ERROR;
9167 
9168 		// get the vnode name
9169 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9170 		struct dirent* buffer = (struct dirent*)_buffer;
9171 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9172 			sizeof(_buffer), get_current_io_context(false));
9173 		if (status != B_OK)
9174 			return status;
9175 
9176 		// copy the name to the userland buffer
9177 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9178 		if (len < 0)
9179 			return len;
9180 		if (len >= (int)nameLength)
9181 			return B_BUFFER_OVERFLOW;
9182 	}
9183 
9184 	return fdCloser.Detach();
9185 }
9186 
9187 
9188 status_t
9189 _user_fcntl(int fd, int op, size_t argument)
9190 {
9191 	status_t status = common_fcntl(fd, op, argument, false);
9192 	if (op == F_SETLKW)
9193 		syscall_restart_handle_post(status);
9194 
9195 	return status;
9196 }
9197 
9198 
9199 status_t
9200 _user_fsync(int fd)
9201 {
9202 	return common_sync(fd, false);
9203 }
9204 
9205 
9206 status_t
9207 _user_flock(int fd, int operation)
9208 {
9209 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9210 
9211 	// Check if the operation is valid
9212 	switch (operation & ~LOCK_NB) {
9213 		case LOCK_UN:
9214 		case LOCK_SH:
9215 		case LOCK_EX:
9216 			break;
9217 
9218 		default:
9219 			return B_BAD_VALUE;
9220 	}
9221 
9222 	struct vnode* vnode;
9223 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9224 	if (!descriptor.IsSet())
9225 		return B_FILE_ERROR;
9226 
9227 	if (descriptor->type != FDTYPE_FILE)
9228 		return B_BAD_VALUE;
9229 
9230 	struct flock flock;
9231 	flock.l_start = 0;
9232 	flock.l_len = OFF_MAX;
9233 	flock.l_whence = 0;
9234 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9235 
9236 	status_t status;
9237 	if ((operation & LOCK_UN) != 0) {
9238 		if (HAS_FS_CALL(vnode, release_lock))
9239 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9240 		else
9241 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9242 	} else {
9243 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9244 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9245 				(operation & LOCK_NB) == 0);
9246 		} else {
9247 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9248 				(operation & LOCK_NB) == 0);
9249 		}
9250 	}
9251 
9252 	syscall_restart_handle_post(status);
9253 
9254 	return status;
9255 }
9256 
9257 
9258 status_t
9259 _user_lock_node(int fd)
9260 {
9261 	return common_lock_node(fd, false);
9262 }
9263 
9264 
9265 status_t
9266 _user_unlock_node(int fd)
9267 {
9268 	return common_unlock_node(fd, false);
9269 }
9270 
9271 
9272 status_t
9273 _user_preallocate(int fd, off_t offset, off_t length)
9274 {
9275 	return common_preallocate(fd, offset, length, false);
9276 }
9277 
9278 
9279 status_t
9280 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9281 	int perms)
9282 {
9283 	char name[B_FILE_NAME_LENGTH];
9284 	status_t status;
9285 
9286 	if (!IS_USER_ADDRESS(userName))
9287 		return B_BAD_ADDRESS;
9288 
9289 	status = user_copy_name(name, userName, sizeof(name));
9290 	if (status != B_OK)
9291 		return status;
9292 
9293 	return dir_create_entry_ref(device, inode, name, perms, false);
9294 }
9295 
9296 
9297 status_t
9298 _user_create_dir(int fd, const char* userPath, int perms)
9299 {
9300 	KPath pathBuffer;
9301 	if (pathBuffer.InitCheck() != B_OK)
9302 		return B_NO_MEMORY;
9303 
9304 	char* path = pathBuffer.LockBuffer();
9305 
9306 	if (!IS_USER_ADDRESS(userPath))
9307 		return B_BAD_ADDRESS;
9308 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9309 	if (status != B_OK)
9310 		return status;
9311 
9312 	return dir_create(fd, path, perms, false);
9313 }
9314 
9315 
9316 status_t
9317 _user_remove_dir(int fd, const char* userPath)
9318 {
9319 	KPath pathBuffer;
9320 	if (pathBuffer.InitCheck() != B_OK)
9321 		return B_NO_MEMORY;
9322 
9323 	char* path = pathBuffer.LockBuffer();
9324 
9325 	if (userPath != NULL) {
9326 		if (!IS_USER_ADDRESS(userPath))
9327 			return B_BAD_ADDRESS;
9328 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9329 		if (status != B_OK)
9330 			return status;
9331 	}
9332 
9333 	return dir_remove(fd, userPath ? path : NULL, false);
9334 }
9335 
9336 
9337 status_t
9338 _user_read_link(int fd, const char* userPath, char* userBuffer,
9339 	size_t* userBufferSize)
9340 {
9341 	KPath pathBuffer, linkBuffer;
9342 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9343 		return B_NO_MEMORY;
9344 
9345 	size_t bufferSize;
9346 
9347 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9348 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9349 		return B_BAD_ADDRESS;
9350 
9351 	char* path = pathBuffer.LockBuffer();
9352 	char* buffer = linkBuffer.LockBuffer();
9353 
9354 	if (userPath) {
9355 		if (!IS_USER_ADDRESS(userPath))
9356 			return B_BAD_ADDRESS;
9357 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9358 		if (status != B_OK)
9359 			return status;
9360 
9361 		if (bufferSize > B_PATH_NAME_LENGTH)
9362 			bufferSize = B_PATH_NAME_LENGTH;
9363 	}
9364 
9365 	size_t newBufferSize = bufferSize;
9366 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9367 		&newBufferSize, false);
9368 
9369 	// we also update the bufferSize in case of errors
9370 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9371 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9372 		return B_BAD_ADDRESS;
9373 
9374 	if (status != B_OK)
9375 		return status;
9376 
9377 	bufferSize = min_c(newBufferSize, bufferSize);
9378 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9379 		return B_BAD_ADDRESS;
9380 
9381 	return B_OK;
9382 }
9383 
9384 
9385 status_t
9386 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9387 	int mode)
9388 {
9389 	KPath pathBuffer;
9390 	KPath toPathBuffer;
9391 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9392 		return B_NO_MEMORY;
9393 
9394 	char* path = pathBuffer.LockBuffer();
9395 	char* toPath = toPathBuffer.LockBuffer();
9396 
9397 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9398 		return B_BAD_ADDRESS;
9399 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9400 	if (status != B_OK)
9401 		return status;
9402 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9403 	if (status != B_OK)
9404 		return status;
9405 
9406 	return common_create_symlink(fd, path, toPath, mode, false);
9407 }
9408 
9409 
9410 status_t
9411 _user_create_link(int pathFD, const char* userPath, int toFD,
9412 	const char* userToPath, bool traverseLeafLink)
9413 {
9414 	KPath pathBuffer;
9415 	KPath toPathBuffer;
9416 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9417 		return B_NO_MEMORY;
9418 
9419 	char* path = pathBuffer.LockBuffer();
9420 	char* toPath = toPathBuffer.LockBuffer();
9421 
9422 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9423 		return B_BAD_ADDRESS;
9424 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9425 	if (status != B_OK)
9426 		return status;
9427 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9428 	if (status != B_OK)
9429 		return status;
9430 
9431 	status = check_path(toPath);
9432 	if (status != B_OK)
9433 		return status;
9434 
9435 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9436 		false);
9437 }
9438 
9439 
9440 status_t
9441 _user_unlink(int fd, const char* userPath)
9442 {
9443 	KPath pathBuffer;
9444 	if (pathBuffer.InitCheck() != B_OK)
9445 		return B_NO_MEMORY;
9446 
9447 	char* path = pathBuffer.LockBuffer();
9448 
9449 	if (!IS_USER_ADDRESS(userPath))
9450 		return B_BAD_ADDRESS;
9451 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9452 	if (status != B_OK)
9453 		return status;
9454 
9455 	return common_unlink(fd, path, false);
9456 }
9457 
9458 
9459 status_t
9460 _user_rename(int oldFD, const char* userOldPath, int newFD,
9461 	const char* userNewPath)
9462 {
9463 	KPath oldPathBuffer;
9464 	KPath newPathBuffer;
9465 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9466 		return B_NO_MEMORY;
9467 
9468 	char* oldPath = oldPathBuffer.LockBuffer();
9469 	char* newPath = newPathBuffer.LockBuffer();
9470 
9471 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9472 		return B_BAD_ADDRESS;
9473 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9474 	if (status != B_OK)
9475 		return status;
9476 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9477 	if (status != B_OK)
9478 		return status;
9479 
9480 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9481 }
9482 
9483 
9484 status_t
9485 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9486 {
9487 	KPath pathBuffer;
9488 	if (pathBuffer.InitCheck() != B_OK)
9489 		return B_NO_MEMORY;
9490 
9491 	char* path = pathBuffer.LockBuffer();
9492 
9493 	if (!IS_USER_ADDRESS(userPath))
9494 		return B_BAD_ADDRESS;
9495 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9496 	if (status != B_OK)
9497 		return status;
9498 
9499 	// split into directory vnode and filename path
9500 	char filename[B_FILE_NAME_LENGTH];
9501 	VnodePutter dir;
9502 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9503 	if (status != B_OK)
9504 		return status;
9505 
9506 	// the underlying FS needs to support creating FIFOs
9507 	if (!HAS_FS_CALL(dir, create_special_node))
9508 		return B_UNSUPPORTED;
9509 
9510 	// create the entry	-- the FIFO sub node is set up automatically
9511 	fs_vnode superVnode;
9512 	ino_t nodeID;
9513 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9514 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9515 
9516 	// create_special_node() acquired a reference for us that we don't need.
9517 	if (status == B_OK)
9518 		put_vnode(dir->mount->volume, nodeID);
9519 
9520 	return status;
9521 }
9522 
9523 
9524 status_t
9525 _user_create_pipe(int* userFDs)
9526 {
9527 	// rootfs should support creating FIFOs, but let's be sure
9528 	if (!HAS_FS_CALL(sRoot, create_special_node))
9529 		return B_UNSUPPORTED;
9530 
9531 	// create the node	-- the FIFO sub node is set up automatically
9532 	fs_vnode superVnode;
9533 	ino_t nodeID;
9534 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9535 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9536 	if (status != B_OK)
9537 		return status;
9538 
9539 	// We've got one reference to the node and need another one.
9540 	struct vnode* vnode;
9541 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9542 	if (status != B_OK) {
9543 		// that should not happen
9544 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9545 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9546 		return status;
9547 	}
9548 
9549 	// Everything looks good so far. Open two FDs for reading respectively
9550 	// writing.
9551 	int fds[2];
9552 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9553 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9554 
9555 	FDCloser closer0(fds[0], false);
9556 	FDCloser closer1(fds[1], false);
9557 
9558 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9559 
9560 	// copy FDs to userland
9561 	if (status == B_OK) {
9562 		if (!IS_USER_ADDRESS(userFDs)
9563 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9564 			status = B_BAD_ADDRESS;
9565 		}
9566 	}
9567 
9568 	// keep FDs, if everything went fine
9569 	if (status == B_OK) {
9570 		closer0.Detach();
9571 		closer1.Detach();
9572 	}
9573 
9574 	return status;
9575 }
9576 
9577 
9578 status_t
9579 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9580 {
9581 	KPath pathBuffer;
9582 	if (pathBuffer.InitCheck() != B_OK)
9583 		return B_NO_MEMORY;
9584 
9585 	char* path = pathBuffer.LockBuffer();
9586 
9587 	if (!IS_USER_ADDRESS(userPath))
9588 		return B_BAD_ADDRESS;
9589 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9590 	if (status != B_OK)
9591 		return status;
9592 
9593 	return common_access(fd, path, mode, effectiveUserGroup, false);
9594 }
9595 
9596 
9597 status_t
9598 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9599 	struct stat* userStat, size_t statSize)
9600 {
9601 	struct stat stat = {0};
9602 	status_t status;
9603 
9604 	if (statSize > sizeof(struct stat))
9605 		return B_BAD_VALUE;
9606 
9607 	if (!IS_USER_ADDRESS(userStat))
9608 		return B_BAD_ADDRESS;
9609 
9610 	if (userPath != NULL) {
9611 		// path given: get the stat of the node referred to by (fd, path)
9612 		if (!IS_USER_ADDRESS(userPath))
9613 			return B_BAD_ADDRESS;
9614 
9615 		KPath pathBuffer;
9616 		if (pathBuffer.InitCheck() != B_OK)
9617 			return B_NO_MEMORY;
9618 
9619 		char* path = pathBuffer.LockBuffer();
9620 
9621 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9622 		if (status != B_OK)
9623 			return status;
9624 
9625 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9626 	} else {
9627 		// no path given: get the FD and use the FD operation
9628 		FileDescriptorPutter descriptor
9629 			(get_fd(get_current_io_context(false), fd));
9630 		if (!descriptor.IsSet())
9631 			return B_FILE_ERROR;
9632 
9633 		if (descriptor->ops->fd_read_stat)
9634 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9635 		else
9636 			status = B_UNSUPPORTED;
9637 	}
9638 
9639 	if (status != B_OK)
9640 		return status;
9641 
9642 	return user_memcpy(userStat, &stat, statSize);
9643 }
9644 
9645 
9646 status_t
9647 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9648 	const struct stat* userStat, size_t statSize, int statMask)
9649 {
9650 	if (statSize > sizeof(struct stat))
9651 		return B_BAD_VALUE;
9652 
9653 	struct stat stat;
9654 
9655 	if (!IS_USER_ADDRESS(userStat)
9656 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9657 		return B_BAD_ADDRESS;
9658 
9659 	// clear additional stat fields
9660 	if (statSize < sizeof(struct stat))
9661 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9662 
9663 	status_t status;
9664 
9665 	if (userPath != NULL) {
9666 		// path given: write the stat of the node referred to by (fd, path)
9667 		if (!IS_USER_ADDRESS(userPath))
9668 			return B_BAD_ADDRESS;
9669 
9670 		KPath pathBuffer;
9671 		if (pathBuffer.InitCheck() != B_OK)
9672 			return B_NO_MEMORY;
9673 
9674 		char* path = pathBuffer.LockBuffer();
9675 
9676 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9677 		if (status != B_OK)
9678 			return status;
9679 
9680 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9681 			statMask, false);
9682 	} else {
9683 		// no path given: get the FD and use the FD operation
9684 		FileDescriptorPutter descriptor
9685 			(get_fd(get_current_io_context(false), fd));
9686 		if (!descriptor.IsSet())
9687 			return B_FILE_ERROR;
9688 
9689 		if (descriptor->ops->fd_write_stat) {
9690 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9691 				statMask);
9692 		} else
9693 			status = B_UNSUPPORTED;
9694 	}
9695 
9696 	return status;
9697 }
9698 
9699 
9700 int
9701 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9702 {
9703 	KPath pathBuffer;
9704 	if (pathBuffer.InitCheck() != B_OK)
9705 		return B_NO_MEMORY;
9706 
9707 	char* path = pathBuffer.LockBuffer();
9708 
9709 	if (userPath != NULL) {
9710 		if (!IS_USER_ADDRESS(userPath))
9711 			return B_BAD_ADDRESS;
9712 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9713 		if (status != B_OK)
9714 			return status;
9715 	}
9716 
9717 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9718 }
9719 
9720 
9721 ssize_t
9722 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9723 	size_t readBytes)
9724 {
9725 	char attribute[B_FILE_NAME_LENGTH];
9726 
9727 	if (userAttribute == NULL)
9728 		return B_BAD_VALUE;
9729 	if (!IS_USER_ADDRESS(userAttribute))
9730 		return B_BAD_ADDRESS;
9731 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9732 	if (status != B_OK)
9733 		return status;
9734 
9735 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9736 	if (attr < 0)
9737 		return attr;
9738 
9739 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9740 	_user_close(attr);
9741 
9742 	return bytes;
9743 }
9744 
9745 
9746 ssize_t
9747 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9748 	const void* buffer, size_t writeBytes)
9749 {
9750 	char attribute[B_FILE_NAME_LENGTH];
9751 
9752 	if (userAttribute == NULL)
9753 		return B_BAD_VALUE;
9754 	if (!IS_USER_ADDRESS(userAttribute))
9755 		return B_BAD_ADDRESS;
9756 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9757 	if (status != B_OK)
9758 		return status;
9759 
9760 	// Try to support the BeOS typical truncation as well as the position
9761 	// argument
9762 	int attr = attr_create(fd, NULL, attribute, type,
9763 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9764 	if (attr < 0)
9765 		return attr;
9766 
9767 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9768 	_user_close(attr);
9769 
9770 	return bytes;
9771 }
9772 
9773 
9774 status_t
9775 _user_stat_attr(int fd, const char* userAttribute,
9776 	struct attr_info* userAttrInfo)
9777 {
9778 	char attribute[B_FILE_NAME_LENGTH];
9779 
9780 	if (userAttribute == NULL || userAttrInfo == NULL)
9781 		return B_BAD_VALUE;
9782 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9783 		return B_BAD_ADDRESS;
9784 	status_t status = user_copy_name(attribute, userAttribute,
9785 		sizeof(attribute));
9786 	if (status != B_OK)
9787 		return status;
9788 
9789 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9790 	if (attr < 0)
9791 		return attr;
9792 
9793 	struct file_descriptor* descriptor
9794 		= get_fd(get_current_io_context(false), attr);
9795 	if (descriptor == NULL) {
9796 		_user_close(attr);
9797 		return B_FILE_ERROR;
9798 	}
9799 
9800 	struct stat stat;
9801 	if (descriptor->ops->fd_read_stat)
9802 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9803 	else
9804 		status = B_UNSUPPORTED;
9805 
9806 	put_fd(descriptor);
9807 	_user_close(attr);
9808 
9809 	if (status == B_OK) {
9810 		attr_info info;
9811 		info.type = stat.st_type;
9812 		info.size = stat.st_size;
9813 
9814 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9815 			return B_BAD_ADDRESS;
9816 	}
9817 
9818 	return status;
9819 }
9820 
9821 
9822 int
9823 _user_open_attr(int fd, const char* userPath, const char* userName,
9824 	uint32 type, int openMode)
9825 {
9826 	char name[B_FILE_NAME_LENGTH];
9827 
9828 	if (!IS_USER_ADDRESS(userName))
9829 		return B_BAD_ADDRESS;
9830 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9831 	if (status != B_OK)
9832 		return status;
9833 
9834 	KPath pathBuffer;
9835 	if (pathBuffer.InitCheck() != B_OK)
9836 		return B_NO_MEMORY;
9837 
9838 	char* path = pathBuffer.LockBuffer();
9839 
9840 	if (userPath != NULL) {
9841 		if (!IS_USER_ADDRESS(userPath))
9842 			return B_BAD_ADDRESS;
9843 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9844 		if (status != B_OK)
9845 			return status;
9846 	}
9847 
9848 	if ((openMode & O_CREAT) != 0) {
9849 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9850 			false);
9851 	}
9852 
9853 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9854 }
9855 
9856 
9857 status_t
9858 _user_remove_attr(int fd, const char* userName)
9859 {
9860 	char name[B_FILE_NAME_LENGTH];
9861 
9862 	if (!IS_USER_ADDRESS(userName))
9863 		return B_BAD_ADDRESS;
9864 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9865 	if (status != B_OK)
9866 		return status;
9867 
9868 	return attr_remove(fd, name, false);
9869 }
9870 
9871 
9872 status_t
9873 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9874 	const char* userToName)
9875 {
9876 	if (!IS_USER_ADDRESS(userFromName)
9877 		|| !IS_USER_ADDRESS(userToName))
9878 		return B_BAD_ADDRESS;
9879 
9880 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9881 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9882 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9883 		return B_NO_MEMORY;
9884 
9885 	char* fromName = fromNameBuffer.LockBuffer();
9886 	char* toName = toNameBuffer.LockBuffer();
9887 
9888 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9889 	if (status != B_OK)
9890 		return status;
9891 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9892 	if (status != B_OK)
9893 		return status;
9894 
9895 	return attr_rename(fromFile, fromName, toFile, toName, false);
9896 }
9897 
9898 
9899 int
9900 _user_open_index_dir(dev_t device)
9901 {
9902 	return index_dir_open(device, false);
9903 }
9904 
9905 
9906 status_t
9907 _user_create_index(dev_t device, const char* userName, uint32 type,
9908 	uint32 flags)
9909 {
9910 	char name[B_FILE_NAME_LENGTH];
9911 
9912 	if (!IS_USER_ADDRESS(userName))
9913 		return B_BAD_ADDRESS;
9914 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9915 	if (status != B_OK)
9916 		return status;
9917 
9918 	return index_create(device, name, type, flags, false);
9919 }
9920 
9921 
9922 status_t
9923 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9924 {
9925 	char name[B_FILE_NAME_LENGTH];
9926 	struct stat stat = {0};
9927 	status_t status;
9928 
9929 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9930 		return B_BAD_ADDRESS;
9931 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9932 	if (status != B_OK)
9933 		return status;
9934 
9935 	status = index_name_read_stat(device, name, &stat, false);
9936 	if (status == B_OK) {
9937 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9938 			return B_BAD_ADDRESS;
9939 	}
9940 
9941 	return status;
9942 }
9943 
9944 
9945 status_t
9946 _user_remove_index(dev_t device, const char* userName)
9947 {
9948 	char name[B_FILE_NAME_LENGTH];
9949 
9950 	if (!IS_USER_ADDRESS(userName))
9951 		return B_BAD_ADDRESS;
9952 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9953 	if (status != B_OK)
9954 		return status;
9955 
9956 	return index_remove(device, name, false);
9957 }
9958 
9959 
9960 status_t
9961 _user_getcwd(char* userBuffer, size_t size)
9962 {
9963 	if (size == 0)
9964 		return B_BAD_VALUE;
9965 	if (!IS_USER_ADDRESS(userBuffer))
9966 		return B_BAD_ADDRESS;
9967 
9968 	if (size > kMaxPathLength)
9969 		size = kMaxPathLength;
9970 
9971 	KPath pathBuffer(size);
9972 	if (pathBuffer.InitCheck() != B_OK)
9973 		return B_NO_MEMORY;
9974 
9975 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9976 
9977 	char* path = pathBuffer.LockBuffer();
9978 
9979 	status_t status = get_cwd(path, size, false);
9980 	if (status != B_OK)
9981 		return status;
9982 
9983 	// Copy back the result
9984 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9985 		return B_BAD_ADDRESS;
9986 
9987 	return status;
9988 }
9989 
9990 
9991 status_t
9992 _user_setcwd(int fd, const char* userPath)
9993 {
9994 	TRACE(("user_setcwd: path = %p\n", userPath));
9995 
9996 	KPath pathBuffer;
9997 	if (pathBuffer.InitCheck() != B_OK)
9998 		return B_NO_MEMORY;
9999 
10000 	char* path = pathBuffer.LockBuffer();
10001 
10002 	if (userPath != NULL) {
10003 		if (!IS_USER_ADDRESS(userPath))
10004 			return B_BAD_ADDRESS;
10005 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10006 		if (status != B_OK)
10007 			return status;
10008 	}
10009 
10010 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10011 }
10012 
10013 
10014 status_t
10015 _user_change_root(const char* userPath)
10016 {
10017 	// only root is allowed to chroot()
10018 	if (geteuid() != 0)
10019 		return B_NOT_ALLOWED;
10020 
10021 	// alloc path buffer
10022 	KPath pathBuffer;
10023 	if (pathBuffer.InitCheck() != B_OK)
10024 		return B_NO_MEMORY;
10025 
10026 	// copy userland path to kernel
10027 	char* path = pathBuffer.LockBuffer();
10028 	if (userPath != NULL) {
10029 		if (!IS_USER_ADDRESS(userPath))
10030 			return B_BAD_ADDRESS;
10031 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10032 		if (status != B_OK)
10033 			return status;
10034 	}
10035 
10036 	// get the vnode
10037 	VnodePutter vnode;
10038 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10039 	if (status != B_OK)
10040 		return status;
10041 
10042 	// set the new root
10043 	struct io_context* context = get_current_io_context(false);
10044 	mutex_lock(&sIOContextRootLock);
10045 	struct vnode* oldRoot = context->root;
10046 	context->root = vnode.Detach();
10047 	mutex_unlock(&sIOContextRootLock);
10048 
10049 	put_vnode(oldRoot);
10050 
10051 	return B_OK;
10052 }
10053 
10054 
10055 int
10056 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10057 	uint32 flags, port_id port, int32 token)
10058 {
10059 	if (device < 0 || userQuery == NULL || queryLength == 0)
10060 		return B_BAD_VALUE;
10061 
10062 	if (!IS_USER_ADDRESS(userQuery))
10063 		return B_BAD_ADDRESS;
10064 
10065 	// this is a safety restriction
10066 	if (queryLength >= 65536)
10067 		return B_NAME_TOO_LONG;
10068 
10069 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10070 	if (!query.IsValid())
10071 		return B_NO_MEMORY;
10072 
10073 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10074 		return B_BAD_ADDRESS;
10075 
10076 	return query_open(device, query, flags, port, token, false);
10077 }
10078 
10079 
10080 #include "vfs_request_io.cpp"
10081