xref: /haiku/src/system/kernel/fs/vfs.cpp (revision 1978089f7cec856677e46204e992c7273d70b9af)
1 /*
2  * Copyright 2005-2013, Ingo Weinhold, ingo_weinhold@gmx.de.
3  * Copyright 2002-2018, Axel Dörfler, axeld@pinc-software.de.
4  * Distributed under the terms of the MIT License.
5  *
6  * Copyright 2001-2002, Travis Geiselbrecht. All rights reserved.
7  * Distributed under the terms of the NewOS License.
8  */
9 
10 
11 /*! Virtual File System and File System Interface Layer */
12 
13 
14 #include <ctype.h>
15 #include <fcntl.h>
16 #include <limits.h>
17 #include <stddef.h>
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/file.h>
21 #include <sys/ioctl.h>
22 #include <sys/resource.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 
26 #include <fs_attr.h>
27 #include <fs_info.h>
28 #include <fs_interface.h>
29 #include <fs_volume.h>
30 #include <NodeMonitor.h>
31 #include <OS.h>
32 #include <StorageDefs.h>
33 
34 #include <AutoDeleter.h>
35 #include <AutoDeleterDrivers.h>
36 #include <block_cache.h>
37 #include <boot/kernel_args.h>
38 #include <debug_heap.h>
39 #include <disk_device_manager/KDiskDevice.h>
40 #include <disk_device_manager/KDiskDeviceManager.h>
41 #include <disk_device_manager/KDiskDeviceUtils.h>
42 #include <disk_device_manager/KDiskSystem.h>
43 #include <fd.h>
44 #include <file_cache.h>
45 #include <fs/node_monitor.h>
46 #include <KPath.h>
47 #include <lock.h>
48 #include <low_resource_manager.h>
49 #include <slab/Slab.h>
50 #include <StackOrHeapArray.h>
51 #include <syscalls.h>
52 #include <syscall_restart.h>
53 #include <tracing.h>
54 #include <util/atomic.h>
55 #include <util/AutoLock.h>
56 #include <util/ThreadAutoLock.h>
57 #include <util/DoublyLinkedList.h>
58 #include <vfs.h>
59 #include <vm/vm.h>
60 #include <vm/VMCache.h>
61 #include <wait_for_objects.h>
62 
63 #include "EntryCache.h"
64 #include "fifo.h"
65 #include "IORequest.h"
66 #include "unused_vnodes.h"
67 #include "vfs_tracing.h"
68 #include "Vnode.h"
69 #include "../cache/vnode_store.h"
70 
71 
72 //#define TRACE_VFS
73 #ifdef TRACE_VFS
74 #	define TRACE(x) dprintf x
75 #	define FUNCTION(x) dprintf x
76 #else
77 #	define TRACE(x) ;
78 #	define FUNCTION(x) ;
79 #endif
80 
81 #define ADD_DEBUGGER_COMMANDS
82 
83 
84 #define HAS_FS_CALL(vnode, op)			(vnode->ops->op != NULL)
85 #define HAS_FS_MOUNT_CALL(mount, op)	(mount->volume->ops->op != NULL)
86 
87 #if KDEBUG
88 #	define FS_CALL(vnode, op, params...) \
89 		( HAS_FS_CALL(vnode, op) ? \
90 			vnode->ops->op(vnode->mount->volume, vnode, params) \
91 			: (panic("FS_CALL: vnode %p op " #op " is NULL", vnode), 0))
92 #	define FS_CALL_NO_PARAMS(vnode, op) \
93 		( HAS_FS_CALL(vnode, op) ? \
94 			vnode->ops->op(vnode->mount->volume, vnode) \
95 			: (panic("FS_CALL_NO_PARAMS: vnode %p op " #op " is NULL", vnode), 0))
96 #	define FS_MOUNT_CALL(mount, op, params...) \
97 		( HAS_FS_MOUNT_CALL(mount, op) ? \
98 			mount->volume->ops->op(mount->volume, params) \
99 			: (panic("FS_MOUNT_CALL: mount %p op " #op " is NULL", mount), 0))
100 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
101 		( HAS_FS_MOUNT_CALL(mount, op) ? \
102 			mount->volume->ops->op(mount->volume) \
103 			: (panic("FS_MOUNT_CALL_NO_PARAMS: mount %p op " #op " is NULL", mount), 0))
104 #else
105 #	define FS_CALL(vnode, op, params...) \
106 			vnode->ops->op(vnode->mount->volume, vnode, params)
107 #	define FS_CALL_NO_PARAMS(vnode, op) \
108 			vnode->ops->op(vnode->mount->volume, vnode)
109 #	define FS_MOUNT_CALL(mount, op, params...) \
110 			mount->volume->ops->op(mount->volume, params)
111 #	define FS_MOUNT_CALL_NO_PARAMS(mount, op) \
112 			mount->volume->ops->op(mount->volume)
113 #endif
114 
115 
116 const static size_t kMaxPathLength = 65536;
117 	// The absolute maximum path length (for getcwd() - this is not depending
118 	// on PATH_MAX
119 
120 
121 typedef DoublyLinkedList<vnode> VnodeList;
122 
123 /*!	\brief Structure to manage a mounted file system
124 
125 	Note: The root_vnode and root_vnode->covers fields (what others?) are
126 	initialized in fs_mount() and not changed afterwards. That is as soon
127 	as the mount is mounted and it is made sure it won't be unmounted
128 	(e.g. by holding a reference to a vnode of that mount) (read) access
129 	to those fields is always safe, even without additional locking. Morever
130 	while mounted the mount holds a reference to the root_vnode->covers vnode,
131 	and thus making the access path vnode->mount->root_vnode->covers->mount->...
132 	safe if a reference to vnode is held (note that for the root mount
133 	root_vnode->covers is NULL, though).
134 */
135 struct fs_mount {
136 	fs_mount()
137 		:
138 		volume(NULL),
139 		device_name(NULL)
140 	{
141 		mutex_init(&lock, "mount lock");
142 	}
143 
144 	~fs_mount()
145 	{
146 		mutex_destroy(&lock);
147 		free(device_name);
148 
149 		while (volume) {
150 			fs_volume* superVolume = volume->super_volume;
151 
152 			if (volume->file_system != NULL)
153 				put_module(volume->file_system->info.name);
154 
155 			free(volume->file_system_name);
156 			free(volume);
157 			volume = superVolume;
158 		}
159 	}
160 
161 	struct fs_mount* next;
162 	dev_t			id;
163 	fs_volume*		volume;
164 	char*			device_name;
165 	mutex			lock;	// guards the vnodes list
166 	struct vnode*	root_vnode;
167 	struct vnode*	covers_vnode;	// immutable
168 	KPartition*		partition;
169 	VnodeList		vnodes;
170 	EntryCache		entry_cache;
171 	bool			unmounting;
172 	bool			owns_file_device;
173 };
174 
175 
176 namespace {
177 
178 struct advisory_lock : public DoublyLinkedListLinkImpl<advisory_lock> {
179 	list_link		link;
180 	void*			bound_to;
181 	team_id			team;
182 	pid_t			session;
183 	off_t			start;
184 	off_t			end;
185 	bool			shared;
186 };
187 
188 typedef DoublyLinkedList<advisory_lock> LockList;
189 
190 } // namespace
191 
192 
193 struct advisory_locking {
194 	sem_id			lock;
195 	sem_id			wait_sem;
196 	LockList		locks;
197 
198 	advisory_locking()
199 		:
200 		lock(-1),
201 		wait_sem(-1)
202 	{
203 	}
204 
205 	~advisory_locking()
206 	{
207 		if (lock >= 0)
208 			delete_sem(lock);
209 		if (wait_sem >= 0)
210 			delete_sem(wait_sem);
211 	}
212 };
213 
214 /*!	\brief Guards sMountsTable.
215 
216 	The holder is allowed to read/write access the sMountsTable.
217 	Manipulation of the fs_mount structures themselves
218 	(and their destruction) requires different locks though.
219 */
220 static rw_lock sMountLock = RW_LOCK_INITIALIZER("vfs_mount_lock");
221 
222 /*!	\brief Guards mount/unmount operations.
223 
224 	The fs_mount() and fs_unmount() hold the lock during their whole operation.
225 	That is locking the lock ensures that no FS is mounted/unmounted. In
226 	particular this means that
227 	- sMountsTable will not be modified,
228 	- the fields immutable after initialization of the fs_mount structures in
229 	  sMountsTable will not be modified,
230 
231 	The thread trying to lock the lock must not hold sVnodeLock or
232 	sMountLock.
233 */
234 static recursive_lock sMountOpLock;
235 
236 /*!	\brief Guards sVnodeTable.
237 
238 	The holder is allowed read/write access to sVnodeTable and to
239 	any unbusy vnode in that table, save to the immutable fields (device, id,
240 	private_node, mount) to which only read-only access is allowed.
241 	The mutable fields advisory_locking, mandatory_locked_by, and ref_count, as
242 	well as the busy, removed, unused flags, and the vnode's type can also be
243 	write accessed when holding a read lock to sVnodeLock *and* having the vnode
244 	locked. Write access to covered_by and covers requires to write lock
245 	sVnodeLock.
246 
247 	The thread trying to acquire the lock must not hold sMountLock.
248 	You must not hold this lock when calling create_sem(), as this might call
249 	vfs_free_unused_vnodes() and thus cause a deadlock.
250 */
251 static rw_lock sVnodeLock = RW_LOCK_INITIALIZER("vfs_vnode_lock");
252 
253 /*!	\brief Guards io_context::root.
254 
255 	Must be held when setting or getting the io_context::root field.
256 	The only operation allowed while holding this lock besides getting or
257 	setting the field is inc_vnode_ref_count() on io_context::root.
258 */
259 static mutex sIOContextRootLock = MUTEX_INITIALIZER("io_context::root lock");
260 
261 
262 namespace {
263 
264 struct vnode_hash_key {
265 	dev_t	device;
266 	ino_t	vnode;
267 };
268 
269 struct VnodeHash {
270 	typedef vnode_hash_key	KeyType;
271 	typedef	struct vnode	ValueType;
272 
273 #define VHASH(mountid, vnodeid) \
274 	(((uint32)((vnodeid) >> 32) + (uint32)(vnodeid)) ^ (uint32)(mountid))
275 
276 	size_t HashKey(KeyType key) const
277 	{
278 		return VHASH(key.device, key.vnode);
279 	}
280 
281 	size_t Hash(ValueType* vnode) const
282 	{
283 		return VHASH(vnode->device, vnode->id);
284 	}
285 
286 #undef VHASH
287 
288 	bool Compare(KeyType key, ValueType* vnode) const
289 	{
290 		return vnode->device == key.device && vnode->id == key.vnode;
291 	}
292 
293 	ValueType*& GetLink(ValueType* value) const
294 	{
295 		return value->next;
296 	}
297 };
298 
299 typedef BOpenHashTable<VnodeHash> VnodeTable;
300 
301 
302 struct MountHash {
303 	typedef dev_t			KeyType;
304 	typedef	struct fs_mount	ValueType;
305 
306 	size_t HashKey(KeyType key) const
307 	{
308 		return key;
309 	}
310 
311 	size_t Hash(ValueType* mount) const
312 	{
313 		return mount->id;
314 	}
315 
316 	bool Compare(KeyType key, ValueType* mount) const
317 	{
318 		return mount->id == key;
319 	}
320 
321 	ValueType*& GetLink(ValueType* value) const
322 	{
323 		return value->next;
324 	}
325 };
326 
327 typedef BOpenHashTable<MountHash> MountTable;
328 
329 } // namespace
330 
331 
332 object_cache* sPathNameCache;
333 object_cache* sVnodeCache;
334 object_cache* sFileDescriptorCache;
335 
336 #define VNODE_HASH_TABLE_SIZE 1024
337 static VnodeTable* sVnodeTable;
338 static struct vnode* sRoot;
339 
340 #define MOUNTS_HASH_TABLE_SIZE 16
341 static MountTable* sMountsTable;
342 static dev_t sNextMountID = 1;
343 
344 #define MAX_TEMP_IO_VECS 8
345 
346 // How long to wait for busy vnodes (10s)
347 #define BUSY_VNODE_RETRIES 2000
348 #define BUSY_VNODE_DELAY 5000
349 
350 mode_t __gUmask = 022;
351 
352 /* function declarations */
353 
354 static void free_unused_vnodes();
355 
356 // file descriptor operation prototypes
357 static status_t file_read(struct file_descriptor* descriptor, off_t pos,
358 	void* buffer, size_t* _bytes);
359 static status_t file_write(struct file_descriptor* descriptor, off_t pos,
360 	const void* buffer, size_t* _bytes);
361 static off_t file_seek(struct file_descriptor* descriptor, off_t pos,
362 	int seekType);
363 static void file_free_fd(struct file_descriptor* descriptor);
364 static status_t file_close(struct file_descriptor* descriptor);
365 static status_t file_select(struct file_descriptor* descriptor, uint8 event,
366 	struct selectsync* sync);
367 static status_t file_deselect(struct file_descriptor* descriptor, uint8 event,
368 	struct selectsync* sync);
369 static status_t dir_read(struct io_context* context,
370 	struct file_descriptor* descriptor, struct dirent* buffer,
371 	size_t bufferSize, uint32* _count);
372 static status_t dir_read(struct io_context* ioContext, struct vnode* vnode,
373 	void* cookie, struct dirent* buffer, size_t bufferSize, uint32* _count);
374 static status_t dir_rewind(struct file_descriptor* descriptor);
375 static void dir_free_fd(struct file_descriptor* descriptor);
376 static status_t dir_close(struct file_descriptor* descriptor);
377 static status_t attr_dir_read(struct io_context* context,
378 	struct file_descriptor* descriptor, struct dirent* buffer,
379 	size_t bufferSize, uint32* _count);
380 static status_t attr_dir_rewind(struct file_descriptor* descriptor);
381 static void attr_dir_free_fd(struct file_descriptor* descriptor);
382 static status_t attr_dir_close(struct file_descriptor* descriptor);
383 static status_t attr_read(struct file_descriptor* descriptor, off_t pos,
384 	void* buffer, size_t* _bytes);
385 static status_t attr_write(struct file_descriptor* descriptor, off_t pos,
386 	const void* buffer, size_t* _bytes);
387 static off_t attr_seek(struct file_descriptor* descriptor, off_t pos,
388 	int seekType);
389 static void attr_free_fd(struct file_descriptor* descriptor);
390 static status_t attr_close(struct file_descriptor* descriptor);
391 static status_t attr_read_stat(struct file_descriptor* descriptor,
392 	struct stat* statData);
393 static status_t attr_write_stat(struct file_descriptor* descriptor,
394 	const struct stat* stat, int statMask);
395 static status_t index_dir_read(struct io_context* context,
396 	struct file_descriptor* descriptor, struct dirent* buffer,
397 	size_t bufferSize, uint32* _count);
398 static status_t index_dir_rewind(struct file_descriptor* descriptor);
399 static void index_dir_free_fd(struct file_descriptor* descriptor);
400 static status_t index_dir_close(struct file_descriptor* descriptor);
401 static status_t query_read(struct io_context* context,
402 	struct file_descriptor* descriptor, struct dirent* buffer,
403 	size_t bufferSize, uint32* _count);
404 static status_t query_rewind(struct file_descriptor* descriptor);
405 static void query_free_fd(struct file_descriptor* descriptor);
406 static status_t query_close(struct file_descriptor* descriptor);
407 
408 static status_t common_ioctl(struct file_descriptor* descriptor, ulong op,
409 	void* buffer, size_t length);
410 static status_t common_read_stat(struct file_descriptor* descriptor,
411 	struct stat* statData);
412 static status_t common_write_stat(struct file_descriptor* descriptor,
413 	const struct stat* statData, int statMask);
414 static status_t common_path_read_stat(int fd, char* path, bool traverseLeafLink,
415 	struct stat* stat, bool kernel);
416 
417 static status_t vnode_path_to_vnode(struct vnode* vnode, char* path,
418 	bool traverseLeafLink, int count, bool kernel,
419 	VnodePutter& _vnode, ino_t* _parentID, char* leafName = NULL);
420 static status_t dir_vnode_to_path(struct vnode* vnode, char* buffer,
421 	size_t bufferSize, bool kernel);
422 static status_t fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
423 	VnodePutter& _vnode, ino_t* _parentID, bool kernel);
424 static void inc_vnode_ref_count(struct vnode* vnode);
425 static status_t dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree,
426 	bool reenter);
427 static inline void put_vnode(struct vnode* vnode);
428 static status_t fs_unmount(char* path, dev_t mountID, uint32 flags,
429 	bool kernel);
430 static int open_vnode(struct vnode* vnode, int openMode, bool kernel);
431 
432 
433 static struct fd_ops sFileOps = {
434 	file_read,
435 	file_write,
436 	file_seek,
437 	common_ioctl,
438 	NULL,		// set_flags
439 	file_select,
440 	file_deselect,
441 	NULL,		// read_dir()
442 	NULL,		// rewind_dir()
443 	common_read_stat,
444 	common_write_stat,
445 	file_close,
446 	file_free_fd
447 };
448 
449 static struct fd_ops sDirectoryOps = {
450 	NULL,		// read()
451 	NULL,		// write()
452 	NULL,		// seek()
453 	common_ioctl,
454 	NULL,		// set_flags
455 	NULL,		// select()
456 	NULL,		// deselect()
457 	dir_read,
458 	dir_rewind,
459 	common_read_stat,
460 	common_write_stat,
461 	dir_close,
462 	dir_free_fd
463 };
464 
465 static struct fd_ops sAttributeDirectoryOps = {
466 	NULL,		// read()
467 	NULL,		// write()
468 	NULL,		// seek()
469 	common_ioctl,
470 	NULL,		// set_flags
471 	NULL,		// select()
472 	NULL,		// deselect()
473 	attr_dir_read,
474 	attr_dir_rewind,
475 	common_read_stat,
476 	common_write_stat,
477 	attr_dir_close,
478 	attr_dir_free_fd
479 };
480 
481 static struct fd_ops sAttributeOps = {
482 	attr_read,
483 	attr_write,
484 	attr_seek,
485 	common_ioctl,
486 	NULL,		// set_flags
487 	NULL,		// select()
488 	NULL,		// deselect()
489 	NULL,		// read_dir()
490 	NULL,		// rewind_dir()
491 	attr_read_stat,
492 	attr_write_stat,
493 	attr_close,
494 	attr_free_fd
495 };
496 
497 static struct fd_ops sIndexDirectoryOps = {
498 	NULL,		// read()
499 	NULL,		// write()
500 	NULL,		// seek()
501 	NULL,		// ioctl()
502 	NULL,		// set_flags
503 	NULL,		// select()
504 	NULL,		// deselect()
505 	index_dir_read,
506 	index_dir_rewind,
507 	NULL,		// read_stat()
508 	NULL,		// write_stat()
509 	index_dir_close,
510 	index_dir_free_fd
511 };
512 
513 #if 0
514 static struct fd_ops sIndexOps = {
515 	NULL,		// read()
516 	NULL,		// write()
517 	NULL,		// seek()
518 	NULL,		// ioctl()
519 	NULL,		// set_flags
520 	NULL,		// select()
521 	NULL,		// deselect()
522 	NULL,		// dir_read()
523 	NULL,		// dir_rewind()
524 	index_read_stat,	// read_stat()
525 	NULL,		// write_stat()
526 	NULL,		// dir_close()
527 	NULL		// free_fd()
528 };
529 #endif
530 
531 static struct fd_ops sQueryOps = {
532 	NULL,		// read()
533 	NULL,		// write()
534 	NULL,		// seek()
535 	NULL,		// ioctl()
536 	NULL,		// set_flags
537 	NULL,		// select()
538 	NULL,		// deselect()
539 	query_read,
540 	query_rewind,
541 	NULL,		// read_stat()
542 	NULL,		// write_stat()
543 	query_close,
544 	query_free_fd
545 };
546 
547 
548 namespace {
549 
550 class FDCloser {
551 public:
552 	FDCloser() : fFD(-1), fKernel(true) {}
553 
554 	FDCloser(int fd, bool kernel) : fFD(fd), fKernel(kernel) {}
555 
556 	~FDCloser()
557 	{
558 		Close();
559 	}
560 
561 	void SetTo(int fd, bool kernel)
562 	{
563 		Close();
564 		fFD = fd;
565 		fKernel = kernel;
566 	}
567 
568 	void Close()
569 	{
570 		if (fFD >= 0) {
571 			if (fKernel)
572 				_kern_close(fFD);
573 			else
574 				_user_close(fFD);
575 			fFD = -1;
576 		}
577 	}
578 
579 	int Detach()
580 	{
581 		int fd = fFD;
582 		fFD = -1;
583 		return fd;
584 	}
585 
586 private:
587 	int		fFD;
588 	bool	fKernel;
589 };
590 
591 } // namespace
592 
593 
594 #if VFS_PAGES_IO_TRACING
595 
596 namespace VFSPagesIOTracing {
597 
598 class PagesIOTraceEntry : public AbstractTraceEntry {
599 protected:
600 	PagesIOTraceEntry(struct vnode* vnode, void* cookie, off_t pos,
601 		const generic_io_vec* vecs, uint32 count, uint32 flags,
602 		generic_size_t bytesRequested, status_t status,
603 		generic_size_t bytesTransferred)
604 		:
605 		fVnode(vnode),
606 		fMountID(vnode->mount->id),
607 		fNodeID(vnode->id),
608 		fCookie(cookie),
609 		fPos(pos),
610 		fCount(count),
611 		fFlags(flags),
612 		fBytesRequested(bytesRequested),
613 		fStatus(status),
614 		fBytesTransferred(bytesTransferred)
615 	{
616 		fVecs = (generic_io_vec*)alloc_tracing_buffer_memcpy(vecs,
617 			sizeof(generic_io_vec) * count, false);
618 	}
619 
620 	void AddDump(TraceOutput& out, const char* mode)
621 	{
622 		out.Print("vfs pages io %5s: vnode: %p (%" B_PRId32 ", %" B_PRId64 "), "
623 			"cookie: %p, pos: %" B_PRIdOFF ", size: %" B_PRIu64 ", vecs: {",
624 			mode, fVnode, fMountID, fNodeID, fCookie, fPos,
625 			(uint64)fBytesRequested);
626 
627 		if (fVecs != NULL) {
628 			for (uint32 i = 0; i < fCount; i++) {
629 				if (i > 0)
630 					out.Print(", ");
631 				out.Print("(%" B_PRIx64 ", %" B_PRIu64 ")", (uint64)fVecs[i].base,
632 					(uint64)fVecs[i].length);
633 			}
634 		}
635 
636 		out.Print("}, flags: %#" B_PRIx32 " -> status: %#" B_PRIx32 ", "
637 			"transferred: %" B_PRIu64, fFlags, fStatus,
638 			(uint64)fBytesTransferred);
639 	}
640 
641 protected:
642 	struct vnode*	fVnode;
643 	dev_t			fMountID;
644 	ino_t			fNodeID;
645 	void*			fCookie;
646 	off_t			fPos;
647 	generic_io_vec*	fVecs;
648 	uint32			fCount;
649 	uint32			fFlags;
650 	generic_size_t	fBytesRequested;
651 	status_t		fStatus;
652 	generic_size_t	fBytesTransferred;
653 };
654 
655 
656 class ReadPages : public PagesIOTraceEntry {
657 public:
658 	ReadPages(struct vnode* vnode, void* cookie, off_t pos,
659 		const generic_io_vec* vecs, uint32 count, uint32 flags,
660 		generic_size_t bytesRequested, status_t status,
661 		generic_size_t bytesTransferred)
662 		:
663 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
664 			bytesRequested, status, bytesTransferred)
665 	{
666 		Initialized();
667 	}
668 
669 	virtual void AddDump(TraceOutput& out)
670 	{
671 		PagesIOTraceEntry::AddDump(out, "read");
672 	}
673 };
674 
675 
676 class WritePages : public PagesIOTraceEntry {
677 public:
678 	WritePages(struct vnode* vnode, void* cookie, off_t pos,
679 		const generic_io_vec* vecs, uint32 count, uint32 flags,
680 		generic_size_t bytesRequested, status_t status,
681 		generic_size_t bytesTransferred)
682 		:
683 		PagesIOTraceEntry(vnode, cookie, pos, vecs, count, flags,
684 			bytesRequested, status, bytesTransferred)
685 	{
686 		Initialized();
687 	}
688 
689 	virtual void AddDump(TraceOutput& out)
690 	{
691 		PagesIOTraceEntry::AddDump(out, "write");
692 	}
693 };
694 
695 }	// namespace VFSPagesIOTracing
696 
697 #	define TPIO(x) new(std::nothrow) VFSPagesIOTracing::x;
698 #else
699 #	define TPIO(x) ;
700 #endif	// VFS_PAGES_IO_TRACING
701 
702 
703 /*! Finds the mounted device (the fs_mount structure) with the given ID.
704 	Note, you must hold the sMountLock lock when you call this function.
705 */
706 static struct fs_mount*
707 find_mount(dev_t id)
708 {
709 	ASSERT_READ_LOCKED_RW_LOCK(&sMountLock);
710 
711 	return sMountsTable->Lookup(id);
712 }
713 
714 
715 static status_t
716 get_mount(dev_t id, struct fs_mount** _mount)
717 {
718 	struct fs_mount* mount;
719 
720 	ReadLocker nodeLocker(sVnodeLock);
721 	ReadLocker mountLocker(sMountLock);
722 
723 	mount = find_mount(id);
724 	if (mount == NULL)
725 		return B_BAD_VALUE;
726 
727 	struct vnode* rootNode = mount->root_vnode;
728 	if (mount->unmounting || rootNode == NULL || rootNode->IsBusy()
729 		|| rootNode->ref_count == 0) {
730 		// might have been called during a mount/unmount operation
731 		return B_BUSY;
732 	}
733 
734 	inc_vnode_ref_count(rootNode);
735 	*_mount = mount;
736 	return B_OK;
737 }
738 
739 
740 static void
741 put_mount(struct fs_mount* mount)
742 {
743 	if (mount)
744 		put_vnode(mount->root_vnode);
745 }
746 
747 
748 /*!	Tries to open the specified file system module.
749 	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1".
750 	Returns a pointer to file system module interface, or NULL if it
751 	could not open the module.
752 */
753 static file_system_module_info*
754 get_file_system(const char* fsName)
755 {
756 	char name[B_FILE_NAME_LENGTH];
757 	if (strncmp(fsName, "file_systems/", strlen("file_systems/"))) {
758 		// construct module name if we didn't get one
759 		// (we currently support only one API)
760 		snprintf(name, sizeof(name), "file_systems/%s/v1", fsName);
761 		fsName = NULL;
762 	}
763 
764 	file_system_module_info* info;
765 	if (get_module(fsName ? fsName : name, (module_info**)&info) != B_OK)
766 		return NULL;
767 
768 	return info;
769 }
770 
771 
772 /*!	Accepts a file system name of the form "bfs" or "file_systems/bfs/v1"
773 	and returns a compatible fs_info.fsh_name name ("bfs" in both cases).
774 	The name is allocated for you, and you have to free() it when you're
775 	done with it.
776 	Returns NULL if the required memory is not available.
777 */
778 static char*
779 get_file_system_name(const char* fsName)
780 {
781 	const size_t length = strlen("file_systems/");
782 
783 	if (strncmp(fsName, "file_systems/", length)) {
784 		// the name already seems to be the module's file name
785 		return strdup(fsName);
786 	}
787 
788 	fsName += length;
789 	const char* end = strchr(fsName, '/');
790 	if (end == NULL) {
791 		// this doesn't seem to be a valid name, but well...
792 		return strdup(fsName);
793 	}
794 
795 	// cut off the trailing /v1
796 
797 	char* name = (char*)malloc(end + 1 - fsName);
798 	if (name == NULL)
799 		return NULL;
800 
801 	strlcpy(name, fsName, end + 1 - fsName);
802 	return name;
803 }
804 
805 
806 /*!	Accepts a list of file system names separated by a colon, one for each
807 	layer and returns the file system name for the specified layer.
808 	The name is allocated for you, and you have to free() it when you're
809 	done with it.
810 	Returns NULL if the required memory is not available or if there is no
811 	name for the specified layer.
812 */
813 static char*
814 get_file_system_name_for_layer(const char* fsNames, int32 layer)
815 {
816 	while (layer >= 0) {
817 		const char* end = strchr(fsNames, ':');
818 		if (end == NULL) {
819 			if (layer == 0)
820 				return strdup(fsNames);
821 			return NULL;
822 		}
823 
824 		if (layer == 0) {
825 			size_t length = end - fsNames + 1;
826 			char* result = (char*)malloc(length);
827 			strlcpy(result, fsNames, length);
828 			return result;
829 		}
830 
831 		fsNames = end + 1;
832 		layer--;
833 	}
834 
835 	return NULL;
836 }
837 
838 
839 static void
840 add_vnode_to_mount_list(struct vnode* vnode, struct fs_mount* mount)
841 {
842 	MutexLocker _(mount->lock);
843 	mount->vnodes.Add(vnode);
844 }
845 
846 
847 static void
848 remove_vnode_from_mount_list(struct vnode* vnode, struct fs_mount* mount)
849 {
850 	MutexLocker _(mount->lock);
851 	mount->vnodes.Remove(vnode);
852 }
853 
854 
855 /*!	\brief Looks up a vnode by mount and node ID in the sVnodeTable.
856 
857 	The caller must hold the sVnodeLock (read lock at least).
858 
859 	\param mountID the mount ID.
860 	\param vnodeID the node ID.
861 
862 	\return The vnode structure, if it was found in the hash table, \c NULL
863 			otherwise.
864 */
865 static struct vnode*
866 lookup_vnode(dev_t mountID, ino_t vnodeID)
867 {
868 	ASSERT_READ_LOCKED_RW_LOCK(&sVnodeLock);
869 
870 	struct vnode_hash_key key;
871 
872 	key.device = mountID;
873 	key.vnode = vnodeID;
874 
875 	return sVnodeTable->Lookup(key);
876 }
877 
878 
879 /*!	\brief Checks whether or not a busy vnode should be waited for (again).
880 
881 	This will also wait for BUSY_VNODE_DELAY before returning if one should
882 	still wait for the vnode becoming unbusy.
883 
884 	\return \c true if one should retry, \c false if not.
885 */
886 static bool
887 retry_busy_vnode(int32& tries, dev_t mountID, ino_t vnodeID)
888 {
889 	if (--tries < 0) {
890 		// vnode doesn't seem to become unbusy
891 		dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO
892 			" is not becoming unbusy!\n", mountID, vnodeID);
893 		return false;
894 	}
895 	snooze(BUSY_VNODE_DELAY);
896 	return true;
897 }
898 
899 
900 /*!	Creates a new vnode with the given mount and node ID.
901 	If the node already exists, it is returned instead and no new node is
902 	created. In either case -- but not, if an error occurs -- the function write
903 	locks \c sVnodeLock and keeps it locked for the caller when returning. On
904 	error the lock is not held on return.
905 
906 	\param mountID The mount ID.
907 	\param vnodeID The vnode ID.
908 	\param _vnode Will be set to the new vnode on success.
909 	\param _nodeCreated Will be set to \c true when the returned vnode has
910 		been newly created, \c false when it already existed. Will not be
911 		changed on error.
912 	\return \c B_OK, when the vnode was successfully created and inserted or
913 		a node with the given ID was found, \c B_NO_MEMORY or
914 		\c B_ENTRY_NOT_FOUND on error.
915 */
916 static status_t
917 create_new_vnode_and_lock(dev_t mountID, ino_t vnodeID, struct vnode*& _vnode,
918 	bool& _nodeCreated)
919 {
920 	FUNCTION(("create_new_vnode_and_lock()\n"));
921 
922 	struct vnode* vnode = (struct vnode*)object_cache_alloc(sVnodeCache, 0);
923 	if (vnode == NULL)
924 		return B_NO_MEMORY;
925 
926 	// initialize basic values
927 	memset(vnode, 0, sizeof(struct vnode));
928 	vnode->device = mountID;
929 	vnode->id = vnodeID;
930 	vnode->ref_count = 1;
931 	vnode->SetBusy(true);
932 
933 	// look up the node -- it might have been added by someone else in the
934 	// meantime
935 	rw_lock_write_lock(&sVnodeLock);
936 	struct vnode* existingVnode = lookup_vnode(mountID, vnodeID);
937 	if (existingVnode != NULL) {
938 		object_cache_free(sVnodeCache, vnode, 0);
939 		_vnode = existingVnode;
940 		_nodeCreated = false;
941 		return B_OK;
942 	}
943 
944 	// get the mount structure
945 	rw_lock_read_lock(&sMountLock);
946 	vnode->mount = find_mount(mountID);
947 	if (!vnode->mount || vnode->mount->unmounting) {
948 		rw_lock_read_unlock(&sMountLock);
949 		rw_lock_write_unlock(&sVnodeLock);
950 		object_cache_free(sVnodeCache, vnode, 0);
951 		return B_ENTRY_NOT_FOUND;
952 	}
953 
954 	// add the vnode to the mount's node list and the hash table
955 	sVnodeTable->Insert(vnode);
956 	add_vnode_to_mount_list(vnode, vnode->mount);
957 
958 	rw_lock_read_unlock(&sMountLock);
959 
960 	_vnode = vnode;
961 	_nodeCreated = true;
962 
963 	// keep the vnode lock locked
964 	return B_OK;
965 }
966 
967 
968 /*!	Frees the vnode and all resources it has acquired, and removes
969 	it from the vnode hash as well as from its mount structure.
970 	Will also make sure that any cache modifications are written back.
971 */
972 static void
973 free_vnode(struct vnode* vnode, bool reenter)
974 {
975 	ASSERT_PRINT(vnode->ref_count == 0 && vnode->IsBusy(), "vnode: %p\n",
976 		vnode);
977 	ASSERT_PRINT(vnode->advisory_locking == NULL, "vnode: %p\n", vnode);
978 
979 	// write back any changes in this vnode's cache -- but only
980 	// if the vnode won't be deleted, in which case the changes
981 	// will be discarded
982 
983 	if (!vnode->IsRemoved() && HAS_FS_CALL(vnode, fsync))
984 		FS_CALL_NO_PARAMS(vnode, fsync);
985 
986 	// Note: If this vnode has a cache attached, there will still be two
987 	// references to that cache at this point. The last one belongs to the vnode
988 	// itself (cf. vfs_get_vnode_cache()) and one belongs to the node's file
989 	// cache. Each but the last reference to a cache also includes a reference
990 	// to the vnode. The file cache, however, released its reference (cf.
991 	// file_cache_create()), so that this vnode's ref count has the chance to
992 	// ever drop to 0. Deleting the file cache now, will cause the next to last
993 	// cache reference to be released, which will also release a (no longer
994 	// existing) vnode reference. To avoid problems, we set the vnode's ref
995 	// count, so that it will neither become negative nor 0.
996 	vnode->ref_count = 2;
997 
998 	if (!vnode->IsUnpublished()) {
999 		if (vnode->IsRemoved())
1000 			FS_CALL(vnode, remove_vnode, reenter);
1001 		else
1002 			FS_CALL(vnode, put_vnode, reenter);
1003 	}
1004 
1005 	// If the vnode has a VMCache attached, make sure that it won't try to get
1006 	// another reference via VMVnodeCache::AcquireUnreferencedStoreRef(). As
1007 	// long as the vnode is busy and in the hash, that won't happen, but as
1008 	// soon as we've removed it from the hash, it could reload the vnode -- with
1009 	// a new cache attached!
1010 	if (vnode->cache != NULL && vnode->cache->type == CACHE_TYPE_VNODE)
1011 		((VMVnodeCache*)vnode->cache)->VnodeDeleted();
1012 
1013 	// The file system has removed the resources of the vnode now, so we can
1014 	// make it available again (by removing the busy vnode from the hash).
1015 	rw_lock_write_lock(&sVnodeLock);
1016 	sVnodeTable->Remove(vnode);
1017 	rw_lock_write_unlock(&sVnodeLock);
1018 
1019 	// if we have a VMCache attached, remove it
1020 	if (vnode->cache)
1021 		vnode->cache->ReleaseRef();
1022 
1023 	vnode->cache = NULL;
1024 
1025 	remove_vnode_from_mount_list(vnode, vnode->mount);
1026 
1027 	object_cache_free(sVnodeCache, vnode, 0);
1028 }
1029 
1030 
1031 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1032 	if the counter dropped to 0.
1033 
1034 	The caller must, of course, own a reference to the vnode to call this
1035 	function.
1036 	The caller must not hold the sVnodeLock or the sMountLock.
1037 
1038 	\param vnode the vnode.
1039 	\param alwaysFree don't move this vnode into the unused list, but really
1040 		   delete it if possible.
1041 	\param reenter \c true, if this function is called (indirectly) from within
1042 		   a file system. This will be passed to file system hooks only.
1043 	\return \c B_OK, if everything went fine, an error code otherwise.
1044 */
1045 static status_t
1046 dec_vnode_ref_count(struct vnode* vnode, bool alwaysFree, bool reenter)
1047 {
1048 	ReadLocker locker(sVnodeLock);
1049 	AutoLocker<Vnode> nodeLocker(vnode);
1050 
1051 	int32 oldRefCount = atomic_add(&vnode->ref_count, -1);
1052 
1053 	ASSERT_PRINT(oldRefCount > 0, "vnode %p\n", vnode);
1054 
1055 	TRACE(("dec_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1056 		vnode->ref_count));
1057 
1058 	if (oldRefCount != 1)
1059 		return B_OK;
1060 
1061 	if (vnode->IsBusy())
1062 		panic("dec_vnode_ref_count: called on busy vnode %p\n", vnode);
1063 
1064 	bool freeNode = false;
1065 	bool freeUnusedNodes = false;
1066 
1067 	// Just insert the vnode into an unused list if we don't need
1068 	// to delete it
1069 	if (vnode->IsRemoved() || alwaysFree) {
1070 		vnode_to_be_freed(vnode);
1071 		vnode->SetBusy(true);
1072 		freeNode = true;
1073 	} else
1074 		freeUnusedNodes = vnode_unused(vnode);
1075 
1076 	nodeLocker.Unlock();
1077 	locker.Unlock();
1078 
1079 	if (freeNode)
1080 		free_vnode(vnode, reenter);
1081 	else if (freeUnusedNodes)
1082 		free_unused_vnodes();
1083 
1084 	return B_OK;
1085 }
1086 
1087 
1088 /*!	\brief Increments the reference counter of the given vnode.
1089 
1090 	The caller must make sure that the node isn't deleted while this function
1091 	is called. This can be done either:
1092 	- by ensuring that a reference to the node exists and remains in existence,
1093 	  or
1094 	- by holding the vnode's lock (which also requires read locking sVnodeLock)
1095 	  or by holding sVnodeLock write locked.
1096 
1097 	In the second case the caller is responsible for dealing with the ref count
1098 	0 -> 1 transition. That is 1. this function must not be invoked when the
1099 	node is busy in the first place and 2. vnode_used() must be called for the
1100 	node.
1101 
1102 	\param vnode the vnode.
1103 */
1104 static void
1105 inc_vnode_ref_count(struct vnode* vnode)
1106 {
1107 	atomic_add(&vnode->ref_count, 1);
1108 	TRACE(("inc_vnode_ref_count: vnode %p, ref now %" B_PRId32 "\n", vnode,
1109 		vnode->ref_count));
1110 }
1111 
1112 
1113 static bool
1114 is_special_node_type(int type)
1115 {
1116 	// at the moment only FIFOs are supported
1117 	return S_ISFIFO(type);
1118 }
1119 
1120 
1121 static status_t
1122 create_special_sub_node(struct vnode* vnode, uint32 flags)
1123 {
1124 	if (S_ISFIFO(vnode->Type()))
1125 		return create_fifo_vnode(vnode->mount->volume, vnode);
1126 
1127 	return B_BAD_VALUE;
1128 }
1129 
1130 
1131 /*!	\brief Retrieves a vnode for a given mount ID, node ID pair.
1132 
1133 	If the node is not yet in memory, it will be loaded.
1134 
1135 	The caller must not hold the sVnodeLock or the sMountLock.
1136 
1137 	\param mountID the mount ID.
1138 	\param vnodeID the node ID.
1139 	\param _vnode Pointer to a vnode* variable into which the pointer to the
1140 		   retrieved vnode structure shall be written.
1141 	\param reenter \c true, if this function is called (indirectly) from within
1142 		   a file system.
1143 	\return \c B_OK, if everything when fine, an error code otherwise.
1144 */
1145 static status_t
1146 get_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode, bool canWait,
1147 	int reenter)
1148 {
1149 	FUNCTION(("get_vnode: mountid %" B_PRId32 " vnid 0x%" B_PRIx64 " %p\n",
1150 		mountID, vnodeID, _vnode));
1151 
1152 	rw_lock_read_lock(&sVnodeLock);
1153 
1154 	int32 tries = BUSY_VNODE_RETRIES;
1155 restart:
1156 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
1157 	AutoLocker<Vnode> nodeLocker(vnode);
1158 
1159 	if (vnode && vnode->IsBusy()) {
1160 		// vnodes in the Removed state (except ones still Unpublished)
1161 		// which are also Busy will disappear soon, so we do not wait for them.
1162 		const bool doNotWait = vnode->IsRemoved() && !vnode->IsUnpublished();
1163 
1164 		nodeLocker.Unlock();
1165 		rw_lock_read_unlock(&sVnodeLock);
1166 		if (!canWait) {
1167 			dprintf("vnode %" B_PRIdDEV ":%" B_PRIdINO " is busy!\n",
1168 				mountID, vnodeID);
1169 			return B_BUSY;
1170 		}
1171 		if (doNotWait || !retry_busy_vnode(tries, mountID, vnodeID))
1172 			return B_BUSY;
1173 
1174 		rw_lock_read_lock(&sVnodeLock);
1175 		goto restart;
1176 	}
1177 
1178 	TRACE(("get_vnode: tried to lookup vnode, got %p\n", vnode));
1179 
1180 	status_t status;
1181 
1182 	if (vnode) {
1183 		if (vnode->ref_count == 0) {
1184 			// this vnode has been unused before
1185 			vnode_used(vnode);
1186 		}
1187 		inc_vnode_ref_count(vnode);
1188 
1189 		nodeLocker.Unlock();
1190 		rw_lock_read_unlock(&sVnodeLock);
1191 	} else {
1192 		// we need to create a new vnode and read it in
1193 		rw_lock_read_unlock(&sVnodeLock);
1194 			// unlock -- create_new_vnode_and_lock() write-locks on success
1195 		bool nodeCreated;
1196 		status = create_new_vnode_and_lock(mountID, vnodeID, vnode,
1197 			nodeCreated);
1198 		if (status != B_OK)
1199 			return status;
1200 
1201 		if (!nodeCreated) {
1202 			rw_lock_read_lock(&sVnodeLock);
1203 			rw_lock_write_unlock(&sVnodeLock);
1204 			goto restart;
1205 		}
1206 
1207 		rw_lock_write_unlock(&sVnodeLock);
1208 
1209 		int type;
1210 		uint32 flags;
1211 		status = FS_MOUNT_CALL(vnode->mount, get_vnode, vnodeID, vnode, &type,
1212 			&flags, reenter);
1213 		if (status == B_OK && vnode->private_node == NULL)
1214 			status = B_BAD_VALUE;
1215 
1216 		bool gotNode = status == B_OK;
1217 		bool publishSpecialSubNode = false;
1218 		if (gotNode) {
1219 			vnode->SetType(type);
1220 			publishSpecialSubNode = is_special_node_type(type)
1221 				&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
1222 		}
1223 
1224 		if (gotNode && publishSpecialSubNode)
1225 			status = create_special_sub_node(vnode, flags);
1226 
1227 		if (status != B_OK) {
1228 			if (gotNode)
1229 				FS_CALL(vnode, put_vnode, reenter);
1230 
1231 			rw_lock_write_lock(&sVnodeLock);
1232 			sVnodeTable->Remove(vnode);
1233 			remove_vnode_from_mount_list(vnode, vnode->mount);
1234 			rw_lock_write_unlock(&sVnodeLock);
1235 
1236 			object_cache_free(sVnodeCache, vnode, 0);
1237 			return status;
1238 		}
1239 
1240 		rw_lock_read_lock(&sVnodeLock);
1241 		vnode->Lock();
1242 
1243 		vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
1244 		vnode->SetBusy(false);
1245 
1246 		vnode->Unlock();
1247 		rw_lock_read_unlock(&sVnodeLock);
1248 	}
1249 
1250 	TRACE(("get_vnode: returning %p\n", vnode));
1251 
1252 	*_vnode = vnode;
1253 	return B_OK;
1254 }
1255 
1256 
1257 /*!	\brief Decrements the reference counter of the given vnode and deletes it,
1258 	if the counter dropped to 0.
1259 
1260 	The caller must, of course, own a reference to the vnode to call this
1261 	function.
1262 	The caller must not hold the sVnodeLock or the sMountLock.
1263 
1264 	\param vnode the vnode.
1265 */
1266 static inline void
1267 put_vnode(struct vnode* vnode)
1268 {
1269 	dec_vnode_ref_count(vnode, false, false);
1270 }
1271 
1272 
1273 static void
1274 free_unused_vnodes(int32 level)
1275 {
1276 	unused_vnodes_check_started();
1277 
1278 	if (level == B_NO_LOW_RESOURCE) {
1279 		unused_vnodes_check_done();
1280 		return;
1281 	}
1282 
1283 	flush_hot_vnodes();
1284 
1285 	// determine how many nodes to free
1286 	uint32 count = 1;
1287 	{
1288 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1289 
1290 		switch (level) {
1291 			case B_LOW_RESOURCE_NOTE:
1292 				count = sUnusedVnodes / 100;
1293 				break;
1294 			case B_LOW_RESOURCE_WARNING:
1295 				count = sUnusedVnodes / 10;
1296 				break;
1297 			case B_LOW_RESOURCE_CRITICAL:
1298 				count = sUnusedVnodes;
1299 				break;
1300 		}
1301 
1302 		if (count > sUnusedVnodes)
1303 			count = sUnusedVnodes;
1304 	}
1305 
1306 	// Write back the modified pages of some unused vnodes and free them.
1307 
1308 	for (uint32 i = 0; i < count; i++) {
1309 		ReadLocker vnodesReadLocker(sVnodeLock);
1310 
1311 		// get the first node
1312 		MutexLocker unusedVnodesLocker(sUnusedVnodesLock);
1313 		struct vnode* vnode = (struct vnode*)list_get_first_item(
1314 			&sUnusedVnodeList);
1315 		unusedVnodesLocker.Unlock();
1316 
1317 		if (vnode == NULL)
1318 			break;
1319 
1320 		// lock the node
1321 		AutoLocker<Vnode> nodeLocker(vnode);
1322 
1323 		// Check whether the node is still unused -- since we only append to the
1324 		// tail of the unused queue, the vnode should still be at its head.
1325 		// Alternatively we could check its ref count for 0 and its busy flag,
1326 		// but if the node is no longer at the head of the queue, it means it
1327 		// has been touched in the meantime, i.e. it is no longer the least
1328 		// recently used unused vnode and we rather don't free it.
1329 		unusedVnodesLocker.Lock();
1330 		if (vnode != list_get_first_item(&sUnusedVnodeList))
1331 			continue;
1332 		unusedVnodesLocker.Unlock();
1333 
1334 		ASSERT(!vnode->IsBusy());
1335 
1336 		// grab a reference
1337 		inc_vnode_ref_count(vnode);
1338 		vnode_used(vnode);
1339 
1340 		// write back changes and free the node
1341 		nodeLocker.Unlock();
1342 		vnodesReadLocker.Unlock();
1343 
1344 		if (vnode->cache != NULL)
1345 			vnode->cache->WriteModified();
1346 
1347 		dec_vnode_ref_count(vnode, true, false);
1348 			// this should free the vnode when it's still unused
1349 	}
1350 
1351 	unused_vnodes_check_done();
1352 }
1353 
1354 
1355 /*!	Gets the vnode the given vnode is covering.
1356 
1357 	The caller must have \c sVnodeLock read-locked at least.
1358 
1359 	The function returns a reference to the retrieved vnode (if any), the caller
1360 	is responsible to free.
1361 
1362 	\param vnode The vnode whose covered node shall be returned.
1363 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1364 		vnode.
1365 */
1366 static inline Vnode*
1367 get_covered_vnode_locked(Vnode* vnode)
1368 {
1369 	if (Vnode* coveredNode = vnode->covers) {
1370 		while (coveredNode->covers != NULL)
1371 			coveredNode = coveredNode->covers;
1372 
1373 		inc_vnode_ref_count(coveredNode);
1374 		return coveredNode;
1375 	}
1376 
1377 	return NULL;
1378 }
1379 
1380 
1381 /*!	Gets the vnode the given vnode is covering.
1382 
1383 	The caller must not hold \c sVnodeLock. Note that this implies a race
1384 	condition, since the situation can change at any time.
1385 
1386 	The function returns a reference to the retrieved vnode (if any), the caller
1387 	is responsible to free.
1388 
1389 	\param vnode The vnode whose covered node shall be returned.
1390 	\return The covered vnode, or \c NULL if the given vnode doesn't cover any
1391 		vnode.
1392 */
1393 static inline Vnode*
1394 get_covered_vnode(Vnode* vnode)
1395 {
1396 	if (!vnode->IsCovering())
1397 		return NULL;
1398 
1399 	ReadLocker vnodeReadLocker(sVnodeLock);
1400 	return get_covered_vnode_locked(vnode);
1401 }
1402 
1403 
1404 /*!	Gets the vnode the given vnode is covered by.
1405 
1406 	The caller must have \c sVnodeLock read-locked at least.
1407 
1408 	The function returns a reference to the retrieved vnode (if any), the caller
1409 	is responsible to free.
1410 
1411 	\param vnode The vnode whose covering node shall be returned.
1412 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1413 		any vnode.
1414 */
1415 static Vnode*
1416 get_covering_vnode_locked(Vnode* vnode)
1417 {
1418 	if (Vnode* coveringNode = vnode->covered_by) {
1419 		while (coveringNode->covered_by != NULL)
1420 			coveringNode = coveringNode->covered_by;
1421 
1422 		inc_vnode_ref_count(coveringNode);
1423 		return coveringNode;
1424 	}
1425 
1426 	return NULL;
1427 }
1428 
1429 
1430 /*!	Gets the vnode the given vnode is covered by.
1431 
1432 	The caller must not hold \c sVnodeLock. Note that this implies a race
1433 	condition, since the situation can change at any time.
1434 
1435 	The function returns a reference to the retrieved vnode (if any), the caller
1436 	is responsible to free.
1437 
1438 	\param vnode The vnode whose covering node shall be returned.
1439 	\return The covering vnode, or \c NULL if the given vnode isn't covered by
1440 		any vnode.
1441 */
1442 static inline Vnode*
1443 get_covering_vnode(Vnode* vnode)
1444 {
1445 	if (!vnode->IsCovered())
1446 		return NULL;
1447 
1448 	ReadLocker vnodeReadLocker(sVnodeLock);
1449 	return get_covering_vnode_locked(vnode);
1450 }
1451 
1452 
1453 static void
1454 free_unused_vnodes()
1455 {
1456 	free_unused_vnodes(
1457 		low_resource_state(B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
1458 			| B_KERNEL_RESOURCE_ADDRESS_SPACE));
1459 }
1460 
1461 
1462 static void
1463 vnode_low_resource_handler(void* /*data*/, uint32 resources, int32 level)
1464 {
1465 	TRACE(("vnode_low_resource_handler(level = %" B_PRId32 ")\n", level));
1466 
1467 	free_unused_vnodes(level);
1468 }
1469 
1470 
1471 static inline void
1472 put_advisory_locking(struct advisory_locking* locking)
1473 {
1474 	release_sem(locking->lock);
1475 }
1476 
1477 
1478 /*!	Returns the advisory_locking object of the \a vnode in case it
1479 	has one, and locks it.
1480 	You have to call put_advisory_locking() when you're done with
1481 	it.
1482 	Note, you must not have the vnode mutex locked when calling
1483 	this function.
1484 */
1485 static struct advisory_locking*
1486 get_advisory_locking(struct vnode* vnode)
1487 {
1488 	rw_lock_read_lock(&sVnodeLock);
1489 	vnode->Lock();
1490 
1491 	struct advisory_locking* locking = vnode->advisory_locking;
1492 	sem_id lock = locking != NULL ? locking->lock : B_ERROR;
1493 
1494 	vnode->Unlock();
1495 	rw_lock_read_unlock(&sVnodeLock);
1496 
1497 	if (lock >= 0)
1498 		lock = acquire_sem(lock);
1499 	if (lock < 0) {
1500 		// This means the locking has been deleted in the mean time
1501 		// or had never existed in the first place - otherwise, we
1502 		// would get the lock at some point.
1503 		return NULL;
1504 	}
1505 
1506 	return locking;
1507 }
1508 
1509 
1510 /*!	Creates a locked advisory_locking object, and attaches it to the
1511 	given \a vnode.
1512 	Returns B_OK in case of success - also if the vnode got such an
1513 	object from someone else in the mean time, you'll still get this
1514 	one locked then.
1515 */
1516 static status_t
1517 create_advisory_locking(struct vnode* vnode)
1518 {
1519 	if (vnode == NULL)
1520 		return B_FILE_ERROR;
1521 
1522 	ObjectDeleter<advisory_locking> lockingDeleter;
1523 	struct advisory_locking* locking = NULL;
1524 
1525 	while (get_advisory_locking(vnode) == NULL) {
1526 		// no locking object set on the vnode yet, create one
1527 		if (locking == NULL) {
1528 			locking = new(std::nothrow) advisory_locking;
1529 			if (locking == NULL)
1530 				return B_NO_MEMORY;
1531 			lockingDeleter.SetTo(locking);
1532 
1533 			locking->wait_sem = create_sem(0, "advisory lock");
1534 			if (locking->wait_sem < 0)
1535 				return locking->wait_sem;
1536 
1537 			locking->lock = create_sem(0, "advisory locking");
1538 			if (locking->lock < 0)
1539 				return locking->lock;
1540 		}
1541 
1542 		// set our newly created locking object
1543 		ReadLocker _(sVnodeLock);
1544 		AutoLocker<Vnode> nodeLocker(vnode);
1545 		if (vnode->advisory_locking == NULL) {
1546 			vnode->advisory_locking = locking;
1547 			lockingDeleter.Detach();
1548 			return B_OK;
1549 		}
1550 	}
1551 
1552 	// The vnode already had a locking object. That's just as well.
1553 
1554 	return B_OK;
1555 }
1556 
1557 
1558 /*! Returns \c true when either \a flock is \c NULL or the \a flock intersects
1559 	with the advisory_lock \a lock.
1560 */
1561 static bool
1562 advisory_lock_intersects(struct advisory_lock* lock, struct flock* flock)
1563 {
1564 	if (flock == NULL)
1565 		return true;
1566 
1567 	return lock->start <= flock->l_start - 1 + flock->l_len
1568 		&& lock->end >= flock->l_start;
1569 }
1570 
1571 
1572 /*!	Tests whether acquiring a lock would block.
1573 */
1574 static status_t
1575 test_advisory_lock(struct vnode* vnode, struct flock* flock)
1576 {
1577 	flock->l_type = F_UNLCK;
1578 
1579 	struct advisory_locking* locking = get_advisory_locking(vnode);
1580 	if (locking == NULL)
1581 		return B_OK;
1582 
1583 	team_id team = team_get_current_team_id();
1584 
1585 	LockList::Iterator iterator = locking->locks.GetIterator();
1586 	while (iterator.HasNext()) {
1587 		struct advisory_lock* lock = iterator.Next();
1588 
1589 		 if (lock->team != team && advisory_lock_intersects(lock, flock)) {
1590 			// locks do overlap
1591 			if (flock->l_type != F_RDLCK || !lock->shared) {
1592 				// collision
1593 				flock->l_type = lock->shared ? F_RDLCK : F_WRLCK;
1594 				flock->l_whence = SEEK_SET;
1595 				flock->l_start = lock->start;
1596 				flock->l_len = lock->end - lock->start + 1;
1597 				flock->l_pid = lock->team;
1598 				break;
1599 			}
1600 		}
1601 	}
1602 
1603 	put_advisory_locking(locking);
1604 	return B_OK;
1605 }
1606 
1607 
1608 /*!	Removes the specified lock, or all locks of the calling team
1609 	if \a flock is NULL.
1610 */
1611 static status_t
1612 release_advisory_lock(struct vnode* vnode, struct io_context* context,
1613 	struct file_descriptor* descriptor, struct flock* flock)
1614 {
1615 	FUNCTION(("release_advisory_lock(vnode = %p, flock = %p)\n", vnode, flock));
1616 
1617 	struct advisory_locking* locking = get_advisory_locking(vnode);
1618 	if (locking == NULL)
1619 		return B_OK;
1620 
1621 	// find matching lock entries
1622 
1623 	LockList::Iterator iterator = locking->locks.GetIterator();
1624 	while (iterator.HasNext()) {
1625 		struct advisory_lock* lock = iterator.Next();
1626 		bool removeLock = false;
1627 
1628 		if (descriptor != NULL && lock->bound_to == descriptor) {
1629 			// Remove flock() locks
1630 			removeLock = true;
1631 		} else if (lock->bound_to == context
1632 				&& advisory_lock_intersects(lock, flock)) {
1633 			// Remove POSIX locks
1634 			bool endsBeyond = false;
1635 			bool startsBefore = false;
1636 			if (flock != NULL) {
1637 				startsBefore = lock->start < flock->l_start;
1638 				endsBeyond = lock->end > flock->l_start - 1 + flock->l_len;
1639 			}
1640 
1641 			if (!startsBefore && !endsBeyond) {
1642 				// lock is completely contained in flock
1643 				removeLock = true;
1644 			} else if (startsBefore && !endsBeyond) {
1645 				// cut the end of the lock
1646 				lock->end = flock->l_start - 1;
1647 			} else if (!startsBefore && endsBeyond) {
1648 				// cut the start of the lock
1649 				lock->start = flock->l_start + flock->l_len;
1650 			} else {
1651 				// divide the lock into two locks
1652 				struct advisory_lock* secondLock = new advisory_lock;
1653 				if (secondLock == NULL) {
1654 					// TODO: we should probably revert the locks we already
1655 					// changed... (ie. allocate upfront)
1656 					put_advisory_locking(locking);
1657 					return B_NO_MEMORY;
1658 				}
1659 
1660 				lock->end = flock->l_start - 1;
1661 
1662 				secondLock->bound_to = context;
1663 				secondLock->team = lock->team;
1664 				secondLock->session = lock->session;
1665 				// values must already be normalized when getting here
1666 				secondLock->start = flock->l_start + flock->l_len;
1667 				secondLock->end = lock->end;
1668 				secondLock->shared = lock->shared;
1669 
1670 				locking->locks.Add(secondLock);
1671 			}
1672 		}
1673 
1674 		if (removeLock) {
1675 			// this lock is no longer used
1676 			iterator.Remove();
1677 			delete lock;
1678 		}
1679 	}
1680 
1681 	bool removeLocking = locking->locks.IsEmpty();
1682 	release_sem_etc(locking->wait_sem, 1, B_RELEASE_ALL);
1683 
1684 	put_advisory_locking(locking);
1685 
1686 	if (removeLocking) {
1687 		// We can remove the whole advisory locking structure; it's no
1688 		// longer used
1689 		locking = get_advisory_locking(vnode);
1690 		if (locking != NULL) {
1691 			ReadLocker locker(sVnodeLock);
1692 			AutoLocker<Vnode> nodeLocker(vnode);
1693 
1694 			// the locking could have been changed in the mean time
1695 			if (locking->locks.IsEmpty()) {
1696 				vnode->advisory_locking = NULL;
1697 				nodeLocker.Unlock();
1698 				locker.Unlock();
1699 
1700 				// we've detached the locking from the vnode, so we can
1701 				// safely delete it
1702 				delete locking;
1703 			} else {
1704 				// the locking is in use again
1705 				nodeLocker.Unlock();
1706 				locker.Unlock();
1707 				release_sem_etc(locking->lock, 1, B_DO_NOT_RESCHEDULE);
1708 			}
1709 		}
1710 	}
1711 
1712 	return B_OK;
1713 }
1714 
1715 
1716 /*!	Acquires an advisory lock for the \a vnode. If \a wait is \c true, it
1717 	will wait for the lock to become available, if there are any collisions
1718 	(it will return B_PERMISSION_DENIED in this case if \a wait is \c false).
1719 
1720 	If \a descriptor is NULL, POSIX semantics are used for this lock. Otherwise,
1721 	BSD flock() semantics are used, that is, all children can unlock the file
1722 	in question (we even allow parents to remove the lock, though, but that
1723 	seems to be in line to what the BSD's are doing).
1724 */
1725 static status_t
1726 acquire_advisory_lock(struct vnode* vnode, io_context* context,
1727 	struct file_descriptor* descriptor, struct flock* flock, bool wait)
1728 {
1729 	FUNCTION(("acquire_advisory_lock(vnode = %p, flock = %p, wait = %s)\n",
1730 		vnode, flock, wait ? "yes" : "no"));
1731 
1732 	bool shared = flock->l_type == F_RDLCK;
1733 	void* boundTo = descriptor != NULL ? (void*)descriptor : (void*)context;
1734 	status_t status = B_OK;
1735 
1736 	// TODO: do deadlock detection!
1737 
1738 	struct advisory_locking* locking;
1739 
1740 	while (true) {
1741 		// if this vnode has an advisory_locking structure attached,
1742 		// lock that one and search for any colliding file lock
1743 		status = create_advisory_locking(vnode);
1744 		if (status != B_OK)
1745 			return status;
1746 
1747 		locking = vnode->advisory_locking;
1748 		team_id team = team_get_current_team_id();
1749 		sem_id waitForLock = -1;
1750 
1751 		// test for collisions
1752 		LockList::Iterator iterator = locking->locks.GetIterator();
1753 		while (iterator.HasNext()) {
1754 			struct advisory_lock* lock = iterator.Next();
1755 
1756 			// TODO: locks from the same team might be joinable!
1757 			if ((lock->team != team || lock->bound_to != boundTo)
1758 					&& advisory_lock_intersects(lock, flock)) {
1759 				// locks do overlap
1760 				if (!shared || !lock->shared) {
1761 					// we need to wait
1762 					waitForLock = locking->wait_sem;
1763 					break;
1764 				}
1765 			}
1766 		}
1767 
1768 		if (waitForLock < 0)
1769 			break;
1770 
1771 		// We need to wait. Do that or fail now, if we've been asked not to.
1772 
1773 		if (!wait) {
1774 			put_advisory_locking(locking);
1775 			return descriptor != NULL ? B_WOULD_BLOCK : B_PERMISSION_DENIED;
1776 		}
1777 
1778 		status = switch_sem_etc(locking->lock, waitForLock, 1,
1779 			B_CAN_INTERRUPT, 0);
1780 		if (status != B_OK && status != B_BAD_SEM_ID)
1781 			return status;
1782 
1783 		// We have been notified, but we need to re-lock the locking object. So
1784 		// go another round...
1785 	}
1786 
1787 	// install new lock
1788 
1789 	struct advisory_lock* lock = new(std::nothrow) advisory_lock;
1790 	if (lock == NULL) {
1791 		put_advisory_locking(locking);
1792 		return B_NO_MEMORY;
1793 	}
1794 
1795 	lock->bound_to = boundTo;
1796 	lock->team = team_get_current_team_id();
1797 	lock->session = thread_get_current_thread()->team->session_id;
1798 	// values must already be normalized when getting here
1799 	lock->start = flock->l_start;
1800 	lock->end = flock->l_start - 1 + flock->l_len;
1801 	lock->shared = shared;
1802 
1803 	locking->locks.Add(lock);
1804 	put_advisory_locking(locking);
1805 
1806 	return status;
1807 }
1808 
1809 
1810 /*!	Normalizes the \a flock structure to make it easier to compare the
1811 	structure with others. The l_start and l_len fields are set to absolute
1812 	values according to the l_whence field.
1813 */
1814 static status_t
1815 normalize_flock(struct file_descriptor* descriptor, struct flock* flock)
1816 {
1817 	switch (flock->l_whence) {
1818 		case SEEK_SET:
1819 			break;
1820 		case SEEK_CUR:
1821 			flock->l_start += descriptor->pos;
1822 			break;
1823 		case SEEK_END:
1824 		{
1825 			struct vnode* vnode = descriptor->u.vnode;
1826 			struct stat stat;
1827 			status_t status;
1828 
1829 			if (!HAS_FS_CALL(vnode, read_stat))
1830 				return B_UNSUPPORTED;
1831 
1832 			status = FS_CALL(vnode, read_stat, &stat);
1833 			if (status != B_OK)
1834 				return status;
1835 
1836 			flock->l_start += stat.st_size;
1837 			break;
1838 		}
1839 		default:
1840 			return B_BAD_VALUE;
1841 	}
1842 
1843 	if (flock->l_start < 0)
1844 		flock->l_start = 0;
1845 	if (flock->l_len == 0)
1846 		flock->l_len = OFF_MAX;
1847 
1848 	// don't let the offset and length overflow
1849 	if (flock->l_start > 0 && OFF_MAX - flock->l_start < flock->l_len)
1850 		flock->l_len = OFF_MAX - flock->l_start;
1851 
1852 	if (flock->l_len < 0) {
1853 		// a negative length reverses the region
1854 		flock->l_start += flock->l_len;
1855 		flock->l_len = -flock->l_len;
1856 	}
1857 
1858 	return B_OK;
1859 }
1860 
1861 
1862 static void
1863 replace_vnode_if_disconnected(struct fs_mount* mount,
1864 	struct vnode* vnodeToDisconnect, struct vnode*& vnode,
1865 	struct vnode* fallBack, bool lockRootLock)
1866 {
1867 	struct vnode* givenVnode = vnode;
1868 	bool vnodeReplaced = false;
1869 
1870 	ReadLocker vnodeReadLocker(sVnodeLock);
1871 
1872 	if (lockRootLock)
1873 		mutex_lock(&sIOContextRootLock);
1874 
1875 	while (vnode != NULL && vnode->mount == mount
1876 		&& (vnodeToDisconnect == NULL || vnodeToDisconnect == vnode)) {
1877 		if (vnode->covers != NULL) {
1878 			// redirect the vnode to the covered vnode
1879 			vnode = vnode->covers;
1880 		} else
1881 			vnode = fallBack;
1882 
1883 		vnodeReplaced = true;
1884 	}
1885 
1886 	// If we've replaced the node, grab a reference for the new one.
1887 	if (vnodeReplaced && vnode != NULL)
1888 		inc_vnode_ref_count(vnode);
1889 
1890 	if (lockRootLock)
1891 		mutex_unlock(&sIOContextRootLock);
1892 
1893 	vnodeReadLocker.Unlock();
1894 
1895 	if (vnodeReplaced)
1896 		put_vnode(givenVnode);
1897 }
1898 
1899 
1900 /*!	Disconnects all file descriptors that are associated with the
1901 	\a vnodeToDisconnect, or if this is NULL, all vnodes of the specified
1902 	\a mount object.
1903 
1904 	Note, after you've called this function, there might still be ongoing
1905 	accesses - they won't be interrupted if they already happened before.
1906 	However, any subsequent access will fail.
1907 
1908 	This is not a cheap function and should be used with care and rarely.
1909 	TODO: there is currently no means to stop a blocking read/write!
1910 */
1911 static void
1912 disconnect_mount_or_vnode_fds(struct fs_mount* mount,
1913 	struct vnode* vnodeToDisconnect)
1914 {
1915 	// iterate over all teams and peek into their file descriptors
1916 	TeamListIterator teamIterator;
1917 	while (Team* team = teamIterator.Next()) {
1918 		BReference<Team> teamReference(team, true);
1919 		TeamLocker teamLocker(team);
1920 
1921 		// lock the I/O context
1922 		io_context* context = team->io_context;
1923 		if (context == NULL)
1924 			continue;
1925 		MutexLocker contextLocker(context->io_mutex);
1926 
1927 		teamLocker.Unlock();
1928 
1929 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->root,
1930 			sRoot, true);
1931 		replace_vnode_if_disconnected(mount, vnodeToDisconnect, context->cwd,
1932 			sRoot, false);
1933 
1934 		for (uint32 i = 0; i < context->table_size; i++) {
1935 			struct file_descriptor* descriptor = context->fds[i];
1936 			if (descriptor == NULL || (descriptor->open_mode & O_DISCONNECTED) != 0)
1937 				continue;
1938 
1939 			inc_fd_ref_count(descriptor);
1940 
1941 			// if this descriptor points at this mount, we
1942 			// need to disconnect it to be able to unmount
1943 			struct vnode* vnode = fd_vnode(descriptor);
1944 			if (vnodeToDisconnect != NULL) {
1945 				if (vnode == vnodeToDisconnect)
1946 					disconnect_fd(descriptor);
1947 			} else if ((vnode != NULL && vnode->mount == mount)
1948 				|| (vnode == NULL && descriptor->u.mount == mount))
1949 				disconnect_fd(descriptor);
1950 
1951 			put_fd(descriptor);
1952 		}
1953 	}
1954 }
1955 
1956 
1957 /*!	\brief Gets the root node of the current IO context.
1958 	If \a kernel is \c true, the kernel IO context will be used.
1959 	The caller obtains a reference to the returned node.
1960 */
1961 struct vnode*
1962 get_root_vnode(bool kernel)
1963 {
1964 	if (!kernel) {
1965 		// Get current working directory from io context
1966 		struct io_context* context = get_current_io_context(kernel);
1967 
1968 		mutex_lock(&sIOContextRootLock);
1969 
1970 		struct vnode* root = context->root;
1971 		if (root != NULL)
1972 			inc_vnode_ref_count(root);
1973 
1974 		mutex_unlock(&sIOContextRootLock);
1975 
1976 		if (root != NULL)
1977 			return root;
1978 
1979 		// That should never happen.
1980 		dprintf("get_root_vnode(): IO context for team %" B_PRId32 " doesn't "
1981 			"have a root\n", team_get_current_team_id());
1982 	}
1983 
1984 	inc_vnode_ref_count(sRoot);
1985 	return sRoot;
1986 }
1987 
1988 
1989 /*!	\brief Gets the directory path and leaf name for a given path.
1990 
1991 	The supplied \a path is transformed to refer to the directory part of
1992 	the entry identified by the original path, and into the buffer \a filename
1993 	the leaf name of the original entry is written.
1994 	Neither the returned path nor the leaf name can be expected to be
1995 	canonical.
1996 
1997 	\param path The path to be analyzed. Must be able to store at least one
1998 		   additional character.
1999 	\param filename The buffer into which the leaf name will be written.
2000 		   Must be of size B_FILE_NAME_LENGTH at least.
2001 	\return \c B_OK, if everything went fine, \c B_NAME_TOO_LONG, if the leaf
2002 		   name is longer than \c B_FILE_NAME_LENGTH, or \c B_ENTRY_NOT_FOUND,
2003 		   if the given path name is empty.
2004 */
2005 static status_t
2006 get_dir_path_and_leaf(char* path, char* filename)
2007 {
2008 	if (*path == '\0')
2009 		return B_ENTRY_NOT_FOUND;
2010 
2011 	char* last = strrchr(path, '/');
2012 		// '/' are not allowed in file names!
2013 
2014 	FUNCTION(("get_dir_path_and_leaf(path = %s)\n", path));
2015 
2016 	if (last == NULL) {
2017 		// this path is single segment with no '/' in it
2018 		// ex. "foo"
2019 		if (strlcpy(filename, path, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2020 			return B_NAME_TOO_LONG;
2021 
2022 		strcpy(path, ".");
2023 	} else {
2024 		last++;
2025 		if (last[0] == '\0') {
2026 			// special case: the path ends in one or more '/' - remove them
2027 			while (*--last == '/' && last != path);
2028 			last[1] = '\0';
2029 
2030 			if (last == path && last[0] == '/') {
2031 				// This path points to the root of the file system
2032 				strcpy(filename, ".");
2033 				return B_OK;
2034 			}
2035 			for (; last != path && *(last - 1) != '/'; last--);
2036 				// rewind to the start of the leaf before the '/'
2037 		}
2038 
2039 		// normal leaf: replace the leaf portion of the path with a '.'
2040 		if (strlcpy(filename, last, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2041 			return B_NAME_TOO_LONG;
2042 
2043 		last[0] = '.';
2044 		last[1] = '\0';
2045 	}
2046 	return B_OK;
2047 }
2048 
2049 
2050 static status_t
2051 entry_ref_to_vnode(dev_t mountID, ino_t directoryID, const char* name,
2052 	bool traverse, bool kernel, VnodePutter& _vnode)
2053 {
2054 	char clonedName[B_FILE_NAME_LENGTH + 1];
2055 	if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH) >= B_FILE_NAME_LENGTH)
2056 		return B_NAME_TOO_LONG;
2057 
2058 	// get the directory vnode and let vnode_path_to_vnode() do the rest
2059 	struct vnode* directory;
2060 
2061 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
2062 	if (status < 0)
2063 		return status;
2064 
2065 	return vnode_path_to_vnode(directory, clonedName, traverse, 0, kernel,
2066 		_vnode, NULL);
2067 }
2068 
2069 
2070 /*!	Looks up the entry with name \a name in the directory represented by \a dir
2071 	and returns the respective vnode.
2072 	On success a reference to the vnode is acquired for the caller.
2073 */
2074 static status_t
2075 lookup_dir_entry(struct vnode* dir, const char* name, struct vnode** _vnode)
2076 {
2077 	ino_t id;
2078 	bool missing;
2079 
2080 	if (dir->mount->entry_cache.Lookup(dir->id, name, id, missing)) {
2081 		return missing ? B_ENTRY_NOT_FOUND
2082 			: get_vnode(dir->device, id, _vnode, true, false);
2083 	}
2084 
2085 	status_t status = FS_CALL(dir, lookup, name, &id);
2086 	if (status != B_OK)
2087 		return status;
2088 
2089 	// The lookup() hook calls get_vnode() or publish_vnode(), so we do already
2090 	// have a reference and just need to look the node up.
2091 	rw_lock_read_lock(&sVnodeLock);
2092 	*_vnode = lookup_vnode(dir->device, id);
2093 	rw_lock_read_unlock(&sVnodeLock);
2094 
2095 	if (*_vnode == NULL) {
2096 		panic("lookup_dir_entry(): could not lookup vnode (mountid 0x%" B_PRIx32
2097 			" vnid 0x%" B_PRIx64 ")\n", dir->device, id);
2098 		return B_ENTRY_NOT_FOUND;
2099 	}
2100 
2101 //	ktrace_printf("lookup_dir_entry(): dir: %p (%ld, %lld), name: \"%s\" -> "
2102 //		"%p (%ld, %lld)", dir, dir->mount->id, dir->id, name, *_vnode,
2103 //		(*_vnode)->mount->id, (*_vnode)->id);
2104 
2105 	return B_OK;
2106 }
2107 
2108 
2109 /*!	Returns the vnode for the relative \a path starting at the specified \a vnode.
2110 
2111 	\param[in,out] path The relative path being searched. Must not be NULL.
2112 	If the function returns successfully, \a path contains the name of the last path
2113 	component. This function clobbers the buffer pointed to by \a path only
2114 	if it does contain more than one component.
2115 
2116 	If the function fails and leafName is not NULL, \a _vnode contains the last directory,
2117 	the caller has the responsibility to call put_vnode() on it.
2118 
2119 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2120 	it is successful or not!
2121 
2122 	\param[out] _vnode If the function returns B_OK, points to the found node.
2123 	\param[out] _vnode If the function returns something else and leafname is not NULL: set to the
2124 		last existing directory in the path. The caller has responsibility to release it using
2125 		put_vnode().
2126 	\param[out] _vnode If the function returns something else and leafname is NULL: not used.
2127 */
2128 static status_t
2129 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2130 	int count, struct io_context* ioContext, VnodePutter& _vnode,
2131 	ino_t* _parentID, char* leafName)
2132 {
2133 	FUNCTION(("vnode_path_to_vnode(vnode = %p, path = %s)\n", vnode, path));
2134 	ASSERT(!_vnode.IsSet() || _vnode.Get() != vnode);
2135 
2136 	if (path == NULL) {
2137 		put_vnode(vnode);
2138 		return B_BAD_VALUE;
2139 	}
2140 
2141 	if (*path == '\0') {
2142 		put_vnode(vnode);
2143 		return B_ENTRY_NOT_FOUND;
2144 	}
2145 
2146 	status_t status = B_OK;
2147 	ino_t lastParentID = vnode->id;
2148 	while (true) {
2149 		struct vnode* nextVnode;
2150 		char* nextPath;
2151 
2152 		TRACE(("vnode_path_to_vnode: top of loop. p = %p, p = '%s'\n", path,
2153 			path));
2154 
2155 		// done?
2156 		if (path[0] == '\0')
2157 			break;
2158 
2159 		// walk to find the next path component ("path" will point to a single
2160 		// path component), and filter out multiple slashes
2161 		for (nextPath = path + 1; *nextPath != '\0' && *nextPath != '/';
2162 				nextPath++);
2163 
2164 		bool directoryFound = false;
2165 		if (*nextPath == '/') {
2166 			directoryFound = true;
2167 			*nextPath = '\0';
2168 			do
2169 				nextPath++;
2170 			while (*nextPath == '/');
2171 		}
2172 
2173 		// See if the '..' is at a covering vnode move to the covered
2174 		// vnode so we pass the '..' path to the underlying filesystem.
2175 		// Also prevent breaking the root of the IO context.
2176 		if (strcmp("..", path) == 0) {
2177 			if (vnode == ioContext->root) {
2178 				// Attempted prison break! Keep it contained.
2179 				path = nextPath;
2180 				continue;
2181 			}
2182 
2183 			if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2184 				nextVnode = coveredVnode;
2185 				put_vnode(vnode);
2186 				vnode = nextVnode;
2187 			}
2188 		}
2189 
2190 		// check if vnode is really a directory
2191 		if (status == B_OK && !S_ISDIR(vnode->Type()))
2192 			status = B_NOT_A_DIRECTORY;
2193 
2194 		// Check if we have the right to search the current directory vnode.
2195 		// If a file system doesn't have the access() function, we assume that
2196 		// searching a directory is always allowed
2197 		if (status == B_OK && HAS_FS_CALL(vnode, access))
2198 			status = FS_CALL(vnode, access, X_OK);
2199 
2200 		// Tell the filesystem to get the vnode of this path component (if we
2201 		// got the permission from the call above)
2202 		if (status == B_OK)
2203 			status = lookup_dir_entry(vnode, path, &nextVnode);
2204 
2205 		if (status != B_OK) {
2206 			if (leafName != NULL) {
2207 				strlcpy(leafName, path, B_FILE_NAME_LENGTH);
2208 				_vnode.SetTo(vnode);
2209 			} else
2210 				put_vnode(vnode);
2211 			return status;
2212 		}
2213 
2214 		// If the new node is a symbolic link, resolve it (if we've been told
2215 		// to do it)
2216 		if (S_ISLNK(nextVnode->Type())
2217 			&& (traverseLeafLink || directoryFound)) {
2218 			size_t bufferSize;
2219 			char* buffer;
2220 
2221 			TRACE(("traverse link\n"));
2222 
2223 			// it's not exactly nice style using goto in this way, but hey,
2224 			// it works :-/
2225 			if (count + 1 > B_MAX_SYMLINKS) {
2226 				status = B_LINK_LIMIT;
2227 				goto resolve_link_error;
2228 			}
2229 
2230 			bufferSize = B_PATH_NAME_LENGTH;
2231 			buffer = (char*)object_cache_alloc(sPathNameCache, 0);
2232 			if (buffer == NULL) {
2233 				status = B_NO_MEMORY;
2234 				goto resolve_link_error;
2235 			}
2236 
2237 			if (HAS_FS_CALL(nextVnode, read_symlink)) {
2238 				bufferSize--;
2239 				status = FS_CALL(nextVnode, read_symlink, buffer, &bufferSize);
2240 				// null-terminate
2241 				if (status >= 0 && bufferSize < B_PATH_NAME_LENGTH)
2242 					buffer[bufferSize] = '\0';
2243 			} else
2244 				status = B_BAD_VALUE;
2245 
2246 			if (status != B_OK) {
2247 				free(buffer);
2248 
2249 		resolve_link_error:
2250 				put_vnode(vnode);
2251 				put_vnode(nextVnode);
2252 
2253 				return status;
2254 			}
2255 			put_vnode(nextVnode);
2256 
2257 			// Check if we start from the root directory or the current
2258 			// directory ("vnode" still points to that one).
2259 			// Cut off all leading slashes if it's the root directory
2260 			path = buffer;
2261 			bool absoluteSymlink = false;
2262 			if (path[0] == '/') {
2263 				// we don't need the old directory anymore
2264 				put_vnode(vnode);
2265 
2266 				while (*++path == '/')
2267 					;
2268 
2269 				mutex_lock(&sIOContextRootLock);
2270 				vnode = ioContext->root;
2271 				inc_vnode_ref_count(vnode);
2272 				mutex_unlock(&sIOContextRootLock);
2273 
2274 				absoluteSymlink = true;
2275 			}
2276 
2277 			inc_vnode_ref_count(vnode);
2278 				// balance the next recursion - we will decrement the
2279 				// ref_count of the vnode, no matter if we succeeded or not
2280 
2281 			if (absoluteSymlink && *path == '\0') {
2282 				// symlink was just "/"
2283 				nextVnode = vnode;
2284 			} else {
2285 				VnodePutter temp;
2286 				status = vnode_path_to_vnode(vnode, path, true, count + 1,
2287 					ioContext, temp, &lastParentID, leafName);
2288 				nextVnode = temp.Detach();
2289 			}
2290 
2291 			object_cache_free(sPathNameCache, buffer, 0);
2292 
2293 			if (status != B_OK) {
2294 				if (leafName != NULL)
2295 					_vnode.SetTo(nextVnode);
2296 				put_vnode(vnode);
2297 				return status;
2298 			}
2299 		} else
2300 			lastParentID = vnode->id;
2301 
2302 		// decrease the ref count on the old dir we just looked up into
2303 		put_vnode(vnode);
2304 
2305 		path = nextPath;
2306 		vnode = nextVnode;
2307 
2308 		// see if we hit a covered node
2309 		if (Vnode* coveringNode = get_covering_vnode(vnode)) {
2310 			put_vnode(vnode);
2311 			vnode = coveringNode;
2312 		}
2313 	}
2314 
2315 	_vnode.SetTo(vnode);
2316 	if (_parentID)
2317 		*_parentID = lastParentID;
2318 
2319 	return B_OK;
2320 }
2321 
2322 
2323 static status_t
2324 vnode_path_to_vnode(struct vnode* vnode, char* path, bool traverseLeafLink,
2325 	int count, bool kernel, VnodePutter& _vnode, ino_t* _parentID, char* leafName)
2326 {
2327 	return vnode_path_to_vnode(vnode, path, traverseLeafLink, count,
2328 		get_current_io_context(kernel), _vnode, _parentID, leafName);
2329 }
2330 
2331 
2332 static status_t
2333 path_to_vnode(char* path, bool traverseLink, VnodePutter& _vnode,
2334 	ino_t* _parentID, bool kernel)
2335 {
2336 	struct vnode* start = NULL;
2337 
2338 	FUNCTION(("path_to_vnode(path = \"%s\")\n", path));
2339 
2340 	if (!path)
2341 		return B_BAD_VALUE;
2342 
2343 	if (*path == '\0')
2344 		return B_ENTRY_NOT_FOUND;
2345 
2346 	// figure out if we need to start at root or at cwd
2347 	if (*path == '/') {
2348 		if (sRoot == NULL) {
2349 			// we're a bit early, aren't we?
2350 			return B_ERROR;
2351 		}
2352 
2353 		while (*++path == '/')
2354 			;
2355 		start = get_root_vnode(kernel);
2356 
2357 		if (*path == '\0') {
2358 			_vnode.SetTo(start);
2359 			return B_OK;
2360 		}
2361 
2362 	} else {
2363 		struct io_context* context = get_current_io_context(kernel);
2364 
2365 		mutex_lock(&context->io_mutex);
2366 		start = context->cwd;
2367 		if (start != NULL)
2368 			inc_vnode_ref_count(start);
2369 		mutex_unlock(&context->io_mutex);
2370 
2371 		if (start == NULL)
2372 			return B_ERROR;
2373 	}
2374 
2375 	return vnode_path_to_vnode(start, path, traverseLink, 0, kernel, _vnode,
2376 		_parentID);
2377 }
2378 
2379 
2380 /*! Returns the vnode in the next to last segment of the path, and returns
2381 	the last portion in filename.
2382 	The path buffer must be able to store at least one additional character.
2383 */
2384 static status_t
2385 path_to_dir_vnode(char* path, VnodePutter& _vnode, char* filename,
2386 	bool kernel)
2387 {
2388 	status_t status = get_dir_path_and_leaf(path, filename);
2389 	if (status != B_OK)
2390 		return status;
2391 
2392 	return path_to_vnode(path, true, _vnode, NULL, kernel);
2393 }
2394 
2395 
2396 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2397 		   to by a FD + path pair.
2398 
2399 	\a path must be given in either case. \a fd might be omitted, in which
2400 	case \a path is either an absolute path or one relative to the current
2401 	directory. If both a supplied and \a path is relative it is reckoned off
2402 	of the directory referred to by \a fd. If \a path is absolute \a fd is
2403 	ignored.
2404 
2405 	The caller has the responsibility to call put_vnode() on the returned
2406 	directory vnode.
2407 
2408 	\param fd The FD. May be < 0.
2409 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2410 	       is modified by this function. It must have at least room for a
2411 	       string one character longer than the path it contains.
2412 	\param _vnode A pointer to a variable the directory vnode shall be written
2413 		   into.
2414 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2415 		   the leaf name of the specified entry will be written.
2416 	\param kernel \c true, if invoked from inside the kernel, \c false if
2417 		   invoked from userland.
2418 	\return \c B_OK, if everything went fine, another error code otherwise.
2419 */
2420 static status_t
2421 fd_and_path_to_dir_vnode(int fd, char* path, VnodePutter& _vnode,
2422 	char* filename, bool kernel)
2423 {
2424 	if (!path)
2425 		return B_BAD_VALUE;
2426 	if (*path == '\0')
2427 		return B_ENTRY_NOT_FOUND;
2428 	if (fd < 0)
2429 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2430 
2431 	status_t status = get_dir_path_and_leaf(path, filename);
2432 	if (status != B_OK)
2433 		return status;
2434 
2435 	return fd_and_path_to_vnode(fd, path, true, _vnode, NULL, kernel);
2436 }
2437 
2438 
2439 /*!	\brief Retrieves the directory vnode and the leaf name of an entry referred
2440 		   to by a vnode + path pair.
2441 
2442 	\a path must be given in either case. \a vnode might be omitted, in which
2443 	case \a path is either an absolute path or one relative to the current
2444 	directory. If both a supplied and \a path is relative it is reckoned off
2445 	of the directory referred to by \a vnode. If \a path is absolute \a vnode is
2446 	ignored.
2447 
2448 	The caller has the responsibility to call put_vnode() on the returned
2449 	directory vnode.
2450 
2451 	Note, this reduces the ref_count of the starting \a vnode, no matter if
2452 	it is successful or not.
2453 
2454 	\param vnode The vnode. May be \c NULL.
2455 	\param path The absolute or relative path. Must not be \c NULL. The buffer
2456 	       is modified by this function. It must have at least room for a
2457 	       string one character longer than the path it contains.
2458 	\param _vnode A pointer to a variable the directory vnode shall be written
2459 		   into.
2460 	\param filename A buffer of size B_FILE_NAME_LENGTH or larger into which
2461 		   the leaf name of the specified entry will be written.
2462 	\param kernel \c true, if invoked from inside the kernel, \c false if
2463 		   invoked from userland.
2464 	\return \c B_OK, if everything went fine, another error code otherwise.
2465 */
2466 static status_t
2467 vnode_and_path_to_dir_vnode(struct vnode* vnode, char* path,
2468 	VnodePutter& _vnode, char* filename, bool kernel)
2469 {
2470 	if (!path)
2471 		return B_BAD_VALUE;
2472 	if (*path == '\0')
2473 		return B_ENTRY_NOT_FOUND;
2474 	if (vnode == NULL || path[0] == '/')
2475 		return path_to_dir_vnode(path, _vnode, filename, kernel);
2476 
2477 	status_t status = get_dir_path_and_leaf(path, filename);
2478 	if (status != B_OK) {
2479 		put_vnode(vnode);
2480 		return status;
2481 	}
2482 
2483 	return vnode_path_to_vnode(vnode, path, true, 0, kernel, _vnode, NULL);
2484 }
2485 
2486 
2487 /*! Returns a vnode's name in the d_name field of a supplied dirent buffer.
2488 */
2489 static status_t
2490 get_vnode_name(struct vnode* vnode, struct vnode* parent, struct dirent* buffer,
2491 	size_t bufferSize, struct io_context* ioContext)
2492 {
2493 	if (bufferSize < sizeof(struct dirent))
2494 		return B_BAD_VALUE;
2495 
2496 	// See if the vnode is covering another vnode and move to the covered
2497 	// vnode so we get the underlying file system
2498 	VnodePutter vnodePutter;
2499 	if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2500 		vnode = coveredVnode;
2501 		vnodePutter.SetTo(vnode);
2502 	}
2503 
2504 	if (HAS_FS_CALL(vnode, get_vnode_name)) {
2505 		// The FS supports getting the name of a vnode.
2506 		if (FS_CALL(vnode, get_vnode_name, buffer->d_name,
2507 			(char*)buffer + bufferSize - buffer->d_name) == B_OK)
2508 			return B_OK;
2509 	}
2510 
2511 	// The FS doesn't support getting the name of a vnode. So we search the
2512 	// parent directory for the vnode, if the caller let us.
2513 
2514 	if (parent == NULL || !HAS_FS_CALL(parent, read_dir))
2515 		return B_UNSUPPORTED;
2516 
2517 	void* cookie;
2518 
2519 	status_t status = FS_CALL(parent, open_dir, &cookie);
2520 	if (status >= B_OK) {
2521 		while (true) {
2522 			uint32 num = 1;
2523 			// We use the FS hook directly instead of dir_read(), since we don't
2524 			// want the entries to be fixed. We have already resolved vnode to
2525 			// the covered node.
2526 			status = FS_CALL(parent, read_dir, cookie, buffer, bufferSize,
2527 				&num);
2528 			if (status != B_OK)
2529 				break;
2530 			if (num == 0) {
2531 				status = B_ENTRY_NOT_FOUND;
2532 				break;
2533 			}
2534 
2535 			if (vnode->id == buffer->d_ino) {
2536 				// found correct entry!
2537 				break;
2538 			}
2539 		}
2540 
2541 		FS_CALL(parent, close_dir, cookie);
2542 		FS_CALL(parent, free_dir_cookie, cookie);
2543 	}
2544 	return status;
2545 }
2546 
2547 
2548 static status_t
2549 get_vnode_name(struct vnode* vnode, struct vnode* parent, char* name,
2550 	size_t nameSize, bool kernel)
2551 {
2552 	char buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2553 	struct dirent* dirent = (struct dirent*)buffer;
2554 
2555 	status_t status = get_vnode_name(vnode, parent, dirent, sizeof(buffer),
2556 		get_current_io_context(kernel));
2557 	if (status != B_OK)
2558 		return status;
2559 
2560 	if (strlcpy(name, dirent->d_name, nameSize) >= nameSize)
2561 		return B_BUFFER_OVERFLOW;
2562 
2563 	return B_OK;
2564 }
2565 
2566 
2567 /*!	Gets the full path to a given directory vnode.
2568 	It uses the fs_get_vnode_name() call to get the name of a vnode; if a
2569 	file system doesn't support this call, it will fall back to iterating
2570 	through the parent directory to get the name of the child.
2571 
2572 	To protect against circular loops, it supports a maximum tree depth
2573 	of 256 levels.
2574 
2575 	Note that the path may not be correct the time this function returns!
2576 	It doesn't use any locking to prevent returning the correct path, as
2577 	paths aren't safe anyway: the path to a file can change at any time.
2578 
2579 	It might be a good idea, though, to check if the returned path exists
2580 	in the calling function (it's not done here because of efficiency)
2581 */
2582 static status_t
2583 dir_vnode_to_path(struct vnode* vnode, char* buffer, size_t bufferSize,
2584 	bool kernel)
2585 {
2586 	FUNCTION(("dir_vnode_to_path(%p, %p, %lu)\n", vnode, buffer, bufferSize));
2587 
2588 	if (vnode == NULL || buffer == NULL || bufferSize == 0)
2589 		return B_BAD_VALUE;
2590 
2591 	if (!S_ISDIR(vnode->Type()))
2592 		return B_NOT_A_DIRECTORY;
2593 
2594 	char* path = buffer;
2595 	int32 insert = bufferSize;
2596 	int32 maxLevel = 256;
2597 	int32 length;
2598 	status_t status = B_OK;
2599 	struct io_context* ioContext = get_current_io_context(kernel);
2600 
2601 	// we don't use get_vnode() here because this call is more
2602 	// efficient and does all we need from get_vnode()
2603 	inc_vnode_ref_count(vnode);
2604 
2605 	path[--insert] = '\0';
2606 		// the path is filled right to left
2607 
2608 	while (true) {
2609 		// If the node is the context's root, bail out. Otherwise resolve mount
2610 		// points.
2611 		if (vnode == ioContext->root)
2612 			break;
2613 
2614 		if (Vnode* coveredVnode = get_covered_vnode(vnode)) {
2615 			put_vnode(vnode);
2616 			vnode = coveredVnode;
2617 		}
2618 
2619 		// lookup the parent vnode
2620 		struct vnode* parentVnode;
2621 		status = lookup_dir_entry(vnode, "..", &parentVnode);
2622 		if (status != B_OK)
2623 			goto out;
2624 
2625 		if (parentVnode == vnode) {
2626 			// The caller apparently got their hands on a node outside of their
2627 			// context's root. Now we've hit the global root.
2628 			put_vnode(parentVnode);
2629 			break;
2630 		}
2631 
2632 		// get the node's name
2633 		char nameBuffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
2634 			// also used for fs_read_dir()
2635 		char* name = &((struct dirent*)nameBuffer)->d_name[0];
2636 		status = get_vnode_name(vnode, parentVnode, (struct dirent*)nameBuffer,
2637 			sizeof(nameBuffer), ioContext);
2638 
2639 		// release the current vnode, we only need its parent from now on
2640 		put_vnode(vnode);
2641 		vnode = parentVnode;
2642 
2643 		if (status != B_OK)
2644 			goto out;
2645 
2646 		// TODO: add an explicit check for loops in about 10 levels to do
2647 		// real loop detection
2648 
2649 		// don't go deeper as 'maxLevel' to prevent circular loops
2650 		if (maxLevel-- < 0) {
2651 			status = B_LINK_LIMIT;
2652 			goto out;
2653 		}
2654 
2655 		// add the name in front of the current path
2656 		name[B_FILE_NAME_LENGTH - 1] = '\0';
2657 		length = strlen(name);
2658 		insert -= length;
2659 		if (insert <= 0) {
2660 			status = B_RESULT_NOT_REPRESENTABLE;
2661 			goto out;
2662 		}
2663 		memcpy(path + insert, name, length);
2664 		path[--insert] = '/';
2665 	}
2666 
2667 	// the root dir will result in an empty path: fix it
2668 	if (path[insert] == '\0')
2669 		path[--insert] = '/';
2670 
2671 	TRACE(("  path is: %s\n", path + insert));
2672 
2673 	// move the path to the start of the buffer
2674 	length = bufferSize - insert;
2675 	memmove(buffer, path + insert, length);
2676 
2677 out:
2678 	put_vnode(vnode);
2679 	return status;
2680 }
2681 
2682 
2683 /*!	Checks the length of every path component, and adds a '.'
2684 	if the path ends in a slash.
2685 	The given path buffer must be able to store at least one
2686 	additional character.
2687 */
2688 static status_t
2689 check_path(char* to)
2690 {
2691 	int32 length = 0;
2692 
2693 	// check length of every path component
2694 
2695 	while (*to) {
2696 		char* begin;
2697 		if (*to == '/')
2698 			to++, length++;
2699 
2700 		begin = to;
2701 		while (*to != '/' && *to)
2702 			to++, length++;
2703 
2704 		if (to - begin > B_FILE_NAME_LENGTH)
2705 			return B_NAME_TOO_LONG;
2706 	}
2707 
2708 	if (length == 0)
2709 		return B_ENTRY_NOT_FOUND;
2710 
2711 	// complete path if there is a slash at the end
2712 
2713 	if (*(to - 1) == '/') {
2714 		if (length > B_PATH_NAME_LENGTH - 2)
2715 			return B_NAME_TOO_LONG;
2716 
2717 		to[0] = '.';
2718 		to[1] = '\0';
2719 	}
2720 
2721 	return B_OK;
2722 }
2723 
2724 
2725 static struct file_descriptor*
2726 get_fd_and_vnode(int fd, struct vnode** _vnode, bool kernel)
2727 {
2728 	struct file_descriptor* descriptor
2729 		= get_fd(get_current_io_context(kernel), fd);
2730 	if (descriptor == NULL)
2731 		return NULL;
2732 
2733 	struct vnode* vnode = fd_vnode(descriptor);
2734 	if (vnode == NULL) {
2735 		put_fd(descriptor);
2736 		return NULL;
2737 	}
2738 
2739 	// ToDo: when we can close a file descriptor at any point, investigate
2740 	//	if this is still valid to do (accessing the vnode without ref_count
2741 	//	or locking)
2742 	*_vnode = vnode;
2743 	return descriptor;
2744 }
2745 
2746 
2747 static struct vnode*
2748 get_vnode_from_fd(int fd, bool kernel)
2749 {
2750 	struct file_descriptor* descriptor;
2751 	struct vnode* vnode;
2752 
2753 	descriptor = get_fd(get_current_io_context(kernel), fd);
2754 	if (descriptor == NULL)
2755 		return NULL;
2756 
2757 	vnode = fd_vnode(descriptor);
2758 	if (vnode != NULL)
2759 		inc_vnode_ref_count(vnode);
2760 
2761 	put_fd(descriptor);
2762 	return vnode;
2763 }
2764 
2765 
2766 /*!	Gets the vnode from an FD + path combination. If \a fd is lower than zero,
2767 	only the path will be considered. In this case, the \a path must not be
2768 	NULL.
2769 	If \a fd is a valid file descriptor, \a path may be NULL for directories,
2770 	and should be NULL for files.
2771 */
2772 static status_t
2773 fd_and_path_to_vnode(int fd, char* path, bool traverseLeafLink,
2774 	VnodePutter& _vnode, ino_t* _parentID, bool kernel)
2775 {
2776 	if (fd < 0 && !path)
2777 		return B_BAD_VALUE;
2778 
2779 	if (path != NULL && *path == '\0')
2780 		return B_ENTRY_NOT_FOUND;
2781 
2782 	if (fd < 0 || (path != NULL && path[0] == '/')) {
2783 		// no FD or absolute path
2784 		return path_to_vnode(path, traverseLeafLink, _vnode, _parentID, kernel);
2785 	}
2786 
2787 	// FD only, or FD + relative path
2788 	struct vnode* vnode = get_vnode_from_fd(fd, kernel);
2789 	if (vnode == NULL)
2790 		return B_FILE_ERROR;
2791 
2792 	if (path != NULL) {
2793 		return vnode_path_to_vnode(vnode, path, traverseLeafLink, 0, kernel,
2794 			_vnode, _parentID);
2795 	}
2796 
2797 	// there is no relative path to take into account
2798 
2799 	_vnode.SetTo(vnode);
2800 	if (_parentID)
2801 		*_parentID = -1;
2802 
2803 	return B_OK;
2804 }
2805 
2806 
2807 static int
2808 get_new_fd(int type, struct fs_mount* mount, struct vnode* vnode,
2809 	void* cookie, int openMode, bool kernel)
2810 {
2811 	struct file_descriptor* descriptor;
2812 	int fd;
2813 
2814 	// If the vnode is locked, we don't allow creating a new file/directory
2815 	// file_descriptor for it
2816 	if (vnode && vnode->mandatory_locked_by != NULL
2817 		&& (type == FDTYPE_FILE || type == FDTYPE_DIR))
2818 		return B_BUSY;
2819 
2820 	if ((openMode & O_RDWR) != 0 && (openMode & O_WRONLY) != 0)
2821 		return B_BAD_VALUE;
2822 
2823 	descriptor = alloc_fd();
2824 	if (!descriptor)
2825 		return B_NO_MEMORY;
2826 
2827 	if (vnode)
2828 		descriptor->u.vnode = vnode;
2829 	else
2830 		descriptor->u.mount = mount;
2831 	descriptor->cookie = cookie;
2832 
2833 	switch (type) {
2834 		// vnode types
2835 		case FDTYPE_FILE:
2836 			descriptor->ops = &sFileOps;
2837 			break;
2838 		case FDTYPE_DIR:
2839 			descriptor->ops = &sDirectoryOps;
2840 			break;
2841 		case FDTYPE_ATTR:
2842 			descriptor->ops = &sAttributeOps;
2843 			break;
2844 		case FDTYPE_ATTR_DIR:
2845 			descriptor->ops = &sAttributeDirectoryOps;
2846 			break;
2847 
2848 		// mount types
2849 		case FDTYPE_INDEX_DIR:
2850 			descriptor->ops = &sIndexDirectoryOps;
2851 			break;
2852 		case FDTYPE_QUERY:
2853 			descriptor->ops = &sQueryOps;
2854 			break;
2855 
2856 		default:
2857 			panic("get_new_fd() called with unknown type %d\n", type);
2858 			break;
2859 	}
2860 	descriptor->type = type;
2861 	descriptor->open_mode = openMode;
2862 
2863 	io_context* context = get_current_io_context(kernel);
2864 	fd = new_fd(context, descriptor);
2865 	if (fd < 0) {
2866 		descriptor->ops = NULL;
2867 		put_fd(descriptor);
2868 		return B_NO_MORE_FDS;
2869 	}
2870 
2871 	mutex_lock(&context->io_mutex);
2872 	fd_set_close_on_exec(context, fd, (openMode & O_CLOEXEC) != 0);
2873 	mutex_unlock(&context->io_mutex);
2874 
2875 	return fd;
2876 }
2877 
2878 
2879 /*!	In-place normalizes \a path. It's otherwise semantically equivalent to
2880 	vfs_normalize_path(). See there for more documentation.
2881 */
2882 static status_t
2883 normalize_path(char* path, size_t pathSize, bool traverseLink, bool kernel)
2884 {
2885 	VnodePutter dir;
2886 	status_t error;
2887 
2888 	for (int i = 0; i < B_MAX_SYMLINKS; i++) {
2889 		// get dir vnode + leaf name
2890 		char leaf[B_FILE_NAME_LENGTH];
2891 		error = vnode_and_path_to_dir_vnode(dir.Detach(), path, dir, leaf, kernel);
2892 		if (error != B_OK)
2893 			return error;
2894 		strcpy(path, leaf);
2895 
2896 		// get file vnode, if we shall resolve links
2897 		bool fileExists = false;
2898 		VnodePutter fileVnode;
2899 		if (traverseLink) {
2900 			inc_vnode_ref_count(dir.Get());
2901 			if (vnode_path_to_vnode(dir.Get(), path, false, 0, kernel, fileVnode,
2902 					NULL) == B_OK) {
2903 				fileExists = true;
2904 			}
2905 		}
2906 
2907 		if (!fileExists || !traverseLink || !S_ISLNK(fileVnode->Type())) {
2908 			// we're done -- construct the path
2909 			bool hasLeaf = true;
2910 			if (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0) {
2911 				// special cases "." and ".." -- get the dir, forget the leaf
2912 				error = vnode_path_to_vnode(dir.Detach(), leaf, false, 0, kernel,
2913 					dir, NULL);
2914 				if (error != B_OK)
2915 					return error;
2916 				hasLeaf = false;
2917 			}
2918 
2919 			// get the directory path
2920 			error = dir_vnode_to_path(dir.Get(), path, B_PATH_NAME_LENGTH, kernel);
2921 			if (error != B_OK)
2922 				return error;
2923 
2924 			// append the leaf name
2925 			if (hasLeaf) {
2926 				// insert a directory separator if this is not the file system
2927 				// root
2928 				if ((strcmp(path, "/") != 0
2929 					&& strlcat(path, "/", pathSize) >= pathSize)
2930 					|| strlcat(path, leaf, pathSize) >= pathSize) {
2931 					return B_NAME_TOO_LONG;
2932 				}
2933 			}
2934 
2935 			return B_OK;
2936 		}
2937 
2938 		// read link
2939 		if (HAS_FS_CALL(fileVnode, read_symlink)) {
2940 			size_t bufferSize = B_PATH_NAME_LENGTH - 1;
2941 			error = FS_CALL(fileVnode.Get(), read_symlink, path, &bufferSize);
2942 			if (error != B_OK)
2943 				return error;
2944 			if (bufferSize < B_PATH_NAME_LENGTH)
2945 				path[bufferSize] = '\0';
2946 		} else
2947 			return B_BAD_VALUE;
2948 	}
2949 
2950 	return B_LINK_LIMIT;
2951 }
2952 
2953 
2954 static status_t
2955 resolve_covered_parent(struct vnode* parent, dev_t* _device, ino_t* _node,
2956 	struct io_context* ioContext)
2957 {
2958 	// Make sure the IO context root is not bypassed.
2959 	if (parent == ioContext->root) {
2960 		*_device = parent->device;
2961 		*_node = parent->id;
2962 		return B_OK;
2963 	}
2964 
2965 	inc_vnode_ref_count(parent);
2966 		// vnode_path_to_vnode() puts the node
2967 
2968 	// ".." is guaranteed not to be clobbered by this call
2969 	VnodePutter vnode;
2970 	status_t status = vnode_path_to_vnode(parent, (char*)"..", false, 0,
2971 		ioContext, vnode, NULL);
2972 	if (status == B_OK) {
2973 		*_device = vnode->device;
2974 		*_node = vnode->id;
2975 	}
2976 
2977 	return status;
2978 }
2979 
2980 
2981 #ifdef ADD_DEBUGGER_COMMANDS
2982 
2983 
2984 static void
2985 _dump_advisory_locking(advisory_locking* locking)
2986 {
2987 	if (locking == NULL)
2988 		return;
2989 
2990 	kprintf("   lock:        %" B_PRId32, locking->lock);
2991 	kprintf("   wait_sem:    %" B_PRId32, locking->wait_sem);
2992 
2993 	int32 index = 0;
2994 	LockList::Iterator iterator = locking->locks.GetIterator();
2995 	while (iterator.HasNext()) {
2996 		struct advisory_lock* lock = iterator.Next();
2997 
2998 		kprintf("   [%2" B_PRId32 "] team:   %" B_PRId32 "\n", index++, lock->team);
2999 		kprintf("        start:  %" B_PRIdOFF "\n", lock->start);
3000 		kprintf("        end:    %" B_PRIdOFF "\n", lock->end);
3001 		kprintf("        shared? %s\n", lock->shared ? "yes" : "no");
3002 	}
3003 }
3004 
3005 
3006 static void
3007 _dump_mount(struct fs_mount* mount)
3008 {
3009 	kprintf("MOUNT: %p\n", mount);
3010 	kprintf(" id:            %" B_PRIdDEV "\n", mount->id);
3011 	kprintf(" device_name:   %s\n", mount->device_name);
3012 	kprintf(" root_vnode:    %p\n", mount->root_vnode);
3013 	kprintf(" covers:        %p\n", mount->root_vnode->covers);
3014 	kprintf(" partition:     %p\n", mount->partition);
3015 	kprintf(" lock:          %p\n", &mount->lock);
3016 	kprintf(" flags:        %s%s\n", mount->unmounting ? " unmounting" : "",
3017 		mount->owns_file_device ? " owns_file_device" : "");
3018 
3019 	fs_volume* volume = mount->volume;
3020 	while (volume != NULL) {
3021 		kprintf(" volume %p:\n", volume);
3022 		kprintf("  layer:            %" B_PRId32 "\n", volume->layer);
3023 		kprintf("  private_volume:   %p\n", volume->private_volume);
3024 		kprintf("  ops:              %p\n", volume->ops);
3025 		kprintf("  file_system:      %p\n", volume->file_system);
3026 		kprintf("  file_system_name: %s\n", volume->file_system_name);
3027 		volume = volume->super_volume;
3028 	}
3029 
3030 	set_debug_variable("_volume", (addr_t)mount->volume->private_volume);
3031 	set_debug_variable("_root", (addr_t)mount->root_vnode);
3032 	set_debug_variable("_covers", (addr_t)mount->root_vnode->covers);
3033 	set_debug_variable("_partition", (addr_t)mount->partition);
3034 }
3035 
3036 
3037 static bool
3038 debug_prepend_vnode_name_to_path(char* buffer, size_t& bufferSize,
3039 	const char* name)
3040 {
3041 	bool insertSlash = buffer[bufferSize] != '\0';
3042 	size_t nameLength = strlen(name);
3043 
3044 	if (bufferSize < nameLength + (insertSlash ? 1 : 0))
3045 		return false;
3046 
3047 	if (insertSlash)
3048 		buffer[--bufferSize] = '/';
3049 
3050 	bufferSize -= nameLength;
3051 	memcpy(buffer + bufferSize, name, nameLength);
3052 
3053 	return true;
3054 }
3055 
3056 
3057 static bool
3058 debug_prepend_vnode_id_to_path(char* buffer, size_t& bufferSize, dev_t devID,
3059 	ino_t nodeID)
3060 {
3061 	if (bufferSize == 0)
3062 		return false;
3063 
3064 	bool insertSlash = buffer[bufferSize] != '\0';
3065 	if (insertSlash)
3066 		buffer[--bufferSize] = '/';
3067 
3068 	size_t size = snprintf(buffer, bufferSize,
3069 		"<%" B_PRIdDEV ",%" B_PRIdINO ">", devID, nodeID);
3070 	if (size > bufferSize) {
3071 		if (insertSlash)
3072 			bufferSize++;
3073 		return false;
3074 	}
3075 
3076 	if (size < bufferSize)
3077 		memmove(buffer + bufferSize - size, buffer, size);
3078 
3079 	bufferSize -= size;
3080 	return true;
3081 }
3082 
3083 
3084 static char*
3085 debug_resolve_vnode_path(struct vnode* vnode, char* buffer, size_t bufferSize,
3086 	bool& _truncated)
3087 {
3088 	// null-terminate the path
3089 	buffer[--bufferSize] = '\0';
3090 
3091 	while (true) {
3092 		while (vnode->covers != NULL)
3093 			vnode = vnode->covers;
3094 
3095 		if (vnode == sRoot) {
3096 			_truncated = bufferSize == 0;
3097 			if (!_truncated)
3098 				buffer[--bufferSize] = '/';
3099 			return buffer + bufferSize;
3100 		}
3101 
3102 		// resolve the name
3103 		ino_t dirID;
3104 		const char* name = vnode->mount->entry_cache.DebugReverseLookup(
3105 			vnode->id, dirID);
3106 		if (name == NULL) {
3107 			// Failed to resolve the name -- prepend "<dev,node>/".
3108 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3109 				vnode->mount->id, vnode->id);
3110 			return buffer + bufferSize;
3111 		}
3112 
3113 		// prepend the name
3114 		if (!debug_prepend_vnode_name_to_path(buffer, bufferSize, name)) {
3115 			_truncated = true;
3116 			return buffer + bufferSize;
3117 		}
3118 
3119 		// resolve the directory node
3120 		struct vnode* nextVnode = lookup_vnode(vnode->mount->id, dirID);
3121 		if (nextVnode == NULL) {
3122 			_truncated = !debug_prepend_vnode_id_to_path(buffer, bufferSize,
3123 				vnode->mount->id, dirID);
3124 			return buffer + bufferSize;
3125 		}
3126 
3127 		vnode = nextVnode;
3128 	}
3129 }
3130 
3131 
3132 static void
3133 _dump_vnode(struct vnode* vnode, bool printPath)
3134 {
3135 	kprintf("VNODE: %p\n", vnode);
3136 	kprintf(" device:        %" B_PRIdDEV "\n", vnode->device);
3137 	kprintf(" id:            %" B_PRIdINO "\n", vnode->id);
3138 	kprintf(" ref_count:     %" B_PRId32 "\n", vnode->ref_count);
3139 	kprintf(" private_node:  %p\n", vnode->private_node);
3140 	kprintf(" mount:         %p\n", vnode->mount);
3141 	kprintf(" covered_by:    %p\n", vnode->covered_by);
3142 	kprintf(" covers:        %p\n", vnode->covers);
3143 	kprintf(" cache:         %p\n", vnode->cache);
3144 	kprintf(" type:          %#" B_PRIx32 "\n", vnode->Type());
3145 	kprintf(" flags:         %s%s%s\n", vnode->IsRemoved() ? "r" : "-",
3146 		vnode->IsBusy() ? "b" : "-", vnode->IsUnpublished() ? "u" : "-");
3147 	kprintf(" advisory_lock: %p\n", vnode->advisory_locking);
3148 
3149 	_dump_advisory_locking(vnode->advisory_locking);
3150 
3151 	if (printPath) {
3152 		void* buffer = debug_malloc(B_PATH_NAME_LENGTH);
3153 		if (buffer != NULL) {
3154 			bool truncated;
3155 			char* path = debug_resolve_vnode_path(vnode, (char*)buffer,
3156 				B_PATH_NAME_LENGTH, truncated);
3157 			if (path != NULL) {
3158 				kprintf(" path:          ");
3159 				if (truncated)
3160 					kputs("<truncated>/");
3161 				kputs(path);
3162 				kputs("\n");
3163 			} else
3164 				kprintf("Failed to resolve vnode path.\n");
3165 
3166 			debug_free(buffer);
3167 		} else
3168 			kprintf("Failed to allocate memory for constructing the path.\n");
3169 	}
3170 
3171 	set_debug_variable("_node", (addr_t)vnode->private_node);
3172 	set_debug_variable("_mount", (addr_t)vnode->mount);
3173 	set_debug_variable("_covered_by", (addr_t)vnode->covered_by);
3174 	set_debug_variable("_covers", (addr_t)vnode->covers);
3175 	set_debug_variable("_adv_lock", (addr_t)vnode->advisory_locking);
3176 }
3177 
3178 
3179 static int
3180 dump_mount(int argc, char** argv)
3181 {
3182 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3183 		kprintf("usage: %s [id|address]\n", argv[0]);
3184 		return 0;
3185 	}
3186 
3187 	ulong val = parse_expression(argv[1]);
3188 	uint32 id = val;
3189 
3190 	struct fs_mount* mount = sMountsTable->Lookup(id);
3191 	if (mount == NULL) {
3192 		if (IS_USER_ADDRESS(id)) {
3193 			kprintf("fs_mount not found\n");
3194 			return 0;
3195 		}
3196 		mount = (fs_mount*)val;
3197 	}
3198 
3199 	_dump_mount(mount);
3200 	return 0;
3201 }
3202 
3203 
3204 static int
3205 dump_mounts(int argc, char** argv)
3206 {
3207 	if (argc != 1) {
3208 		kprintf("usage: %s\n", argv[0]);
3209 		return 0;
3210 	}
3211 
3212 	kprintf("%-*s    id %-*s   %-*s   %-*s   fs_name\n",
3213 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "root",
3214 		B_PRINTF_POINTER_WIDTH, "covers", B_PRINTF_POINTER_WIDTH, "cookie");
3215 
3216 	struct fs_mount* mount;
3217 
3218 	MountTable::Iterator iterator(sMountsTable);
3219 	while (iterator.HasNext()) {
3220 		mount = iterator.Next();
3221 		kprintf("%p%4" B_PRIdDEV " %p %p %p %s\n", mount, mount->id, mount->root_vnode,
3222 			mount->root_vnode->covers, mount->volume->private_volume,
3223 			mount->volume->file_system_name);
3224 
3225 		fs_volume* volume = mount->volume;
3226 		while (volume->super_volume != NULL) {
3227 			volume = volume->super_volume;
3228 			kprintf("                                     %p %s\n",
3229 				volume->private_volume, volume->file_system_name);
3230 		}
3231 	}
3232 
3233 	return 0;
3234 }
3235 
3236 
3237 static int
3238 dump_vnode(int argc, char** argv)
3239 {
3240 	bool printPath = false;
3241 	int argi = 1;
3242 	if (argc >= 2 && strcmp(argv[argi], "-p") == 0) {
3243 		printPath = true;
3244 		argi++;
3245 	}
3246 
3247 	if (argi >= argc || argi + 2 < argc) {
3248 		print_debugger_command_usage(argv[0]);
3249 		return 0;
3250 	}
3251 
3252 	struct vnode* vnode = NULL;
3253 
3254 	if (argi + 1 == argc) {
3255 		vnode = (struct vnode*)parse_expression(argv[argi]);
3256 		if (IS_USER_ADDRESS(vnode)) {
3257 			kprintf("invalid vnode address\n");
3258 			return 0;
3259 		}
3260 		_dump_vnode(vnode, printPath);
3261 		return 0;
3262 	}
3263 
3264 	dev_t device = parse_expression(argv[argi]);
3265 	ino_t id = parse_expression(argv[argi + 1]);
3266 
3267 	VnodeTable::Iterator iterator(sVnodeTable);
3268 	while (iterator.HasNext()) {
3269 		vnode = iterator.Next();
3270 		if (vnode->id != id || vnode->device != device)
3271 			continue;
3272 
3273 		_dump_vnode(vnode, printPath);
3274 	}
3275 
3276 	return 0;
3277 }
3278 
3279 
3280 static int
3281 dump_vnodes(int argc, char** argv)
3282 {
3283 	if (argc != 2 || !strcmp(argv[1], "--help")) {
3284 		kprintf("usage: %s [device]\n", argv[0]);
3285 		return 0;
3286 	}
3287 
3288 	// restrict dumped nodes to a certain device if requested
3289 	dev_t device = parse_expression(argv[1]);
3290 
3291 	struct vnode* vnode;
3292 
3293 	kprintf("%-*s   dev     inode  ref %-*s   %-*s   %-*s   flags\n",
3294 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache",
3295 		B_PRINTF_POINTER_WIDTH, "fs-node", B_PRINTF_POINTER_WIDTH, "locking");
3296 
3297 	VnodeTable::Iterator iterator(sVnodeTable);
3298 	while (iterator.HasNext()) {
3299 		vnode = iterator.Next();
3300 		if (vnode->device != device)
3301 			continue;
3302 
3303 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO "%5" B_PRId32 " %p %p %p %s%s%s\n",
3304 			vnode, vnode->device, vnode->id, vnode->ref_count, vnode->cache,
3305 			vnode->private_node, vnode->advisory_locking,
3306 			vnode->IsRemoved() ? "r" : "-", vnode->IsBusy() ? "b" : "-",
3307 			vnode->IsUnpublished() ? "u" : "-");
3308 	}
3309 
3310 	return 0;
3311 }
3312 
3313 
3314 static int
3315 dump_vnode_caches(int argc, char** argv)
3316 {
3317 	struct vnode* vnode;
3318 
3319 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3320 		kprintf("usage: %s [device]\n", argv[0]);
3321 		return 0;
3322 	}
3323 
3324 	// restrict dumped nodes to a certain device if requested
3325 	dev_t device = -1;
3326 	if (argc > 1)
3327 		device = parse_expression(argv[1]);
3328 
3329 	kprintf("%-*s   dev     inode %-*s       size   pages\n",
3330 		B_PRINTF_POINTER_WIDTH, "address", B_PRINTF_POINTER_WIDTH, "cache");
3331 
3332 	VnodeTable::Iterator iterator(sVnodeTable);
3333 	while (iterator.HasNext()) {
3334 		vnode = iterator.Next();
3335 		if (vnode->cache == NULL)
3336 			continue;
3337 		if (device != -1 && vnode->device != device)
3338 			continue;
3339 
3340 		kprintf("%p%4" B_PRIdDEV "%10" B_PRIdINO " %p %8" B_PRIdOFF "%8" B_PRId32 "\n",
3341 			vnode, vnode->device, vnode->id, vnode->cache,
3342 			(vnode->cache->virtual_end + B_PAGE_SIZE - 1) / B_PAGE_SIZE,
3343 			vnode->cache->page_count);
3344 	}
3345 
3346 	return 0;
3347 }
3348 
3349 
3350 int
3351 dump_io_context(int argc, char** argv)
3352 {
3353 	if (argc > 2 || !strcmp(argv[1], "--help")) {
3354 		kprintf("usage: %s [team-id|address]\n", argv[0]);
3355 		return 0;
3356 	}
3357 
3358 	struct io_context* context = NULL;
3359 
3360 	if (argc > 1) {
3361 		ulong num = parse_expression(argv[1]);
3362 		if (IS_KERNEL_ADDRESS(num))
3363 			context = (struct io_context*)num;
3364 		else {
3365 			Team* team = team_get_team_struct_locked(num);
3366 			if (team == NULL) {
3367 				kprintf("could not find team with ID %lu\n", num);
3368 				return 0;
3369 			}
3370 			context = (struct io_context*)team->io_context;
3371 		}
3372 	} else
3373 		context = get_current_io_context(true);
3374 
3375 	kprintf("I/O CONTEXT: %p\n", context);
3376 	kprintf(" root vnode:\t%p\n", context->root);
3377 	kprintf(" cwd vnode:\t%p\n", context->cwd);
3378 	kprintf(" used fds:\t%" B_PRIu32 "\n", context->num_used_fds);
3379 	kprintf(" max fds:\t%" B_PRIu32 "\n", context->table_size);
3380 
3381 	if (context->num_used_fds) {
3382 		kprintf("   no.  type    %*s  ref  open  mode         pos    %*s\n",
3383 			B_PRINTF_POINTER_WIDTH, "ops", B_PRINTF_POINTER_WIDTH, "cookie");
3384 	}
3385 
3386 	for (uint32 i = 0; i < context->table_size; i++) {
3387 		struct file_descriptor* fd = context->fds[i];
3388 		if (fd == NULL)
3389 			continue;
3390 
3391 		kprintf("  %3" B_PRIu32 ":  %4" B_PRId32 "  %p  %3" B_PRId32 "  %4"
3392 			B_PRIu32 "  %4" B_PRIx32 "  %10" B_PRIdOFF "  %p  %s %p\n", i,
3393 			fd->type, fd->ops, fd->ref_count, fd->open_count, fd->open_mode,
3394 			fd->pos, fd->cookie,
3395 			fd->type >= FDTYPE_INDEX && fd->type <= FDTYPE_QUERY
3396 				? "mount" : "vnode",
3397 			fd->u.vnode);
3398 	}
3399 
3400 	kprintf(" used monitors:\t%" B_PRIu32 "\n", context->num_monitors);
3401 	kprintf(" max monitors:\t%" B_PRIu32 "\n", context->max_monitors);
3402 
3403 	set_debug_variable("_cwd", (addr_t)context->cwd);
3404 
3405 	return 0;
3406 }
3407 
3408 
3409 int
3410 dump_vnode_usage(int argc, char** argv)
3411 {
3412 	if (argc != 1) {
3413 		kprintf("usage: %s\n", argv[0]);
3414 		return 0;
3415 	}
3416 
3417 	kprintf("Unused vnodes: %" B_PRIu32 " (max unused %" B_PRIu32 ")\n",
3418 		sUnusedVnodes, kMaxUnusedVnodes);
3419 
3420 	uint32 count = sVnodeTable->CountElements();
3421 
3422 	kprintf("%" B_PRIu32 " vnodes total (%" B_PRIu32 " in use).\n", count,
3423 		count - sUnusedVnodes);
3424 	return 0;
3425 }
3426 
3427 #endif	// ADD_DEBUGGER_COMMANDS
3428 
3429 
3430 /*!	Clears memory specified by an iovec array.
3431 */
3432 static void
3433 zero_iovecs(const iovec* vecs, size_t vecCount, size_t bytes)
3434 {
3435 	for (size_t i = 0; i < vecCount && bytes > 0; i++) {
3436 		size_t length = std::min(vecs[i].iov_len, bytes);
3437 		memset(vecs[i].iov_base, 0, length);
3438 		bytes -= length;
3439 	}
3440 }
3441 
3442 
3443 /*!	Does the dirty work of combining the file_io_vecs with the iovecs
3444 	and calls the file system hooks to read/write the request to disk.
3445 */
3446 static status_t
3447 common_file_io_vec_pages(struct vnode* vnode, void* cookie,
3448 	const file_io_vec* fileVecs, size_t fileVecCount, const iovec* vecs,
3449 	size_t vecCount, uint32* _vecIndex, size_t* _vecOffset, size_t* _numBytes,
3450 	bool doWrite)
3451 {
3452 	if (fileVecCount == 0) {
3453 		// There are no file vecs at this offset, so we're obviously trying
3454 		// to access the file outside of its bounds
3455 		return B_BAD_VALUE;
3456 	}
3457 
3458 	size_t numBytes = *_numBytes;
3459 	uint32 fileVecIndex;
3460 	size_t vecOffset = *_vecOffset;
3461 	uint32 vecIndex = *_vecIndex;
3462 	status_t status;
3463 	size_t size;
3464 
3465 	if (!doWrite && vecOffset == 0) {
3466 		// now directly read the data from the device
3467 		// the first file_io_vec can be read directly
3468 		// TODO: we could also write directly
3469 
3470 		if (fileVecs[0].length < (off_t)numBytes)
3471 			size = fileVecs[0].length;
3472 		else
3473 			size = numBytes;
3474 
3475 		if (fileVecs[0].offset >= 0) {
3476 			status = FS_CALL(vnode, read_pages, cookie, fileVecs[0].offset,
3477 				&vecs[vecIndex], vecCount - vecIndex, &size);
3478 		} else {
3479 			// sparse read
3480 			zero_iovecs(&vecs[vecIndex], vecCount - vecIndex, size);
3481 			status = B_OK;
3482 		}
3483 		if (status != B_OK)
3484 			return status;
3485 
3486 		ASSERT((off_t)size <= fileVecs[0].length);
3487 
3488 		// If the file portion was contiguous, we're already done now
3489 		if (size == numBytes)
3490 			return B_OK;
3491 
3492 		// if we reached the end of the file, we can return as well
3493 		if ((off_t)size != fileVecs[0].length) {
3494 			*_numBytes = size;
3495 			return B_OK;
3496 		}
3497 
3498 		fileVecIndex = 1;
3499 
3500 		// first, find out where we have to continue in our iovecs
3501 		for (; vecIndex < vecCount; vecIndex++) {
3502 			if (size < vecs[vecIndex].iov_len)
3503 				break;
3504 
3505 			size -= vecs[vecIndex].iov_len;
3506 		}
3507 
3508 		vecOffset = size;
3509 	} else {
3510 		fileVecIndex = 0;
3511 		size = 0;
3512 	}
3513 
3514 	// Too bad, let's process the rest of the file_io_vecs
3515 
3516 	size_t totalSize = size;
3517 	size_t bytesLeft = numBytes - size;
3518 
3519 	for (; fileVecIndex < fileVecCount; fileVecIndex++) {
3520 		const file_io_vec &fileVec = fileVecs[fileVecIndex];
3521 		off_t fileOffset = fileVec.offset;
3522 		off_t fileLeft = min_c(fileVec.length, (off_t)bytesLeft);
3523 
3524 		TRACE(("FILE VEC [%" B_PRIu32 "] length %" B_PRIdOFF "\n", fileVecIndex,
3525 			fileLeft));
3526 
3527 		// process the complete fileVec
3528 		while (fileLeft > 0) {
3529 			iovec tempVecs[MAX_TEMP_IO_VECS];
3530 			uint32 tempCount = 0;
3531 
3532 			// size tracks how much of what is left of the current fileVec
3533 			// (fileLeft) has been assigned to tempVecs
3534 			size = 0;
3535 
3536 			// assign what is left of the current fileVec to the tempVecs
3537 			for (size = 0; (off_t)size < fileLeft && vecIndex < vecCount
3538 					&& tempCount < MAX_TEMP_IO_VECS;) {
3539 				// try to satisfy one iovec per iteration (or as much as
3540 				// possible)
3541 
3542 				// bytes left of the current iovec
3543 				size_t vecLeft = vecs[vecIndex].iov_len - vecOffset;
3544 				if (vecLeft == 0) {
3545 					vecOffset = 0;
3546 					vecIndex++;
3547 					continue;
3548 				}
3549 
3550 				TRACE(("fill vec %" B_PRIu32 ", offset = %lu, size = %lu\n",
3551 					vecIndex, vecOffset, size));
3552 
3553 				// actually available bytes
3554 				size_t tempVecSize = min_c(vecLeft, fileLeft - size);
3555 
3556 				tempVecs[tempCount].iov_base
3557 					= (void*)((addr_t)vecs[vecIndex].iov_base + vecOffset);
3558 				tempVecs[tempCount].iov_len = tempVecSize;
3559 				tempCount++;
3560 
3561 				size += tempVecSize;
3562 				vecOffset += tempVecSize;
3563 			}
3564 
3565 			size_t bytes = size;
3566 
3567 			if (fileOffset == -1) {
3568 				if (doWrite) {
3569 					panic("sparse write attempt: vnode %p", vnode);
3570 					status = B_IO_ERROR;
3571 				} else {
3572 					// sparse read
3573 					zero_iovecs(tempVecs, tempCount, bytes);
3574 					status = B_OK;
3575 				}
3576 			} else if (doWrite) {
3577 				status = FS_CALL(vnode, write_pages, cookie, fileOffset,
3578 					tempVecs, tempCount, &bytes);
3579 			} else {
3580 				status = FS_CALL(vnode, read_pages, cookie, fileOffset,
3581 					tempVecs, tempCount, &bytes);
3582 			}
3583 			if (status != B_OK)
3584 				return status;
3585 
3586 			totalSize += bytes;
3587 			bytesLeft -= size;
3588 			if (fileOffset >= 0)
3589 				fileOffset += size;
3590 			fileLeft -= size;
3591 			//dprintf("-> file left = %Lu\n", fileLeft);
3592 
3593 			if (size != bytes || vecIndex >= vecCount) {
3594 				// there are no more bytes or iovecs, let's bail out
3595 				*_numBytes = totalSize;
3596 				return B_OK;
3597 			}
3598 		}
3599 	}
3600 
3601 	*_vecIndex = vecIndex;
3602 	*_vecOffset = vecOffset;
3603 	*_numBytes = totalSize;
3604 	return B_OK;
3605 }
3606 
3607 
3608 static bool
3609 is_user_in_group(gid_t gid)
3610 {
3611 	if (gid == getegid())
3612 		return true;
3613 
3614 	gid_t groups[NGROUPS_MAX];
3615 	int groupCount = getgroups(NGROUPS_MAX, groups);
3616 	for (int i = 0; i < groupCount; i++) {
3617 		if (gid == groups[i])
3618 			return true;
3619 	}
3620 
3621 	return false;
3622 }
3623 
3624 
3625 static status_t
3626 free_io_context(io_context* context)
3627 {
3628 	uint32 i;
3629 
3630 	TIOC(FreeIOContext(context));
3631 
3632 	if (context->root)
3633 		put_vnode(context->root);
3634 
3635 	if (context->cwd)
3636 		put_vnode(context->cwd);
3637 
3638 	mutex_lock(&context->io_mutex);
3639 
3640 	for (i = 0; i < context->table_size; i++) {
3641 		if (struct file_descriptor* descriptor = context->fds[i]) {
3642 			close_fd(context, descriptor);
3643 			put_fd(descriptor);
3644 		}
3645 	}
3646 
3647 	mutex_destroy(&context->io_mutex);
3648 
3649 	remove_node_monitors(context);
3650 	free(context->fds);
3651 	free(context);
3652 
3653 	return B_OK;
3654 }
3655 
3656 
3657 static status_t
3658 resize_monitor_table(struct io_context* context, const int newSize)
3659 {
3660 	int	status = B_OK;
3661 
3662 	if (newSize <= 0 || newSize > MAX_NODE_MONITORS)
3663 		return B_BAD_VALUE;
3664 
3665 	mutex_lock(&context->io_mutex);
3666 
3667 	if ((size_t)newSize < context->num_monitors) {
3668 		status = B_BUSY;
3669 		goto out;
3670 	}
3671 	context->max_monitors = newSize;
3672 
3673 out:
3674 	mutex_unlock(&context->io_mutex);
3675 	return status;
3676 }
3677 
3678 
3679 //	#pragma mark - public API for file systems
3680 
3681 
3682 extern "C" status_t
3683 new_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3684 	fs_vnode_ops* ops)
3685 {
3686 	FUNCTION(("new_vnode(volume = %p (%" B_PRId32 "), vnodeID = %" B_PRId64
3687 		", node = %p)\n", volume, volume->id, vnodeID, privateNode));
3688 
3689 	if (privateNode == NULL)
3690 		return B_BAD_VALUE;
3691 
3692 	int32 tries = BUSY_VNODE_RETRIES;
3693 restart:
3694 	// create the node
3695 	bool nodeCreated;
3696 	struct vnode* vnode;
3697 	status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3698 		nodeCreated);
3699 	if (status != B_OK)
3700 		return status;
3701 
3702 	WriteLocker nodeLocker(sVnodeLock, true);
3703 		// create_new_vnode_and_lock() has locked for us
3704 
3705 	if (!nodeCreated && vnode->IsBusy()) {
3706 		nodeLocker.Unlock();
3707 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3708 			return B_BUSY;
3709 		goto restart;
3710 	}
3711 
3712 	// file system integrity check:
3713 	// test if the vnode already exists and bail out if this is the case!
3714 	if (!nodeCreated) {
3715 		panic("vnode %" B_PRIdDEV ":%" B_PRIdINO " already exists (node = %p, "
3716 			"vnode->node = %p)!", volume->id, vnodeID, privateNode,
3717 			vnode->private_node);
3718 		return B_ERROR;
3719 	}
3720 
3721 	vnode->private_node = privateNode;
3722 	vnode->ops = ops;
3723 	vnode->SetUnpublished(true);
3724 
3725 	TRACE(("returns: %s\n", strerror(status)));
3726 
3727 	return status;
3728 }
3729 
3730 
3731 extern "C" status_t
3732 publish_vnode(fs_volume* volume, ino_t vnodeID, void* privateNode,
3733 	fs_vnode_ops* ops, int type, uint32 flags)
3734 {
3735 	FUNCTION(("publish_vnode()\n"));
3736 
3737 	int32 tries = BUSY_VNODE_RETRIES;
3738 restart:
3739 	WriteLocker locker(sVnodeLock);
3740 
3741 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3742 
3743 	bool nodeCreated = false;
3744 	if (vnode == NULL) {
3745 		if (privateNode == NULL)
3746 			return B_BAD_VALUE;
3747 
3748 		// create the node
3749 		locker.Unlock();
3750 			// create_new_vnode_and_lock() will re-lock for us on success
3751 		status_t status = create_new_vnode_and_lock(volume->id, vnodeID, vnode,
3752 			nodeCreated);
3753 		if (status != B_OK)
3754 			return status;
3755 
3756 		locker.SetTo(sVnodeLock, true);
3757 	}
3758 
3759 	if (nodeCreated) {
3760 		vnode->private_node = privateNode;
3761 		vnode->ops = ops;
3762 		vnode->SetUnpublished(true);
3763 	} else if (vnode->IsBusy() && vnode->IsUnpublished()
3764 		&& vnode->private_node == privateNode && vnode->ops == ops) {
3765 		// already known, but not published
3766 	} else if (vnode->IsBusy()) {
3767 		locker.Unlock();
3768 		if (!retry_busy_vnode(tries, volume->id, vnodeID))
3769 			return B_BUSY;
3770 		goto restart;
3771 	} else
3772 		return B_BAD_VALUE;
3773 
3774 	bool publishSpecialSubNode = false;
3775 
3776 	vnode->SetType(type);
3777 	vnode->SetRemoved((flags & B_VNODE_PUBLISH_REMOVED) != 0);
3778 	publishSpecialSubNode = is_special_node_type(type)
3779 		&& (flags & B_VNODE_DONT_CREATE_SPECIAL_SUB_NODE) == 0;
3780 
3781 	status_t status = B_OK;
3782 
3783 	// create sub vnodes, if necessary
3784 	if (volume->sub_volume != NULL || publishSpecialSubNode) {
3785 		locker.Unlock();
3786 
3787 		fs_volume* subVolume = volume;
3788 		if (volume->sub_volume != NULL) {
3789 			while (status == B_OK && subVolume->sub_volume != NULL) {
3790 				subVolume = subVolume->sub_volume;
3791 				status = subVolume->ops->create_sub_vnode(subVolume, vnodeID,
3792 					vnode);
3793 			}
3794 		}
3795 
3796 		if (status == B_OK && publishSpecialSubNode)
3797 			status = create_special_sub_node(vnode, flags);
3798 
3799 		if (status != B_OK) {
3800 			// error -- clean up the created sub vnodes
3801 			while (subVolume->super_volume != volume) {
3802 				subVolume = subVolume->super_volume;
3803 				subVolume->ops->delete_sub_vnode(subVolume, vnode);
3804 			}
3805 		}
3806 
3807 		if (status == B_OK) {
3808 			ReadLocker vnodesReadLocker(sVnodeLock);
3809 			AutoLocker<Vnode> nodeLocker(vnode);
3810 			vnode->SetBusy(false);
3811 			vnode->SetUnpublished(false);
3812 		} else {
3813 			locker.Lock();
3814 			sVnodeTable->Remove(vnode);
3815 			remove_vnode_from_mount_list(vnode, vnode->mount);
3816 			object_cache_free(sVnodeCache, vnode, 0);
3817 		}
3818 	} else {
3819 		// we still hold the write lock -- mark the node unbusy and published
3820 		vnode->SetBusy(false);
3821 		vnode->SetUnpublished(false);
3822 	}
3823 
3824 	TRACE(("returns: %s\n", strerror(status)));
3825 
3826 	return status;
3827 }
3828 
3829 
3830 extern "C" status_t
3831 get_vnode(fs_volume* volume, ino_t vnodeID, void** _privateNode)
3832 {
3833 	struct vnode* vnode;
3834 
3835 	if (volume == NULL)
3836 		return B_BAD_VALUE;
3837 
3838 	status_t status = get_vnode(volume->id, vnodeID, &vnode, true, true);
3839 	if (status != B_OK)
3840 		return status;
3841 
3842 	// If this is a layered FS, we need to get the node cookie for the requested
3843 	// layer.
3844 	if (HAS_FS_CALL(vnode, get_super_vnode)) {
3845 		fs_vnode resolvedNode;
3846 		status_t status = FS_CALL(vnode, get_super_vnode, volume,
3847 			&resolvedNode);
3848 		if (status != B_OK) {
3849 			panic("get_vnode(): Failed to get super node for vnode %p, "
3850 				"volume: %p", vnode, volume);
3851 			put_vnode(vnode);
3852 			return status;
3853 		}
3854 
3855 		if (_privateNode != NULL)
3856 			*_privateNode = resolvedNode.private_node;
3857 	} else if (_privateNode != NULL)
3858 		*_privateNode = vnode->private_node;
3859 
3860 	return B_OK;
3861 }
3862 
3863 
3864 extern "C" status_t
3865 acquire_vnode(fs_volume* volume, ino_t vnodeID)
3866 {
3867 	ReadLocker nodeLocker(sVnodeLock);
3868 
3869 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3870 	if (vnode == NULL)
3871 		return B_BAD_VALUE;
3872 
3873 	inc_vnode_ref_count(vnode);
3874 	return B_OK;
3875 }
3876 
3877 
3878 extern "C" status_t
3879 put_vnode(fs_volume* volume, ino_t vnodeID)
3880 {
3881 	struct vnode* vnode;
3882 
3883 	rw_lock_read_lock(&sVnodeLock);
3884 	vnode = lookup_vnode(volume->id, vnodeID);
3885 	rw_lock_read_unlock(&sVnodeLock);
3886 
3887 	if (vnode == NULL)
3888 		return B_BAD_VALUE;
3889 
3890 	dec_vnode_ref_count(vnode, false, true);
3891 	return B_OK;
3892 }
3893 
3894 
3895 extern "C" status_t
3896 remove_vnode(fs_volume* volume, ino_t vnodeID)
3897 {
3898 	ReadLocker locker(sVnodeLock);
3899 
3900 	struct vnode* vnode = lookup_vnode(volume->id, vnodeID);
3901 	if (vnode == NULL)
3902 		return B_ENTRY_NOT_FOUND;
3903 
3904 	if (vnode->covered_by != NULL || vnode->covers != NULL) {
3905 		// this vnode is in use
3906 		return B_BUSY;
3907 	}
3908 
3909 	vnode->Lock();
3910 
3911 	vnode->SetRemoved(true);
3912 	bool removeUnpublished = false;
3913 
3914 	if (vnode->IsUnpublished()) {
3915 		// prepare the vnode for deletion
3916 		removeUnpublished = true;
3917 		vnode->SetBusy(true);
3918 	}
3919 
3920 	vnode->Unlock();
3921 	locker.Unlock();
3922 
3923 	if (removeUnpublished) {
3924 		// If the vnode hasn't been published yet, we delete it here
3925 		atomic_add(&vnode->ref_count, -1);
3926 		free_vnode(vnode, true);
3927 	}
3928 
3929 	return B_OK;
3930 }
3931 
3932 
3933 extern "C" status_t
3934 unremove_vnode(fs_volume* volume, ino_t vnodeID)
3935 {
3936 	struct vnode* vnode;
3937 
3938 	rw_lock_read_lock(&sVnodeLock);
3939 
3940 	vnode = lookup_vnode(volume->id, vnodeID);
3941 	if (vnode) {
3942 		AutoLocker<Vnode> nodeLocker(vnode);
3943 		vnode->SetRemoved(false);
3944 	}
3945 
3946 	rw_lock_read_unlock(&sVnodeLock);
3947 	return B_OK;
3948 }
3949 
3950 
3951 extern "C" status_t
3952 get_vnode_removed(fs_volume* volume, ino_t vnodeID, bool* _removed)
3953 {
3954 	ReadLocker _(sVnodeLock);
3955 
3956 	if (struct vnode* vnode = lookup_vnode(volume->id, vnodeID)) {
3957 		if (_removed != NULL)
3958 			*_removed = vnode->IsRemoved();
3959 		return B_OK;
3960 	}
3961 
3962 	return B_BAD_VALUE;
3963 }
3964 
3965 
3966 extern "C" fs_volume*
3967 volume_for_vnode(fs_vnode* _vnode)
3968 {
3969 	if (_vnode == NULL)
3970 		return NULL;
3971 
3972 	struct vnode* vnode = static_cast<struct vnode*>(_vnode);
3973 	return vnode->mount->volume;
3974 }
3975 
3976 
3977 extern "C" status_t
3978 check_access_permissions(int accessMode, mode_t mode, gid_t nodeGroupID,
3979 	uid_t nodeUserID)
3980 {
3981 	// get node permissions
3982 	int userPermissions = (mode & S_IRWXU) >> 6;
3983 	int groupPermissions = (mode & S_IRWXG) >> 3;
3984 	int otherPermissions = mode & S_IRWXO;
3985 
3986 	// get the node permissions for this uid/gid
3987 	int permissions = 0;
3988 	uid_t uid = geteuid();
3989 
3990 	if (uid == 0) {
3991 		// user is root
3992 		// root has always read/write permission, but at least one of the
3993 		// X bits must be set for execute permission
3994 		permissions = userPermissions | groupPermissions | otherPermissions
3995 			| S_IROTH | S_IWOTH;
3996 		if (S_ISDIR(mode))
3997 			permissions |= S_IXOTH;
3998 	} else if (uid == nodeUserID) {
3999 		// user is node owner
4000 		permissions = userPermissions;
4001 	} else if (is_user_in_group(nodeGroupID)) {
4002 		// user is in owning group
4003 		permissions = groupPermissions;
4004 	} else {
4005 		// user is one of the others
4006 		permissions = otherPermissions;
4007 	}
4008 
4009 	return (accessMode & ~permissions) == 0 ? B_OK : B_PERMISSION_DENIED;
4010 }
4011 
4012 
4013 #if 0
4014 extern "C" status_t
4015 read_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4016 	size_t* _numBytes)
4017 {
4018 	struct file_descriptor* descriptor;
4019 	struct vnode* vnode;
4020 
4021 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4022 	if (descriptor == NULL)
4023 		return B_FILE_ERROR;
4024 
4025 	status_t status = vfs_read_pages(vnode, descriptor->cookie, pos, vecs,
4026 		count, 0, _numBytes);
4027 
4028 	put_fd(descriptor);
4029 	return status;
4030 }
4031 
4032 
4033 extern "C" status_t
4034 write_pages(int fd, off_t pos, const iovec* vecs, size_t count,
4035 	size_t* _numBytes)
4036 {
4037 	struct file_descriptor* descriptor;
4038 	struct vnode* vnode;
4039 
4040 	descriptor = get_fd_and_vnode(fd, &vnode, true);
4041 	if (descriptor == NULL)
4042 		return B_FILE_ERROR;
4043 
4044 	status_t status = vfs_write_pages(vnode, descriptor->cookie, pos, vecs,
4045 		count, 0, _numBytes);
4046 
4047 	put_fd(descriptor);
4048 	return status;
4049 }
4050 #endif
4051 
4052 
4053 extern "C" status_t
4054 read_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4055 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4056 	size_t* _bytes)
4057 {
4058 	struct vnode* vnode;
4059 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4060 	if (!descriptor.IsSet())
4061 		return B_FILE_ERROR;
4062 
4063 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4064 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4065 		false);
4066 
4067 	return status;
4068 }
4069 
4070 
4071 extern "C" status_t
4072 write_file_io_vec_pages(int fd, const file_io_vec* fileVecs, size_t fileVecCount,
4073 	const iovec* vecs, size_t vecCount, uint32* _vecIndex, size_t* _vecOffset,
4074 	size_t* _bytes)
4075 {
4076 	struct vnode* vnode;
4077 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, true));
4078 	if (!descriptor.IsSet())
4079 		return B_FILE_ERROR;
4080 
4081 	status_t status = common_file_io_vec_pages(vnode, descriptor->cookie,
4082 		fileVecs, fileVecCount, vecs, vecCount, _vecIndex, _vecOffset, _bytes,
4083 		true);
4084 
4085 	return status;
4086 }
4087 
4088 
4089 extern "C" status_t
4090 entry_cache_add(dev_t mountID, ino_t dirID, const char* name, ino_t nodeID)
4091 {
4092 	// lookup mount -- the caller is required to make sure that the mount
4093 	// won't go away
4094 	ReadLocker locker(sMountLock);
4095 	struct fs_mount* mount = find_mount(mountID);
4096 	if (mount == NULL)
4097 		return B_BAD_VALUE;
4098 	locker.Unlock();
4099 
4100 	return mount->entry_cache.Add(dirID, name, nodeID, false);
4101 }
4102 
4103 
4104 extern "C" status_t
4105 entry_cache_add_missing(dev_t mountID, ino_t dirID, const char* name)
4106 {
4107 	// lookup mount -- the caller is required to make sure that the mount
4108 	// won't go away
4109 	ReadLocker locker(sMountLock);
4110 	struct fs_mount* mount = find_mount(mountID);
4111 	if (mount == NULL)
4112 		return B_BAD_VALUE;
4113 	locker.Unlock();
4114 
4115 	return mount->entry_cache.Add(dirID, name, -1, true);
4116 }
4117 
4118 
4119 extern "C" status_t
4120 entry_cache_remove(dev_t mountID, ino_t dirID, const char* name)
4121 {
4122 	// lookup mount -- the caller is required to make sure that the mount
4123 	// won't go away
4124 	ReadLocker locker(sMountLock);
4125 	struct fs_mount* mount = find_mount(mountID);
4126 	if (mount == NULL)
4127 		return B_BAD_VALUE;
4128 	locker.Unlock();
4129 
4130 	return mount->entry_cache.Remove(dirID, name);
4131 }
4132 
4133 
4134 //	#pragma mark - private VFS API
4135 //	Functions the VFS exports for other parts of the kernel
4136 
4137 
4138 /*! Acquires another reference to the vnode that has to be released
4139 	by calling vfs_put_vnode().
4140 */
4141 void
4142 vfs_acquire_vnode(struct vnode* vnode)
4143 {
4144 	inc_vnode_ref_count(vnode);
4145 }
4146 
4147 
4148 /*! This is currently called from file_cache_create() only.
4149 	It's probably a temporary solution as long as devfs requires that
4150 	fs_read_pages()/fs_write_pages() are called with the standard
4151 	open cookie and not with a device cookie.
4152 	If that's done differently, remove this call; it has no other
4153 	purpose.
4154 */
4155 extern "C" status_t
4156 vfs_get_cookie_from_fd(int fd, void** _cookie)
4157 {
4158 	struct file_descriptor* descriptor;
4159 
4160 	descriptor = get_fd(get_current_io_context(true), fd);
4161 	if (descriptor == NULL)
4162 		return B_FILE_ERROR;
4163 
4164 	*_cookie = descriptor->cookie;
4165 	return B_OK;
4166 }
4167 
4168 
4169 extern "C" status_t
4170 vfs_get_vnode_from_fd(int fd, bool kernel, struct vnode** vnode)
4171 {
4172 	*vnode = get_vnode_from_fd(fd, kernel);
4173 
4174 	if (*vnode == NULL)
4175 		return B_FILE_ERROR;
4176 
4177 	return B_NO_ERROR;
4178 }
4179 
4180 
4181 extern "C" status_t
4182 vfs_get_vnode_from_path(const char* path, bool kernel, struct vnode** _vnode)
4183 {
4184 	TRACE(("vfs_get_vnode_from_path: entry. path = '%s', kernel %d\n",
4185 		path, kernel));
4186 
4187 	KPath pathBuffer;
4188 	if (pathBuffer.InitCheck() != B_OK)
4189 		return B_NO_MEMORY;
4190 
4191 	char* buffer = pathBuffer.LockBuffer();
4192 	strlcpy(buffer, path, pathBuffer.BufferSize());
4193 
4194 	VnodePutter vnode;
4195 	status_t status = path_to_vnode(buffer, true, vnode, NULL, kernel);
4196 	if (status != B_OK)
4197 		return status;
4198 
4199 	*_vnode = vnode.Detach();
4200 	return B_OK;
4201 }
4202 
4203 
4204 extern "C" status_t
4205 vfs_get_vnode(dev_t mountID, ino_t vnodeID, bool canWait, struct vnode** _vnode)
4206 {
4207 	struct vnode* vnode = NULL;
4208 
4209 	status_t status = get_vnode(mountID, vnodeID, &vnode, canWait, false);
4210 	if (status != B_OK)
4211 		return status;
4212 
4213 	*_vnode = vnode;
4214 	return B_OK;
4215 }
4216 
4217 
4218 extern "C" status_t
4219 vfs_entry_ref_to_vnode(dev_t mountID, ino_t directoryID,
4220 	const char* name, struct vnode** _vnode)
4221 {
4222 	VnodePutter vnode;
4223 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, false, true, vnode);
4224 	*_vnode = vnode.Detach();
4225 	return status;
4226 }
4227 
4228 
4229 extern "C" void
4230 vfs_vnode_to_node_ref(struct vnode* vnode, dev_t* _mountID, ino_t* _vnodeID)
4231 {
4232 	*_mountID = vnode->device;
4233 	*_vnodeID = vnode->id;
4234 }
4235 
4236 
4237 /*!
4238 	Helper function abstracting the process of "converting" a given
4239 	vnode-pointer to a fs_vnode-pointer.
4240 	Currently only used in bindfs.
4241 */
4242 extern "C" fs_vnode*
4243 vfs_fsnode_for_vnode(struct vnode* vnode)
4244 {
4245 	return vnode;
4246 }
4247 
4248 
4249 /*!
4250 	Calls fs_open() on the given vnode and returns a new
4251 	file descriptor for it
4252 */
4253 int
4254 vfs_open_vnode(struct vnode* vnode, int openMode, bool kernel)
4255 {
4256 	return open_vnode(vnode, openMode, kernel);
4257 }
4258 
4259 
4260 /*!	Looks up a vnode with the given mount and vnode ID.
4261 	Must only be used with "in-use" vnodes as it doesn't grab a reference
4262 	to the node.
4263 	It's currently only be used by file_cache_create().
4264 */
4265 extern "C" status_t
4266 vfs_lookup_vnode(dev_t mountID, ino_t vnodeID, struct vnode** _vnode)
4267 {
4268 	rw_lock_read_lock(&sVnodeLock);
4269 	struct vnode* vnode = lookup_vnode(mountID, vnodeID);
4270 	rw_lock_read_unlock(&sVnodeLock);
4271 
4272 	if (vnode == NULL)
4273 		return B_ERROR;
4274 
4275 	*_vnode = vnode;
4276 	return B_OK;
4277 }
4278 
4279 
4280 extern "C" status_t
4281 vfs_get_fs_node_from_path(fs_volume* volume, const char* path,
4282 	bool traverseLeafLink, bool kernel, void** _node)
4283 {
4284 	TRACE(("vfs_get_fs_node_from_path(volume = %p, path = \"%s\", kernel %d)\n",
4285 		volume, path, kernel));
4286 
4287 	KPath pathBuffer;
4288 	if (pathBuffer.InitCheck() != B_OK)
4289 		return B_NO_MEMORY;
4290 
4291 	fs_mount* mount;
4292 	status_t status = get_mount(volume->id, &mount);
4293 	if (status != B_OK)
4294 		return status;
4295 
4296 	char* buffer = pathBuffer.LockBuffer();
4297 	strlcpy(buffer, path, pathBuffer.BufferSize());
4298 
4299 	VnodePutter vnode;
4300 
4301 	if (buffer[0] == '/')
4302 		status = path_to_vnode(buffer, traverseLeafLink, vnode, NULL, kernel);
4303 	else {
4304 		inc_vnode_ref_count(mount->root_vnode);
4305 			// vnode_path_to_vnode() releases a reference to the starting vnode
4306 		status = vnode_path_to_vnode(mount->root_vnode, buffer, traverseLeafLink, 0,
4307 			kernel, vnode, NULL);
4308 	}
4309 
4310 	put_mount(mount);
4311 
4312 	if (status != B_OK)
4313 		return status;
4314 
4315 	if (vnode->device != volume->id) {
4316 		// wrong mount ID - must not gain access on foreign file system nodes
4317 		return B_BAD_VALUE;
4318 	}
4319 
4320 	// Use get_vnode() to resolve the cookie for the right layer.
4321 	status = get_vnode(volume, vnode->id, _node);
4322 
4323 	return status;
4324 }
4325 
4326 
4327 status_t
4328 vfs_read_stat(int fd, const char* path, bool traverseLeafLink,
4329 	struct stat* stat, bool kernel)
4330 {
4331 	status_t status;
4332 
4333 	if (path != NULL) {
4334 		// path given: get the stat of the node referred to by (fd, path)
4335 		KPath pathBuffer(path);
4336 		if (pathBuffer.InitCheck() != B_OK)
4337 			return B_NO_MEMORY;
4338 
4339 		status = common_path_read_stat(fd, pathBuffer.LockBuffer(),
4340 			traverseLeafLink, stat, kernel);
4341 	} else {
4342 		// no path given: get the FD and use the FD operation
4343 		FileDescriptorPutter descriptor
4344 			(get_fd(get_current_io_context(kernel), fd));
4345 		if (!descriptor.IsSet())
4346 			return B_FILE_ERROR;
4347 
4348 		if (descriptor->ops->fd_read_stat)
4349 			status = descriptor->ops->fd_read_stat(descriptor.Get(), stat);
4350 		else
4351 			status = B_UNSUPPORTED;
4352 	}
4353 
4354 	return status;
4355 }
4356 
4357 
4358 /*!	Finds the full path to the file that contains the module \a moduleName,
4359 	puts it into \a pathBuffer, and returns B_OK for success.
4360 	If \a pathBuffer was too small, it returns \c B_BUFFER_OVERFLOW,
4361 	\c B_ENTRY_NOT_FOUNT if no file could be found.
4362 	\a pathBuffer is clobbered in any case and must not be relied on if this
4363 	functions returns unsuccessfully.
4364 	\a basePath and \a pathBuffer must not point to the same space.
4365 */
4366 status_t
4367 vfs_get_module_path(const char* basePath, const char* moduleName,
4368 	char* pathBuffer, size_t bufferSize)
4369 {
4370 	status_t status;
4371 	size_t length;
4372 	char* path;
4373 
4374 	if (bufferSize == 0
4375 		|| strlcpy(pathBuffer, basePath, bufferSize) >= bufferSize)
4376 		return B_BUFFER_OVERFLOW;
4377 
4378 	VnodePutter dir;
4379 	status = path_to_vnode(pathBuffer, true, dir, NULL, true);
4380 	if (status != B_OK)
4381 		return status;
4382 
4383 	// the path buffer had been clobbered by the above call
4384 	length = strlcpy(pathBuffer, basePath, bufferSize);
4385 	if (pathBuffer[length - 1] != '/')
4386 		pathBuffer[length++] = '/';
4387 
4388 	path = pathBuffer + length;
4389 	bufferSize -= length;
4390 
4391 	VnodePutter file;
4392 	while (moduleName) {
4393 		char* nextPath = strchr(moduleName, '/');
4394 		if (nextPath == NULL)
4395 			length = strlen(moduleName);
4396 		else {
4397 			length = nextPath - moduleName;
4398 			nextPath++;
4399 		}
4400 
4401 		if (length + 1 >= bufferSize)
4402 			return B_BUFFER_OVERFLOW;
4403 
4404 		memcpy(path, moduleName, length);
4405 		path[length] = '\0';
4406 		moduleName = nextPath;
4407 
4408 		// vnode_path_to_vnode() assumes ownership of the passed dir
4409 		status = vnode_path_to_vnode(dir.Detach(), path, true, 0, true, file, NULL);
4410 		if (status != B_OK)
4411 			return status;
4412 
4413 		if (S_ISDIR(file->Type())) {
4414 			// goto the next directory
4415 			path[length] = '/';
4416 			path[length + 1] = '\0';
4417 			path += length + 1;
4418 			bufferSize -= length + 1;
4419 
4420 			dir.SetTo(file.Detach());
4421 		} else if (S_ISREG(file->Type())) {
4422 			// it's a file so it should be what we've searched for
4423 			return B_OK;
4424 		} else {
4425 			TRACE(("vfs_get_module_path(): something is strange here: "
4426 				"0x%08" B_PRIx32 "...\n", file->Type()));
4427 			return B_ERROR;
4428 		}
4429 	}
4430 
4431 	// if we got here, the moduleName just pointed to a directory, not to
4432 	// a real module - what should we do in this case?
4433 	return B_ENTRY_NOT_FOUND;
4434 }
4435 
4436 
4437 /*!	\brief Normalizes a given path.
4438 
4439 	The path must refer to an existing or non-existing entry in an existing
4440 	directory, that is chopping off the leaf component the remaining path must
4441 	refer to an existing directory.
4442 
4443 	The returned will be canonical in that it will be absolute, will not
4444 	contain any "." or ".." components or duplicate occurrences of '/'s,
4445 	and none of the directory components will by symbolic links.
4446 
4447 	Any two paths referring to the same entry, will result in the same
4448 	normalized path (well, that is pretty much the definition of `normalized',
4449 	isn't it :-).
4450 
4451 	\param path The path to be normalized.
4452 	\param buffer The buffer into which the normalized path will be written.
4453 		   May be the same one as \a path.
4454 	\param bufferSize The size of \a buffer.
4455 	\param traverseLink If \c true, the function also resolves leaf symlinks.
4456 	\param kernel \c true, if the IO context of the kernel shall be used,
4457 		   otherwise that of the team this thread belongs to. Only relevant,
4458 		   if the path is relative (to get the CWD).
4459 	\return \c B_OK if everything went fine, another error code otherwise.
4460 */
4461 status_t
4462 vfs_normalize_path(const char* path, char* buffer, size_t bufferSize,
4463 	bool traverseLink, bool kernel)
4464 {
4465 	if (!path || !buffer || bufferSize < 1)
4466 		return B_BAD_VALUE;
4467 
4468 	if (path != buffer) {
4469 		if (strlcpy(buffer, path, bufferSize) >= bufferSize)
4470 			return B_BUFFER_OVERFLOW;
4471 	}
4472 
4473 	return normalize_path(buffer, bufferSize, traverseLink, kernel);
4474 }
4475 
4476 
4477 /*!	\brief Gets the parent of the passed in node.
4478 
4479 	Gets the parent of the passed in node, and correctly resolves covered
4480 	nodes.
4481 */
4482 extern "C" status_t
4483 vfs_resolve_parent(struct vnode* parent, dev_t* device, ino_t* node)
4484 {
4485 	return resolve_covered_parent(parent, device, node,
4486 		get_current_io_context(true));
4487 }
4488 
4489 
4490 /*!	\brief Creates a special node in the file system.
4491 
4492 	The caller gets a reference to the newly created node (which is passed
4493 	back through \a _createdVnode) and is responsible for releasing it.
4494 
4495 	\param path The path where to create the entry for the node. Can be \c NULL,
4496 		in which case the node is created without an entry in the root FS -- it
4497 		will automatically be deleted when the last reference has been released.
4498 	\param subVnode The definition of the subnode. Can be \c NULL, in which case
4499 		the target file system will just create the node with its standard
4500 		operations. Depending on the type of the node a subnode might be created
4501 		automatically, though.
4502 	\param mode The type and permissions for the node to be created.
4503 	\param flags Flags to be passed to the creating FS.
4504 	\param kernel \c true, if called in the kernel context (relevant only if
4505 		\a path is not \c NULL and not absolute).
4506 	\param _superVnode Pointer to a pre-allocated structure to be filled by the
4507 		file system creating the node, with the private data pointer and
4508 		operations for the super node. Can be \c NULL.
4509 	\param _createVnode Pointer to pre-allocated storage where to store the
4510 		pointer to the newly created node.
4511 	\return \c B_OK, if everything went fine, another error code otherwise.
4512 */
4513 status_t
4514 vfs_create_special_node(const char* path, fs_vnode* subVnode, mode_t mode,
4515 	uint32 flags, bool kernel, fs_vnode* _superVnode,
4516 	struct vnode** _createdVnode)
4517 {
4518 	VnodePutter dirNode;
4519 	char _leaf[B_FILE_NAME_LENGTH];
4520 	char* leaf = NULL;
4521 
4522 	if (path) {
4523 		// We've got a path. Get the dir vnode and the leaf name.
4524 		KPath tmpPathBuffer;
4525 		if (tmpPathBuffer.InitCheck() != B_OK)
4526 			return B_NO_MEMORY;
4527 
4528 		char* tmpPath = tmpPathBuffer.LockBuffer();
4529 		if (strlcpy(tmpPath, path, B_PATH_NAME_LENGTH) >= B_PATH_NAME_LENGTH)
4530 			return B_NAME_TOO_LONG;
4531 
4532 		// get the dir vnode and the leaf name
4533 		leaf = _leaf;
4534 		status_t error = path_to_dir_vnode(tmpPath, dirNode, leaf, kernel);
4535 		if (error != B_OK)
4536 			return error;
4537 	} else {
4538 		// No path. Create the node in the root FS.
4539 		dirNode.SetTo(sRoot);
4540 		inc_vnode_ref_count(dirNode.Get());
4541 	}
4542 
4543 	// check support for creating special nodes
4544 	if (!HAS_FS_CALL(dirNode, create_special_node))
4545 		return B_UNSUPPORTED;
4546 
4547 	// create the node
4548 	fs_vnode superVnode;
4549 	ino_t nodeID;
4550 	status_t status = FS_CALL(dirNode.Get(), create_special_node, leaf, subVnode,
4551 		mode, flags, _superVnode != NULL ? _superVnode : &superVnode, &nodeID);
4552 	if (status != B_OK)
4553 		return status;
4554 
4555 	// lookup the node
4556 	rw_lock_read_lock(&sVnodeLock);
4557 	*_createdVnode = lookup_vnode(dirNode->mount->id, nodeID);
4558 	rw_lock_read_unlock(&sVnodeLock);
4559 
4560 	if (*_createdVnode == NULL) {
4561 		panic("vfs_create_special_node(): lookup of node failed");
4562 		return B_ERROR;
4563 	}
4564 
4565 	return B_OK;
4566 }
4567 
4568 
4569 extern "C" void
4570 vfs_put_vnode(struct vnode* vnode)
4571 {
4572 	put_vnode(vnode);
4573 }
4574 
4575 
4576 extern "C" status_t
4577 vfs_get_cwd(dev_t* _mountID, ino_t* _vnodeID)
4578 {
4579 	// Get current working directory from io context
4580 	struct io_context* context = get_current_io_context(false);
4581 	status_t status = B_OK;
4582 
4583 	mutex_lock(&context->io_mutex);
4584 
4585 	if (context->cwd != NULL) {
4586 		*_mountID = context->cwd->device;
4587 		*_vnodeID = context->cwd->id;
4588 	} else
4589 		status = B_ERROR;
4590 
4591 	mutex_unlock(&context->io_mutex);
4592 	return status;
4593 }
4594 
4595 
4596 status_t
4597 vfs_unmount(dev_t mountID, uint32 flags)
4598 {
4599 	return fs_unmount(NULL, mountID, flags, true);
4600 }
4601 
4602 
4603 extern "C" status_t
4604 vfs_disconnect_vnode(dev_t mountID, ino_t vnodeID)
4605 {
4606 	struct vnode* vnode;
4607 
4608 	status_t status = get_vnode(mountID, vnodeID, &vnode, true, true);
4609 	if (status != B_OK)
4610 		return status;
4611 
4612 	disconnect_mount_or_vnode_fds(vnode->mount, vnode);
4613 	put_vnode(vnode);
4614 	return B_OK;
4615 }
4616 
4617 
4618 extern "C" void
4619 vfs_free_unused_vnodes(int32 level)
4620 {
4621 	vnode_low_resource_handler(NULL,
4622 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
4623 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
4624 		level);
4625 }
4626 
4627 
4628 extern "C" bool
4629 vfs_can_page(struct vnode* vnode, void* cookie)
4630 {
4631 	FUNCTION(("vfs_canpage: vnode %p\n", vnode));
4632 
4633 	if (HAS_FS_CALL(vnode, can_page))
4634 		return FS_CALL(vnode, can_page, cookie);
4635 	return false;
4636 }
4637 
4638 
4639 extern "C" status_t
4640 vfs_read_pages(struct vnode* vnode, void* cookie, off_t pos,
4641 	const generic_io_vec* vecs, size_t count, uint32 flags,
4642 	generic_size_t* _numBytes)
4643 {
4644 	FUNCTION(("vfs_read_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4645 		vecs, pos));
4646 
4647 #if VFS_PAGES_IO_TRACING
4648 	generic_size_t bytesRequested = *_numBytes;
4649 #endif
4650 
4651 	IORequest request;
4652 	status_t status = request.Init(pos, vecs, count, *_numBytes, false, flags);
4653 	if (status == B_OK) {
4654 		status = vfs_vnode_io(vnode, cookie, &request);
4655 		if (status == B_OK)
4656 			status = request.Wait();
4657 		*_numBytes = request.TransferredBytes();
4658 	}
4659 
4660 	TPIO(ReadPages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4661 		status, *_numBytes));
4662 
4663 	return status;
4664 }
4665 
4666 
4667 extern "C" status_t
4668 vfs_write_pages(struct vnode* vnode, void* cookie, off_t pos,
4669 	const generic_io_vec* vecs, size_t count, uint32 flags,
4670 	generic_size_t* _numBytes)
4671 {
4672 	FUNCTION(("vfs_write_pages: vnode %p, vecs %p, pos %" B_PRIdOFF "\n", vnode,
4673 		vecs, pos));
4674 
4675 #if VFS_PAGES_IO_TRACING
4676 	generic_size_t bytesRequested = *_numBytes;
4677 #endif
4678 
4679 	IORequest request;
4680 	status_t status = request.Init(pos, vecs, count, *_numBytes, true, flags);
4681 	if (status == B_OK) {
4682 		status = vfs_vnode_io(vnode, cookie, &request);
4683 		if (status == B_OK)
4684 			status = request.Wait();
4685 		*_numBytes = request.TransferredBytes();
4686 	}
4687 
4688 	TPIO(WritePages(vnode, cookie, pos, vecs, count, flags, bytesRequested,
4689 		status, *_numBytes));
4690 
4691 	return status;
4692 }
4693 
4694 
4695 /*!	Gets the vnode's VMCache object. If it didn't have one, it will be
4696 	created if \a allocate is \c true.
4697 	In case it's successful, it will also grab a reference to the cache
4698 	it returns.
4699 */
4700 extern "C" status_t
4701 vfs_get_vnode_cache(struct vnode* vnode, VMCache** _cache, bool allocate)
4702 {
4703 	if (vnode->cache != NULL) {
4704 		vnode->cache->AcquireRef();
4705 		*_cache = vnode->cache;
4706 		return B_OK;
4707 	}
4708 
4709 	rw_lock_read_lock(&sVnodeLock);
4710 	vnode->Lock();
4711 
4712 	status_t status = B_OK;
4713 
4714 	// The cache could have been created in the meantime
4715 	if (vnode->cache == NULL) {
4716 		if (allocate) {
4717 			// TODO: actually the vnode needs to be busy already here, or
4718 			//	else this won't work...
4719 			bool wasBusy = vnode->IsBusy();
4720 			vnode->SetBusy(true);
4721 
4722 			vnode->Unlock();
4723 			rw_lock_read_unlock(&sVnodeLock);
4724 
4725 			status = vm_create_vnode_cache(vnode, &vnode->cache);
4726 
4727 			rw_lock_read_lock(&sVnodeLock);
4728 			vnode->Lock();
4729 			vnode->SetBusy(wasBusy);
4730 		} else
4731 			status = B_BAD_VALUE;
4732 	}
4733 
4734 	vnode->Unlock();
4735 	rw_lock_read_unlock(&sVnodeLock);
4736 
4737 	if (status == B_OK) {
4738 		vnode->cache->AcquireRef();
4739 		*_cache = vnode->cache;
4740 	}
4741 
4742 	return status;
4743 }
4744 
4745 
4746 /*!	Sets the vnode's VMCache object, for subsystems that want to manage
4747 	their own.
4748 	In case it's successful, it will also grab a reference to the cache
4749 	it returns.
4750 */
4751 extern "C" status_t
4752 vfs_set_vnode_cache(struct vnode* vnode, VMCache* _cache)
4753 {
4754 	rw_lock_read_lock(&sVnodeLock);
4755 	vnode->Lock();
4756 
4757 	status_t status = B_OK;
4758 	if (vnode->cache != NULL) {
4759 		status = B_NOT_ALLOWED;
4760 	} else {
4761 		vnode->cache = _cache;
4762 		_cache->AcquireRef();
4763 	}
4764 
4765 	vnode->Unlock();
4766 	rw_lock_read_unlock(&sVnodeLock);
4767 	return status;
4768 }
4769 
4770 
4771 status_t
4772 vfs_get_file_map(struct vnode* vnode, off_t offset, size_t size,
4773 	file_io_vec* vecs, size_t* _count)
4774 {
4775 	FUNCTION(("vfs_get_file_map: vnode %p, vecs %p, offset %" B_PRIdOFF
4776 		", size = %" B_PRIuSIZE "\n", vnode, vecs, offset, size));
4777 
4778 	return FS_CALL(vnode, get_file_map, offset, size, vecs, _count);
4779 }
4780 
4781 
4782 status_t
4783 vfs_stat_vnode(struct vnode* vnode, struct stat* stat)
4784 {
4785 	status_t status = FS_CALL(vnode, read_stat, stat);
4786 
4787 	// fill in the st_dev and st_ino fields
4788 	if (status == B_OK) {
4789 		stat->st_dev = vnode->device;
4790 		stat->st_ino = vnode->id;
4791 		// the rdev field must stay unset for non-special files
4792 		if (!S_ISBLK(stat->st_mode) && !S_ISCHR(stat->st_mode))
4793 			stat->st_rdev = -1;
4794 	}
4795 
4796 	return status;
4797 }
4798 
4799 
4800 status_t
4801 vfs_stat_node_ref(dev_t device, ino_t inode, struct stat* stat)
4802 {
4803 	struct vnode* vnode;
4804 	status_t status = get_vnode(device, inode, &vnode, true, false);
4805 	if (status != B_OK)
4806 		return status;
4807 
4808 	status = vfs_stat_vnode(vnode, stat);
4809 
4810 	put_vnode(vnode);
4811 	return status;
4812 }
4813 
4814 
4815 status_t
4816 vfs_get_vnode_name(struct vnode* vnode, char* name, size_t nameSize)
4817 {
4818 	return get_vnode_name(vnode, NULL, name, nameSize, true);
4819 }
4820 
4821 
4822 status_t
4823 vfs_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
4824 	bool kernel, char* path, size_t pathLength)
4825 {
4826 	VnodePutter vnode;
4827 	status_t status;
4828 
4829 	// filter invalid leaf names
4830 	if (leaf != NULL && (leaf[0] == '\0' || strchr(leaf, '/')))
4831 		return B_BAD_VALUE;
4832 
4833 	// get the vnode matching the dir's node_ref
4834 	if (leaf && (strcmp(leaf, ".") == 0 || strcmp(leaf, "..") == 0)) {
4835 		// special cases "." and "..": we can directly get the vnode of the
4836 		// referenced directory
4837 		status = entry_ref_to_vnode(device, inode, leaf, false, kernel, vnode);
4838 		leaf = NULL;
4839 	} else {
4840 		struct vnode* temp = NULL;
4841 		status = get_vnode(device, inode, &temp, true, false);
4842 		vnode.SetTo(temp);
4843 	}
4844 	if (status != B_OK)
4845 		return status;
4846 
4847 	// get the directory path
4848 	status = dir_vnode_to_path(vnode.Get(), path, pathLength, kernel);
4849 	vnode.Unset();
4850 		// we don't need the vnode anymore
4851 	if (status != B_OK)
4852 		return status;
4853 
4854 	// append the leaf name
4855 	if (leaf) {
4856 		// insert a directory separator if this is not the file system root
4857 		if ((strcmp(path, "/") && strlcat(path, "/", pathLength)
4858 				>= pathLength)
4859 			|| strlcat(path, leaf, pathLength) >= pathLength) {
4860 			return B_NAME_TOO_LONG;
4861 		}
4862 	}
4863 
4864 	return B_OK;
4865 }
4866 
4867 
4868 /*!	If the given descriptor locked its vnode, that lock will be released. */
4869 void
4870 vfs_unlock_vnode_if_locked(struct file_descriptor* descriptor)
4871 {
4872 	struct vnode* vnode = fd_vnode(descriptor);
4873 
4874 	if (vnode != NULL && vnode->mandatory_locked_by == descriptor)
4875 		vnode->mandatory_locked_by = NULL;
4876 }
4877 
4878 
4879 /*!	Releases any POSIX locks on the file descriptor. */
4880 status_t
4881 vfs_release_posix_lock(io_context* context, struct file_descriptor* descriptor)
4882 {
4883 	struct vnode* vnode = descriptor->u.vnode;
4884 	if (vnode == NULL)
4885 		return B_OK;
4886 
4887 	if (HAS_FS_CALL(vnode, release_lock))
4888 		return FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
4889 
4890 	return release_advisory_lock(vnode, context, NULL, NULL);
4891 }
4892 
4893 
4894 /*!	Closes all file descriptors of the specified I/O context that
4895 	have the O_CLOEXEC flag set.
4896 */
4897 void
4898 vfs_exec_io_context(io_context* context)
4899 {
4900 	uint32 i;
4901 
4902 	for (i = 0; i < context->table_size; i++) {
4903 		mutex_lock(&context->io_mutex);
4904 
4905 		struct file_descriptor* descriptor = context->fds[i];
4906 		bool remove = false;
4907 
4908 		if (descriptor != NULL && fd_close_on_exec(context, i)) {
4909 			context->fds[i] = NULL;
4910 			context->num_used_fds--;
4911 
4912 			remove = true;
4913 		}
4914 
4915 		mutex_unlock(&context->io_mutex);
4916 
4917 		if (remove) {
4918 			close_fd(context, descriptor);
4919 			put_fd(descriptor);
4920 		}
4921 	}
4922 }
4923 
4924 
4925 /*! Sets up a new io_control structure, and inherits the properties
4926 	of the parent io_control if it is given.
4927 */
4928 io_context*
4929 vfs_new_io_context(io_context* parentContext, bool purgeCloseOnExec)
4930 {
4931 	io_context* context = (io_context*)malloc(sizeof(io_context));
4932 	if (context == NULL)
4933 		return NULL;
4934 
4935 	TIOC(NewIOContext(context, parentContext));
4936 
4937 	memset(context, 0, sizeof(io_context));
4938 	context->ref_count = 1;
4939 
4940 	MutexLocker parentLocker;
4941 
4942 	size_t tableSize;
4943 	if (parentContext != NULL) {
4944 		parentLocker.SetTo(parentContext->io_mutex, false);
4945 		tableSize = parentContext->table_size;
4946 	} else
4947 		tableSize = DEFAULT_FD_TABLE_SIZE;
4948 
4949 	// allocate space for FDs and their close-on-exec flag
4950 	context->fds = (file_descriptor**)malloc(
4951 		sizeof(struct file_descriptor*) * tableSize
4952 		+ sizeof(struct select_info**) * tableSize
4953 		+ (tableSize + 7) / 8);
4954 	if (context->fds == NULL) {
4955 		free(context);
4956 		return NULL;
4957 	}
4958 
4959 	context->select_infos = (select_info**)(context->fds + tableSize);
4960 	context->fds_close_on_exec = (uint8*)(context->select_infos + tableSize);
4961 
4962 	memset(context->fds, 0, sizeof(struct file_descriptor*) * tableSize
4963 		+ sizeof(struct select_info**) * tableSize
4964 		+ (tableSize + 7) / 8);
4965 
4966 	mutex_init(&context->io_mutex, "I/O context");
4967 
4968 	// Copy all parent file descriptors
4969 
4970 	if (parentContext != NULL) {
4971 		size_t i;
4972 
4973 		mutex_lock(&sIOContextRootLock);
4974 		context->root = parentContext->root;
4975 		if (context->root)
4976 			inc_vnode_ref_count(context->root);
4977 		mutex_unlock(&sIOContextRootLock);
4978 
4979 		context->cwd = parentContext->cwd;
4980 		if (context->cwd)
4981 			inc_vnode_ref_count(context->cwd);
4982 
4983 		if (parentContext->inherit_fds) {
4984 			for (i = 0; i < tableSize; i++) {
4985 				struct file_descriptor* descriptor = parentContext->fds[i];
4986 
4987 				if (descriptor != NULL
4988 					&& (descriptor->open_mode & O_DISCONNECTED) == 0) {
4989 					bool closeOnExec = fd_close_on_exec(parentContext, i);
4990 					if (closeOnExec && purgeCloseOnExec)
4991 						continue;
4992 
4993 					TFD(InheritFD(context, i, descriptor, parentContext));
4994 
4995 					context->fds[i] = descriptor;
4996 					context->num_used_fds++;
4997 					atomic_add(&descriptor->ref_count, 1);
4998 					atomic_add(&descriptor->open_count, 1);
4999 
5000 					if (closeOnExec)
5001 						fd_set_close_on_exec(context, i, true);
5002 				}
5003 			}
5004 		}
5005 
5006 		parentLocker.Unlock();
5007 	} else {
5008 		context->root = sRoot;
5009 		context->cwd = sRoot;
5010 
5011 		if (context->root)
5012 			inc_vnode_ref_count(context->root);
5013 
5014 		if (context->cwd)
5015 			inc_vnode_ref_count(context->cwd);
5016 	}
5017 
5018 	context->table_size = tableSize;
5019 	context->inherit_fds = parentContext != NULL;
5020 
5021 	list_init(&context->node_monitors);
5022 	context->max_monitors = DEFAULT_NODE_MONITORS;
5023 
5024 	return context;
5025 }
5026 
5027 
5028 void
5029 vfs_get_io_context(io_context* context)
5030 {
5031 	atomic_add(&context->ref_count, 1);
5032 }
5033 
5034 
5035 void
5036 vfs_put_io_context(io_context* context)
5037 {
5038 	if (atomic_add(&context->ref_count, -1) == 1)
5039 		free_io_context(context);
5040 }
5041 
5042 
5043 status_t
5044 vfs_resize_fd_table(struct io_context* context, uint32 newSize)
5045 {
5046 	if (newSize == 0 || newSize > MAX_FD_TABLE_SIZE)
5047 		return B_BAD_VALUE;
5048 
5049 	TIOC(ResizeIOContext(context, newSize));
5050 
5051 	MutexLocker _(context->io_mutex);
5052 
5053 	uint32 oldSize = context->table_size;
5054 	int oldCloseOnExitBitmapSize = (oldSize + 7) / 8;
5055 	int newCloseOnExitBitmapSize = (newSize + 7) / 8;
5056 
5057 	// If the tables shrink, make sure none of the fds being dropped are in use.
5058 	if (newSize < oldSize) {
5059 		for (uint32 i = oldSize; i-- > newSize;) {
5060 			if (context->fds[i])
5061 				return B_BUSY;
5062 		}
5063 	}
5064 
5065 	// store pointers to the old tables
5066 	file_descriptor** oldFDs = context->fds;
5067 	select_info** oldSelectInfos = context->select_infos;
5068 	uint8* oldCloseOnExecTable = context->fds_close_on_exec;
5069 
5070 	// allocate new tables
5071 	file_descriptor** newFDs = (file_descriptor**)malloc(
5072 		sizeof(struct file_descriptor*) * newSize
5073 		+ sizeof(struct select_infos**) * newSize
5074 		+ newCloseOnExitBitmapSize);
5075 	if (newFDs == NULL)
5076 		return B_NO_MEMORY;
5077 
5078 	context->fds = newFDs;
5079 	context->select_infos = (select_info**)(context->fds + newSize);
5080 	context->fds_close_on_exec = (uint8*)(context->select_infos + newSize);
5081 	context->table_size = newSize;
5082 
5083 	// copy entries from old tables
5084 	uint32 toCopy = min_c(oldSize, newSize);
5085 
5086 	memcpy(context->fds, oldFDs, sizeof(void*) * toCopy);
5087 	memcpy(context->select_infos, oldSelectInfos, sizeof(void*) * toCopy);
5088 	memcpy(context->fds_close_on_exec, oldCloseOnExecTable,
5089 		min_c(oldCloseOnExitBitmapSize, newCloseOnExitBitmapSize));
5090 
5091 	// clear additional entries, if the tables grow
5092 	if (newSize > oldSize) {
5093 		memset(context->fds + oldSize, 0, sizeof(void*) * (newSize - oldSize));
5094 		memset(context->select_infos + oldSize, 0,
5095 			sizeof(void*) * (newSize - oldSize));
5096 		memset(context->fds_close_on_exec + oldCloseOnExitBitmapSize, 0,
5097 			newCloseOnExitBitmapSize - oldCloseOnExitBitmapSize);
5098 	}
5099 
5100 	free(oldFDs);
5101 
5102 	return B_OK;
5103 }
5104 
5105 
5106 /*!	\brief Resolves a vnode to the vnode it is covered by, if any.
5107 
5108 	Given an arbitrary vnode (identified by mount and node ID), the function
5109 	checks, whether the vnode is covered by another vnode. If it is, the
5110 	function returns the mount and node ID of the covering vnode. Otherwise
5111 	it simply returns the supplied mount and node ID.
5112 
5113 	In case of error (e.g. the supplied node could not be found) the variables
5114 	for storing the resolved mount and node ID remain untouched and an error
5115 	code is returned.
5116 
5117 	\param mountID The mount ID of the vnode in question.
5118 	\param nodeID The node ID of the vnode in question.
5119 	\param resolvedMountID Pointer to storage for the resolved mount ID.
5120 	\param resolvedNodeID Pointer to storage for the resolved node ID.
5121 	\return
5122 	- \c B_OK, if everything went fine,
5123 	- another error code, if something went wrong.
5124 */
5125 status_t
5126 vfs_resolve_vnode_to_covering_vnode(dev_t mountID, ino_t nodeID,
5127 	dev_t* resolvedMountID, ino_t* resolvedNodeID)
5128 {
5129 	// get the node
5130 	struct vnode* node;
5131 	status_t error = get_vnode(mountID, nodeID, &node, true, false);
5132 	if (error != B_OK)
5133 		return error;
5134 
5135 	// resolve the node
5136 	if (Vnode* coveringNode = get_covering_vnode(node)) {
5137 		put_vnode(node);
5138 		node = coveringNode;
5139 	}
5140 
5141 	// set the return values
5142 	*resolvedMountID = node->device;
5143 	*resolvedNodeID = node->id;
5144 
5145 	put_vnode(node);
5146 
5147 	return B_OK;
5148 }
5149 
5150 
5151 status_t
5152 vfs_get_mount_point(dev_t mountID, dev_t* _mountPointMountID,
5153 	ino_t* _mountPointNodeID)
5154 {
5155 	ReadLocker nodeLocker(sVnodeLock);
5156 	ReadLocker mountLocker(sMountLock);
5157 
5158 	struct fs_mount* mount = find_mount(mountID);
5159 	if (mount == NULL)
5160 		return B_BAD_VALUE;
5161 
5162 	Vnode* mountPoint = mount->covers_vnode;
5163 
5164 	*_mountPointMountID = mountPoint->device;
5165 	*_mountPointNodeID = mountPoint->id;
5166 
5167 	return B_OK;
5168 }
5169 
5170 
5171 status_t
5172 vfs_bind_mount_directory(dev_t mountID, ino_t nodeID, dev_t coveredMountID,
5173 	ino_t coveredNodeID)
5174 {
5175 	// get the vnodes
5176 	Vnode* vnode;
5177 	status_t error = get_vnode(mountID, nodeID, &vnode, true, false);
5178 	if (error != B_OK)
5179 		return B_BAD_VALUE;
5180 	VnodePutter vnodePutter(vnode);
5181 
5182 	Vnode* coveredVnode;
5183 	error = get_vnode(coveredMountID, coveredNodeID, &coveredVnode, true,
5184 		false);
5185 	if (error != B_OK)
5186 		return B_BAD_VALUE;
5187 	VnodePutter coveredVnodePutter(coveredVnode);
5188 
5189 	// establish the covered/covering links
5190 	WriteLocker locker(sVnodeLock);
5191 
5192 	if (vnode->covers != NULL || coveredVnode->covered_by != NULL
5193 		|| vnode->mount->unmounting || coveredVnode->mount->unmounting) {
5194 		return B_BUSY;
5195 	}
5196 
5197 	vnode->covers = coveredVnode;
5198 	vnode->SetCovering(true);
5199 
5200 	coveredVnode->covered_by = vnode;
5201 	coveredVnode->SetCovered(true);
5202 
5203 	// the vnodes do now reference each other
5204 	inc_vnode_ref_count(vnode);
5205 	inc_vnode_ref_count(coveredVnode);
5206 
5207 	return B_OK;
5208 }
5209 
5210 
5211 int
5212 vfs_getrlimit(int resource, struct rlimit* rlp)
5213 {
5214 	if (!rlp)
5215 		return B_BAD_ADDRESS;
5216 
5217 	switch (resource) {
5218 		case RLIMIT_NOFILE:
5219 		{
5220 			struct io_context* context = get_current_io_context(false);
5221 			MutexLocker _(context->io_mutex);
5222 
5223 			rlp->rlim_cur = context->table_size;
5224 			rlp->rlim_max = MAX_FD_TABLE_SIZE;
5225 			return 0;
5226 		}
5227 
5228 		case RLIMIT_NOVMON:
5229 		{
5230 			struct io_context* context = get_current_io_context(false);
5231 			MutexLocker _(context->io_mutex);
5232 
5233 			rlp->rlim_cur = context->max_monitors;
5234 			rlp->rlim_max = MAX_NODE_MONITORS;
5235 			return 0;
5236 		}
5237 
5238 		default:
5239 			return B_BAD_VALUE;
5240 	}
5241 }
5242 
5243 
5244 int
5245 vfs_setrlimit(int resource, const struct rlimit* rlp)
5246 {
5247 	if (!rlp)
5248 		return B_BAD_ADDRESS;
5249 
5250 	switch (resource) {
5251 		case RLIMIT_NOFILE:
5252 			/* TODO: check getuid() */
5253 			if (rlp->rlim_max != RLIM_SAVED_MAX
5254 				&& rlp->rlim_max != MAX_FD_TABLE_SIZE)
5255 				return B_NOT_ALLOWED;
5256 
5257 			return vfs_resize_fd_table(get_current_io_context(false),
5258 				rlp->rlim_cur);
5259 
5260 		case RLIMIT_NOVMON:
5261 			/* TODO: check getuid() */
5262 			if (rlp->rlim_max != RLIM_SAVED_MAX
5263 				&& rlp->rlim_max != MAX_NODE_MONITORS)
5264 				return B_NOT_ALLOWED;
5265 
5266 			return resize_monitor_table(get_current_io_context(false),
5267 				rlp->rlim_cur);
5268 
5269 		default:
5270 			return B_BAD_VALUE;
5271 	}
5272 }
5273 
5274 
5275 status_t
5276 vfs_init(kernel_args* args)
5277 {
5278 	vnode::StaticInit();
5279 
5280 	sVnodeTable = new(std::nothrow) VnodeTable();
5281 	if (sVnodeTable == NULL || sVnodeTable->Init(VNODE_HASH_TABLE_SIZE) != B_OK)
5282 		panic("vfs_init: error creating vnode hash table\n");
5283 
5284 	struct vnode dummy_vnode;
5285 	list_init_etc(&sUnusedVnodeList, offset_of_member(dummy_vnode, unused_link));
5286 
5287 	struct fs_mount dummyMount;
5288 	sMountsTable = new(std::nothrow) MountTable();
5289 	if (sMountsTable == NULL
5290 			|| sMountsTable->Init(MOUNTS_HASH_TABLE_SIZE) != B_OK)
5291 		panic("vfs_init: error creating mounts hash table\n");
5292 
5293 	sPathNameCache = create_object_cache("vfs path names",
5294 		B_PATH_NAME_LENGTH + 1, 8, NULL, NULL, NULL);
5295 	if (sPathNameCache == NULL)
5296 		panic("vfs_init: error creating path name object_cache\n");
5297 
5298 	sVnodeCache = create_object_cache("vfs vnodes",
5299 		sizeof(struct vnode), 8, NULL, NULL, NULL);
5300 	if (sVnodeCache == NULL)
5301 		panic("vfs_init: error creating vnode object_cache\n");
5302 
5303 	sFileDescriptorCache = create_object_cache("vfs fds",
5304 		sizeof(file_descriptor), 8, NULL, NULL, NULL);
5305 	if (sFileDescriptorCache == NULL)
5306 		panic("vfs_init: error creating file descriptor object_cache\n");
5307 
5308 	node_monitor_init();
5309 
5310 	sRoot = NULL;
5311 
5312 	recursive_lock_init(&sMountOpLock, "vfs_mount_op_lock");
5313 
5314 	if (block_cache_init() != B_OK)
5315 		return B_ERROR;
5316 
5317 #ifdef ADD_DEBUGGER_COMMANDS
5318 	// add some debugger commands
5319 	add_debugger_command_etc("vnode", &dump_vnode,
5320 		"Print info about the specified vnode",
5321 		"[ \"-p\" ] ( <vnode> | <devID> <nodeID> )\n"
5322 		"Prints information about the vnode specified by address <vnode> or\n"
5323 		"<devID>, <vnodeID> pair. If \"-p\" is given, a path of the vnode is\n"
5324 		"constructed and printed. It might not be possible to construct a\n"
5325 		"complete path, though.\n",
5326 		0);
5327 	add_debugger_command("vnodes", &dump_vnodes,
5328 		"list all vnodes (from the specified device)");
5329 	add_debugger_command("vnode_caches", &dump_vnode_caches,
5330 		"list all vnode caches");
5331 	add_debugger_command("mount", &dump_mount,
5332 		"info about the specified fs_mount");
5333 	add_debugger_command("mounts", &dump_mounts, "list all fs_mounts");
5334 	add_debugger_command("io_context", &dump_io_context,
5335 		"info about the I/O context");
5336 	add_debugger_command("vnode_usage", &dump_vnode_usage,
5337 		"info about vnode usage");
5338 #endif
5339 
5340 	register_low_resource_handler(&vnode_low_resource_handler, NULL,
5341 		B_KERNEL_RESOURCE_PAGES | B_KERNEL_RESOURCE_MEMORY
5342 			| B_KERNEL_RESOURCE_ADDRESS_SPACE,
5343 		0);
5344 
5345 	fifo_init();
5346 	file_map_init();
5347 
5348 	return file_cache_init();
5349 }
5350 
5351 
5352 //	#pragma mark - fd_ops implementations
5353 
5354 
5355 /*!
5356 	Calls fs_open() on the given vnode and returns a new
5357 	file descriptor for it
5358 */
5359 static int
5360 open_vnode(struct vnode* vnode, int openMode, bool kernel)
5361 {
5362 	void* cookie;
5363 	status_t status = FS_CALL(vnode, open, openMode, &cookie);
5364 	if (status != B_OK)
5365 		return status;
5366 
5367 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode, cookie, openMode, kernel);
5368 	if (fd < 0) {
5369 		FS_CALL(vnode, close, cookie);
5370 		FS_CALL(vnode, free_cookie, cookie);
5371 	}
5372 	return fd;
5373 }
5374 
5375 
5376 /*!
5377 	Calls fs_open() on the given vnode and returns a new
5378 	file descriptor for it
5379 */
5380 static int
5381 create_vnode(struct vnode* directory, const char* name, int openMode,
5382 	int perms, bool kernel)
5383 {
5384 	bool traverse = ((openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0);
5385 	status_t status = B_ERROR;
5386 	VnodePutter vnode, dirPutter;
5387 	void* cookie;
5388 	ino_t newID;
5389 	char clonedName[B_FILE_NAME_LENGTH + 1];
5390 
5391 	// This is somewhat tricky: If the entry already exists, the FS responsible
5392 	// for the directory might not necessarily also be the one responsible for
5393 	// the node the entry refers to (e.g. in case of mount points or FIFOs). So
5394 	// we can actually never call the create() hook without O_EXCL. Instead we
5395 	// try to look the entry up first. If it already exists, we just open the
5396 	// node (unless O_EXCL), otherwise we call create() with O_EXCL. This
5397 	// introduces a race condition, since someone else might have created the
5398 	// entry in the meantime. We hope the respective FS returns the correct
5399 	// error code and retry (up to 3 times) again.
5400 
5401 	for (int i = 0; i < 3 && status != B_OK; i++) {
5402 		bool create = false;
5403 
5404 		// look the node up
5405 		{
5406 			struct vnode* entry = NULL;
5407 			status = lookup_dir_entry(directory, name, &entry);
5408 			vnode.SetTo(entry);
5409 		}
5410 		if (status == B_OK) {
5411 			if ((openMode & O_EXCL) != 0)
5412 				return B_FILE_EXISTS;
5413 
5414 			// If the node is a symlink, we have to follow it, unless
5415 			// O_NOTRAVERSE is set.
5416 			if (S_ISLNK(vnode->Type()) && traverse) {
5417 				vnode.Unset();
5418 				if (strlcpy(clonedName, name, B_FILE_NAME_LENGTH)
5419 						>= B_FILE_NAME_LENGTH) {
5420 					return B_NAME_TOO_LONG;
5421 				}
5422 
5423 				inc_vnode_ref_count(directory);
5424 				status = vnode_path_to_vnode(directory, clonedName, true, 0,
5425 					kernel, vnode, NULL, clonedName);
5426 				if (status != B_OK) {
5427 					// vnode is not found, but maybe it has a parent and we can create it from
5428 					// there. In that case, vnode_path_to_vnode has set vnode to the latest
5429 					// directory found in the path
5430 					if (status == B_ENTRY_NOT_FOUND) {
5431 						directory = vnode.Detach();
5432 						dirPutter.SetTo(directory);
5433 						name = clonedName;
5434 						create = true;
5435 					} else
5436 						return status;
5437 				}
5438 			}
5439 
5440 			if (!create) {
5441 				if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5442 					return B_LINK_LIMIT;
5443 
5444 				int fd = open_vnode(vnode.Get(), openMode & ~O_CREAT, kernel);
5445 				// on success keep the vnode reference for the FD
5446 				if (fd >= 0)
5447 					vnode.Detach();
5448 
5449 				return fd;
5450 			}
5451 		}
5452 
5453 		// it doesn't exist yet -- try to create it
5454 
5455 		if (!HAS_FS_CALL(directory, create))
5456 			return B_READ_ONLY_DEVICE;
5457 
5458 		status = FS_CALL(directory, create, name, openMode | O_EXCL, perms,
5459 			&cookie, &newID);
5460 		if (status != B_OK
5461 			&& ((openMode & O_EXCL) != 0 || status != B_FILE_EXISTS)) {
5462 			return status;
5463 		}
5464 	}
5465 
5466 	if (status != B_OK)
5467 		return status;
5468 
5469 	// the node has been created successfully
5470 
5471 	rw_lock_read_lock(&sVnodeLock);
5472 	vnode.SetTo(lookup_vnode(directory->device, newID));
5473 	rw_lock_read_unlock(&sVnodeLock);
5474 
5475 	if (!vnode.IsSet()) {
5476 		panic("vfs: fs_create() returned success but there is no vnode, "
5477 			"mount ID %" B_PRIdDEV "!\n", directory->device);
5478 		return B_BAD_VALUE;
5479 	}
5480 
5481 	int fd = get_new_fd(FDTYPE_FILE, NULL, vnode.Get(), cookie, openMode, kernel);
5482 	if (fd >= 0) {
5483 		vnode.Detach();
5484 		return fd;
5485 	}
5486 
5487 	status = fd;
5488 
5489 	// something went wrong, clean up
5490 
5491 	FS_CALL(vnode.Get(), close, cookie);
5492 	FS_CALL(vnode.Get(), free_cookie, cookie);
5493 
5494 	FS_CALL(directory, unlink, name);
5495 
5496 	return status;
5497 }
5498 
5499 
5500 /*! Calls fs open_dir() on the given vnode and returns a new
5501 	file descriptor for it
5502 */
5503 static int
5504 open_dir_vnode(struct vnode* vnode, bool kernel)
5505 {
5506 	if (!HAS_FS_CALL(vnode, open_dir))
5507 		return B_UNSUPPORTED;
5508 
5509 	void* cookie;
5510 	status_t status = FS_CALL(vnode, open_dir, &cookie);
5511 	if (status != B_OK)
5512 		return status;
5513 
5514 	// directory is opened, create a fd
5515 	status = get_new_fd(FDTYPE_DIR, NULL, vnode, cookie, O_CLOEXEC, kernel);
5516 	if (status >= 0)
5517 		return status;
5518 
5519 	FS_CALL(vnode, close_dir, cookie);
5520 	FS_CALL(vnode, free_dir_cookie, cookie);
5521 
5522 	return status;
5523 }
5524 
5525 
5526 /*! Calls fs open_attr_dir() on the given vnode and returns a new
5527 	file descriptor for it.
5528 	Used by attr_dir_open(), and attr_dir_open_fd().
5529 */
5530 static int
5531 open_attr_dir_vnode(struct vnode* vnode, bool kernel)
5532 {
5533 	if (!HAS_FS_CALL(vnode, open_attr_dir))
5534 		return B_UNSUPPORTED;
5535 
5536 	void* cookie;
5537 	status_t status = FS_CALL(vnode, open_attr_dir, &cookie);
5538 	if (status != B_OK)
5539 		return status;
5540 
5541 	// directory is opened, create a fd
5542 	status = get_new_fd(FDTYPE_ATTR_DIR, NULL, vnode, cookie, O_CLOEXEC,
5543 		kernel);
5544 	if (status >= 0)
5545 		return status;
5546 
5547 	FS_CALL(vnode, close_attr_dir, cookie);
5548 	FS_CALL(vnode, free_attr_dir_cookie, cookie);
5549 
5550 	return status;
5551 }
5552 
5553 
5554 static int
5555 file_create_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5556 	int openMode, int perms, bool kernel)
5557 {
5558 	FUNCTION(("file_create_entry_ref: name = '%s', omode %x, perms %d, "
5559 		"kernel %d\n", name, openMode, perms, kernel));
5560 
5561 	// get directory to put the new file in
5562 	struct vnode* directory;
5563 	status_t status = get_vnode(mountID, directoryID, &directory, true, false);
5564 	if (status != B_OK)
5565 		return status;
5566 
5567 	status = create_vnode(directory, name, openMode, perms, kernel);
5568 	put_vnode(directory);
5569 
5570 	return status;
5571 }
5572 
5573 
5574 static int
5575 file_create(int fd, char* path, int openMode, int perms, bool kernel)
5576 {
5577 	FUNCTION(("file_create: path '%s', omode %x, perms %d, kernel %d\n", path,
5578 		openMode, perms, kernel));
5579 
5580 	// get directory to put the new file in
5581 	char name[B_FILE_NAME_LENGTH];
5582 	VnodePutter directory;
5583 	status_t status = fd_and_path_to_dir_vnode(fd, path, directory, name,
5584 		kernel);
5585 	if (status < 0)
5586 		return status;
5587 
5588 	return create_vnode(directory.Get(), name, openMode, perms, kernel);
5589 }
5590 
5591 
5592 static int
5593 file_open_entry_ref(dev_t mountID, ino_t directoryID, const char* name,
5594 	int openMode, bool kernel)
5595 {
5596 	if (name == NULL || *name == '\0')
5597 		return B_BAD_VALUE;
5598 
5599 	FUNCTION(("file_open_entry_ref(ref = (%" B_PRId32 ", %" B_PRId64 ", %s), "
5600 		"openMode = %d)\n", mountID, directoryID, name, openMode));
5601 
5602 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5603 
5604 	// get the vnode matching the entry_ref
5605 	VnodePutter vnode;
5606 	status_t status = entry_ref_to_vnode(mountID, directoryID, name, traverse,
5607 		kernel, vnode);
5608 	if (status != B_OK)
5609 		return status;
5610 
5611 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5612 		return B_LINK_LIMIT;
5613 
5614 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5615 	if (newFD >= 0) {
5616 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache, mountID,
5617 			directoryID, vnode->id, name);
5618 
5619 		// The vnode reference has been transferred to the FD
5620 		vnode.Detach();
5621 	}
5622 
5623 	return newFD;
5624 }
5625 
5626 
5627 static int
5628 file_open(int fd, char* path, int openMode, bool kernel)
5629 {
5630 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
5631 
5632 	FUNCTION(("file_open: fd: %d, entry path = '%s', omode %d, kernel %d\n",
5633 		fd, path, openMode, kernel));
5634 
5635 	// get the vnode matching the vnode + path combination
5636 	VnodePutter vnode;
5637 	ino_t parentID;
5638 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode,
5639 		&parentID, kernel);
5640 	if (status != B_OK)
5641 		return status;
5642 
5643 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
5644 		return B_LINK_LIMIT;
5645 
5646 	// open the vnode
5647 	int newFD = open_vnode(vnode.Get(), openMode, kernel);
5648 	if (newFD >= 0) {
5649 		cache_node_opened(vnode.Get(), FDTYPE_FILE, vnode->cache,
5650 			vnode->device, parentID, vnode->id, NULL);
5651 
5652 		// The vnode reference has been transferred to the FD
5653 		vnode.Detach();
5654 	}
5655 
5656 	return newFD;
5657 }
5658 
5659 
5660 static status_t
5661 file_close(struct file_descriptor* descriptor)
5662 {
5663 	struct vnode* vnode = descriptor->u.vnode;
5664 	status_t status = B_OK;
5665 
5666 	FUNCTION(("file_close(descriptor = %p)\n", descriptor));
5667 
5668 	cache_node_closed(vnode, FDTYPE_FILE, vnode->cache, vnode->device,
5669 		vnode->id);
5670 	if (HAS_FS_CALL(vnode, close)) {
5671 		status = FS_CALL(vnode, close, descriptor->cookie);
5672 	}
5673 
5674 	if (status == B_OK) {
5675 		// remove all outstanding locks for this team
5676 		if (HAS_FS_CALL(vnode, release_lock))
5677 			status = FS_CALL(vnode, release_lock, descriptor->cookie, NULL);
5678 		else
5679 			status = release_advisory_lock(vnode, NULL, descriptor, NULL);
5680 	}
5681 	return status;
5682 }
5683 
5684 
5685 static void
5686 file_free_fd(struct file_descriptor* descriptor)
5687 {
5688 	struct vnode* vnode = descriptor->u.vnode;
5689 
5690 	if (vnode != NULL) {
5691 		FS_CALL(vnode, free_cookie, descriptor->cookie);
5692 		put_vnode(vnode);
5693 	}
5694 }
5695 
5696 
5697 static status_t
5698 file_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
5699 	size_t* length)
5700 {
5701 	struct vnode* vnode = descriptor->u.vnode;
5702 	FUNCTION(("file_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
5703 		pos, length, *length));
5704 
5705 	if (S_ISDIR(vnode->Type()))
5706 		return B_IS_A_DIRECTORY;
5707 
5708 	return FS_CALL(vnode, read, descriptor->cookie, pos, buffer, length);
5709 }
5710 
5711 
5712 static status_t
5713 file_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
5714 	size_t* length)
5715 {
5716 	struct vnode* vnode = descriptor->u.vnode;
5717 	FUNCTION(("file_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
5718 		length));
5719 
5720 	if (S_ISDIR(vnode->Type()))
5721 		return B_IS_A_DIRECTORY;
5722 	if (!HAS_FS_CALL(vnode, write))
5723 		return B_READ_ONLY_DEVICE;
5724 
5725 	return FS_CALL(vnode, write, descriptor->cookie, pos, buffer, length);
5726 }
5727 
5728 
5729 static off_t
5730 file_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
5731 {
5732 	struct vnode* vnode = descriptor->u.vnode;
5733 	off_t offset;
5734 	bool isDevice = false;
5735 
5736 	FUNCTION(("file_seek(pos = %" B_PRIdOFF ", seekType = %d)\n", pos,
5737 		seekType));
5738 
5739 	// some kinds of files are not seekable
5740 	switch (vnode->Type() & S_IFMT) {
5741 		case S_IFIFO:
5742 		case S_IFSOCK:
5743 			return ESPIPE;
5744 
5745 		// drivers publish block devices as chr, so pick both
5746 		case S_IFBLK:
5747 		case S_IFCHR:
5748 			isDevice = true;
5749 			break;
5750 		// The Open Group Base Specs don't mention any file types besides pipes,
5751 		// fifos, and sockets specially, so we allow seeking them.
5752 		case S_IFREG:
5753 		case S_IFDIR:
5754 		case S_IFLNK:
5755 			break;
5756 	}
5757 
5758 	switch (seekType) {
5759 		case SEEK_SET:
5760 			offset = 0;
5761 			break;
5762 		case SEEK_CUR:
5763 			offset = descriptor->pos;
5764 			break;
5765 		case SEEK_END:
5766 		{
5767 			// stat() the node
5768 			if (!HAS_FS_CALL(vnode, read_stat))
5769 				return B_UNSUPPORTED;
5770 
5771 			struct stat stat;
5772 			status_t status = FS_CALL(vnode, read_stat, &stat);
5773 			if (status != B_OK)
5774 				return status;
5775 
5776 			offset = stat.st_size;
5777 
5778 			if (offset == 0 && isDevice) {
5779 				// stat() on regular drivers doesn't report size
5780 				device_geometry geometry;
5781 
5782 				if (HAS_FS_CALL(vnode, ioctl)) {
5783 					status = FS_CALL(vnode, ioctl, descriptor->cookie,
5784 						B_GET_GEOMETRY, &geometry, sizeof(geometry));
5785 					if (status == B_OK)
5786 						offset = (off_t)geometry.bytes_per_sector
5787 							* geometry.sectors_per_track
5788 							* geometry.cylinder_count
5789 							* geometry.head_count;
5790 				}
5791 			}
5792 
5793 			break;
5794 		}
5795 		case SEEK_DATA:
5796 		case SEEK_HOLE:
5797 		{
5798 			status_t status = B_BAD_VALUE;
5799 			if (HAS_FS_CALL(vnode, ioctl)) {
5800 				offset = pos;
5801 				status = FS_CALL(vnode, ioctl, descriptor->cookie,
5802 					seekType == SEEK_DATA ? FIOSEEKDATA : FIOSEEKHOLE,
5803 					&offset, sizeof(offset));
5804 				if (status == B_OK) {
5805 					if (offset > pos)
5806 						offset -= pos;
5807 					break;
5808 				}
5809 			}
5810 			if (status != B_BAD_VALUE && status != B_DEV_INVALID_IOCTL)
5811 				return status;
5812 
5813 			// basic implementation with stat() the node
5814 			if (!HAS_FS_CALL(vnode, read_stat) || isDevice)
5815 				return B_BAD_VALUE;
5816 
5817 			struct stat stat;
5818 			status = FS_CALL(vnode, read_stat, &stat);
5819 			if (status != B_OK)
5820 				return status;
5821 
5822 			off_t end = stat.st_size;
5823 			if (pos >= end)
5824 				return ENXIO;
5825 			offset = seekType == SEEK_HOLE ? end - pos : 0;
5826 			break;
5827 		}
5828 		default:
5829 			return B_BAD_VALUE;
5830 	}
5831 
5832 	// assumes off_t is 64 bits wide
5833 	if (offset > 0 && LONGLONG_MAX - offset < pos)
5834 		return B_BUFFER_OVERFLOW;
5835 
5836 	pos += offset;
5837 	if (pos < 0)
5838 		return B_BAD_VALUE;
5839 
5840 	return descriptor->pos = pos;
5841 }
5842 
5843 
5844 static status_t
5845 file_select(struct file_descriptor* descriptor, uint8 event,
5846 	struct selectsync* sync)
5847 {
5848 	FUNCTION(("file_select(%p, %u, %p)\n", descriptor, event, sync));
5849 
5850 	struct vnode* vnode = descriptor->u.vnode;
5851 
5852 	// If the FS has no select() hook, notify select() now.
5853 	if (!HAS_FS_CALL(vnode, select)) {
5854 		if (!SELECT_TYPE_IS_OUTPUT_ONLY(event))
5855 			return notify_select_event(sync, event);
5856 		else
5857 			return B_OK;
5858 	}
5859 
5860 	return FS_CALL(vnode, select, descriptor->cookie, event, sync);
5861 }
5862 
5863 
5864 static status_t
5865 file_deselect(struct file_descriptor* descriptor, uint8 event,
5866 	struct selectsync* sync)
5867 {
5868 	struct vnode* vnode = descriptor->u.vnode;
5869 
5870 	if (!HAS_FS_CALL(vnode, deselect))
5871 		return B_OK;
5872 
5873 	return FS_CALL(vnode, deselect, descriptor->cookie, event, sync);
5874 }
5875 
5876 
5877 static status_t
5878 dir_create_entry_ref(dev_t mountID, ino_t parentID, const char* name, int perms,
5879 	bool kernel)
5880 {
5881 	struct vnode* vnode;
5882 	status_t status;
5883 
5884 	if (name == NULL || *name == '\0')
5885 		return B_BAD_VALUE;
5886 
5887 	FUNCTION(("dir_create_entry_ref(dev = %" B_PRId32 ", ino = %" B_PRId64 ", "
5888 		"name = '%s', perms = %d)\n", mountID, parentID, name, perms));
5889 
5890 	status = get_vnode(mountID, parentID, &vnode, true, false);
5891 	if (status != B_OK)
5892 		return status;
5893 
5894 	if (HAS_FS_CALL(vnode, create_dir))
5895 		status = FS_CALL(vnode, create_dir, name, perms);
5896 	else
5897 		status = B_READ_ONLY_DEVICE;
5898 
5899 	put_vnode(vnode);
5900 	return status;
5901 }
5902 
5903 
5904 static status_t
5905 dir_create(int fd, char* path, int perms, bool kernel)
5906 {
5907 	char filename[B_FILE_NAME_LENGTH];
5908 	status_t status;
5909 
5910 	FUNCTION(("dir_create: path '%s', perms %d, kernel %d\n", path, perms,
5911 		kernel));
5912 
5913 	VnodePutter vnode;
5914 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
5915 	if (status < 0)
5916 		return status;
5917 
5918 	if (HAS_FS_CALL(vnode, create_dir)) {
5919 		status = FS_CALL(vnode.Get(), create_dir, filename, perms);
5920 	} else
5921 		status = B_READ_ONLY_DEVICE;
5922 
5923 	return status;
5924 }
5925 
5926 
5927 static int
5928 dir_open_entry_ref(dev_t mountID, ino_t parentID, const char* name, bool kernel)
5929 {
5930 	FUNCTION(("dir_open_entry_ref()\n"));
5931 
5932 	if (name && name[0] == '\0')
5933 		return B_BAD_VALUE;
5934 
5935 	// get the vnode matching the entry_ref/node_ref
5936 	VnodePutter vnode;
5937 	status_t status;
5938 	if (name) {
5939 		status = entry_ref_to_vnode(mountID, parentID, name, true, kernel,
5940 			vnode);
5941 	} else {
5942 		struct vnode* temp = NULL;
5943 		status = get_vnode(mountID, parentID, &temp, true, false);
5944 		vnode.SetTo(temp);
5945 	}
5946 	if (status != B_OK)
5947 		return status;
5948 
5949 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5950 	if (newFD >= 0) {
5951 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, mountID, parentID,
5952 			vnode->id, name);
5953 
5954 		// The vnode reference has been transferred to the FD
5955 		vnode.Detach();
5956 	}
5957 
5958 	return newFD;
5959 }
5960 
5961 
5962 static int
5963 dir_open(int fd, char* path, bool kernel)
5964 {
5965 	FUNCTION(("dir_open: fd: %d, entry path = '%s', kernel %d\n", fd, path,
5966 		kernel));
5967 
5968 	// get the vnode matching the vnode + path combination
5969 	VnodePutter vnode;
5970 	ino_t parentID;
5971 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, &parentID,
5972 		kernel);
5973 	if (status != B_OK)
5974 		return status;
5975 
5976 	// open the dir
5977 	int newFD = open_dir_vnode(vnode.Get(), kernel);
5978 	if (newFD >= 0) {
5979 		cache_node_opened(vnode.Get(), FDTYPE_DIR, vnode->cache, vnode->device,
5980 			parentID, vnode->id, NULL);
5981 
5982 		// The vnode reference has been transferred to the FD
5983 		vnode.Detach();
5984 	}
5985 
5986 	return newFD;
5987 }
5988 
5989 
5990 static status_t
5991 dir_close(struct file_descriptor* descriptor)
5992 {
5993 	struct vnode* vnode = descriptor->u.vnode;
5994 
5995 	FUNCTION(("dir_close(descriptor = %p)\n", descriptor));
5996 
5997 	cache_node_closed(vnode, FDTYPE_DIR, vnode->cache, vnode->device,
5998 		vnode->id);
5999 	if (HAS_FS_CALL(vnode, close_dir))
6000 		return FS_CALL(vnode, close_dir, descriptor->cookie);
6001 
6002 	return B_OK;
6003 }
6004 
6005 
6006 static void
6007 dir_free_fd(struct file_descriptor* descriptor)
6008 {
6009 	struct vnode* vnode = descriptor->u.vnode;
6010 
6011 	if (vnode != NULL) {
6012 		FS_CALL(vnode, free_dir_cookie, descriptor->cookie);
6013 		put_vnode(vnode);
6014 	}
6015 }
6016 
6017 
6018 static status_t
6019 dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6020 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6021 {
6022 	return dir_read(ioContext, descriptor->u.vnode, descriptor->cookie, buffer,
6023 		bufferSize, _count);
6024 }
6025 
6026 
6027 static status_t
6028 fix_dirent(struct vnode* parent, struct dirent* entry,
6029 	struct io_context* ioContext)
6030 {
6031 	// set d_pdev and d_pino
6032 	entry->d_pdev = parent->device;
6033 	entry->d_pino = parent->id;
6034 
6035 	// If this is the ".." entry and the directory covering another vnode,
6036 	// we need to replace d_dev and d_ino with the actual values.
6037 	if (strcmp(entry->d_name, "..") == 0 && parent->IsCovering()) {
6038 		return resolve_covered_parent(parent, &entry->d_dev, &entry->d_ino,
6039 			ioContext);
6040 	}
6041 
6042 	// resolve covered vnodes
6043 	ReadLocker _(&sVnodeLock);
6044 
6045 	struct vnode* vnode = lookup_vnode(entry->d_dev, entry->d_ino);
6046 	if (vnode != NULL && vnode->covered_by != NULL) {
6047 		do {
6048 			vnode = vnode->covered_by;
6049 		} while (vnode->covered_by != NULL);
6050 
6051 		entry->d_dev = vnode->device;
6052 		entry->d_ino = vnode->id;
6053 	}
6054 
6055 	return B_OK;
6056 }
6057 
6058 
6059 static status_t
6060 dir_read(struct io_context* ioContext, struct vnode* vnode, void* cookie,
6061 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6062 {
6063 	if (!HAS_FS_CALL(vnode, read_dir))
6064 		return B_UNSUPPORTED;
6065 
6066 	status_t error = FS_CALL(vnode, read_dir, cookie, buffer, bufferSize,
6067 		_count);
6068 	if (error != B_OK)
6069 		return error;
6070 
6071 	// we need to adjust the read dirents
6072 	uint32 count = *_count;
6073 	for (uint32 i = 0; i < count; i++) {
6074 		error = fix_dirent(vnode, buffer, ioContext);
6075 		if (error != B_OK)
6076 			return error;
6077 
6078 		buffer = (struct dirent*)((uint8*)buffer + buffer->d_reclen);
6079 	}
6080 
6081 	return error;
6082 }
6083 
6084 
6085 static status_t
6086 dir_rewind(struct file_descriptor* descriptor)
6087 {
6088 	struct vnode* vnode = descriptor->u.vnode;
6089 
6090 	if (HAS_FS_CALL(vnode, rewind_dir)) {
6091 		return FS_CALL(vnode, rewind_dir, descriptor->cookie);
6092 	}
6093 
6094 	return B_UNSUPPORTED;
6095 }
6096 
6097 
6098 static status_t
6099 dir_remove(int fd, char* path, bool kernel)
6100 {
6101 	char name[B_FILE_NAME_LENGTH];
6102 	status_t status;
6103 
6104 	if (path != NULL) {
6105 		// we need to make sure our path name doesn't stop with "/", ".",
6106 		// or ".."
6107 		char* lastSlash;
6108 		while ((lastSlash = strrchr(path, '/')) != NULL) {
6109 			char* leaf = lastSlash + 1;
6110 			if (!strcmp(leaf, ".."))
6111 				return B_NOT_ALLOWED;
6112 
6113 			// omit multiple slashes
6114 			while (lastSlash > path && lastSlash[-1] == '/')
6115 				lastSlash--;
6116 
6117 			if (leaf[0]
6118 				&& strcmp(leaf, ".")) {
6119 				break;
6120 			}
6121 			// "name/" -> "name", or "name/." -> "name"
6122 			lastSlash[0] = '\0';
6123 		}
6124 
6125 		if (!strcmp(path, ".") || !strcmp(path, ".."))
6126 			return B_NOT_ALLOWED;
6127 	}
6128 
6129 	VnodePutter directory;
6130 	status = fd_and_path_to_dir_vnode(fd, path, directory, name, kernel);
6131 	if (status != B_OK)
6132 		return status;
6133 
6134 	if (HAS_FS_CALL(directory, remove_dir))
6135 		status = FS_CALL(directory.Get(), remove_dir, name);
6136 	else
6137 		status = B_READ_ONLY_DEVICE;
6138 
6139 	return status;
6140 }
6141 
6142 
6143 static status_t
6144 common_ioctl(struct file_descriptor* descriptor, ulong op, void* buffer,
6145 	size_t length)
6146 {
6147 	struct vnode* vnode = descriptor->u.vnode;
6148 
6149 	if (HAS_FS_CALL(vnode, ioctl))
6150 		return FS_CALL(vnode, ioctl, descriptor->cookie, op, buffer, length);
6151 
6152 	return B_DEV_INVALID_IOCTL;
6153 }
6154 
6155 
6156 static status_t
6157 common_fcntl(int fd, int op, size_t argument, bool kernel)
6158 {
6159 	struct flock flock;
6160 
6161 	FUNCTION(("common_fcntl(fd = %d, op = %d, argument = %lx, %s)\n",
6162 		fd, op, argument, kernel ? "kernel" : "user"));
6163 
6164 	struct io_context* context = get_current_io_context(kernel);
6165 
6166 	FileDescriptorPutter descriptor(get_fd(context, fd));
6167 	if (!descriptor.IsSet())
6168 		return B_FILE_ERROR;
6169 
6170 	struct vnode* vnode = fd_vnode(descriptor.Get());
6171 
6172 	status_t status = B_OK;
6173 
6174 	if (op == F_SETLK || op == F_SETLKW || op == F_GETLK) {
6175 		if (descriptor->type != FDTYPE_FILE)
6176 			status = B_BAD_VALUE;
6177 		else if (kernel)
6178 			memcpy(&flock, (struct flock*)argument, sizeof(struct flock));
6179 		else if (user_memcpy(&flock, (struct flock*)argument,
6180 				sizeof(struct flock)) != B_OK)
6181 			status = B_BAD_ADDRESS;
6182 		if (status != B_OK)
6183 			return status;
6184 	}
6185 
6186 	switch (op) {
6187 		case F_SETFD:
6188 		{
6189 			// Set file descriptor flags
6190 
6191 			// O_CLOEXEC is the only flag available at this time
6192 			mutex_lock(&context->io_mutex);
6193 			fd_set_close_on_exec(context, fd, (argument & FD_CLOEXEC) != 0);
6194 			mutex_unlock(&context->io_mutex);
6195 
6196 			status = B_OK;
6197 			break;
6198 		}
6199 
6200 		case F_GETFD:
6201 		{
6202 			// Get file descriptor flags
6203 			mutex_lock(&context->io_mutex);
6204 			status = fd_close_on_exec(context, fd) ? FD_CLOEXEC : 0;
6205 			mutex_unlock(&context->io_mutex);
6206 			break;
6207 		}
6208 
6209 		case F_SETFL:
6210 			// Set file descriptor open mode
6211 
6212 			// we only accept changes to O_APPEND and O_NONBLOCK
6213 			argument &= O_APPEND | O_NONBLOCK;
6214 			if (descriptor->ops->fd_set_flags != NULL) {
6215 				status = descriptor->ops->fd_set_flags(descriptor.Get(), argument);
6216 			} else if (vnode != NULL && HAS_FS_CALL(vnode, set_flags)) {
6217 				status = FS_CALL(vnode, set_flags, descriptor->cookie,
6218 					(int)argument);
6219 			} else
6220 				status = B_UNSUPPORTED;
6221 
6222 			if (status == B_OK) {
6223 				// update this descriptor's open_mode field
6224 				descriptor->open_mode = (descriptor->open_mode
6225 					& ~(O_APPEND | O_NONBLOCK)) | argument;
6226 			}
6227 
6228 			break;
6229 
6230 		case F_GETFL:
6231 			// Get file descriptor open mode
6232 			status = descriptor->open_mode;
6233 			break;
6234 
6235 		case F_DUPFD:
6236 		case F_DUPFD_CLOEXEC:
6237 		{
6238 			status = new_fd_etc(context, descriptor.Get(), (int)argument);
6239 			if (status >= 0) {
6240 				mutex_lock(&context->io_mutex);
6241 				fd_set_close_on_exec(context, status, op == F_DUPFD_CLOEXEC);
6242 				mutex_unlock(&context->io_mutex);
6243 
6244 				atomic_add(&descriptor->ref_count, 1);
6245 			}
6246 			break;
6247 		}
6248 
6249 		case F_GETLK:
6250 			if (vnode != NULL) {
6251 				struct flock normalizedLock;
6252 
6253 				memcpy(&normalizedLock, &flock, sizeof(struct flock));
6254 				status = normalize_flock(descriptor.Get(), &normalizedLock);
6255 				if (status != B_OK)
6256 					break;
6257 
6258 				if (HAS_FS_CALL(vnode, test_lock)) {
6259 					status = FS_CALL(vnode, test_lock, descriptor->cookie,
6260 						&normalizedLock);
6261 				} else
6262 					status = test_advisory_lock(vnode, &normalizedLock);
6263 				if (status == B_OK) {
6264 					if (normalizedLock.l_type == F_UNLCK) {
6265 						// no conflicting lock found, copy back the same struct
6266 						// we were given except change type to F_UNLCK
6267 						flock.l_type = F_UNLCK;
6268 						if (kernel) {
6269 							memcpy((struct flock*)argument, &flock,
6270 								sizeof(struct flock));
6271 						} else {
6272 							status = user_memcpy((struct flock*)argument,
6273 								&flock, sizeof(struct flock));
6274 						}
6275 					} else {
6276 						// a conflicting lock was found, copy back its range and
6277 						// type
6278 						if (normalizedLock.l_len == OFF_MAX)
6279 							normalizedLock.l_len = 0;
6280 
6281 						if (kernel) {
6282 							memcpy((struct flock*)argument,
6283 								&normalizedLock, sizeof(struct flock));
6284 						} else {
6285 							status = user_memcpy((struct flock*)argument,
6286 								&normalizedLock, sizeof(struct flock));
6287 						}
6288 					}
6289 				}
6290 			} else
6291 				status = B_BAD_VALUE;
6292 			break;
6293 
6294 		case F_SETLK:
6295 		case F_SETLKW:
6296 			status = normalize_flock(descriptor.Get(), &flock);
6297 			if (status != B_OK)
6298 				break;
6299 
6300 			if (vnode == NULL) {
6301 				status = B_BAD_VALUE;
6302 			} else if (flock.l_type == F_UNLCK) {
6303 				if (HAS_FS_CALL(vnode, release_lock)) {
6304 					status = FS_CALL(vnode, release_lock, descriptor->cookie,
6305 						&flock);
6306 				} else {
6307 					status = release_advisory_lock(vnode, context, NULL,
6308 						&flock);
6309 				}
6310 			} else {
6311 				// the open mode must match the lock type
6312 				if (((descriptor->open_mode & O_RWMASK) == O_RDONLY
6313 						&& flock.l_type == F_WRLCK)
6314 					|| ((descriptor->open_mode & O_RWMASK) == O_WRONLY
6315 						&& flock.l_type == F_RDLCK))
6316 					status = B_FILE_ERROR;
6317 				else {
6318 					if (HAS_FS_CALL(vnode, acquire_lock)) {
6319 						status = FS_CALL(vnode, acquire_lock,
6320 							descriptor->cookie, &flock, op == F_SETLKW);
6321 					} else {
6322 						status = acquire_advisory_lock(vnode, context, NULL,
6323 							&flock, op == F_SETLKW);
6324 					}
6325 				}
6326 			}
6327 			break;
6328 
6329 		// ToDo: add support for more ops?
6330 
6331 		default:
6332 			status = B_BAD_VALUE;
6333 	}
6334 
6335 	return status;
6336 }
6337 
6338 
6339 static status_t
6340 common_sync(int fd, bool kernel)
6341 {
6342 	FUNCTION(("common_fsync: entry. fd %d kernel %d\n", fd, kernel));
6343 
6344 	struct vnode* vnode;
6345 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6346 	if (!descriptor.IsSet())
6347 		return B_FILE_ERROR;
6348 
6349 	status_t status;
6350 	if (HAS_FS_CALL(vnode, fsync))
6351 		status = FS_CALL_NO_PARAMS(vnode, fsync);
6352 	else
6353 		status = B_UNSUPPORTED;
6354 
6355 	return status;
6356 }
6357 
6358 
6359 static status_t
6360 common_lock_node(int fd, bool kernel)
6361 {
6362 	struct vnode* vnode;
6363 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6364 	if (!descriptor.IsSet())
6365 		return B_FILE_ERROR;
6366 
6367 	status_t status = B_OK;
6368 
6369 	// We need to set the locking atomically - someone
6370 	// else might set one at the same time
6371 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6372 			descriptor.Get(), (file_descriptor*)NULL) != NULL)
6373 		status = B_BUSY;
6374 
6375 	return status;
6376 }
6377 
6378 
6379 static status_t
6380 common_unlock_node(int fd, bool kernel)
6381 {
6382 	struct vnode* vnode;
6383 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6384 	if (!descriptor.IsSet())
6385 		return B_FILE_ERROR;
6386 
6387 	status_t status = B_OK;
6388 
6389 	// We need to set the locking atomically - someone
6390 	// else might set one at the same time
6391 	if (atomic_pointer_test_and_set(&vnode->mandatory_locked_by,
6392 			(file_descriptor*)NULL, descriptor.Get()) != descriptor.Get())
6393 		status = B_BAD_VALUE;
6394 
6395 	return status;
6396 }
6397 
6398 
6399 static status_t
6400 common_preallocate(int fd, off_t offset, off_t length, bool kernel)
6401 {
6402 	if (offset < 0 || length == 0)
6403 		return B_BAD_VALUE;
6404 	if (offset > OFF_MAX - length)
6405 		return B_FILE_TOO_LARGE;
6406 
6407 	struct vnode* vnode;
6408 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6409 	if (!descriptor.IsSet() || (descriptor->open_mode & O_RWMASK) == O_RDONLY)
6410 		return B_FILE_ERROR;
6411 
6412 	switch (vnode->Type() & S_IFMT) {
6413 		case S_IFIFO:
6414 		case S_IFSOCK:
6415 			return ESPIPE;
6416 
6417 		case S_IFBLK:
6418 		case S_IFCHR:
6419 		case S_IFDIR:
6420 		case S_IFLNK:
6421 			return B_DEVICE_NOT_FOUND;
6422 
6423 		case S_IFREG:
6424 			break;
6425 	}
6426 
6427 	status_t status = B_OK;
6428 	if (HAS_FS_CALL(vnode, preallocate)) {
6429 		status = FS_CALL(vnode, preallocate, offset, length);
6430 	} else {
6431 		status = HAS_FS_CALL(vnode, write)
6432 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6433 	}
6434 
6435 	return status;
6436 }
6437 
6438 
6439 static status_t
6440 common_read_link(int fd, char* path, char* buffer, size_t* _bufferSize,
6441 	bool kernel)
6442 {
6443 	VnodePutter vnode;
6444 	status_t status;
6445 
6446 	status = fd_and_path_to_vnode(fd, path, false, vnode, NULL, kernel);
6447 	if (status != B_OK)
6448 		return status;
6449 
6450 	if (HAS_FS_CALL(vnode, read_symlink)) {
6451 		status = FS_CALL(vnode.Get(), read_symlink, buffer, _bufferSize);
6452 	} else
6453 		status = B_BAD_VALUE;
6454 
6455 	return status;
6456 }
6457 
6458 
6459 static status_t
6460 common_create_symlink(int fd, char* path, const char* toPath, int mode,
6461 	bool kernel)
6462 {
6463 	// path validity checks have to be in the calling function!
6464 	char name[B_FILE_NAME_LENGTH];
6465 	status_t status;
6466 
6467 	FUNCTION(("common_create_symlink(fd = %d, path = %s, toPath = %s, "
6468 		"mode = %d, kernel = %d)\n", fd, path, toPath, mode, kernel));
6469 
6470 	VnodePutter vnode;
6471 	status = fd_and_path_to_dir_vnode(fd, path, vnode, name, kernel);
6472 	if (status != B_OK)
6473 		return status;
6474 
6475 	if (HAS_FS_CALL(vnode, create_symlink))
6476 		status = FS_CALL(vnode.Get(), create_symlink, name, toPath, mode);
6477 	else {
6478 		status = HAS_FS_CALL(vnode, write)
6479 			? B_UNSUPPORTED : B_READ_ONLY_DEVICE;
6480 	}
6481 
6482 	return status;
6483 }
6484 
6485 
6486 static status_t
6487 common_create_link(int pathFD, char* path, int toFD, char* toPath,
6488 	bool traverseLeafLink, bool kernel)
6489 {
6490 	// path validity checks have to be in the calling function!
6491 
6492 	FUNCTION(("common_create_link(path = %s, toPath = %s, kernel = %d)\n", path,
6493 		toPath, kernel));
6494 
6495 	char name[B_FILE_NAME_LENGTH];
6496 	VnodePutter directory;
6497 	status_t status = fd_and_path_to_dir_vnode(pathFD, path, directory, name,
6498 		kernel);
6499 	if (status != B_OK)
6500 		return status;
6501 
6502 	VnodePutter vnode;
6503 	status = fd_and_path_to_vnode(toFD, toPath, traverseLeafLink, vnode, NULL,
6504 		kernel);
6505 	if (status != B_OK)
6506 		return status;
6507 
6508 	if (directory->mount != vnode->mount)
6509 		return B_CROSS_DEVICE_LINK;
6510 
6511 	if (HAS_FS_CALL(directory, link))
6512 		status = FS_CALL(directory.Get(), link, name, vnode.Get());
6513 	else
6514 		status = B_READ_ONLY_DEVICE;
6515 
6516 	return status;
6517 }
6518 
6519 
6520 static status_t
6521 common_unlink(int fd, char* path, bool kernel)
6522 {
6523 	char filename[B_FILE_NAME_LENGTH];
6524 	status_t status;
6525 
6526 	FUNCTION(("common_unlink: fd: %d, path '%s', kernel %d\n", fd, path,
6527 		kernel));
6528 
6529 	VnodePutter vnode;
6530 	status = fd_and_path_to_dir_vnode(fd, path, vnode, filename, kernel);
6531 	if (status < 0)
6532 		return status;
6533 
6534 	if (HAS_FS_CALL(vnode, unlink))
6535 		status = FS_CALL(vnode.Get(), unlink, filename);
6536 	else
6537 		status = B_READ_ONLY_DEVICE;
6538 
6539 	return status;
6540 }
6541 
6542 
6543 static status_t
6544 common_access(int fd, char* path, int mode, bool effectiveUserGroup, bool kernel)
6545 {
6546 	status_t status;
6547 
6548 	// TODO: honor effectiveUserGroup argument
6549 
6550 	VnodePutter vnode;
6551 	status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
6552 	if (status != B_OK)
6553 		return status;
6554 
6555 	if (HAS_FS_CALL(vnode, access))
6556 		status = FS_CALL(vnode.Get(), access, mode);
6557 	else
6558 		status = B_OK;
6559 
6560 	return status;
6561 }
6562 
6563 
6564 static status_t
6565 common_rename(int fd, char* path, int newFD, char* newPath, bool kernel)
6566 {
6567 	status_t status;
6568 
6569 	FUNCTION(("common_rename(fd = %d, path = %s, newFD = %d, newPath = %s, "
6570 		"kernel = %d)\n", fd, path, newFD, newPath, kernel));
6571 
6572 	VnodePutter fromVnode;
6573 	char fromName[B_FILE_NAME_LENGTH];
6574 	status = fd_and_path_to_dir_vnode(fd, path, fromVnode, fromName, kernel);
6575 	if (status != B_OK)
6576 		return status;
6577 
6578 	VnodePutter toVnode;
6579 	char toName[B_FILE_NAME_LENGTH];
6580 	status = fd_and_path_to_dir_vnode(newFD, newPath, toVnode, toName, kernel);
6581 	if (status != B_OK)
6582 		return status;
6583 
6584 	if (fromVnode->device != toVnode->device)
6585 		return B_CROSS_DEVICE_LINK;
6586 
6587 	if (fromName[0] == '\0' || toName[0] == '\0'
6588 		|| !strcmp(fromName, ".") || !strcmp(fromName, "..")
6589 		|| !strcmp(toName, ".") || !strcmp(toName, "..")
6590 		|| (fromVnode.Get() == toVnode.Get() && !strcmp(fromName, toName))) {
6591 		return B_BAD_VALUE;
6592 	}
6593 
6594 	if (HAS_FS_CALL(fromVnode, rename))
6595 		status = FS_CALL(fromVnode.Get(), rename, fromName, toVnode.Get(), toName);
6596 	else
6597 		status = B_READ_ONLY_DEVICE;
6598 
6599 	return status;
6600 }
6601 
6602 
6603 static status_t
6604 common_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6605 {
6606 	struct vnode* vnode = descriptor->u.vnode;
6607 
6608 	FUNCTION(("common_read_stat: stat %p\n", stat));
6609 
6610 	// TODO: remove this once all file systems properly set them!
6611 	stat->st_crtim.tv_nsec = 0;
6612 	stat->st_ctim.tv_nsec = 0;
6613 	stat->st_mtim.tv_nsec = 0;
6614 	stat->st_atim.tv_nsec = 0;
6615 
6616 	return vfs_stat_vnode(vnode, stat);
6617 }
6618 
6619 
6620 static status_t
6621 common_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6622 	int statMask)
6623 {
6624 	struct vnode* vnode = descriptor->u.vnode;
6625 
6626 	FUNCTION(("common_write_stat(vnode = %p, stat = %p, statMask = %d)\n",
6627 		vnode, stat, statMask));
6628 
6629 	if ((descriptor->open_mode & O_RWMASK) == O_RDONLY
6630 		&& (statMask & B_STAT_SIZE) != 0) {
6631 		return B_BAD_VALUE;
6632 	}
6633 
6634 	if (!HAS_FS_CALL(vnode, write_stat))
6635 		return B_READ_ONLY_DEVICE;
6636 
6637 	return FS_CALL(vnode, write_stat, stat, statMask);
6638 }
6639 
6640 
6641 static status_t
6642 common_path_read_stat(int fd, char* path, bool traverseLeafLink,
6643 	struct stat* stat, bool kernel)
6644 {
6645 	FUNCTION(("common_path_read_stat: fd: %d, path '%s', stat %p,\n", fd, path,
6646 		stat));
6647 
6648 	VnodePutter vnode;
6649 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6650 		NULL, kernel);
6651 	if (status != B_OK)
6652 		return status;
6653 
6654 	status = vfs_stat_vnode(vnode.Get(), stat);
6655 
6656 	return status;
6657 }
6658 
6659 
6660 static status_t
6661 common_path_write_stat(int fd, char* path, bool traverseLeafLink,
6662 	const struct stat* stat, int statMask, bool kernel)
6663 {
6664 	FUNCTION(("common_write_stat: fd: %d, path '%s', stat %p, stat_mask %d, "
6665 		"kernel %d\n", fd, path, stat, statMask, kernel));
6666 
6667 	VnodePutter vnode;
6668 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6669 		NULL, kernel);
6670 	if (status != B_OK)
6671 		return status;
6672 
6673 	if (HAS_FS_CALL(vnode, write_stat))
6674 		status = FS_CALL(vnode.Get(), write_stat, stat, statMask);
6675 	else
6676 		status = B_READ_ONLY_DEVICE;
6677 
6678 	return status;
6679 }
6680 
6681 
6682 static int
6683 attr_dir_open(int fd, char* path, bool traverseLeafLink, bool kernel)
6684 {
6685 	FUNCTION(("attr_dir_open(fd = %d, path = '%s', kernel = %d)\n", fd, path,
6686 		kernel));
6687 
6688 	VnodePutter vnode;
6689 	status_t status = fd_and_path_to_vnode(fd, path, traverseLeafLink, vnode,
6690 		NULL, kernel);
6691 	if (status != B_OK)
6692 		return status;
6693 
6694 	status = open_attr_dir_vnode(vnode.Get(), kernel);
6695 	if (status >= 0)
6696 		vnode.Detach();
6697 
6698 	return status;
6699 }
6700 
6701 
6702 static status_t
6703 attr_dir_close(struct file_descriptor* descriptor)
6704 {
6705 	struct vnode* vnode = descriptor->u.vnode;
6706 
6707 	FUNCTION(("attr_dir_close(descriptor = %p)\n", descriptor));
6708 
6709 	if (HAS_FS_CALL(vnode, close_attr_dir))
6710 		return FS_CALL(vnode, close_attr_dir, descriptor->cookie);
6711 
6712 	return B_OK;
6713 }
6714 
6715 
6716 static void
6717 attr_dir_free_fd(struct file_descriptor* descriptor)
6718 {
6719 	struct vnode* vnode = descriptor->u.vnode;
6720 
6721 	if (vnode != NULL) {
6722 		FS_CALL(vnode, free_attr_dir_cookie, descriptor->cookie);
6723 		put_vnode(vnode);
6724 	}
6725 }
6726 
6727 
6728 static status_t
6729 attr_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
6730 	struct dirent* buffer, size_t bufferSize, uint32* _count)
6731 {
6732 	struct vnode* vnode = descriptor->u.vnode;
6733 
6734 	FUNCTION(("attr_dir_read(descriptor = %p)\n", descriptor));
6735 
6736 	if (HAS_FS_CALL(vnode, read_attr_dir))
6737 		return FS_CALL(vnode, read_attr_dir, descriptor->cookie, buffer,
6738 			bufferSize, _count);
6739 
6740 	return B_UNSUPPORTED;
6741 }
6742 
6743 
6744 static status_t
6745 attr_dir_rewind(struct file_descriptor* descriptor)
6746 {
6747 	struct vnode* vnode = descriptor->u.vnode;
6748 
6749 	FUNCTION(("attr_dir_rewind(descriptor = %p)\n", descriptor));
6750 
6751 	if (HAS_FS_CALL(vnode, rewind_attr_dir))
6752 		return FS_CALL(vnode, rewind_attr_dir, descriptor->cookie);
6753 
6754 	return B_UNSUPPORTED;
6755 }
6756 
6757 
6758 static int
6759 attr_create(int fd, char* path, const char* name, uint32 type,
6760 	int openMode, bool kernel)
6761 {
6762 	if (name == NULL || *name == '\0')
6763 		return B_BAD_VALUE;
6764 
6765 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6766 	VnodePutter vnode;
6767 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6768 		kernel);
6769 	if (status != B_OK)
6770 		return status;
6771 
6772 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6773 		return B_LINK_LIMIT;
6774 
6775 	if (!HAS_FS_CALL(vnode, create_attr))
6776 		return B_READ_ONLY_DEVICE;
6777 
6778 	void* cookie;
6779 	status = FS_CALL(vnode.Get(), create_attr, name, type, openMode, &cookie);
6780 	if (status != B_OK)
6781 		return status;
6782 
6783 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6784 	if (fd >= 0) {
6785 		vnode.Detach();
6786 		return fd;
6787 	}
6788 
6789 	status = fd;
6790 
6791 	FS_CALL(vnode.Get(), close_attr, cookie);
6792 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6793 
6794 	FS_CALL(vnode.Get(), remove_attr, name);
6795 
6796 	return status;
6797 }
6798 
6799 
6800 static int
6801 attr_open(int fd, char* path, const char* name, int openMode, bool kernel)
6802 {
6803 	if (name == NULL || *name == '\0')
6804 		return B_BAD_VALUE;
6805 
6806 	bool traverse = (openMode & (O_NOTRAVERSE | O_NOFOLLOW)) == 0;
6807 	VnodePutter vnode;
6808 	status_t status = fd_and_path_to_vnode(fd, path, traverse, vnode, NULL,
6809 		kernel);
6810 	if (status != B_OK)
6811 		return status;
6812 
6813 	if ((openMode & O_NOFOLLOW) != 0 && S_ISLNK(vnode->Type()))
6814 		return B_LINK_LIMIT;
6815 
6816 	if (!HAS_FS_CALL(vnode, open_attr))
6817 		return B_UNSUPPORTED;
6818 
6819 	void* cookie;
6820 	status = FS_CALL(vnode.Get(), open_attr, name, openMode, &cookie);
6821 	if (status != B_OK)
6822 		return status;
6823 
6824 	// now we only need a file descriptor for this attribute and we're done
6825 	fd = get_new_fd(FDTYPE_ATTR, NULL, vnode.Get(), cookie, openMode, kernel);
6826 	if (fd >= 0) {
6827 		vnode.Detach();
6828 		return fd;
6829 	}
6830 
6831 	status = fd;
6832 
6833 	FS_CALL(vnode.Get(), close_attr, cookie);
6834 	FS_CALL(vnode.Get(), free_attr_cookie, cookie);
6835 
6836 	return status;
6837 }
6838 
6839 
6840 static status_t
6841 attr_close(struct file_descriptor* descriptor)
6842 {
6843 	struct vnode* vnode = descriptor->u.vnode;
6844 
6845 	FUNCTION(("attr_close(descriptor = %p)\n", descriptor));
6846 
6847 	if (HAS_FS_CALL(vnode, close_attr))
6848 		return FS_CALL(vnode, close_attr, descriptor->cookie);
6849 
6850 	return B_OK;
6851 }
6852 
6853 
6854 static void
6855 attr_free_fd(struct file_descriptor* descriptor)
6856 {
6857 	struct vnode* vnode = descriptor->u.vnode;
6858 
6859 	if (vnode != NULL) {
6860 		FS_CALL(vnode, free_attr_cookie, descriptor->cookie);
6861 		put_vnode(vnode);
6862 	}
6863 }
6864 
6865 
6866 static status_t
6867 attr_read(struct file_descriptor* descriptor, off_t pos, void* buffer,
6868 	size_t* length)
6869 {
6870 	struct vnode* vnode = descriptor->u.vnode;
6871 
6872 	FUNCTION(("attr_read: buf %p, pos %" B_PRIdOFF ", len %p = %ld\n", buffer,
6873 		pos, length, *length));
6874 
6875 	if (!HAS_FS_CALL(vnode, read_attr))
6876 		return B_UNSUPPORTED;
6877 
6878 	return FS_CALL(vnode, read_attr, descriptor->cookie, pos, buffer, length);
6879 }
6880 
6881 
6882 static status_t
6883 attr_write(struct file_descriptor* descriptor, off_t pos, const void* buffer,
6884 	size_t* length)
6885 {
6886 	struct vnode* vnode = descriptor->u.vnode;
6887 
6888 	FUNCTION(("attr_write: buf %p, pos %" B_PRIdOFF ", len %p\n", buffer, pos,
6889 		length));
6890 
6891 	if (!HAS_FS_CALL(vnode, write_attr))
6892 		return B_UNSUPPORTED;
6893 
6894 	return FS_CALL(vnode, write_attr, descriptor->cookie, pos, buffer, length);
6895 }
6896 
6897 
6898 static off_t
6899 attr_seek(struct file_descriptor* descriptor, off_t pos, int seekType)
6900 {
6901 	off_t offset;
6902 
6903 	switch (seekType) {
6904 		case SEEK_SET:
6905 			offset = 0;
6906 			break;
6907 		case SEEK_CUR:
6908 			offset = descriptor->pos;
6909 			break;
6910 		case SEEK_END:
6911 		{
6912 			struct vnode* vnode = descriptor->u.vnode;
6913 			if (!HAS_FS_CALL(vnode, read_stat))
6914 				return B_UNSUPPORTED;
6915 
6916 			struct stat stat;
6917 			status_t status = FS_CALL(vnode, read_attr_stat, descriptor->cookie,
6918 				&stat);
6919 			if (status != B_OK)
6920 				return status;
6921 
6922 			offset = stat.st_size;
6923 			break;
6924 		}
6925 		default:
6926 			return B_BAD_VALUE;
6927 	}
6928 
6929 	// assumes off_t is 64 bits wide
6930 	if (offset > 0 && LONGLONG_MAX - offset < pos)
6931 		return B_BUFFER_OVERFLOW;
6932 
6933 	pos += offset;
6934 	if (pos < 0)
6935 		return B_BAD_VALUE;
6936 
6937 	return descriptor->pos = pos;
6938 }
6939 
6940 
6941 static status_t
6942 attr_read_stat(struct file_descriptor* descriptor, struct stat* stat)
6943 {
6944 	struct vnode* vnode = descriptor->u.vnode;
6945 
6946 	FUNCTION(("attr_read_stat: stat 0x%p\n", stat));
6947 
6948 	if (!HAS_FS_CALL(vnode, read_attr_stat))
6949 		return B_UNSUPPORTED;
6950 
6951 	return FS_CALL(vnode, read_attr_stat, descriptor->cookie, stat);
6952 }
6953 
6954 
6955 static status_t
6956 attr_write_stat(struct file_descriptor* descriptor, const struct stat* stat,
6957 	int statMask)
6958 {
6959 	struct vnode* vnode = descriptor->u.vnode;
6960 
6961 	FUNCTION(("attr_write_stat: stat = %p, statMask %d\n", stat, statMask));
6962 
6963 	if (!HAS_FS_CALL(vnode, write_attr_stat))
6964 		return B_READ_ONLY_DEVICE;
6965 
6966 	return FS_CALL(vnode, write_attr_stat, descriptor->cookie, stat, statMask);
6967 }
6968 
6969 
6970 static status_t
6971 attr_remove(int fd, const char* name, bool kernel)
6972 {
6973 	if (name == NULL || *name == '\0')
6974 		return B_BAD_VALUE;
6975 
6976 	FUNCTION(("attr_remove: fd = %d, name = \"%s\", kernel %d\n", fd, name,
6977 		kernel));
6978 
6979 	struct vnode* vnode;
6980 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, kernel));
6981 	if (!descriptor.IsSet())
6982 		return B_FILE_ERROR;
6983 
6984 	status_t status;
6985 	if (HAS_FS_CALL(vnode, remove_attr))
6986 		status = FS_CALL(vnode, remove_attr, name);
6987 	else
6988 		status = B_READ_ONLY_DEVICE;
6989 
6990 	return status;
6991 }
6992 
6993 
6994 static status_t
6995 attr_rename(int fromFD, const char* fromName, int toFD, const char* toName,
6996 	bool kernel)
6997 {
6998 	if (fromName == NULL || *fromName == '\0' || toName == NULL
6999 		|| *toName == '\0')
7000 		return B_BAD_VALUE;
7001 
7002 	FUNCTION(("attr_rename: from fd = %d, from name = \"%s\", to fd = %d, to "
7003 		"name = \"%s\", kernel %d\n", fromFD, fromName, toFD, toName, kernel));
7004 
7005 	struct vnode* fromVnode;
7006 	FileDescriptorPutter fromDescriptor(get_fd_and_vnode(fromFD, &fromVnode, kernel));
7007 	if (!fromDescriptor.IsSet())
7008 		return B_FILE_ERROR;
7009 
7010 	struct vnode* toVnode;
7011 	FileDescriptorPutter toDescriptor(get_fd_and_vnode(toFD, &toVnode, kernel));
7012 	if (!toDescriptor.IsSet())
7013 		return B_FILE_ERROR;
7014 
7015 	// are the files on the same volume?
7016 	if (fromVnode->device != toVnode->device)
7017 		return B_CROSS_DEVICE_LINK;
7018 
7019 	status_t status;
7020 	if (HAS_FS_CALL(fromVnode, rename_attr)) {
7021 		status = FS_CALL(fromVnode, rename_attr, fromName, toVnode, toName);
7022 	} else
7023 		status = B_READ_ONLY_DEVICE;
7024 
7025 	return status;
7026 }
7027 
7028 
7029 static int
7030 index_dir_open(dev_t mountID, bool kernel)
7031 {
7032 	struct fs_mount* mount;
7033 	void* cookie;
7034 
7035 	FUNCTION(("index_dir_open(mountID = %" B_PRId32 ", kernel = %d)\n", mountID,
7036 		kernel));
7037 
7038 	status_t status = get_mount(mountID, &mount);
7039 	if (status != B_OK)
7040 		return status;
7041 
7042 	if (!HAS_FS_MOUNT_CALL(mount, open_index_dir)) {
7043 		status = B_UNSUPPORTED;
7044 		goto error;
7045 	}
7046 
7047 	status = FS_MOUNT_CALL(mount, open_index_dir, &cookie);
7048 	if (status != B_OK)
7049 		goto error;
7050 
7051 	// get fd for the index directory
7052 	int fd;
7053 	fd = get_new_fd(FDTYPE_INDEX_DIR, mount, NULL, cookie, O_CLOEXEC, kernel);
7054 	if (fd >= 0)
7055 		return fd;
7056 
7057 	// something went wrong
7058 	FS_MOUNT_CALL(mount, close_index_dir, cookie);
7059 	FS_MOUNT_CALL(mount, free_index_dir_cookie, cookie);
7060 
7061 	status = fd;
7062 
7063 error:
7064 	put_mount(mount);
7065 	return status;
7066 }
7067 
7068 
7069 static status_t
7070 index_dir_close(struct file_descriptor* descriptor)
7071 {
7072 	struct fs_mount* mount = descriptor->u.mount;
7073 
7074 	FUNCTION(("index_dir_close(descriptor = %p)\n", descriptor));
7075 
7076 	if (HAS_FS_MOUNT_CALL(mount, close_index_dir))
7077 		return FS_MOUNT_CALL(mount, close_index_dir, descriptor->cookie);
7078 
7079 	return B_OK;
7080 }
7081 
7082 
7083 static void
7084 index_dir_free_fd(struct file_descriptor* descriptor)
7085 {
7086 	struct fs_mount* mount = descriptor->u.mount;
7087 
7088 	if (mount != NULL) {
7089 		FS_MOUNT_CALL(mount, free_index_dir_cookie, descriptor->cookie);
7090 		put_mount(mount);
7091 	}
7092 }
7093 
7094 
7095 static status_t
7096 index_dir_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7097 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7098 {
7099 	struct fs_mount* mount = descriptor->u.mount;
7100 
7101 	if (HAS_FS_MOUNT_CALL(mount, read_index_dir)) {
7102 		return FS_MOUNT_CALL(mount, read_index_dir, descriptor->cookie, buffer,
7103 			bufferSize, _count);
7104 	}
7105 
7106 	return B_UNSUPPORTED;
7107 }
7108 
7109 
7110 static status_t
7111 index_dir_rewind(struct file_descriptor* descriptor)
7112 {
7113 	struct fs_mount* mount = descriptor->u.mount;
7114 
7115 	if (HAS_FS_MOUNT_CALL(mount, rewind_index_dir))
7116 		return FS_MOUNT_CALL(mount, rewind_index_dir, descriptor->cookie);
7117 
7118 	return B_UNSUPPORTED;
7119 }
7120 
7121 
7122 static status_t
7123 index_create(dev_t mountID, const char* name, uint32 type, uint32 flags,
7124 	bool kernel)
7125 {
7126 	FUNCTION(("index_create(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7127 		mountID, name, kernel));
7128 
7129 	struct fs_mount* mount;
7130 	status_t status = get_mount(mountID, &mount);
7131 	if (status != B_OK)
7132 		return status;
7133 
7134 	if (!HAS_FS_MOUNT_CALL(mount, create_index)) {
7135 		status = B_READ_ONLY_DEVICE;
7136 		goto out;
7137 	}
7138 
7139 	status = FS_MOUNT_CALL(mount, create_index, name, type, flags);
7140 
7141 out:
7142 	put_mount(mount);
7143 	return status;
7144 }
7145 
7146 
7147 #if 0
7148 static status_t
7149 index_read_stat(struct file_descriptor* descriptor, struct stat* stat)
7150 {
7151 	struct vnode* vnode = descriptor->u.vnode;
7152 
7153 	// ToDo: currently unused!
7154 	FUNCTION(("index_read_stat: stat 0x%p\n", stat));
7155 	if (!HAS_FS_CALL(vnode, read_index_stat))
7156 		return B_UNSUPPORTED;
7157 
7158 	return B_UNSUPPORTED;
7159 	//return FS_CALL(vnode, read_index_stat, descriptor->cookie, stat);
7160 }
7161 
7162 
7163 static void
7164 index_free_fd(struct file_descriptor* descriptor)
7165 {
7166 	struct vnode* vnode = descriptor->u.vnode;
7167 
7168 	if (vnode != NULL) {
7169 		FS_CALL(vnode, free_index_cookie, descriptor->cookie);
7170 		put_vnode(vnode);
7171 	}
7172 }
7173 #endif
7174 
7175 
7176 static status_t
7177 index_name_read_stat(dev_t mountID, const char* name, struct stat* stat,
7178 	bool kernel)
7179 {
7180 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7181 		mountID, name, kernel));
7182 
7183 	struct fs_mount* mount;
7184 	status_t status = get_mount(mountID, &mount);
7185 	if (status != B_OK)
7186 		return status;
7187 
7188 	if (!HAS_FS_MOUNT_CALL(mount, read_index_stat)) {
7189 		status = B_UNSUPPORTED;
7190 		goto out;
7191 	}
7192 
7193 	status = FS_MOUNT_CALL(mount, read_index_stat, name, stat);
7194 
7195 out:
7196 	put_mount(mount);
7197 	return status;
7198 }
7199 
7200 
7201 static status_t
7202 index_remove(dev_t mountID, const char* name, bool kernel)
7203 {
7204 	FUNCTION(("index_remove(mountID = %" B_PRId32 ", name = %s, kernel = %d)\n",
7205 		mountID, name, kernel));
7206 
7207 	struct fs_mount* mount;
7208 	status_t status = get_mount(mountID, &mount);
7209 	if (status != B_OK)
7210 		return status;
7211 
7212 	if (!HAS_FS_MOUNT_CALL(mount, remove_index)) {
7213 		status = B_READ_ONLY_DEVICE;
7214 		goto out;
7215 	}
7216 
7217 	status = FS_MOUNT_CALL(mount, remove_index, name);
7218 
7219 out:
7220 	put_mount(mount);
7221 	return status;
7222 }
7223 
7224 
7225 /*!	TODO: the query FS API is still the pretty much the same as in R5.
7226 		It would be nice if the FS would find some more kernel support
7227 		for them.
7228 		For example, query parsing should be moved into the kernel.
7229 */
7230 static int
7231 query_open(dev_t device, const char* query, uint32 flags, port_id port,
7232 	int32 token, bool kernel)
7233 {
7234 	struct fs_mount* mount;
7235 	void* cookie;
7236 
7237 	FUNCTION(("query_open(device = %" B_PRId32 ", query = \"%s\", kernel = %d)\n",
7238 		device, query, kernel));
7239 
7240 	status_t status = get_mount(device, &mount);
7241 	if (status != B_OK)
7242 		return status;
7243 
7244 	if (!HAS_FS_MOUNT_CALL(mount, open_query)) {
7245 		status = B_UNSUPPORTED;
7246 		goto error;
7247 	}
7248 
7249 	status = FS_MOUNT_CALL(mount, open_query, query, flags, port, token,
7250 		&cookie);
7251 	if (status != B_OK)
7252 		goto error;
7253 
7254 	// get fd for the index directory
7255 	int fd;
7256 	fd = get_new_fd(FDTYPE_QUERY, mount, NULL, cookie, O_CLOEXEC, kernel);
7257 	if (fd >= 0)
7258 		return fd;
7259 
7260 	status = fd;
7261 
7262 	// something went wrong
7263 	FS_MOUNT_CALL(mount, close_query, cookie);
7264 	FS_MOUNT_CALL(mount, free_query_cookie, cookie);
7265 
7266 error:
7267 	put_mount(mount);
7268 	return status;
7269 }
7270 
7271 
7272 static status_t
7273 query_close(struct file_descriptor* descriptor)
7274 {
7275 	struct fs_mount* mount = descriptor->u.mount;
7276 
7277 	FUNCTION(("query_close(descriptor = %p)\n", descriptor));
7278 
7279 	if (HAS_FS_MOUNT_CALL(mount, close_query))
7280 		return FS_MOUNT_CALL(mount, close_query, descriptor->cookie);
7281 
7282 	return B_OK;
7283 }
7284 
7285 
7286 static void
7287 query_free_fd(struct file_descriptor* descriptor)
7288 {
7289 	struct fs_mount* mount = descriptor->u.mount;
7290 
7291 	if (mount != NULL) {
7292 		FS_MOUNT_CALL(mount, free_query_cookie, descriptor->cookie);
7293 		put_mount(mount);
7294 	}
7295 }
7296 
7297 
7298 static status_t
7299 query_read(struct io_context* ioContext, struct file_descriptor* descriptor,
7300 	struct dirent* buffer, size_t bufferSize, uint32* _count)
7301 {
7302 	struct fs_mount* mount = descriptor->u.mount;
7303 
7304 	if (HAS_FS_MOUNT_CALL(mount, read_query)) {
7305 		return FS_MOUNT_CALL(mount, read_query, descriptor->cookie, buffer,
7306 			bufferSize, _count);
7307 	}
7308 
7309 	return B_UNSUPPORTED;
7310 }
7311 
7312 
7313 static status_t
7314 query_rewind(struct file_descriptor* descriptor)
7315 {
7316 	struct fs_mount* mount = descriptor->u.mount;
7317 
7318 	if (HAS_FS_MOUNT_CALL(mount, rewind_query))
7319 		return FS_MOUNT_CALL(mount, rewind_query, descriptor->cookie);
7320 
7321 	return B_UNSUPPORTED;
7322 }
7323 
7324 
7325 //	#pragma mark - General File System functions
7326 
7327 
7328 static dev_t
7329 fs_mount(char* path, const char* device, const char* fsName, uint32 flags,
7330 	const char* args, bool kernel)
7331 {
7332 	struct ::fs_mount* mount;
7333 	status_t status = B_OK;
7334 	fs_volume* volume = NULL;
7335 	int32 layer = 0;
7336 	Vnode* coveredNode = NULL;
7337 
7338 	FUNCTION(("fs_mount: path = '%s', device = '%s', fs_name = '%s', flags = %#"
7339 		B_PRIx32 ", args = '%s'\n", path, device, fsName, flags, args));
7340 
7341 	// The path is always safe, we just have to make sure that fsName is
7342 	// almost valid - we can't make any assumptions about args, though.
7343 	// A NULL fsName is OK, if a device was given and the FS is not virtual.
7344 	// We'll get it from the DDM later.
7345 	if (fsName == NULL) {
7346 		if (!device || flags & B_MOUNT_VIRTUAL_DEVICE)
7347 			return B_BAD_VALUE;
7348 	} else if (fsName[0] == '\0')
7349 		return B_BAD_VALUE;
7350 
7351 	RecursiveLocker mountOpLocker(sMountOpLock);
7352 
7353 	// Helper to delete a newly created file device on failure.
7354 	// Not exactly beautiful, but helps to keep the code below cleaner.
7355 	struct FileDeviceDeleter {
7356 		FileDeviceDeleter() : id(-1) {}
7357 		~FileDeviceDeleter()
7358 		{
7359 			KDiskDeviceManager::Default()->DeleteFileDevice(id);
7360 		}
7361 
7362 		partition_id id;
7363 	} fileDeviceDeleter;
7364 
7365 	// If the file system is not a "virtual" one, the device argument should
7366 	// point to a real file/device (if given at all).
7367 	// get the partition
7368 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7369 	KPartition* partition = NULL;
7370 	KPath normalizedDevice;
7371 	bool newlyCreatedFileDevice = false;
7372 
7373 	if (!(flags & B_MOUNT_VIRTUAL_DEVICE) && device != NULL) {
7374 		// normalize the device path
7375 		status = normalizedDevice.SetTo(device, true);
7376 		if (status != B_OK)
7377 			return status;
7378 
7379 		// get a corresponding partition from the DDM
7380 		partition = ddm->RegisterPartition(normalizedDevice.Path());
7381 		if (partition == NULL) {
7382 			// Partition not found: This either means, the user supplied
7383 			// an invalid path, or the path refers to an image file. We try
7384 			// to let the DDM create a file device for the path.
7385 			partition_id deviceID = ddm->CreateFileDevice(
7386 				normalizedDevice.Path(), &newlyCreatedFileDevice);
7387 			if (deviceID >= 0) {
7388 				partition = ddm->RegisterPartition(deviceID);
7389 				if (newlyCreatedFileDevice)
7390 					fileDeviceDeleter.id = deviceID;
7391 			}
7392 		}
7393 
7394 		if (!partition) {
7395 			TRACE(("fs_mount(): Partition `%s' not found.\n",
7396 				normalizedDevice.Path()));
7397 			return B_ENTRY_NOT_FOUND;
7398 		}
7399 
7400 		device = normalizedDevice.Path();
7401 			// correct path to file device
7402 	}
7403 	PartitionRegistrar partitionRegistrar(partition, true);
7404 
7405 	// Write lock the partition's device. For the time being, we keep the lock
7406 	// until we're done mounting -- not nice, but ensure, that no-one is
7407 	// interfering.
7408 	// TODO: Just mark the partition busy while mounting!
7409 	KDiskDevice* diskDevice = NULL;
7410 	if (partition) {
7411 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7412 		if (!diskDevice) {
7413 			TRACE(("fs_mount(): Failed to lock disk device!\n"));
7414 			return B_ERROR;
7415 		}
7416 	}
7417 
7418 	DeviceWriteLocker writeLocker(diskDevice, true);
7419 		// this takes over the write lock acquired before
7420 
7421 	if (partition != NULL) {
7422 		// make sure, that the partition is not busy
7423 		if (partition->IsBusy()) {
7424 			TRACE(("fs_mount(): Partition is busy.\n"));
7425 			return B_BUSY;
7426 		}
7427 
7428 		// if no FS name had been supplied, we get it from the partition
7429 		if (fsName == NULL) {
7430 			KDiskSystem* diskSystem = partition->DiskSystem();
7431 			if (!diskSystem) {
7432 				TRACE(("fs_mount(): No FS name was given, and the DDM didn't "
7433 					"recognize it.\n"));
7434 				return B_BAD_VALUE;
7435 			}
7436 
7437 			if (!diskSystem->IsFileSystem()) {
7438 				TRACE(("fs_mount(): No FS name was given, and the DDM found a "
7439 					"partitioning system.\n"));
7440 				return B_BAD_VALUE;
7441 			}
7442 
7443 			// The disk system name will not change, and the KDiskSystem
7444 			// object will not go away while the disk device is locked (and
7445 			// the partition has a reference to it), so this is safe.
7446 			fsName = diskSystem->Name();
7447 		}
7448 	}
7449 
7450 	mount = new(std::nothrow) (struct ::fs_mount);
7451 	if (mount == NULL)
7452 		return B_NO_MEMORY;
7453 
7454 	mount->device_name = strdup(device);
7455 		// "device" can be NULL
7456 
7457 	status = mount->entry_cache.Init();
7458 	if (status != B_OK)
7459 		goto err1;
7460 
7461 	// initialize structure
7462 	mount->id = sNextMountID++;
7463 	mount->partition = NULL;
7464 	mount->root_vnode = NULL;
7465 	mount->covers_vnode = NULL;
7466 	mount->unmounting = false;
7467 	mount->owns_file_device = false;
7468 	mount->volume = NULL;
7469 
7470 	// build up the volume(s)
7471 	while (true) {
7472 		char* layerFSName = get_file_system_name_for_layer(fsName, layer);
7473 		if (layerFSName == NULL) {
7474 			if (layer == 0) {
7475 				status = B_NO_MEMORY;
7476 				goto err1;
7477 			}
7478 
7479 			break;
7480 		}
7481 		MemoryDeleter layerFSNameDeleter(layerFSName);
7482 
7483 		volume = (fs_volume*)malloc(sizeof(fs_volume));
7484 		if (volume == NULL) {
7485 			status = B_NO_MEMORY;
7486 			goto err1;
7487 		}
7488 
7489 		volume->id = mount->id;
7490 		volume->partition = partition != NULL ? partition->ID() : -1;
7491 		volume->layer = layer++;
7492 		volume->private_volume = NULL;
7493 		volume->ops = NULL;
7494 		volume->sub_volume = NULL;
7495 		volume->super_volume = NULL;
7496 		volume->file_system = NULL;
7497 		volume->file_system_name = NULL;
7498 
7499 		volume->file_system_name = get_file_system_name(layerFSName);
7500 		if (volume->file_system_name == NULL) {
7501 			status = B_NO_MEMORY;
7502 			free(volume);
7503 			goto err1;
7504 		}
7505 
7506 		volume->file_system = get_file_system(layerFSName);
7507 		if (volume->file_system == NULL) {
7508 			status = B_DEVICE_NOT_FOUND;
7509 			free(volume->file_system_name);
7510 			free(volume);
7511 			goto err1;
7512 		}
7513 
7514 		if (mount->volume == NULL)
7515 			mount->volume = volume;
7516 		else {
7517 			volume->super_volume = mount->volume;
7518 			mount->volume->sub_volume = volume;
7519 			mount->volume = volume;
7520 		}
7521 	}
7522 
7523 	// insert mount struct into list before we call FS's mount() function
7524 	// so that vnodes can be created for this mount
7525 	rw_lock_write_lock(&sMountLock);
7526 	sMountsTable->Insert(mount);
7527 	rw_lock_write_unlock(&sMountLock);
7528 
7529 	ino_t rootID;
7530 
7531 	if (!sRoot) {
7532 		// we haven't mounted anything yet
7533 		if (strcmp(path, "/") != 0) {
7534 			status = B_ERROR;
7535 			goto err2;
7536 		}
7537 
7538 		status = mount->volume->file_system->mount(mount->volume, device, flags,
7539 			args, &rootID);
7540 		if (status != B_OK || mount->volume->ops == NULL)
7541 			goto err2;
7542 	} else {
7543 		{
7544 			VnodePutter temp;
7545 			status = path_to_vnode(path, true, temp, NULL, kernel);
7546 			coveredNode = temp.Detach();
7547 		}
7548 		if (status != B_OK)
7549 			goto err2;
7550 
7551 		mount->covers_vnode = coveredNode;
7552 
7553 		// make sure covered_vnode is a directory
7554 		if (!S_ISDIR(coveredNode->Type())) {
7555 			status = B_NOT_A_DIRECTORY;
7556 			goto err3;
7557 		}
7558 
7559 		if (coveredNode->IsCovered()) {
7560 			// this is already a covered vnode
7561 			status = B_BUSY;
7562 			goto err3;
7563 		}
7564 
7565 		// mount it/them
7566 		fs_volume* volume = mount->volume;
7567 		while (volume) {
7568 			status = volume->file_system->mount(volume, device, flags, args,
7569 				&rootID);
7570 			if (status != B_OK || volume->ops == NULL) {
7571 				if (status == B_OK && volume->ops == NULL)
7572 					panic("fs_mount: mount() succeeded but ops is NULL!");
7573 				if (volume->sub_volume)
7574 					goto err4;
7575 				goto err3;
7576 			}
7577 
7578 			volume = volume->super_volume;
7579 		}
7580 
7581 		volume = mount->volume;
7582 		while (volume) {
7583 			if (volume->ops->all_layers_mounted != NULL)
7584 				volume->ops->all_layers_mounted(volume);
7585 			volume = volume->super_volume;
7586 		}
7587 	}
7588 
7589 	// the root node is supposed to be owned by the file system - it must
7590 	// exist at this point
7591 	rw_lock_write_lock(&sVnodeLock);
7592 	mount->root_vnode = lookup_vnode(mount->id, rootID);
7593 	if (mount->root_vnode == NULL || mount->root_vnode->ref_count != 1) {
7594 		panic("fs_mount: file system does not own its root node!\n");
7595 		status = B_ERROR;
7596 		rw_lock_write_unlock(&sVnodeLock);
7597 		goto err4;
7598 	}
7599 
7600 	// set up the links between the root vnode and the vnode it covers
7601 	if (coveredNode != NULL) {
7602 		if (coveredNode->IsCovered()) {
7603 			// the vnode is covered now
7604 			status = B_BUSY;
7605 			rw_lock_write_unlock(&sVnodeLock);
7606 			goto err4;
7607 		}
7608 
7609 		mount->root_vnode->covers = coveredNode;
7610 		mount->root_vnode->SetCovering(true);
7611 
7612 		coveredNode->covered_by = mount->root_vnode;
7613 		coveredNode->SetCovered(true);
7614 	}
7615 	rw_lock_write_unlock(&sVnodeLock);
7616 
7617 	if (!sRoot) {
7618 		sRoot = mount->root_vnode;
7619 		mutex_lock(&sIOContextRootLock);
7620 		get_current_io_context(true)->root = sRoot;
7621 		mutex_unlock(&sIOContextRootLock);
7622 		inc_vnode_ref_count(sRoot);
7623 	}
7624 
7625 	// supply the partition (if any) with the mount cookie and mark it mounted
7626 	if (partition) {
7627 		partition->SetMountCookie(mount->volume->private_volume);
7628 		partition->SetVolumeID(mount->id);
7629 
7630 		// keep a partition reference as long as the partition is mounted
7631 		partitionRegistrar.Detach();
7632 		mount->partition = partition;
7633 		mount->owns_file_device = newlyCreatedFileDevice;
7634 		fileDeviceDeleter.id = -1;
7635 	}
7636 
7637 	notify_mount(mount->id,
7638 		coveredNode != NULL ? coveredNode->device : -1,
7639 		coveredNode ? coveredNode->id : -1);
7640 
7641 	return mount->id;
7642 
7643 err4:
7644 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7645 err3:
7646 	if (coveredNode != NULL)
7647 		put_vnode(coveredNode);
7648 err2:
7649 	rw_lock_write_lock(&sMountLock);
7650 	sMountsTable->Remove(mount);
7651 	rw_lock_write_unlock(&sMountLock);
7652 err1:
7653 	delete mount;
7654 
7655 	return status;
7656 }
7657 
7658 
7659 static status_t
7660 fs_unmount(char* path, dev_t mountID, uint32 flags, bool kernel)
7661 {
7662 	struct fs_mount* mount;
7663 	status_t err;
7664 
7665 	FUNCTION(("fs_unmount(path '%s', dev %" B_PRId32 ", kernel %d\n", path,
7666 		mountID, kernel));
7667 
7668 	VnodePutter pathVnode;
7669 	if (path != NULL) {
7670 		err = path_to_vnode(path, true, pathVnode, NULL, kernel);
7671 		if (err != B_OK)
7672 			return B_ENTRY_NOT_FOUND;
7673 	}
7674 
7675 	RecursiveLocker mountOpLocker(sMountOpLock);
7676 	ReadLocker mountLocker(sMountLock);
7677 
7678 	mount = find_mount(path != NULL ? pathVnode->device : mountID);
7679 	if (mount == NULL) {
7680 		panic("fs_unmount: find_mount() failed on root vnode @%p of mount\n",
7681 			pathVnode.Get());
7682 	}
7683 
7684 	mountLocker.Unlock();
7685 
7686 	if (path != NULL) {
7687 		if (mount->root_vnode != pathVnode.Get()) {
7688 			// not mountpoint
7689 			return B_BAD_VALUE;
7690 		}
7691 
7692 		pathVnode.Unset();
7693 	}
7694 
7695 	// if the volume is associated with a partition, lock the device of the
7696 	// partition as long as we are unmounting
7697 	KDiskDeviceManager* ddm = KDiskDeviceManager::Default();
7698 	KPartition* partition = mount->partition;
7699 	KDiskDevice* diskDevice = NULL;
7700 	if (partition != NULL) {
7701 		if (partition->Device() == NULL) {
7702 			dprintf("fs_unmount(): There is no device!\n");
7703 			return B_ERROR;
7704 		}
7705 		diskDevice = ddm->WriteLockDevice(partition->Device()->ID());
7706 		if (!diskDevice) {
7707 			TRACE(("fs_unmount(): Failed to lock disk device!\n"));
7708 			return B_ERROR;
7709 		}
7710 	}
7711 	DeviceWriteLocker writeLocker(diskDevice, true);
7712 
7713 	// make sure, that the partition is not busy
7714 	if (partition != NULL) {
7715 		if ((flags & B_UNMOUNT_BUSY_PARTITION) == 0 && partition->IsBusy()) {
7716 			TRACE(("fs_unmount(): Partition is busy.\n"));
7717 			return B_BUSY;
7718 		}
7719 	}
7720 
7721 	// grab the vnode master mutex to keep someone from creating
7722 	// a vnode while we're figuring out if we can continue
7723 	WriteLocker vnodesWriteLocker(&sVnodeLock);
7724 
7725 	bool disconnectedDescriptors = false;
7726 
7727 	while (true) {
7728 		bool busy = false;
7729 
7730 		// cycle through the list of vnodes associated with this mount and
7731 		// make sure all of them are not busy or have refs on them
7732 		VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7733 		while (struct vnode* vnode = iterator.Next()) {
7734 			if (vnode->IsBusy()) {
7735 				busy = true;
7736 				break;
7737 			}
7738 
7739 			// check the vnode's ref count -- subtract additional references for
7740 			// covering
7741 			int32 refCount = vnode->ref_count;
7742 			if (vnode->covers != NULL)
7743 				refCount--;
7744 			if (vnode->covered_by != NULL)
7745 				refCount--;
7746 
7747 			if (refCount != 0) {
7748 				// there are still vnodes in use on this mount, so we cannot
7749 				// unmount yet
7750 				busy = true;
7751 				break;
7752 			}
7753 		}
7754 
7755 		if (!busy)
7756 			break;
7757 
7758 		if ((flags & B_FORCE_UNMOUNT) == 0)
7759 			return B_BUSY;
7760 
7761 		if (disconnectedDescriptors) {
7762 			// wait a bit until the last access is finished, and then try again
7763 			vnodesWriteLocker.Unlock();
7764 			snooze(100000);
7765 			// TODO: if there is some kind of bug that prevents the ref counts
7766 			// from getting back to zero, this will fall into an endless loop...
7767 			vnodesWriteLocker.Lock();
7768 			continue;
7769 		}
7770 
7771 		// the file system is still busy - but we're forced to unmount it,
7772 		// so let's disconnect all open file descriptors
7773 
7774 		mount->unmounting = true;
7775 			// prevent new vnodes from being created
7776 
7777 		vnodesWriteLocker.Unlock();
7778 
7779 		disconnect_mount_or_vnode_fds(mount, NULL);
7780 		disconnectedDescriptors = true;
7781 
7782 		vnodesWriteLocker.Lock();
7783 	}
7784 
7785 	// We can safely continue. Mark all of the vnodes busy and this mount
7786 	// structure in unmounting state. Also undo the vnode covers/covered_by
7787 	// links.
7788 	mount->unmounting = true;
7789 
7790 	VnodeList::Iterator iterator = mount->vnodes.GetIterator();
7791 	while (struct vnode* vnode = iterator.Next()) {
7792 		// Remove all covers/covered_by links from other mounts' nodes to this
7793 		// vnode and adjust the node ref count accordingly. We will release the
7794 		// references to the external vnodes below.
7795 		if (Vnode* coveredNode = vnode->covers) {
7796 			if (Vnode* coveringNode = vnode->covered_by) {
7797 				// We have both covered and covering vnodes, so just remove us
7798 				// from the chain.
7799 				coveredNode->covered_by = coveringNode;
7800 				coveringNode->covers = coveredNode;
7801 				vnode->ref_count -= 2;
7802 
7803 				vnode->covered_by = NULL;
7804 				vnode->covers = NULL;
7805 				vnode->SetCovering(false);
7806 				vnode->SetCovered(false);
7807 			} else {
7808 				// We only have a covered vnode. Remove its link to us.
7809 				coveredNode->covered_by = NULL;
7810 				coveredNode->SetCovered(false);
7811 				vnode->ref_count--;
7812 
7813 				// If the other node is an external vnode, we keep its link
7814 				// link around so we can put the reference later on. Otherwise
7815 				// we get rid of it right now.
7816 				if (coveredNode->mount == mount) {
7817 					vnode->covers = NULL;
7818 					coveredNode->ref_count--;
7819 				}
7820 			}
7821 		} else if (Vnode* coveringNode = vnode->covered_by) {
7822 			// We only have a covering vnode. Remove its link to us.
7823 			coveringNode->covers = NULL;
7824 			coveringNode->SetCovering(false);
7825 			vnode->ref_count--;
7826 
7827 			// If the other node is an external vnode, we keep its link
7828 			// link around so we can put the reference later on. Otherwise
7829 			// we get rid of it right now.
7830 			if (coveringNode->mount == mount) {
7831 				vnode->covered_by = NULL;
7832 				coveringNode->ref_count--;
7833 			}
7834 		}
7835 
7836 		vnode->SetBusy(true);
7837 		vnode_to_be_freed(vnode);
7838 	}
7839 
7840 	vnodesWriteLocker.Unlock();
7841 
7842 	// Free all vnodes associated with this mount.
7843 	// They will be removed from the mount list by free_vnode(), so
7844 	// we don't have to do this.
7845 	while (struct vnode* vnode = mount->vnodes.Head()) {
7846 		// Put the references to external covered/covering vnodes we kept above.
7847 		if (Vnode* coveredNode = vnode->covers)
7848 			put_vnode(coveredNode);
7849 		if (Vnode* coveringNode = vnode->covered_by)
7850 			put_vnode(coveringNode);
7851 
7852 		free_vnode(vnode, false);
7853 	}
7854 
7855 	// remove the mount structure from the hash table
7856 	rw_lock_write_lock(&sMountLock);
7857 	sMountsTable->Remove(mount);
7858 	rw_lock_write_unlock(&sMountLock);
7859 
7860 	mountOpLocker.Unlock();
7861 
7862 	FS_MOUNT_CALL_NO_PARAMS(mount, unmount);
7863 	notify_unmount(mount->id);
7864 
7865 	// dereference the partition and mark it unmounted
7866 	if (partition) {
7867 		partition->SetVolumeID(-1);
7868 		partition->SetMountCookie(NULL);
7869 
7870 		if (mount->owns_file_device)
7871 			KDiskDeviceManager::Default()->DeleteFileDevice(partition->ID());
7872 		partition->Unregister();
7873 	}
7874 
7875 	delete mount;
7876 	return B_OK;
7877 }
7878 
7879 
7880 static status_t
7881 fs_sync(dev_t device)
7882 {
7883 	struct fs_mount* mount;
7884 	status_t status = get_mount(device, &mount);
7885 	if (status != B_OK)
7886 		return status;
7887 
7888 	struct vnode marker;
7889 	memset(&marker, 0, sizeof(marker));
7890 	marker.SetBusy(true);
7891 	marker.SetRemoved(true);
7892 
7893 	// First, synchronize all file caches
7894 
7895 	while (true) {
7896 		WriteLocker locker(sVnodeLock);
7897 			// Note: That's the easy way. Which is probably OK for sync(),
7898 			// since it's a relatively rare call and doesn't need to allow for
7899 			// a lot of concurrency. Using a read lock would be possible, but
7900 			// also more involved, since we had to lock the individual nodes
7901 			// and take care of the locking order, which we might not want to
7902 			// do while holding fs_mount::lock.
7903 
7904 		// synchronize access to vnode list
7905 		mutex_lock(&mount->lock);
7906 
7907 		struct vnode* vnode;
7908 		if (!marker.IsRemoved()) {
7909 			vnode = mount->vnodes.GetNext(&marker);
7910 			mount->vnodes.Remove(&marker);
7911 			marker.SetRemoved(true);
7912 		} else
7913 			vnode = mount->vnodes.First();
7914 
7915 		while (vnode != NULL && (vnode->cache == NULL
7916 			|| vnode->IsRemoved() || vnode->IsBusy())) {
7917 			// TODO: we could track writes (and writable mapped vnodes)
7918 			//	and have a simple flag that we could test for here
7919 			vnode = mount->vnodes.GetNext(vnode);
7920 		}
7921 
7922 		if (vnode != NULL) {
7923 			// insert marker vnode again
7924 			mount->vnodes.InsertBefore(mount->vnodes.GetNext(vnode), &marker);
7925 			marker.SetRemoved(false);
7926 		}
7927 
7928 		mutex_unlock(&mount->lock);
7929 
7930 		if (vnode == NULL)
7931 			break;
7932 
7933 		vnode = lookup_vnode(mount->id, vnode->id);
7934 		if (vnode == NULL || vnode->IsBusy())
7935 			continue;
7936 
7937 		if (vnode->ref_count == 0) {
7938 			// this vnode has been unused before
7939 			vnode_used(vnode);
7940 		}
7941 		inc_vnode_ref_count(vnode);
7942 
7943 		locker.Unlock();
7944 
7945 		if (vnode->cache != NULL && !vnode->IsRemoved())
7946 			vnode->cache->WriteModified();
7947 
7948 		put_vnode(vnode);
7949 	}
7950 
7951 	// Let the file systems do their synchronizing work
7952 	if (HAS_FS_MOUNT_CALL(mount, sync))
7953 		status = FS_MOUNT_CALL_NO_PARAMS(mount, sync);
7954 
7955 	// Finally, flush the underlying device's write cache (if possible.)
7956 	if (mount->partition != NULL && mount->partition->Device() != NULL)
7957 		ioctl(mount->partition->Device()->FD(), B_FLUSH_DRIVE_CACHE);
7958 
7959 	put_mount(mount);
7960 	return status;
7961 }
7962 
7963 
7964 static status_t
7965 fs_read_info(dev_t device, struct fs_info* info)
7966 {
7967 	struct fs_mount* mount;
7968 	status_t status = get_mount(device, &mount);
7969 	if (status != B_OK)
7970 		return status;
7971 
7972 	memset(info, 0, sizeof(struct fs_info));
7973 
7974 	if (HAS_FS_MOUNT_CALL(mount, read_fs_info))
7975 		status = FS_MOUNT_CALL(mount, read_fs_info, info);
7976 
7977 	// fill in info the file system doesn't (have to) know about
7978 	if (status == B_OK) {
7979 		info->dev = mount->id;
7980 		info->root = mount->root_vnode->id;
7981 
7982 		fs_volume* volume = mount->volume;
7983 		while (volume->super_volume != NULL)
7984 			volume = volume->super_volume;
7985 
7986 		strlcpy(info->fsh_name, volume->file_system_name,
7987 			sizeof(info->fsh_name));
7988 		if (mount->device_name != NULL) {
7989 			strlcpy(info->device_name, mount->device_name,
7990 				sizeof(info->device_name));
7991 		}
7992 	}
7993 
7994 	// if the call is not supported by the file system, there are still
7995 	// the parts that we filled out ourselves
7996 
7997 	put_mount(mount);
7998 	return status;
7999 }
8000 
8001 
8002 static status_t
8003 fs_write_info(dev_t device, const struct fs_info* info, int mask)
8004 {
8005 	struct fs_mount* mount;
8006 	status_t status = get_mount(device, &mount);
8007 	if (status != B_OK)
8008 		return status;
8009 
8010 	if (HAS_FS_MOUNT_CALL(mount, write_fs_info))
8011 		status = FS_MOUNT_CALL(mount, write_fs_info, info, mask);
8012 	else
8013 		status = B_READ_ONLY_DEVICE;
8014 
8015 	put_mount(mount);
8016 	return status;
8017 }
8018 
8019 
8020 static dev_t
8021 fs_next_device(int32* _cookie)
8022 {
8023 	struct fs_mount* mount = NULL;
8024 	dev_t device = *_cookie;
8025 
8026 	rw_lock_read_lock(&sMountLock);
8027 
8028 	// Since device IDs are assigned sequentially, this algorithm
8029 	// does work good enough. It makes sure that the device list
8030 	// returned is sorted, and that no device is skipped when an
8031 	// already visited device got unmounted.
8032 
8033 	while (device < sNextMountID) {
8034 		mount = find_mount(device++);
8035 		if (mount != NULL && mount->volume->private_volume != NULL)
8036 			break;
8037 	}
8038 
8039 	*_cookie = device;
8040 
8041 	if (mount != NULL)
8042 		device = mount->id;
8043 	else
8044 		device = B_BAD_VALUE;
8045 
8046 	rw_lock_read_unlock(&sMountLock);
8047 
8048 	return device;
8049 }
8050 
8051 
8052 ssize_t
8053 fs_read_attr(int fd, const char *attribute, uint32 type, off_t pos,
8054 	void *buffer, size_t readBytes)
8055 {
8056 	int attrFD = attr_open(fd, NULL, attribute, O_RDONLY, true);
8057 	if (attrFD < 0)
8058 		return attrFD;
8059 
8060 	ssize_t bytesRead = _kern_read(attrFD, pos, buffer, readBytes);
8061 
8062 	_kern_close(attrFD);
8063 
8064 	return bytesRead;
8065 }
8066 
8067 
8068 static status_t
8069 get_cwd(char* buffer, size_t size, bool kernel)
8070 {
8071 	// Get current working directory from io context
8072 	struct io_context* context = get_current_io_context(kernel);
8073 	status_t status;
8074 
8075 	FUNCTION(("vfs_get_cwd: buf %p, size %ld\n", buffer, size));
8076 
8077 	mutex_lock(&context->io_mutex);
8078 
8079 	struct vnode* vnode = context->cwd;
8080 	if (vnode)
8081 		inc_vnode_ref_count(vnode);
8082 
8083 	mutex_unlock(&context->io_mutex);
8084 
8085 	if (vnode) {
8086 		status = dir_vnode_to_path(vnode, buffer, size, kernel);
8087 		put_vnode(vnode);
8088 	} else
8089 		status = B_ERROR;
8090 
8091 	return status;
8092 }
8093 
8094 
8095 static status_t
8096 set_cwd(int fd, char* path, bool kernel)
8097 {
8098 	struct io_context* context;
8099 	struct vnode* oldDirectory;
8100 
8101 	FUNCTION(("set_cwd: path = \'%s\'\n", path));
8102 
8103 	// Get vnode for passed path, and bail if it failed
8104 	VnodePutter vnode;
8105 	status_t status = fd_and_path_to_vnode(fd, path, true, vnode, NULL, kernel);
8106 	if (status < 0)
8107 		return status;
8108 
8109 	if (!S_ISDIR(vnode->Type())) {
8110 		// nope, can't cwd to here
8111 		return B_NOT_A_DIRECTORY;
8112 	}
8113 
8114 	// We need to have the permission to enter the directory, too
8115 	if (HAS_FS_CALL(vnode, access)) {
8116 		status = FS_CALL(vnode.Get(), access, X_OK);
8117 		if (status != B_OK)
8118 			return status;
8119 	}
8120 
8121 	// Get current io context and lock
8122 	context = get_current_io_context(kernel);
8123 	mutex_lock(&context->io_mutex);
8124 
8125 	// save the old current working directory first
8126 	oldDirectory = context->cwd;
8127 	context->cwd = vnode.Detach();
8128 
8129 	mutex_unlock(&context->io_mutex);
8130 
8131 	if (oldDirectory)
8132 		put_vnode(oldDirectory);
8133 
8134 	return B_NO_ERROR;
8135 }
8136 
8137 
8138 static status_t
8139 user_copy_name(char* to, const char* from, size_t length)
8140 {
8141 	ssize_t len = user_strlcpy(to, from, length);
8142 	if (len < 0)
8143 		return len;
8144 	if (len >= (ssize_t)length)
8145 		return B_NAME_TOO_LONG;
8146 	return B_OK;
8147 }
8148 
8149 
8150 //	#pragma mark - kernel mirrored syscalls
8151 
8152 
8153 dev_t
8154 _kern_mount(const char* path, const char* device, const char* fsName,
8155 	uint32 flags, const char* args, size_t argsLength)
8156 {
8157 	KPath pathBuffer(path);
8158 	if (pathBuffer.InitCheck() != B_OK)
8159 		return B_NO_MEMORY;
8160 
8161 	return fs_mount(pathBuffer.LockBuffer(), device, fsName, flags, args, true);
8162 }
8163 
8164 
8165 status_t
8166 _kern_unmount(const char* path, uint32 flags)
8167 {
8168 	KPath pathBuffer(path);
8169 	if (pathBuffer.InitCheck() != B_OK)
8170 		return B_NO_MEMORY;
8171 
8172 	return fs_unmount(pathBuffer.LockBuffer(), -1, flags, true);
8173 }
8174 
8175 
8176 status_t
8177 _kern_read_fs_info(dev_t device, struct fs_info* info)
8178 {
8179 	if (info == NULL)
8180 		return B_BAD_VALUE;
8181 
8182 	return fs_read_info(device, info);
8183 }
8184 
8185 
8186 status_t
8187 _kern_write_fs_info(dev_t device, const struct fs_info* info, int mask)
8188 {
8189 	if (info == NULL)
8190 		return B_BAD_VALUE;
8191 
8192 	return fs_write_info(device, info, mask);
8193 }
8194 
8195 
8196 status_t
8197 _kern_sync(void)
8198 {
8199 	// Note: _kern_sync() is also called from _user_sync()
8200 	int32 cookie = 0;
8201 	dev_t device;
8202 	while ((device = next_dev(&cookie)) >= 0) {
8203 		status_t status = fs_sync(device);
8204 		if (status != B_OK && status != B_BAD_VALUE) {
8205 			dprintf("sync: device %" B_PRIdDEV " couldn't sync: %s\n", device,
8206 				strerror(status));
8207 		}
8208 	}
8209 
8210 	return B_OK;
8211 }
8212 
8213 
8214 dev_t
8215 _kern_next_device(int32* _cookie)
8216 {
8217 	return fs_next_device(_cookie);
8218 }
8219 
8220 
8221 status_t
8222 _kern_get_next_fd_info(team_id teamID, uint32* _cookie, fd_info* info,
8223 	size_t infoSize)
8224 {
8225 	if (infoSize != sizeof(fd_info))
8226 		return B_BAD_VALUE;
8227 
8228 	// get the team
8229 	Team* team = Team::Get(teamID);
8230 	if (team == NULL)
8231 		return B_BAD_TEAM_ID;
8232 	BReference<Team> teamReference(team, true);
8233 
8234 	// now that we have a team reference, its I/O context won't go away
8235 	io_context* context = team->io_context;
8236 	MutexLocker contextLocker(context->io_mutex);
8237 
8238 	uint32 slot = *_cookie;
8239 
8240 	struct file_descriptor* descriptor;
8241 	while (slot < context->table_size
8242 		&& (descriptor = context->fds[slot]) == NULL) {
8243 		slot++;
8244 	}
8245 
8246 	if (slot >= context->table_size)
8247 		return B_ENTRY_NOT_FOUND;
8248 
8249 	info->number = slot;
8250 	info->open_mode = descriptor->open_mode;
8251 
8252 	struct vnode* vnode = fd_vnode(descriptor);
8253 	if (vnode != NULL) {
8254 		info->device = vnode->device;
8255 		info->node = vnode->id;
8256 	} else if (descriptor->u.mount != NULL) {
8257 		info->device = descriptor->u.mount->id;
8258 		info->node = -1;
8259 	}
8260 
8261 	*_cookie = slot + 1;
8262 	return B_OK;
8263 }
8264 
8265 
8266 int
8267 _kern_open_entry_ref(dev_t device, ino_t inode, const char* name, int openMode,
8268 	int perms)
8269 {
8270 	if ((openMode & O_CREAT) != 0) {
8271 		return file_create_entry_ref(device, inode, name, openMode, perms,
8272 			true);
8273 	}
8274 
8275 	return file_open_entry_ref(device, inode, name, openMode, true);
8276 }
8277 
8278 
8279 /*!	\brief Opens a node specified by a FD + path pair.
8280 
8281 	At least one of \a fd and \a path must be specified.
8282 	If only \a fd is given, the function opens the node identified by this
8283 	FD. If only a path is given, this path is opened. If both are given and
8284 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8285 	of the directory (!) identified by \a fd.
8286 
8287 	\param fd The FD. May be < 0.
8288 	\param path The absolute or relative path. May be \c NULL.
8289 	\param openMode The open mode.
8290 	\return A FD referring to the newly opened node, or an error code,
8291 			if an error occurs.
8292 */
8293 int
8294 _kern_open(int fd, const char* path, int openMode, int perms)
8295 {
8296 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8297 	if (pathBuffer.InitCheck() != B_OK)
8298 		return B_NO_MEMORY;
8299 
8300 	if ((openMode & O_CREAT) != 0)
8301 		return file_create(fd, pathBuffer.LockBuffer(), openMode, perms, true);
8302 
8303 	return file_open(fd, pathBuffer.LockBuffer(), openMode, true);
8304 }
8305 
8306 
8307 /*!	\brief Opens a directory specified by entry_ref or node_ref.
8308 
8309 	The supplied name may be \c NULL, in which case directory identified
8310 	by \a device and \a inode will be opened. Otherwise \a device and
8311 	\a inode identify the parent directory of the directory to be opened
8312 	and \a name its entry name.
8313 
8314 	\param device If \a name is specified the ID of the device the parent
8315 		   directory of the directory to be opened resides on, otherwise
8316 		   the device of the directory itself.
8317 	\param inode If \a name is specified the node ID of the parent
8318 		   directory of the directory to be opened, otherwise node ID of the
8319 		   directory itself.
8320 	\param name The entry name of the directory to be opened. If \c NULL,
8321 		   the \a device + \a inode pair identify the node to be opened.
8322 	\return The FD of the newly opened directory or an error code, if
8323 			something went wrong.
8324 */
8325 int
8326 _kern_open_dir_entry_ref(dev_t device, ino_t inode, const char* name)
8327 {
8328 	return dir_open_entry_ref(device, inode, name, true);
8329 }
8330 
8331 
8332 /*!	\brief Opens a directory specified by a FD + path pair.
8333 
8334 	At least one of \a fd and \a path must be specified.
8335 	If only \a fd is given, the function opens the directory identified by this
8336 	FD. If only a path is given, this path is opened. If both are given and
8337 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8338 	of the directory (!) identified by \a fd.
8339 
8340 	\param fd The FD. May be < 0.
8341 	\param path The absolute or relative path. May be \c NULL.
8342 	\return A FD referring to the newly opened directory, or an error code,
8343 			if an error occurs.
8344 */
8345 int
8346 _kern_open_dir(int fd, const char* path)
8347 {
8348 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8349 	if (pathBuffer.InitCheck() != B_OK)
8350 		return B_NO_MEMORY;
8351 
8352 	return dir_open(fd, pathBuffer.LockBuffer(), true);
8353 }
8354 
8355 
8356 status_t
8357 _kern_fcntl(int fd, int op, size_t argument)
8358 {
8359 	return common_fcntl(fd, op, argument, true);
8360 }
8361 
8362 
8363 status_t
8364 _kern_fsync(int fd)
8365 {
8366 	return common_sync(fd, true);
8367 }
8368 
8369 
8370 status_t
8371 _kern_lock_node(int fd)
8372 {
8373 	return common_lock_node(fd, true);
8374 }
8375 
8376 
8377 status_t
8378 _kern_unlock_node(int fd)
8379 {
8380 	return common_unlock_node(fd, true);
8381 }
8382 
8383 
8384 status_t
8385 _kern_preallocate(int fd, off_t offset, off_t length)
8386 {
8387 	return common_preallocate(fd, offset, length, true);
8388 }
8389 
8390 
8391 status_t
8392 _kern_create_dir_entry_ref(dev_t device, ino_t inode, const char* name,
8393 	int perms)
8394 {
8395 	return dir_create_entry_ref(device, inode, name, perms, true);
8396 }
8397 
8398 
8399 /*!	\brief Creates a directory specified by a FD + path pair.
8400 
8401 	\a path must always be specified (it contains the name of the new directory
8402 	at least). If only a path is given, this path identifies the location at
8403 	which the directory shall be created. If both \a fd and \a path are given
8404 	and the path is absolute, \a fd is ignored; a relative path is reckoned off
8405 	of the directory (!) identified by \a fd.
8406 
8407 	\param fd The FD. May be < 0.
8408 	\param path The absolute or relative path. Must not be \c NULL.
8409 	\param perms The access permissions the new directory shall have.
8410 	\return \c B_OK, if the directory has been created successfully, another
8411 			error code otherwise.
8412 */
8413 status_t
8414 _kern_create_dir(int fd, const char* path, int perms)
8415 {
8416 	KPath pathBuffer(path, KPath::DEFAULT);
8417 	if (pathBuffer.InitCheck() != B_OK)
8418 		return B_NO_MEMORY;
8419 
8420 	return dir_create(fd, pathBuffer.LockBuffer(), perms, true);
8421 }
8422 
8423 
8424 status_t
8425 _kern_remove_dir(int fd, const char* path)
8426 {
8427 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8428 	if (pathBuffer.InitCheck() != B_OK)
8429 		return B_NO_MEMORY;
8430 
8431 	return dir_remove(fd, pathBuffer.LockBuffer(), true);
8432 }
8433 
8434 
8435 /*!	\brief Reads the contents of a symlink referred to by a FD + path pair.
8436 
8437 	At least one of \a fd and \a path must be specified.
8438 	If only \a fd is given, the function the symlink to be read is the node
8439 	identified by this FD. If only a path is given, this path identifies the
8440 	symlink to be read. If both are given and the path is absolute, \a fd is
8441 	ignored; a relative path is reckoned off of the directory (!) identified
8442 	by \a fd.
8443 	If this function fails with B_BUFFER_OVERFLOW, the \a _bufferSize pointer
8444 	will still be updated to reflect the required buffer size.
8445 
8446 	\param fd The FD. May be < 0.
8447 	\param path The absolute or relative path. May be \c NULL.
8448 	\param buffer The buffer into which the contents of the symlink shall be
8449 		   written.
8450 	\param _bufferSize A pointer to the size of the supplied buffer.
8451 	\return The length of the link on success or an appropriate error code
8452 */
8453 status_t
8454 _kern_read_link(int fd, const char* path, char* buffer, size_t* _bufferSize)
8455 {
8456 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8457 	if (pathBuffer.InitCheck() != B_OK)
8458 		return B_NO_MEMORY;
8459 
8460 	return common_read_link(fd, pathBuffer.LockBuffer(),
8461 		buffer, _bufferSize, true);
8462 }
8463 
8464 
8465 /*!	\brief Creates a symlink specified by a FD + path pair.
8466 
8467 	\a path must always be specified (it contains the name of the new symlink
8468 	at least). If only a path is given, this path identifies the location at
8469 	which the symlink shall be created. If both \a fd and \a path are given and
8470 	the path is absolute, \a fd is ignored; a relative path is reckoned off
8471 	of the directory (!) identified by \a fd.
8472 
8473 	\param fd The FD. May be < 0.
8474 	\param toPath The absolute or relative path. Must not be \c NULL.
8475 	\param mode The access permissions the new symlink shall have.
8476 	\return \c B_OK, if the symlink has been created successfully, another
8477 			error code otherwise.
8478 */
8479 status_t
8480 _kern_create_symlink(int fd, const char* path, const char* toPath, int mode)
8481 {
8482 	KPath pathBuffer(path);
8483 	if (pathBuffer.InitCheck() != B_OK)
8484 		return B_NO_MEMORY;
8485 
8486 	return common_create_symlink(fd, pathBuffer.LockBuffer(),
8487 		toPath, mode, true);
8488 }
8489 
8490 
8491 status_t
8492 _kern_create_link(int pathFD, const char* path, int toFD, const char* toPath,
8493 	bool traverseLeafLink)
8494 {
8495 	KPath pathBuffer(path);
8496 	KPath toPathBuffer(toPath);
8497 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
8498 		return B_NO_MEMORY;
8499 
8500 	return common_create_link(pathFD, pathBuffer.LockBuffer(), toFD,
8501 		toPathBuffer.LockBuffer(), traverseLeafLink, true);
8502 }
8503 
8504 
8505 /*!	\brief Removes an entry specified by a FD + path pair from its directory.
8506 
8507 	\a path must always be specified (it contains at least the name of the entry
8508 	to be deleted). If only a path is given, this path identifies the entry
8509 	directly. If both \a fd and \a path are given and the path is absolute,
8510 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8511 	identified by \a fd.
8512 
8513 	\param fd The FD. May be < 0.
8514 	\param path The absolute or relative path. Must not be \c NULL.
8515 	\return \c B_OK, if the entry has been removed successfully, another
8516 			error code otherwise.
8517 */
8518 status_t
8519 _kern_unlink(int fd, const char* path)
8520 {
8521 	KPath pathBuffer(path);
8522 	if (pathBuffer.InitCheck() != B_OK)
8523 		return B_NO_MEMORY;
8524 
8525 	return common_unlink(fd, pathBuffer.LockBuffer(), true);
8526 }
8527 
8528 
8529 /*!	\brief Moves an entry specified by a FD + path pair to a an entry specified
8530 		   by another FD + path pair.
8531 
8532 	\a oldPath and \a newPath must always be specified (they contain at least
8533 	the name of the entry). If only a path is given, this path identifies the
8534 	entry directly. If both a FD and a path are given and the path is absolute,
8535 	the FD is ignored; a relative path is reckoned off of the directory (!)
8536 	identified by the respective FD.
8537 
8538 	\param oldFD The FD of the old location. May be < 0.
8539 	\param oldPath The absolute or relative path of the old location. Must not
8540 		   be \c NULL.
8541 	\param newFD The FD of the new location. May be < 0.
8542 	\param newPath The absolute or relative path of the new location. Must not
8543 		   be \c NULL.
8544 	\return \c B_OK, if the entry has been moved successfully, another
8545 			error code otherwise.
8546 */
8547 status_t
8548 _kern_rename(int oldFD, const char* oldPath, int newFD, const char* newPath)
8549 {
8550 	KPath oldPathBuffer(oldPath);
8551 	KPath newPathBuffer(newPath);
8552 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
8553 		return B_NO_MEMORY;
8554 
8555 	return common_rename(oldFD, oldPathBuffer.LockBuffer(),
8556 		newFD, newPathBuffer.LockBuffer(), true);
8557 }
8558 
8559 
8560 status_t
8561 _kern_access(int fd, const char* path, int mode, bool effectiveUserGroup)
8562 {
8563 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8564 	if (pathBuffer.InitCheck() != B_OK)
8565 		return B_NO_MEMORY;
8566 
8567 	return common_access(fd, pathBuffer.LockBuffer(), mode, effectiveUserGroup,
8568 		true);
8569 }
8570 
8571 
8572 /*!	\brief Reads stat data of an entity specified by a FD + path pair.
8573 
8574 	If only \a fd is given, the stat operation associated with the type
8575 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8576 	given, this path identifies the entry for whose node to retrieve the
8577 	stat data. If both \a fd and \a path are given and the path is absolute,
8578 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8579 	identified by \a fd and specifies the entry whose stat data shall be
8580 	retrieved.
8581 
8582 	\param fd The FD. May be < 0.
8583 	\param path The absolute or relative path. Must not be \c NULL.
8584 	\param traverseLeafLink If \a path is given, \c true specifies that the
8585 		   function shall not stick to symlinks, but traverse them.
8586 	\param stat The buffer the stat data shall be written into.
8587 	\param statSize The size of the supplied stat buffer.
8588 	\return \c B_OK, if the the stat data have been read successfully, another
8589 			error code otherwise.
8590 */
8591 status_t
8592 _kern_read_stat(int fd, const char* path, bool traverseLeafLink,
8593 	struct stat* stat, size_t statSize)
8594 {
8595 	struct stat completeStat;
8596 	struct stat* originalStat = NULL;
8597 	status_t status;
8598 
8599 	if (statSize > sizeof(struct stat))
8600 		return B_BAD_VALUE;
8601 
8602 	// this supports different stat extensions
8603 	if (statSize < sizeof(struct stat)) {
8604 		originalStat = stat;
8605 		stat = &completeStat;
8606 	}
8607 
8608 	status = vfs_read_stat(fd, path, traverseLeafLink, stat, true);
8609 
8610 	if (status == B_OK && originalStat != NULL)
8611 		memcpy(originalStat, stat, statSize);
8612 
8613 	return status;
8614 }
8615 
8616 
8617 /*!	\brief Writes stat data of an entity specified by a FD + path pair.
8618 
8619 	If only \a fd is given, the stat operation associated with the type
8620 	of the FD (node, attr, attr dir etc.) is performed. If only \a path is
8621 	given, this path identifies the entry for whose node to write the
8622 	stat data. If both \a fd and \a path are given and the path is absolute,
8623 	\a fd is ignored; a relative path is reckoned off of the directory (!)
8624 	identified by \a fd and specifies the entry whose stat data shall be
8625 	written.
8626 
8627 	\param fd The FD. May be < 0.
8628 	\param path The absolute or relative path. May be \c NULL.
8629 	\param traverseLeafLink If \a path is given, \c true specifies that the
8630 		   function shall not stick to symlinks, but traverse them.
8631 	\param stat The buffer containing the stat data to be written.
8632 	\param statSize The size of the supplied stat buffer.
8633 	\param statMask A mask specifying which parts of the stat data shall be
8634 		   written.
8635 	\return \c B_OK, if the the stat data have been written successfully,
8636 			another error code otherwise.
8637 */
8638 status_t
8639 _kern_write_stat(int fd, const char* path, bool traverseLeafLink,
8640 	const struct stat* stat, size_t statSize, int statMask)
8641 {
8642 	struct stat completeStat;
8643 
8644 	if (statSize > sizeof(struct stat))
8645 		return B_BAD_VALUE;
8646 
8647 	// this supports different stat extensions
8648 	if (statSize < sizeof(struct stat)) {
8649 		memset((uint8*)&completeStat + statSize, 0,
8650 			sizeof(struct stat) - statSize);
8651 		memcpy(&completeStat, stat, statSize);
8652 		stat = &completeStat;
8653 	}
8654 
8655 	status_t status;
8656 
8657 	if (path != NULL) {
8658 		// path given: write the stat of the node referred to by (fd, path)
8659 		KPath pathBuffer(path);
8660 		if (pathBuffer.InitCheck() != B_OK)
8661 			return B_NO_MEMORY;
8662 
8663 		status = common_path_write_stat(fd, pathBuffer.LockBuffer(),
8664 			traverseLeafLink, stat, statMask, true);
8665 	} else {
8666 		// no path given: get the FD and use the FD operation
8667 		FileDescriptorPutter descriptor
8668 			(get_fd(get_current_io_context(true), fd));
8669 		if (!descriptor.IsSet())
8670 			return B_FILE_ERROR;
8671 
8672 		if (descriptor->ops->fd_write_stat)
8673 			status = descriptor->ops->fd_write_stat(descriptor.Get(), stat, statMask);
8674 		else
8675 			status = B_UNSUPPORTED;
8676 	}
8677 
8678 	return status;
8679 }
8680 
8681 
8682 int
8683 _kern_open_attr_dir(int fd, const char* path, bool traverseLeafLink)
8684 {
8685 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8686 	if (pathBuffer.InitCheck() != B_OK)
8687 		return B_NO_MEMORY;
8688 
8689 	return attr_dir_open(fd, pathBuffer.LockBuffer(), traverseLeafLink, true);
8690 }
8691 
8692 
8693 int
8694 _kern_open_attr(int fd, const char* path, const char* name, uint32 type,
8695 	int openMode)
8696 {
8697 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8698 	if (pathBuffer.InitCheck() != B_OK)
8699 		return B_NO_MEMORY;
8700 
8701 	if ((openMode & O_CREAT) != 0) {
8702 		return attr_create(fd, pathBuffer.LockBuffer(), name, type, openMode,
8703 			true);
8704 	}
8705 
8706 	return attr_open(fd, pathBuffer.LockBuffer(), name, openMode, true);
8707 }
8708 
8709 
8710 status_t
8711 _kern_remove_attr(int fd, const char* name)
8712 {
8713 	return attr_remove(fd, name, true);
8714 }
8715 
8716 
8717 status_t
8718 _kern_rename_attr(int fromFile, const char* fromName, int toFile,
8719 	const char* toName)
8720 {
8721 	return attr_rename(fromFile, fromName, toFile, toName, true);
8722 }
8723 
8724 
8725 int
8726 _kern_open_index_dir(dev_t device)
8727 {
8728 	return index_dir_open(device, true);
8729 }
8730 
8731 
8732 status_t
8733 _kern_create_index(dev_t device, const char* name, uint32 type, uint32 flags)
8734 {
8735 	return index_create(device, name, type, flags, true);
8736 }
8737 
8738 
8739 status_t
8740 _kern_read_index_stat(dev_t device, const char* name, struct stat* stat)
8741 {
8742 	return index_name_read_stat(device, name, stat, true);
8743 }
8744 
8745 
8746 status_t
8747 _kern_remove_index(dev_t device, const char* name)
8748 {
8749 	return index_remove(device, name, true);
8750 }
8751 
8752 
8753 status_t
8754 _kern_getcwd(char* buffer, size_t size)
8755 {
8756 	TRACE(("_kern_getcwd: buf %p, %ld\n", buffer, size));
8757 
8758 	// Call vfs to get current working directory
8759 	return get_cwd(buffer, size, true);
8760 }
8761 
8762 
8763 status_t
8764 _kern_setcwd(int fd, const char* path)
8765 {
8766 	KPath pathBuffer(path, KPath::LAZY_ALLOC);
8767 	if (pathBuffer.InitCheck() != B_OK)
8768 		return B_NO_MEMORY;
8769 
8770 	return set_cwd(fd, pathBuffer.LockBuffer(), true);
8771 }
8772 
8773 
8774 //	#pragma mark - userland syscalls
8775 
8776 
8777 dev_t
8778 _user_mount(const char* userPath, const char* userDevice,
8779 	const char* userFileSystem, uint32 flags, const char* userArgs,
8780 	size_t argsLength)
8781 {
8782 	char fileSystem[B_FILE_NAME_LENGTH];
8783 	KPath path, device;
8784 	char* args = NULL;
8785 	status_t status;
8786 
8787 	if (!IS_USER_ADDRESS(userPath))
8788 		return B_BAD_ADDRESS;
8789 
8790 	if (path.InitCheck() != B_OK || device.InitCheck() != B_OK)
8791 		return B_NO_MEMORY;
8792 
8793 	status = user_copy_name(path.LockBuffer(), userPath,
8794 		B_PATH_NAME_LENGTH);
8795 	if (status != B_OK)
8796 		return status;
8797 	path.UnlockBuffer();
8798 
8799 	if (userFileSystem != NULL) {
8800 		if (!IS_USER_ADDRESS(userFileSystem))
8801 			return B_BAD_ADDRESS;
8802 
8803 		status = user_copy_name(fileSystem, userFileSystem, sizeof(fileSystem));
8804 		if (status != B_OK)
8805 			return status;
8806 	}
8807 
8808 	if (userDevice != NULL) {
8809 		if (!IS_USER_ADDRESS(userDevice))
8810 			return B_BAD_ADDRESS;
8811 
8812 		status = user_copy_name(device.LockBuffer(), userDevice,
8813 			B_PATH_NAME_LENGTH);
8814 		if (status != B_OK)
8815 			return status;
8816 		device.UnlockBuffer();
8817 	}
8818 
8819 	if (userArgs != NULL && argsLength > 0) {
8820 		if (!IS_USER_ADDRESS(userArgs))
8821 			return B_BAD_ADDRESS;
8822 
8823 		// this is a safety restriction
8824 		if (argsLength >= 65536)
8825 			return B_NAME_TOO_LONG;
8826 
8827 		args = (char*)malloc(argsLength + 1);
8828 		if (args == NULL)
8829 			return B_NO_MEMORY;
8830 
8831 		status = user_copy_name(args, userArgs, argsLength + 1);
8832 		if (status != B_OK) {
8833 			free(args);
8834 			return status;
8835 		}
8836 	}
8837 
8838 	status = fs_mount(path.LockBuffer(),
8839 		userDevice != NULL ? device.Path() : NULL,
8840 		userFileSystem ? fileSystem : NULL, flags, args, false);
8841 
8842 	free(args);
8843 	return status;
8844 }
8845 
8846 
8847 status_t
8848 _user_unmount(const char* userPath, uint32 flags)
8849 {
8850 	if (!IS_USER_ADDRESS(userPath))
8851 		return B_BAD_ADDRESS;
8852 
8853 	KPath pathBuffer;
8854 	if (pathBuffer.InitCheck() != B_OK)
8855 		return B_NO_MEMORY;
8856 
8857 	char* path = pathBuffer.LockBuffer();
8858 
8859 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
8860 	if (status != B_OK)
8861 		return status;
8862 
8863 	return fs_unmount(path, -1, flags & ~B_UNMOUNT_BUSY_PARTITION, false);
8864 }
8865 
8866 
8867 status_t
8868 _user_read_fs_info(dev_t device, struct fs_info* userInfo)
8869 {
8870 	struct fs_info info;
8871 	status_t status;
8872 
8873 	if (userInfo == NULL)
8874 		return B_BAD_VALUE;
8875 
8876 	if (!IS_USER_ADDRESS(userInfo))
8877 		return B_BAD_ADDRESS;
8878 
8879 	status = fs_read_info(device, &info);
8880 	if (status != B_OK)
8881 		return status;
8882 
8883 	if (user_memcpy(userInfo, &info, sizeof(struct fs_info)) != B_OK)
8884 		return B_BAD_ADDRESS;
8885 
8886 	return B_OK;
8887 }
8888 
8889 
8890 status_t
8891 _user_write_fs_info(dev_t device, const struct fs_info* userInfo, int mask)
8892 {
8893 	struct fs_info info;
8894 
8895 	if (userInfo == NULL)
8896 		return B_BAD_VALUE;
8897 
8898 	if (!IS_USER_ADDRESS(userInfo)
8899 		|| user_memcpy(&info, userInfo, sizeof(struct fs_info)) != B_OK)
8900 		return B_BAD_ADDRESS;
8901 
8902 	return fs_write_info(device, &info, mask);
8903 }
8904 
8905 
8906 dev_t
8907 _user_next_device(int32* _userCookie)
8908 {
8909 	int32 cookie;
8910 	dev_t device;
8911 
8912 	if (!IS_USER_ADDRESS(_userCookie)
8913 		|| user_memcpy(&cookie, _userCookie, sizeof(int32)) != B_OK)
8914 		return B_BAD_ADDRESS;
8915 
8916 	device = fs_next_device(&cookie);
8917 
8918 	if (device >= B_OK) {
8919 		// update user cookie
8920 		if (user_memcpy(_userCookie, &cookie, sizeof(int32)) != B_OK)
8921 			return B_BAD_ADDRESS;
8922 	}
8923 
8924 	return device;
8925 }
8926 
8927 
8928 status_t
8929 _user_sync(void)
8930 {
8931 	return _kern_sync();
8932 }
8933 
8934 
8935 status_t
8936 _user_get_next_fd_info(team_id team, uint32* userCookie, fd_info* userInfo,
8937 	size_t infoSize)
8938 {
8939 	struct fd_info info;
8940 	uint32 cookie;
8941 
8942 	// only root can do this
8943 	if (geteuid() != 0)
8944 		return B_NOT_ALLOWED;
8945 
8946 	if (infoSize != sizeof(fd_info))
8947 		return B_BAD_VALUE;
8948 
8949 	if (!IS_USER_ADDRESS(userCookie) || !IS_USER_ADDRESS(userInfo)
8950 		|| user_memcpy(&cookie, userCookie, sizeof(uint32)) != B_OK)
8951 		return B_BAD_ADDRESS;
8952 
8953 	status_t status = _kern_get_next_fd_info(team, &cookie, &info, infoSize);
8954 	if (status != B_OK)
8955 		return status;
8956 
8957 	if (user_memcpy(userCookie, &cookie, sizeof(uint32)) != B_OK
8958 		|| user_memcpy(userInfo, &info, infoSize) != B_OK)
8959 		return B_BAD_ADDRESS;
8960 
8961 	return status;
8962 }
8963 
8964 
8965 status_t
8966 _user_entry_ref_to_path(dev_t device, ino_t inode, const char* leaf,
8967 	char* userPath, size_t pathLength)
8968 {
8969 	if (!IS_USER_ADDRESS(userPath))
8970 		return B_BAD_ADDRESS;
8971 
8972 	KPath path;
8973 	if (path.InitCheck() != B_OK)
8974 		return B_NO_MEMORY;
8975 
8976 	// copy the leaf name onto the stack
8977 	char stackLeaf[B_FILE_NAME_LENGTH];
8978 	if (leaf != NULL) {
8979 		if (!IS_USER_ADDRESS(leaf))
8980 			return B_BAD_ADDRESS;
8981 
8982 		int status = user_copy_name(stackLeaf, leaf, B_FILE_NAME_LENGTH);
8983 		if (status != B_OK)
8984 			return status;
8985 
8986 		leaf = stackLeaf;
8987 	}
8988 
8989 	status_t status = vfs_entry_ref_to_path(device, inode, leaf,
8990 		false, path.LockBuffer(), path.BufferSize());
8991 	if (status != B_OK)
8992 		return status;
8993 
8994 	path.UnlockBuffer();
8995 
8996 	int length = user_strlcpy(userPath, path.Path(), pathLength);
8997 	if (length < 0)
8998 		return length;
8999 	if (length >= (int)pathLength)
9000 		return B_BUFFER_OVERFLOW;
9001 
9002 	return B_OK;
9003 }
9004 
9005 
9006 status_t
9007 _user_normalize_path(const char* userPath, bool traverseLink, char* buffer)
9008 {
9009 	if (userPath == NULL || buffer == NULL)
9010 		return B_BAD_VALUE;
9011 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(buffer))
9012 		return B_BAD_ADDRESS;
9013 
9014 	// copy path from userland
9015 	KPath pathBuffer;
9016 	if (pathBuffer.InitCheck() != B_OK)
9017 		return B_NO_MEMORY;
9018 	char* path = pathBuffer.LockBuffer();
9019 
9020 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9021 	if (status != B_OK)
9022 		return status;
9023 
9024 	status_t error = normalize_path(path, pathBuffer.BufferSize(), traverseLink,
9025 		false);
9026 	if (error != B_OK)
9027 		return error;
9028 
9029 	// copy back to userland
9030 	int len = user_strlcpy(buffer, path, B_PATH_NAME_LENGTH);
9031 	if (len < 0)
9032 		return len;
9033 	if (len >= B_PATH_NAME_LENGTH)
9034 		return B_BUFFER_OVERFLOW;
9035 
9036 	return B_OK;
9037 }
9038 
9039 
9040 int
9041 _user_open_entry_ref(dev_t device, ino_t inode, const char* userName,
9042 	int openMode, int perms)
9043 {
9044 	char name[B_FILE_NAME_LENGTH];
9045 
9046 	if (userName == NULL || device < 0 || inode < 0)
9047 		return B_BAD_VALUE;
9048 	if (!IS_USER_ADDRESS(userName))
9049 		return B_BAD_ADDRESS;
9050 	status_t status = user_copy_name(name, userName, sizeof(name));
9051 	if (status != B_OK)
9052 		return status;
9053 
9054 	if ((openMode & O_CREAT) != 0) {
9055 		return file_create_entry_ref(device, inode, name, openMode, perms,
9056 			false);
9057 	}
9058 
9059 	return file_open_entry_ref(device, inode, name, openMode, false);
9060 }
9061 
9062 
9063 int
9064 _user_open(int fd, const char* userPath, int openMode, int perms)
9065 {
9066 	KPath path;
9067 	if (path.InitCheck() != B_OK)
9068 		return B_NO_MEMORY;
9069 
9070 	char* buffer = path.LockBuffer();
9071 
9072 	if (!IS_USER_ADDRESS(userPath))
9073 		return B_BAD_ADDRESS;
9074 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9075 	if (status != B_OK)
9076 		return status;
9077 
9078 	if ((openMode & O_CREAT) != 0)
9079 		return file_create(fd, buffer, openMode, perms, false);
9080 
9081 	return file_open(fd, buffer, openMode, false);
9082 }
9083 
9084 
9085 int
9086 _user_open_dir_entry_ref(dev_t device, ino_t inode, const char* userName)
9087 {
9088 	if (userName != NULL) {
9089 		char name[B_FILE_NAME_LENGTH];
9090 
9091 		if (!IS_USER_ADDRESS(userName))
9092 			return B_BAD_ADDRESS;
9093 		status_t status = user_copy_name(name, userName, sizeof(name));
9094 		if (status != B_OK)
9095 			return status;
9096 
9097 		return dir_open_entry_ref(device, inode, name, false);
9098 	}
9099 	return dir_open_entry_ref(device, inode, NULL, false);
9100 }
9101 
9102 
9103 int
9104 _user_open_dir(int fd, const char* userPath)
9105 {
9106 	if (userPath == NULL)
9107 		return dir_open(fd, NULL, false);
9108 
9109 	KPath path;
9110 	if (path.InitCheck() != B_OK)
9111 		return B_NO_MEMORY;
9112 
9113 	char* buffer = path.LockBuffer();
9114 
9115 	if (!IS_USER_ADDRESS(userPath))
9116 		return B_BAD_ADDRESS;
9117 	status_t status = user_copy_name(buffer, userPath, B_PATH_NAME_LENGTH);
9118 	if (status != B_OK)
9119 		return status;
9120 
9121 	return dir_open(fd, buffer, false);
9122 }
9123 
9124 
9125 /*!	\brief Opens a directory's parent directory and returns the entry name
9126 		   of the former.
9127 
9128 	Aside from that it returns the directory's entry name, this method is
9129 	equivalent to \code _user_open_dir(fd, "..") \endcode. It really is
9130 	equivalent, if \a userName is \c NULL.
9131 
9132 	If a name buffer is supplied and the name does not fit the buffer, the
9133 	function fails. A buffer of size \c B_FILE_NAME_LENGTH should be safe.
9134 
9135 	\param fd A FD referring to a directory.
9136 	\param userName Buffer the directory's entry name shall be written into.
9137 		   May be \c NULL.
9138 	\param nameLength Size of the name buffer.
9139 	\return The file descriptor of the opened parent directory, if everything
9140 			went fine, an error code otherwise.
9141 */
9142 int
9143 _user_open_parent_dir(int fd, char* userName, size_t nameLength)
9144 {
9145 	bool kernel = false;
9146 
9147 	if (userName && !IS_USER_ADDRESS(userName))
9148 		return B_BAD_ADDRESS;
9149 
9150 	// open the parent dir
9151 	int parentFD = dir_open(fd, (char*)"..", kernel);
9152 	if (parentFD < 0)
9153 		return parentFD;
9154 	FDCloser fdCloser(parentFD, kernel);
9155 
9156 	if (userName) {
9157 		// get the vnodes
9158 		struct vnode* parentVNode = get_vnode_from_fd(parentFD, kernel);
9159 		struct vnode* dirVNode = get_vnode_from_fd(fd, kernel);
9160 		VnodePutter parentVNodePutter(parentVNode);
9161 		VnodePutter dirVNodePutter(dirVNode);
9162 		if (!parentVNode || !dirVNode)
9163 			return B_FILE_ERROR;
9164 
9165 		// get the vnode name
9166 		char _buffer[offsetof(struct dirent, d_name) + B_FILE_NAME_LENGTH + 1];
9167 		struct dirent* buffer = (struct dirent*)_buffer;
9168 		status_t status = get_vnode_name(dirVNode, parentVNode, buffer,
9169 			sizeof(_buffer), get_current_io_context(false));
9170 		if (status != B_OK)
9171 			return status;
9172 
9173 		// copy the name to the userland buffer
9174 		int len = user_strlcpy(userName, buffer->d_name, nameLength);
9175 		if (len < 0)
9176 			return len;
9177 		if (len >= (int)nameLength)
9178 			return B_BUFFER_OVERFLOW;
9179 	}
9180 
9181 	return fdCloser.Detach();
9182 }
9183 
9184 
9185 status_t
9186 _user_fcntl(int fd, int op, size_t argument)
9187 {
9188 	status_t status = common_fcntl(fd, op, argument, false);
9189 	if (op == F_SETLKW)
9190 		syscall_restart_handle_post(status);
9191 
9192 	return status;
9193 }
9194 
9195 
9196 status_t
9197 _user_fsync(int fd)
9198 {
9199 	return common_sync(fd, false);
9200 }
9201 
9202 
9203 status_t
9204 _user_flock(int fd, int operation)
9205 {
9206 	FUNCTION(("_user_fcntl(fd = %d, op = %d)\n", fd, operation));
9207 
9208 	// Check if the operation is valid
9209 	switch (operation & ~LOCK_NB) {
9210 		case LOCK_UN:
9211 		case LOCK_SH:
9212 		case LOCK_EX:
9213 			break;
9214 
9215 		default:
9216 			return B_BAD_VALUE;
9217 	}
9218 
9219 	struct vnode* vnode;
9220 	FileDescriptorPutter descriptor(get_fd_and_vnode(fd, &vnode, false));
9221 	if (!descriptor.IsSet())
9222 		return B_FILE_ERROR;
9223 
9224 	if (descriptor->type != FDTYPE_FILE)
9225 		return B_BAD_VALUE;
9226 
9227 	struct flock flock;
9228 	flock.l_start = 0;
9229 	flock.l_len = OFF_MAX;
9230 	flock.l_whence = 0;
9231 	flock.l_type = (operation & LOCK_SH) != 0 ? F_RDLCK : F_WRLCK;
9232 
9233 	status_t status;
9234 	if ((operation & LOCK_UN) != 0) {
9235 		if (HAS_FS_CALL(vnode, release_lock))
9236 			status = FS_CALL(vnode, release_lock, descriptor->cookie, &flock);
9237 		else
9238 			status = release_advisory_lock(vnode, NULL, descriptor.Get(), &flock);
9239 	} else {
9240 		if (HAS_FS_CALL(vnode, acquire_lock)) {
9241 			status = FS_CALL(vnode, acquire_lock, descriptor->cookie, &flock,
9242 				(operation & LOCK_NB) == 0);
9243 		} else {
9244 			status = acquire_advisory_lock(vnode, NULL, descriptor.Get(), &flock,
9245 				(operation & LOCK_NB) == 0);
9246 		}
9247 	}
9248 
9249 	syscall_restart_handle_post(status);
9250 
9251 	return status;
9252 }
9253 
9254 
9255 status_t
9256 _user_lock_node(int fd)
9257 {
9258 	return common_lock_node(fd, false);
9259 }
9260 
9261 
9262 status_t
9263 _user_unlock_node(int fd)
9264 {
9265 	return common_unlock_node(fd, false);
9266 }
9267 
9268 
9269 status_t
9270 _user_preallocate(int fd, off_t offset, off_t length)
9271 {
9272 	return common_preallocate(fd, offset, length, false);
9273 }
9274 
9275 
9276 status_t
9277 _user_create_dir_entry_ref(dev_t device, ino_t inode, const char* userName,
9278 	int perms)
9279 {
9280 	char name[B_FILE_NAME_LENGTH];
9281 	status_t status;
9282 
9283 	if (!IS_USER_ADDRESS(userName))
9284 		return B_BAD_ADDRESS;
9285 
9286 	status = user_copy_name(name, userName, sizeof(name));
9287 	if (status != B_OK)
9288 		return status;
9289 
9290 	return dir_create_entry_ref(device, inode, name, perms, false);
9291 }
9292 
9293 
9294 status_t
9295 _user_create_dir(int fd, const char* userPath, int perms)
9296 {
9297 	KPath pathBuffer;
9298 	if (pathBuffer.InitCheck() != B_OK)
9299 		return B_NO_MEMORY;
9300 
9301 	char* path = pathBuffer.LockBuffer();
9302 
9303 	if (!IS_USER_ADDRESS(userPath))
9304 		return B_BAD_ADDRESS;
9305 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9306 	if (status != B_OK)
9307 		return status;
9308 
9309 	return dir_create(fd, path, perms, false);
9310 }
9311 
9312 
9313 status_t
9314 _user_remove_dir(int fd, const char* userPath)
9315 {
9316 	KPath pathBuffer;
9317 	if (pathBuffer.InitCheck() != B_OK)
9318 		return B_NO_MEMORY;
9319 
9320 	char* path = pathBuffer.LockBuffer();
9321 
9322 	if (userPath != NULL) {
9323 		if (!IS_USER_ADDRESS(userPath))
9324 			return B_BAD_ADDRESS;
9325 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9326 		if (status != B_OK)
9327 			return status;
9328 	}
9329 
9330 	return dir_remove(fd, userPath ? path : NULL, false);
9331 }
9332 
9333 
9334 status_t
9335 _user_read_link(int fd, const char* userPath, char* userBuffer,
9336 	size_t* userBufferSize)
9337 {
9338 	KPath pathBuffer, linkBuffer;
9339 	if (pathBuffer.InitCheck() != B_OK || linkBuffer.InitCheck() != B_OK)
9340 		return B_NO_MEMORY;
9341 
9342 	size_t bufferSize;
9343 
9344 	if (!IS_USER_ADDRESS(userBuffer) || !IS_USER_ADDRESS(userBufferSize)
9345 		|| user_memcpy(&bufferSize, userBufferSize, sizeof(size_t)) != B_OK)
9346 		return B_BAD_ADDRESS;
9347 
9348 	char* path = pathBuffer.LockBuffer();
9349 	char* buffer = linkBuffer.LockBuffer();
9350 
9351 	if (userPath) {
9352 		if (!IS_USER_ADDRESS(userPath))
9353 			return B_BAD_ADDRESS;
9354 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9355 		if (status != B_OK)
9356 			return status;
9357 
9358 		if (bufferSize > B_PATH_NAME_LENGTH)
9359 			bufferSize = B_PATH_NAME_LENGTH;
9360 	}
9361 
9362 	size_t newBufferSize = bufferSize;
9363 	status_t status = common_read_link(fd, userPath ? path : NULL, buffer,
9364 		&newBufferSize, false);
9365 
9366 	// we also update the bufferSize in case of errors
9367 	// (the real length will be returned in case of B_BUFFER_OVERFLOW)
9368 	if (user_memcpy(userBufferSize, &newBufferSize, sizeof(size_t)) != B_OK)
9369 		return B_BAD_ADDRESS;
9370 
9371 	if (status != B_OK)
9372 		return status;
9373 
9374 	bufferSize = min_c(newBufferSize, bufferSize);
9375 	if (user_memcpy(userBuffer, buffer, bufferSize) != B_OK)
9376 		return B_BAD_ADDRESS;
9377 
9378 	return B_OK;
9379 }
9380 
9381 
9382 status_t
9383 _user_create_symlink(int fd, const char* userPath, const char* userToPath,
9384 	int mode)
9385 {
9386 	KPath pathBuffer;
9387 	KPath toPathBuffer;
9388 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9389 		return B_NO_MEMORY;
9390 
9391 	char* path = pathBuffer.LockBuffer();
9392 	char* toPath = toPathBuffer.LockBuffer();
9393 
9394 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9395 		return B_BAD_ADDRESS;
9396 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9397 	if (status != B_OK)
9398 		return status;
9399 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9400 	if (status != B_OK)
9401 		return status;
9402 
9403 	return common_create_symlink(fd, path, toPath, mode, false);
9404 }
9405 
9406 
9407 status_t
9408 _user_create_link(int pathFD, const char* userPath, int toFD,
9409 	const char* userToPath, bool traverseLeafLink)
9410 {
9411 	KPath pathBuffer;
9412 	KPath toPathBuffer;
9413 	if (pathBuffer.InitCheck() != B_OK || toPathBuffer.InitCheck() != B_OK)
9414 		return B_NO_MEMORY;
9415 
9416 	char* path = pathBuffer.LockBuffer();
9417 	char* toPath = toPathBuffer.LockBuffer();
9418 
9419 	if (!IS_USER_ADDRESS(userPath) || !IS_USER_ADDRESS(userToPath))
9420 		return B_BAD_ADDRESS;
9421 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9422 	if (status != B_OK)
9423 		return status;
9424 	status = user_copy_name(toPath, userToPath, B_PATH_NAME_LENGTH);
9425 	if (status != B_OK)
9426 		return status;
9427 
9428 	status = check_path(toPath);
9429 	if (status != B_OK)
9430 		return status;
9431 
9432 	return common_create_link(pathFD, path, toFD, toPath, traverseLeafLink,
9433 		false);
9434 }
9435 
9436 
9437 status_t
9438 _user_unlink(int fd, const char* userPath)
9439 {
9440 	KPath pathBuffer;
9441 	if (pathBuffer.InitCheck() != B_OK)
9442 		return B_NO_MEMORY;
9443 
9444 	char* path = pathBuffer.LockBuffer();
9445 
9446 	if (!IS_USER_ADDRESS(userPath))
9447 		return B_BAD_ADDRESS;
9448 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9449 	if (status != B_OK)
9450 		return status;
9451 
9452 	return common_unlink(fd, path, false);
9453 }
9454 
9455 
9456 status_t
9457 _user_rename(int oldFD, const char* userOldPath, int newFD,
9458 	const char* userNewPath)
9459 {
9460 	KPath oldPathBuffer;
9461 	KPath newPathBuffer;
9462 	if (oldPathBuffer.InitCheck() != B_OK || newPathBuffer.InitCheck() != B_OK)
9463 		return B_NO_MEMORY;
9464 
9465 	char* oldPath = oldPathBuffer.LockBuffer();
9466 	char* newPath = newPathBuffer.LockBuffer();
9467 
9468 	if (!IS_USER_ADDRESS(userOldPath) || !IS_USER_ADDRESS(userNewPath))
9469 		return B_BAD_ADDRESS;
9470 	status_t status = user_copy_name(oldPath, userOldPath, B_PATH_NAME_LENGTH);
9471 	if (status != B_OK)
9472 		return status;
9473 	status = user_copy_name(newPath, userNewPath, B_PATH_NAME_LENGTH);
9474 	if (status != B_OK)
9475 		return status;
9476 
9477 	return common_rename(oldFD, oldPath, newFD, newPath, false);
9478 }
9479 
9480 
9481 status_t
9482 _user_create_fifo(int fd, const char* userPath, mode_t perms)
9483 {
9484 	KPath pathBuffer;
9485 	if (pathBuffer.InitCheck() != B_OK)
9486 		return B_NO_MEMORY;
9487 
9488 	char* path = pathBuffer.LockBuffer();
9489 
9490 	if (!IS_USER_ADDRESS(userPath))
9491 		return B_BAD_ADDRESS;
9492 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9493 	if (status != B_OK)
9494 		return status;
9495 
9496 	// split into directory vnode and filename path
9497 	char filename[B_FILE_NAME_LENGTH];
9498 	VnodePutter dir;
9499 	status = fd_and_path_to_dir_vnode(fd, path, dir, filename, false);
9500 	if (status != B_OK)
9501 		return status;
9502 
9503 	// the underlying FS needs to support creating FIFOs
9504 	if (!HAS_FS_CALL(dir, create_special_node))
9505 		return B_UNSUPPORTED;
9506 
9507 	// create the entry	-- the FIFO sub node is set up automatically
9508 	fs_vnode superVnode;
9509 	ino_t nodeID;
9510 	status = FS_CALL(dir.Get(), create_special_node, filename, NULL,
9511 		S_IFIFO | (perms & S_IUMSK), 0, &superVnode, &nodeID);
9512 
9513 	// create_special_node() acquired a reference for us that we don't need.
9514 	if (status == B_OK)
9515 		put_vnode(dir->mount->volume, nodeID);
9516 
9517 	return status;
9518 }
9519 
9520 
9521 status_t
9522 _user_create_pipe(int* userFDs)
9523 {
9524 	// rootfs should support creating FIFOs, but let's be sure
9525 	if (!HAS_FS_CALL(sRoot, create_special_node))
9526 		return B_UNSUPPORTED;
9527 
9528 	// create the node	-- the FIFO sub node is set up automatically
9529 	fs_vnode superVnode;
9530 	ino_t nodeID;
9531 	status_t status = FS_CALL(sRoot, create_special_node, NULL, NULL,
9532 		S_IFIFO | S_IRUSR | S_IWUSR, 0, &superVnode, &nodeID);
9533 	if (status != B_OK)
9534 		return status;
9535 
9536 	// We've got one reference to the node and need another one.
9537 	struct vnode* vnode;
9538 	status = get_vnode(sRoot->mount->id, nodeID, &vnode, true, false);
9539 	if (status != B_OK) {
9540 		// that should not happen
9541 		dprintf("_user_create_pipe(): Failed to lookup vnode (%" B_PRIdDEV ", "
9542 			"%" B_PRIdINO ")\n", sRoot->mount->id, sRoot->id);
9543 		return status;
9544 	}
9545 
9546 	// Everything looks good so far. Open two FDs for reading respectively
9547 	// writing.
9548 	int fds[2];
9549 	fds[0] = open_vnode(vnode, O_RDONLY, false);
9550 	fds[1] = open_vnode(vnode, O_WRONLY, false);
9551 
9552 	FDCloser closer0(fds[0], false);
9553 	FDCloser closer1(fds[1], false);
9554 
9555 	status = (fds[0] >= 0 ? (fds[1] >= 0 ? B_OK : fds[1]) : fds[0]);
9556 
9557 	// copy FDs to userland
9558 	if (status == B_OK) {
9559 		if (!IS_USER_ADDRESS(userFDs)
9560 			|| user_memcpy(userFDs, fds, sizeof(fds)) != B_OK) {
9561 			status = B_BAD_ADDRESS;
9562 		}
9563 	}
9564 
9565 	// keep FDs, if everything went fine
9566 	if (status == B_OK) {
9567 		closer0.Detach();
9568 		closer1.Detach();
9569 	}
9570 
9571 	return status;
9572 }
9573 
9574 
9575 status_t
9576 _user_access(int fd, const char* userPath, int mode, bool effectiveUserGroup)
9577 {
9578 	KPath pathBuffer;
9579 	if (pathBuffer.InitCheck() != B_OK)
9580 		return B_NO_MEMORY;
9581 
9582 	char* path = pathBuffer.LockBuffer();
9583 
9584 	if (!IS_USER_ADDRESS(userPath))
9585 		return B_BAD_ADDRESS;
9586 	status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9587 	if (status != B_OK)
9588 		return status;
9589 
9590 	return common_access(fd, path, mode, effectiveUserGroup, false);
9591 }
9592 
9593 
9594 status_t
9595 _user_read_stat(int fd, const char* userPath, bool traverseLink,
9596 	struct stat* userStat, size_t statSize)
9597 {
9598 	struct stat stat = {0};
9599 	status_t status;
9600 
9601 	if (statSize > sizeof(struct stat))
9602 		return B_BAD_VALUE;
9603 
9604 	if (!IS_USER_ADDRESS(userStat))
9605 		return B_BAD_ADDRESS;
9606 
9607 	if (userPath != NULL) {
9608 		// path given: get the stat of the node referred to by (fd, path)
9609 		if (!IS_USER_ADDRESS(userPath))
9610 			return B_BAD_ADDRESS;
9611 
9612 		KPath pathBuffer;
9613 		if (pathBuffer.InitCheck() != B_OK)
9614 			return B_NO_MEMORY;
9615 
9616 		char* path = pathBuffer.LockBuffer();
9617 
9618 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9619 		if (status != B_OK)
9620 			return status;
9621 
9622 		status = common_path_read_stat(fd, path, traverseLink, &stat, false);
9623 	} else {
9624 		// no path given: get the FD and use the FD operation
9625 		FileDescriptorPutter descriptor
9626 			(get_fd(get_current_io_context(false), fd));
9627 		if (!descriptor.IsSet())
9628 			return B_FILE_ERROR;
9629 
9630 		if (descriptor->ops->fd_read_stat)
9631 			status = descriptor->ops->fd_read_stat(descriptor.Get(), &stat);
9632 		else
9633 			status = B_UNSUPPORTED;
9634 	}
9635 
9636 	if (status != B_OK)
9637 		return status;
9638 
9639 	return user_memcpy(userStat, &stat, statSize);
9640 }
9641 
9642 
9643 status_t
9644 _user_write_stat(int fd, const char* userPath, bool traverseLeafLink,
9645 	const struct stat* userStat, size_t statSize, int statMask)
9646 {
9647 	if (statSize > sizeof(struct stat))
9648 		return B_BAD_VALUE;
9649 
9650 	struct stat stat;
9651 
9652 	if (!IS_USER_ADDRESS(userStat)
9653 		|| user_memcpy(&stat, userStat, statSize) < B_OK)
9654 		return B_BAD_ADDRESS;
9655 
9656 	// clear additional stat fields
9657 	if (statSize < sizeof(struct stat))
9658 		memset((uint8*)&stat + statSize, 0, sizeof(struct stat) - statSize);
9659 
9660 	status_t status;
9661 
9662 	if (userPath != NULL) {
9663 		// path given: write the stat of the node referred to by (fd, path)
9664 		if (!IS_USER_ADDRESS(userPath))
9665 			return B_BAD_ADDRESS;
9666 
9667 		KPath pathBuffer;
9668 		if (pathBuffer.InitCheck() != B_OK)
9669 			return B_NO_MEMORY;
9670 
9671 		char* path = pathBuffer.LockBuffer();
9672 
9673 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9674 		if (status != B_OK)
9675 			return status;
9676 
9677 		status = common_path_write_stat(fd, path, traverseLeafLink, &stat,
9678 			statMask, false);
9679 	} else {
9680 		// no path given: get the FD and use the FD operation
9681 		FileDescriptorPutter descriptor
9682 			(get_fd(get_current_io_context(false), fd));
9683 		if (!descriptor.IsSet())
9684 			return B_FILE_ERROR;
9685 
9686 		if (descriptor->ops->fd_write_stat) {
9687 			status = descriptor->ops->fd_write_stat(descriptor.Get(), &stat,
9688 				statMask);
9689 		} else
9690 			status = B_UNSUPPORTED;
9691 	}
9692 
9693 	return status;
9694 }
9695 
9696 
9697 int
9698 _user_open_attr_dir(int fd, const char* userPath, bool traverseLeafLink)
9699 {
9700 	KPath pathBuffer;
9701 	if (pathBuffer.InitCheck() != B_OK)
9702 		return B_NO_MEMORY;
9703 
9704 	char* path = pathBuffer.LockBuffer();
9705 
9706 	if (userPath != NULL) {
9707 		if (!IS_USER_ADDRESS(userPath))
9708 			return B_BAD_ADDRESS;
9709 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9710 		if (status != B_OK)
9711 			return status;
9712 	}
9713 
9714 	return attr_dir_open(fd, userPath ? path : NULL, traverseLeafLink, false);
9715 }
9716 
9717 
9718 ssize_t
9719 _user_read_attr(int fd, const char* userAttribute, off_t pos, void* userBuffer,
9720 	size_t readBytes)
9721 {
9722 	char attribute[B_FILE_NAME_LENGTH];
9723 
9724 	if (userAttribute == NULL)
9725 		return B_BAD_VALUE;
9726 	if (!IS_USER_ADDRESS(userAttribute))
9727 		return B_BAD_ADDRESS;
9728 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9729 	if (status != B_OK)
9730 		return status;
9731 
9732 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9733 	if (attr < 0)
9734 		return attr;
9735 
9736 	ssize_t bytes = _user_read(attr, pos, userBuffer, readBytes);
9737 	_user_close(attr);
9738 
9739 	return bytes;
9740 }
9741 
9742 
9743 ssize_t
9744 _user_write_attr(int fd, const char* userAttribute, uint32 type, off_t pos,
9745 	const void* buffer, size_t writeBytes)
9746 {
9747 	char attribute[B_FILE_NAME_LENGTH];
9748 
9749 	if (userAttribute == NULL)
9750 		return B_BAD_VALUE;
9751 	if (!IS_USER_ADDRESS(userAttribute))
9752 		return B_BAD_ADDRESS;
9753 	status_t status = user_copy_name(attribute, userAttribute, sizeof(attribute));
9754 	if (status != B_OK)
9755 		return status;
9756 
9757 	// Try to support the BeOS typical truncation as well as the position
9758 	// argument
9759 	int attr = attr_create(fd, NULL, attribute, type,
9760 		O_CREAT | O_WRONLY | (pos != 0 ? 0 : O_TRUNC), false);
9761 	if (attr < 0)
9762 		return attr;
9763 
9764 	ssize_t bytes = _user_write(attr, pos, buffer, writeBytes);
9765 	_user_close(attr);
9766 
9767 	return bytes;
9768 }
9769 
9770 
9771 status_t
9772 _user_stat_attr(int fd, const char* userAttribute,
9773 	struct attr_info* userAttrInfo)
9774 {
9775 	char attribute[B_FILE_NAME_LENGTH];
9776 
9777 	if (userAttribute == NULL || userAttrInfo == NULL)
9778 		return B_BAD_VALUE;
9779 	if (!IS_USER_ADDRESS(userAttribute) || !IS_USER_ADDRESS(userAttrInfo))
9780 		return B_BAD_ADDRESS;
9781 	status_t status = user_copy_name(attribute, userAttribute,
9782 		sizeof(attribute));
9783 	if (status != B_OK)
9784 		return status;
9785 
9786 	int attr = attr_open(fd, NULL, attribute, O_RDONLY, false);
9787 	if (attr < 0)
9788 		return attr;
9789 
9790 	struct file_descriptor* descriptor
9791 		= get_fd(get_current_io_context(false), attr);
9792 	if (descriptor == NULL) {
9793 		_user_close(attr);
9794 		return B_FILE_ERROR;
9795 	}
9796 
9797 	struct stat stat;
9798 	if (descriptor->ops->fd_read_stat)
9799 		status = descriptor->ops->fd_read_stat(descriptor, &stat);
9800 	else
9801 		status = B_UNSUPPORTED;
9802 
9803 	put_fd(descriptor);
9804 	_user_close(attr);
9805 
9806 	if (status == B_OK) {
9807 		attr_info info;
9808 		info.type = stat.st_type;
9809 		info.size = stat.st_size;
9810 
9811 		if (user_memcpy(userAttrInfo, &info, sizeof(struct attr_info)) != B_OK)
9812 			return B_BAD_ADDRESS;
9813 	}
9814 
9815 	return status;
9816 }
9817 
9818 
9819 int
9820 _user_open_attr(int fd, const char* userPath, const char* userName,
9821 	uint32 type, int openMode)
9822 {
9823 	char name[B_FILE_NAME_LENGTH];
9824 
9825 	if (!IS_USER_ADDRESS(userName))
9826 		return B_BAD_ADDRESS;
9827 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9828 	if (status != B_OK)
9829 		return status;
9830 
9831 	KPath pathBuffer;
9832 	if (pathBuffer.InitCheck() != B_OK)
9833 		return B_NO_MEMORY;
9834 
9835 	char* path = pathBuffer.LockBuffer();
9836 
9837 	if (userPath != NULL) {
9838 		if (!IS_USER_ADDRESS(userPath))
9839 			return B_BAD_ADDRESS;
9840 		status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
9841 		if (status != B_OK)
9842 			return status;
9843 	}
9844 
9845 	if ((openMode & O_CREAT) != 0) {
9846 		return attr_create(fd, userPath ? path : NULL, name, type, openMode,
9847 			false);
9848 	}
9849 
9850 	return attr_open(fd, userPath ? path : NULL, name, openMode, false);
9851 }
9852 
9853 
9854 status_t
9855 _user_remove_attr(int fd, const char* userName)
9856 {
9857 	char name[B_FILE_NAME_LENGTH];
9858 
9859 	if (!IS_USER_ADDRESS(userName))
9860 		return B_BAD_ADDRESS;
9861 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9862 	if (status != B_OK)
9863 		return status;
9864 
9865 	return attr_remove(fd, name, false);
9866 }
9867 
9868 
9869 status_t
9870 _user_rename_attr(int fromFile, const char* userFromName, int toFile,
9871 	const char* userToName)
9872 {
9873 	if (!IS_USER_ADDRESS(userFromName)
9874 		|| !IS_USER_ADDRESS(userToName))
9875 		return B_BAD_ADDRESS;
9876 
9877 	KPath fromNameBuffer(B_FILE_NAME_LENGTH);
9878 	KPath toNameBuffer(B_FILE_NAME_LENGTH);
9879 	if (fromNameBuffer.InitCheck() != B_OK || toNameBuffer.InitCheck() != B_OK)
9880 		return B_NO_MEMORY;
9881 
9882 	char* fromName = fromNameBuffer.LockBuffer();
9883 	char* toName = toNameBuffer.LockBuffer();
9884 
9885 	status_t status = user_copy_name(fromName, userFromName, B_FILE_NAME_LENGTH);
9886 	if (status != B_OK)
9887 		return status;
9888 	status = user_copy_name(toName, userToName, B_FILE_NAME_LENGTH);
9889 	if (status != B_OK)
9890 		return status;
9891 
9892 	return attr_rename(fromFile, fromName, toFile, toName, false);
9893 }
9894 
9895 
9896 int
9897 _user_open_index_dir(dev_t device)
9898 {
9899 	return index_dir_open(device, false);
9900 }
9901 
9902 
9903 status_t
9904 _user_create_index(dev_t device, const char* userName, uint32 type,
9905 	uint32 flags)
9906 {
9907 	char name[B_FILE_NAME_LENGTH];
9908 
9909 	if (!IS_USER_ADDRESS(userName))
9910 		return B_BAD_ADDRESS;
9911 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9912 	if (status != B_OK)
9913 		return status;
9914 
9915 	return index_create(device, name, type, flags, false);
9916 }
9917 
9918 
9919 status_t
9920 _user_read_index_stat(dev_t device, const char* userName, struct stat* userStat)
9921 {
9922 	char name[B_FILE_NAME_LENGTH];
9923 	struct stat stat = {0};
9924 	status_t status;
9925 
9926 	if (!IS_USER_ADDRESS(userName) || !IS_USER_ADDRESS(userStat))
9927 		return B_BAD_ADDRESS;
9928 	status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9929 	if (status != B_OK)
9930 		return status;
9931 
9932 	status = index_name_read_stat(device, name, &stat, false);
9933 	if (status == B_OK) {
9934 		if (user_memcpy(userStat, &stat, sizeof(stat)) != B_OK)
9935 			return B_BAD_ADDRESS;
9936 	}
9937 
9938 	return status;
9939 }
9940 
9941 
9942 status_t
9943 _user_remove_index(dev_t device, const char* userName)
9944 {
9945 	char name[B_FILE_NAME_LENGTH];
9946 
9947 	if (!IS_USER_ADDRESS(userName))
9948 		return B_BAD_ADDRESS;
9949 	status_t status = user_copy_name(name, userName, B_FILE_NAME_LENGTH);
9950 	if (status != B_OK)
9951 		return status;
9952 
9953 	return index_remove(device, name, false);
9954 }
9955 
9956 
9957 status_t
9958 _user_getcwd(char* userBuffer, size_t size)
9959 {
9960 	if (size == 0)
9961 		return B_BAD_VALUE;
9962 	if (!IS_USER_ADDRESS(userBuffer))
9963 		return B_BAD_ADDRESS;
9964 
9965 	if (size > kMaxPathLength)
9966 		size = kMaxPathLength;
9967 
9968 	KPath pathBuffer(size);
9969 	if (pathBuffer.InitCheck() != B_OK)
9970 		return B_NO_MEMORY;
9971 
9972 	TRACE(("user_getcwd: buf %p, %ld\n", userBuffer, size));
9973 
9974 	char* path = pathBuffer.LockBuffer();
9975 
9976 	status_t status = get_cwd(path, size, false);
9977 	if (status != B_OK)
9978 		return status;
9979 
9980 	// Copy back the result
9981 	if (user_strlcpy(userBuffer, path, size) < B_OK)
9982 		return B_BAD_ADDRESS;
9983 
9984 	return status;
9985 }
9986 
9987 
9988 status_t
9989 _user_setcwd(int fd, const char* userPath)
9990 {
9991 	TRACE(("user_setcwd: path = %p\n", userPath));
9992 
9993 	KPath pathBuffer;
9994 	if (pathBuffer.InitCheck() != B_OK)
9995 		return B_NO_MEMORY;
9996 
9997 	char* path = pathBuffer.LockBuffer();
9998 
9999 	if (userPath != NULL) {
10000 		if (!IS_USER_ADDRESS(userPath))
10001 			return B_BAD_ADDRESS;
10002 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10003 		if (status != B_OK)
10004 			return status;
10005 	}
10006 
10007 	return set_cwd(fd, userPath != NULL ? path : NULL, false);
10008 }
10009 
10010 
10011 status_t
10012 _user_change_root(const char* userPath)
10013 {
10014 	// only root is allowed to chroot()
10015 	if (geteuid() != 0)
10016 		return B_NOT_ALLOWED;
10017 
10018 	// alloc path buffer
10019 	KPath pathBuffer;
10020 	if (pathBuffer.InitCheck() != B_OK)
10021 		return B_NO_MEMORY;
10022 
10023 	// copy userland path to kernel
10024 	char* path = pathBuffer.LockBuffer();
10025 	if (userPath != NULL) {
10026 		if (!IS_USER_ADDRESS(userPath))
10027 			return B_BAD_ADDRESS;
10028 		status_t status = user_copy_name(path, userPath, B_PATH_NAME_LENGTH);
10029 		if (status != B_OK)
10030 			return status;
10031 	}
10032 
10033 	// get the vnode
10034 	VnodePutter vnode;
10035 	status_t status = path_to_vnode(path, true, vnode, NULL, false);
10036 	if (status != B_OK)
10037 		return status;
10038 
10039 	// set the new root
10040 	struct io_context* context = get_current_io_context(false);
10041 	mutex_lock(&sIOContextRootLock);
10042 	struct vnode* oldRoot = context->root;
10043 	context->root = vnode.Detach();
10044 	mutex_unlock(&sIOContextRootLock);
10045 
10046 	put_vnode(oldRoot);
10047 
10048 	return B_OK;
10049 }
10050 
10051 
10052 int
10053 _user_open_query(dev_t device, const char* userQuery, size_t queryLength,
10054 	uint32 flags, port_id port, int32 token)
10055 {
10056 	if (device < 0 || userQuery == NULL || queryLength == 0)
10057 		return B_BAD_VALUE;
10058 
10059 	if (!IS_USER_ADDRESS(userQuery))
10060 		return B_BAD_ADDRESS;
10061 
10062 	// this is a safety restriction
10063 	if (queryLength >= 65536)
10064 		return B_NAME_TOO_LONG;
10065 
10066 	BStackOrHeapArray<char, 128> query(queryLength + 1);
10067 	if (!query.IsValid())
10068 		return B_NO_MEMORY;
10069 
10070 	if (user_strlcpy(query, userQuery, queryLength + 1) < B_OK)
10071 		return B_BAD_ADDRESS;
10072 
10073 	return query_open(device, query, flags, port, token, false);
10074 }
10075 
10076 
10077 #include "vfs_request_io.cpp"
10078